Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 1 | #ifndef AOS_EVENTS_LOGGING_LZMA_ENCODER_H_ |
| 2 | #define AOS_EVENTS_LOGGING_LZMA_ENCODER_H_ |
| 3 | |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 4 | #include <condition_variable> |
| 5 | #include <mutex> |
Austin Schuh | 60e7794 | 2022-05-16 17:48:24 -0700 | [diff] [blame] | 6 | #include <string_view> |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 7 | #include <thread> |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 8 | |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 9 | #include "absl/types/span.h" |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 10 | #include "flatbuffers/flatbuffers.h" |
| 11 | |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 12 | #include "aos/containers/resizeable_buffer.h" |
| 13 | #include "aos/events/logging/buffer_encoder.h" |
| 14 | #include "aos/events/logging/logger_generated.h" |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 15 | #include "lzma.h" |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 16 | |
| 17 | namespace aos::logger { |
| 18 | |
| 19 | // Encodes buffers using liblzma. |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 20 | class LzmaEncoder final : public DataEncoder { |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 21 | public: |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 22 | static constexpr std::string_view kExtension = ".xz"; |
| 23 | |
Austin Schuh | be91b34 | 2022-06-27 00:53:45 -0700 | [diff] [blame] | 24 | // Initializes the LZMA stream and encoder. The block size is the block size |
| 25 | // used by the multithreaded encoder for batching. A block size of 0 tells |
| 26 | // lzma to pick it's favorite block size. |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 27 | explicit LzmaEncoder(size_t max_message_size, uint32_t compression_preset, |
| 28 | size_t block_size = 0); |
Austin Schuh | c41603c | 2020-10-11 16:17:37 -0700 | [diff] [blame] | 29 | LzmaEncoder(const LzmaEncoder &) = delete; |
| 30 | LzmaEncoder(LzmaEncoder &&other) = delete; |
| 31 | LzmaEncoder &operator=(const LzmaEncoder &) = delete; |
| 32 | LzmaEncoder &operator=(LzmaEncoder &&other) = delete; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 33 | // Gracefully shuts down the encoder. |
| 34 | ~LzmaEncoder() final; |
| 35 | |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 36 | bool HasSpace(size_t /*request*/) const override { |
| 37 | // Since the underlying lzma encoder handles buffering, we always have |
| 38 | // space. |
| 39 | return true; |
| 40 | } |
Austin Schuh | 8bdfc49 | 2023-02-11 12:53:13 -0800 | [diff] [blame] | 41 | size_t space() const final { return input_buffer_.capacity(); } |
| 42 | size_t Encode(Copier *copy, size_t start_byte) final; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 43 | void Finish() final; |
| 44 | void Clear(int n) final; |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 45 | absl::Span<const absl::Span<const uint8_t>> queue() final; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 46 | size_t queued_bytes() const final; |
| 47 | size_t total_bytes() const final { return total_bytes_; } |
| 48 | size_t queue_size() const final { return queue_.size(); } |
| 49 | |
| 50 | private: |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 51 | static constexpr size_t kEncodedBufferSizeBytes{1024 * 128}; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 52 | |
| 53 | void RunLzmaCode(lzma_action action); |
| 54 | |
| 55 | lzma_stream stream_; |
| 56 | uint32_t compression_preset_; |
| 57 | std::vector<ResizeableBuffer> queue_; |
Austin Schuh | 0b0f8bb | 2023-03-24 15:09:08 -0700 | [diff] [blame] | 58 | // Since we pretty much just allocate a couple of buffers, then allocate and |
| 59 | // release them over and over with very similar memory usage and without much |
| 60 | // variation in the peak usage, put the allocate chunks in a free queue to |
| 61 | // reduce fragmentation. |
| 62 | std::vector<ResizeableBuffer> free_queue_; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 63 | bool finished_ = false; |
| 64 | // Total bytes that resulted from encoding raw data since the last call to |
| 65 | // Reset. |
| 66 | size_t total_bytes_ = 0; |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 67 | |
| 68 | // Buffer that messages get coppied into for encoding. |
| 69 | ResizeableBuffer input_buffer_; |
| 70 | |
| 71 | std::vector<absl::Span<const uint8_t>> return_queue_; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 72 | }; |
| 73 | |
| 74 | // Decompresses data with liblzma. |
| 75 | class LzmaDecoder final : public DataDecoder { |
| 76 | public: |
James Kuszmaul | dd0a504 | 2021-10-28 23:38:04 -0700 | [diff] [blame] | 77 | static constexpr std::string_view kExtension = ".xz"; |
| 78 | |
Austin Schuh | cd36842 | 2021-11-22 21:23:29 -0800 | [diff] [blame] | 79 | explicit LzmaDecoder(std::unique_ptr<DataDecoder> underlying_decoder, |
| 80 | bool quiet = false); |
| 81 | explicit LzmaDecoder(std::string_view filename, bool quiet = false) |
| 82 | : LzmaDecoder(std::make_unique<DummyDecoder>(filename), quiet) {} |
Austin Schuh | c41603c | 2020-10-11 16:17:37 -0700 | [diff] [blame] | 83 | LzmaDecoder(const LzmaDecoder &) = delete; |
| 84 | LzmaDecoder(LzmaDecoder &&other) = delete; |
| 85 | LzmaDecoder &operator=(const LzmaDecoder &) = delete; |
| 86 | LzmaDecoder &operator=(LzmaDecoder &&other) = delete; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 87 | ~LzmaDecoder(); |
| 88 | |
| 89 | size_t Read(uint8_t *begin, uint8_t *end) final; |
Tyler Chatow | 2015bc6 | 2021-08-04 21:15:09 -0700 | [diff] [blame] | 90 | std::string_view filename() const final { |
| 91 | return underlying_decoder_->filename(); |
| 92 | } |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 93 | |
| 94 | private: |
| 95 | // Size of temporary buffer to use. |
| 96 | static constexpr size_t kBufSize{256 * 1024}; |
| 97 | |
| 98 | // Temporary buffer for storing compressed data. |
| 99 | ResizeableBuffer compressed_data_; |
| 100 | // Used for reading data from the file. |
Tyler Chatow | 2015bc6 | 2021-08-04 21:15:09 -0700 | [diff] [blame] | 101 | std::unique_ptr<DataDecoder> underlying_decoder_; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 102 | // Stream for decompression. |
| 103 | lzma_stream stream_; |
| 104 | // The current action. This is LZMA_RUN until we've run out of data to read |
| 105 | // from the file. |
| 106 | lzma_action action_ = LZMA_RUN; |
| 107 | // Flag that represents whether or not all the data from the file has been |
| 108 | // successfully decoded. |
| 109 | bool finished_ = false; |
Austin Schuh | cd36842 | 2021-11-22 21:23:29 -0800 | [diff] [blame] | 110 | // Flag to signal how quiet to be when logging potential issues around |
| 111 | // truncation. |
| 112 | const bool quiet_ = false; |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 113 | }; |
| 114 | |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 115 | // Decompresses data with liblzma in a new thread, up to a maximum queue |
| 116 | // size. Calls to Read() will return data from the queue if available, |
| 117 | // or block until more data is queued or the stream finishes. |
| 118 | class ThreadedLzmaDecoder : public DataDecoder { |
| 119 | public: |
Austin Schuh | cd36842 | 2021-11-22 21:23:29 -0800 | [diff] [blame] | 120 | explicit ThreadedLzmaDecoder(std::string_view filename, bool quiet = false) |
| 121 | : ThreadedLzmaDecoder(std::make_unique<DummyDecoder>(filename), quiet) {} |
| 122 | explicit ThreadedLzmaDecoder(std::unique_ptr<DataDecoder> underlying_decoder, |
| 123 | bool quiet = false); |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 124 | ThreadedLzmaDecoder(const ThreadedLzmaDecoder &) = delete; |
| 125 | ThreadedLzmaDecoder &operator=(const ThreadedLzmaDecoder &) = delete; |
| 126 | |
| 127 | ~ThreadedLzmaDecoder(); |
| 128 | |
| 129 | size_t Read(uint8_t *begin, uint8_t *end) final; |
| 130 | |
Tyler Chatow | 2015bc6 | 2021-08-04 21:15:09 -0700 | [diff] [blame] | 131 | std::string_view filename() const final { return decoder_.filename(); } |
| 132 | |
Tyler Chatow | 7df6083 | 2021-07-15 21:18:36 -0700 | [diff] [blame] | 133 | private: |
| 134 | static constexpr size_t kBufSize{256 * 1024}; |
| 135 | static constexpr size_t kQueueSize{8}; |
| 136 | |
| 137 | LzmaDecoder decoder_; |
| 138 | |
| 139 | // Queue of decompressed data to return on calls to Read |
| 140 | std::vector<ResizeableBuffer> decoded_queue_; |
| 141 | |
| 142 | // Mutex to control access to decoded_queue_. |
| 143 | std::mutex decode_mutex_; |
| 144 | std::condition_variable continue_decoding_; |
| 145 | std::condition_variable queue_filled_; |
| 146 | |
| 147 | bool finished_ = false; |
| 148 | |
| 149 | std::thread decode_thread_; |
| 150 | }; |
| 151 | |
Brian Silverman | f59fe3f | 2020-09-22 21:04:09 -0700 | [diff] [blame] | 152 | } // namespace aos::logger |
| 153 | |
| 154 | #endif // AOS_EVENTS_LOGGING_LZMA_ENCODER_H_ |