James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 1 | #ifndef AOS_UTIL_MCAP_LOGGER_H_ |
| 2 | #define AOS_UTIL_MCAP_LOGGER_H_ |
| 3 | |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 4 | #include <stddef.h> |
| 5 | #include <stdint.h> |
| 6 | |
| 7 | #include <iosfwd> |
| 8 | #include <map> |
| 9 | #include <memory> |
| 10 | #include <optional> |
| 11 | #include <string> |
| 12 | #include <string_view> |
| 13 | #include <utility> |
| 14 | #include <vector> |
| 15 | |
| 16 | #include "nlohmann/json_fwd.hpp" |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 17 | |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 18 | #include "aos/configuration_generated.h" |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 19 | #include "aos/events/context.h" |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 20 | #include "aos/events/event_loop.h" |
| 21 | #include "aos/fast_string_builder.h" |
| 22 | #include "aos/flatbuffer_utils.h" |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame] | 23 | #include "aos/flatbuffers.h" |
| 24 | #include "aos/time/time.h" |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 25 | |
| 26 | namespace aos { |
| 27 | |
| 28 | // Produces a JSON Schema (https://json-schema.org/) for a given flatbuffer |
| 29 | // type. If recursion_level is set, will include a $schema attribute indicating |
| 30 | // the schema definition being used (this is used to allow for recursion). |
| 31 | // |
| 32 | // Note that this is pretty bare-bones, so, e.g., we don't distinguish between |
| 33 | // structs and tables when generating the JSON schema, so we don't bother to |
| 34 | // mark struct fields as required. |
| 35 | enum class JsonSchemaRecursion { |
| 36 | kTopLevel, |
| 37 | kNested, |
| 38 | }; |
| 39 | nlohmann::json JsonSchemaForFlatbuffer( |
| 40 | const FlatbufferType &type, |
| 41 | JsonSchemaRecursion recursion_level = JsonSchemaRecursion::kTopLevel); |
| 42 | |
James Kuszmaul | 1e418f6 | 2023-02-26 14:40:20 -0800 | [diff] [blame] | 43 | // Returns the shortest possible alias for the specified channel on the |
| 44 | // specified node/application. |
| 45 | std::string ShortenedChannelName(const aos::Configuration *config, |
| 46 | const aos::Channel *channel, |
| 47 | std::string_view application_name, |
| 48 | const aos::Node *node); |
| 49 | |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 50 | // Generates an MCAP file, per the specification at |
| 51 | // https://github.com/foxglove/mcap/tree/main/docs/specification |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 52 | // This currently generates an uncompressed logfile with full message indexing |
| 53 | // available, to be able to support Foxglove fully. |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 54 | class McapLogger { |
| 55 | public: |
James Kuszmaul | c31d736 | 2022-05-27 14:20:04 -0700 | [diff] [blame] | 56 | // Whether to serialize the messages into the MCAP file as JSON or |
| 57 | // flatbuffers. |
| 58 | enum class Serialization { |
| 59 | kJson, |
| 60 | kFlatbuffer, |
| 61 | }; |
James Kuszmaul | 9f607c6 | 2022-10-27 17:01:55 -0700 | [diff] [blame] | 62 | // Whether to attempt to shorten channel names. |
| 63 | enum class CanonicalChannelNames { |
| 64 | // Just use the full, unambiguous, channel names. |
| 65 | kCanonical, |
| 66 | // Use GetChannelAliases() to determine the shortest possible name for the |
| 67 | // channel for the current node, and use that in the MCAP file. This makes |
| 68 | // it so that the channels in the resulting file are more likely to match |
| 69 | // the channel names that are used in "real" applications. |
| 70 | kShortened, |
| 71 | }; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 72 | // Chunk compression to use in the MCAP file. |
| 73 | enum class Compression { |
| 74 | kNone, |
| 75 | kLz4, |
| 76 | }; |
James Kuszmaul | c31d736 | 2022-05-27 14:20:04 -0700 | [diff] [blame] | 77 | McapLogger(EventLoop *event_loop, const std::string &output_path, |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 78 | Serialization serialization, |
| 79 | CanonicalChannelNames canonical_channels, Compression compression); |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 80 | ~McapLogger(); |
| 81 | |
| 82 | private: |
| 83 | enum class OpCode { |
| 84 | kHeader = 0x01, |
| 85 | kFooter = 0x02, |
| 86 | kSchema = 0x03, |
| 87 | kChannel = 0x04, |
| 88 | kMessage = 0x05, |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 89 | kChunk = 0x06, |
| 90 | kMessageIndex = 0x07, |
| 91 | kChunkIndex = 0x08, |
| 92 | kAttachment = 0x09, |
| 93 | kAttachmentIndex = 0x0A, |
| 94 | kStatistics = 0x0B, |
| 95 | kMetadata = 0x0C, |
| 96 | kMetadataIndex = 0x0D, |
| 97 | kSummaryOffset = 0x0E, |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 98 | kDataEnd = 0x0F, |
| 99 | }; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 100 | // Stores information associated with a SummaryOffset entry (an offset to the |
| 101 | // start of a section within Summary section, which allows readers to quickly |
| 102 | // find all the indices/channel definitions/etc. for a given log). |
| 103 | struct SummaryOffset { |
| 104 | OpCode op_code; |
| 105 | // Offset from the start of the file. |
| 106 | uint64_t offset; |
| 107 | // Total length of the section, in bytes. |
| 108 | uint64_t size; |
| 109 | }; |
| 110 | // Information needed to build a ChunkIndex entry. |
| 111 | struct ChunkIndex { |
| 112 | // Earliest and latest message times within the Chunk being referenced. |
| 113 | aos::monotonic_clock::time_point start_time; |
| 114 | aos::monotonic_clock::time_point end_time; |
| 115 | // Offset from the start of the file to the start of the relevant Chunk. |
| 116 | uint64_t offset; |
| 117 | // Total size of the Chunk, in bytes. |
| 118 | uint64_t chunk_size; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 119 | // Total uncompressed size of the records portion of the Chunk, in bytes. |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 120 | uint64_t records_size; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 121 | // Total size of the records portion of the Chunk, when compressed |
| 122 | uint64_t records_size_compressed; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 123 | // Mapping of channel IDs to the MessageIndex entry for that channel within |
| 124 | // the referenced Chunk. The MessageIndex is referenced by an offset from |
| 125 | // the start of the file. |
| 126 | std::map<uint16_t, uint64_t> message_index_offsets; |
| 127 | // Total size, in bytes, of all the MessageIndex entries for this Chunk |
| 128 | // together (note that they are required to be contiguous). |
| 129 | uint64_t message_index_size; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 130 | // Compression used in this Chunk. |
| 131 | Compression compression; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 132 | }; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 133 | // Maintains the state of a single Chunk. In order to maximize read |
| 134 | // performance, we currently maintain separate chunks for each channel so |
| 135 | // that, in order to read a given channel, only data associated with that |
| 136 | // channel nead be read. |
James Kuszmaul | 36a25f4 | 2022-10-28 10:18:00 -0700 | [diff] [blame] | 137 | struct ChunkStatus { |
| 138 | // Buffer containing serialized message data for the currently-being-built |
| 139 | // chunk. |
| 140 | std::stringstream data; |
| 141 | // Earliest message observed in this chunk. |
| 142 | std::optional<aos::monotonic_clock::time_point> earliest_message; |
| 143 | // Latest message observed in this chunk. |
| 144 | std::optional<aos::monotonic_clock::time_point> latest_message; |
| 145 | // MessageIndex's for each message. The std::map is indexed by channel ID. |
| 146 | // The vector is then a series of pairs of (timestamp, offset from start of |
| 147 | // data). |
| 148 | // Note that currently this will only ever have one entry, for the channel |
| 149 | // that this chunk corresponds to. However, the standard provides for there |
| 150 | // being more than one channel per chunk and so we still have some code that |
| 151 | // supports that. |
| 152 | std::map<uint16_t, std::vector<std::pair<uint64_t, uint64_t>>> |
| 153 | message_indices; |
| 154 | }; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 155 | enum class RegisterHandlers { kYes, kNo }; |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 156 | // Helpers to write each type of relevant record. |
| 157 | void WriteMagic(); |
| 158 | void WriteHeader(); |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 159 | void WriteFooter(uint64_t summary_offset, uint64_t summary_offset_offset); |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 160 | void WriteDataEnd(); |
| 161 | void WriteSchema(const uint16_t id, const aos::Channel *channel); |
| 162 | void WriteChannel(const uint16_t id, const uint16_t schema_id, |
James Kuszmaul | e4aa01d | 2022-06-28 14:09:02 -0700 | [diff] [blame] | 163 | const aos::Channel *channel, |
| 164 | std::string_view override_name = ""); |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 165 | void WriteMessage(uint16_t channel_id, const Channel *channel, |
James Kuszmaul | 36a25f4 | 2022-10-28 10:18:00 -0700 | [diff] [blame] | 166 | const Context &context, ChunkStatus *chunk); |
| 167 | void WriteChunk(ChunkStatus *chunk); |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 168 | |
James Kuszmaul | bed2af0 | 2023-01-28 15:57:24 -0800 | [diff] [blame] | 169 | // Writes out the special configuration channel. This gets called right before |
| 170 | // the first actual message is written so that we can have a reasonable |
| 171 | // monotonic clock time. |
| 172 | void WriteConfigurationMessage(); |
| 173 | |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 174 | // The helpers for writing records which appear in the Summary section will |
| 175 | // return SummaryOffset's so that they can be referenced in the SummaryOffset |
| 176 | // section. |
| 177 | SummaryOffset WriteChunkIndices(); |
| 178 | SummaryOffset WriteStatistics(); |
| 179 | std::vector<SummaryOffset> WriteSchemasAndChannels( |
| 180 | RegisterHandlers register_handlers); |
| 181 | void WriteSummaryOffset(const SummaryOffset &offset); |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 182 | |
| 183 | // Writes an MCAP record to the output file. |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 184 | void WriteRecord(OpCode op, std::string_view record, std::ostream *ostream); |
| 185 | void WriteRecord(OpCode op, std::string_view record) { |
| 186 | WriteRecord(op, record, &output_); |
| 187 | } |
| 188 | // Adds an MCAP-spec string/byte-array/map/array of pairs/fixed-size integer |
| 189 | // to a buffer. |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 190 | static void AppendString(FastStringBuilder *builder, std::string_view string); |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 191 | static void AppendBytes(FastStringBuilder *builder, std::string_view bytes); |
| 192 | static void AppendChannelMap(FastStringBuilder *builder, |
| 193 | const std::map<uint16_t, uint64_t> &map); |
| 194 | static void AppendMessageIndices( |
| 195 | FastStringBuilder *builder, |
| 196 | const std::vector<std::pair<uint64_t, uint64_t>> &messages); |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 197 | static void AppendInt16(FastStringBuilder *builder, uint16_t val); |
| 198 | static void AppendInt32(FastStringBuilder *builder, uint32_t val); |
| 199 | static void AppendInt64(FastStringBuilder *builder, uint64_t val); |
| 200 | |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 201 | aos::EventLoop *event_loop_; |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 202 | std::ofstream output_; |
James Kuszmaul | c31d736 | 2022-05-27 14:20:04 -0700 | [diff] [blame] | 203 | const Serialization serialization_; |
James Kuszmaul | 9f607c6 | 2022-10-27 17:01:55 -0700 | [diff] [blame] | 204 | const CanonicalChannelNames canonical_channels_; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 205 | const Compression compression_; |
James Kuszmaul | c31d736 | 2022-05-27 14:20:04 -0700 | [diff] [blame] | 206 | size_t total_message_bytes_ = 0; |
| 207 | std::map<const Channel *, size_t> total_channel_bytes_; |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 208 | FastStringBuilder string_builder_; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 209 | |
| 210 | // Earliest message observed in this logfile. |
| 211 | std::optional<aos::monotonic_clock::time_point> earliest_message_; |
James Kuszmaul | 36a25f4 | 2022-10-28 10:18:00 -0700 | [diff] [blame] | 212 | // Latest message observed in this logfile. |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 213 | aos::monotonic_clock::time_point latest_message_ = |
| 214 | aos::monotonic_clock::min_time; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 215 | // Count of all messages on each channel, indexed by channel ID. |
| 216 | std::map<uint16_t, uint64_t> message_counts_; |
James Kuszmaul | c31d736 | 2022-05-27 14:20:04 -0700 | [diff] [blame] | 217 | std::map<uint16_t, std::unique_ptr<RawFetcher>> fetchers_; |
James Kuszmaul | 36a25f4 | 2022-10-28 10:18:00 -0700 | [diff] [blame] | 218 | // All currently-being-built chunks. Indexed by channel ID. This is used to |
| 219 | // segregate channels into separate chunks to support more efficient reading. |
| 220 | std::map<uint16_t, ChunkStatus> current_chunks_; |
James Kuszmaul | b3fba25 | 2022-04-06 15:13:31 -0700 | [diff] [blame] | 221 | // ChunkIndex's for all fully written Chunks. |
| 222 | std::vector<ChunkIndex> chunk_indices_; |
James Kuszmaul | e4aa01d | 2022-06-28 14:09:02 -0700 | [diff] [blame] | 223 | |
| 224 | // Metadata associated with the fake "configuration" channel that we create in |
| 225 | // order to ensure that foxglove extensions/users have access to the full |
| 226 | // configuration. |
| 227 | uint16_t configuration_id_ = 0; |
| 228 | FlatbufferDetachedBuffer<Channel> configuration_channel_; |
| 229 | FlatbufferDetachedBuffer<Configuration> configuration_; |
James Kuszmaul | bed2af0 | 2023-01-28 15:57:24 -0800 | [diff] [blame] | 230 | bool wrote_configuration_ = false; |
James Kuszmaul | 5ab990d | 2022-11-07 16:35:49 -0800 | [diff] [blame] | 231 | |
| 232 | // Memory buffer to use for compressing data. |
| 233 | std::vector<uint8_t> compression_buffer_; |
James Kuszmaul | 4ed5fb1 | 2022-03-22 15:20:04 -0700 | [diff] [blame] | 234 | }; |
| 235 | } // namespace aos |
| 236 | #endif // AOS_UTIL_MCAP_LOGGER_H_ |