blob: c67abdac3363bbe9a9184384ab93ee3b188e5847 [file] [log] [blame]
James Kuszmaul4ed5fb12022-03-22 15:20:04 -07001#ifndef AOS_UTIL_MCAP_LOGGER_H_
2#define AOS_UTIL_MCAP_LOGGER_H_
3
Stephan Pleinesb1177672024-05-27 17:48:32 -07004#include <stddef.h>
5#include <stdint.h>
6
7#include <iosfwd>
8#include <map>
9#include <memory>
10#include <optional>
11#include <string>
12#include <string_view>
13#include <utility>
14#include <vector>
15
16#include "nlohmann/json_fwd.hpp"
Philipp Schrader790cb542023-07-05 21:06:52 -070017
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070018#include "aos/configuration_generated.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070019#include "aos/events/context.h"
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070020#include "aos/events/event_loop.h"
21#include "aos/fast_string_builder.h"
22#include "aos/flatbuffer_utils.h"
Stephan Pleinesb1177672024-05-27 17:48:32 -070023#include "aos/flatbuffers.h"
24#include "aos/time/time.h"
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070025
26namespace aos {
27
28// Produces a JSON Schema (https://json-schema.org/) for a given flatbuffer
29// type. If recursion_level is set, will include a $schema attribute indicating
30// the schema definition being used (this is used to allow for recursion).
31//
32// Note that this is pretty bare-bones, so, e.g., we don't distinguish between
33// structs and tables when generating the JSON schema, so we don't bother to
34// mark struct fields as required.
35enum class JsonSchemaRecursion {
36 kTopLevel,
37 kNested,
38};
39nlohmann::json JsonSchemaForFlatbuffer(
40 const FlatbufferType &type,
41 JsonSchemaRecursion recursion_level = JsonSchemaRecursion::kTopLevel);
42
James Kuszmaul1e418f62023-02-26 14:40:20 -080043// Returns the shortest possible alias for the specified channel on the
44// specified node/application.
45std::string ShortenedChannelName(const aos::Configuration *config,
46 const aos::Channel *channel,
47 std::string_view application_name,
48 const aos::Node *node);
49
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070050// Generates an MCAP file, per the specification at
51// https://github.com/foxglove/mcap/tree/main/docs/specification
James Kuszmaulb3fba252022-04-06 15:13:31 -070052// This currently generates an uncompressed logfile with full message indexing
53// available, to be able to support Foxglove fully.
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070054class McapLogger {
55 public:
James Kuszmaulc31d7362022-05-27 14:20:04 -070056 // Whether to serialize the messages into the MCAP file as JSON or
57 // flatbuffers.
58 enum class Serialization {
59 kJson,
60 kFlatbuffer,
61 };
James Kuszmaul9f607c62022-10-27 17:01:55 -070062 // Whether to attempt to shorten channel names.
63 enum class CanonicalChannelNames {
64 // Just use the full, unambiguous, channel names.
65 kCanonical,
66 // Use GetChannelAliases() to determine the shortest possible name for the
67 // channel for the current node, and use that in the MCAP file. This makes
68 // it so that the channels in the resulting file are more likely to match
69 // the channel names that are used in "real" applications.
70 kShortened,
71 };
James Kuszmaul5ab990d2022-11-07 16:35:49 -080072 // Chunk compression to use in the MCAP file.
73 enum class Compression {
74 kNone,
75 kLz4,
76 };
James Kuszmaulc31d7362022-05-27 14:20:04 -070077 McapLogger(EventLoop *event_loop, const std::string &output_path,
James Kuszmaul5ab990d2022-11-07 16:35:49 -080078 Serialization serialization,
79 CanonicalChannelNames canonical_channels, Compression compression);
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070080 ~McapLogger();
81
82 private:
83 enum class OpCode {
84 kHeader = 0x01,
85 kFooter = 0x02,
86 kSchema = 0x03,
87 kChannel = 0x04,
88 kMessage = 0x05,
James Kuszmaulb3fba252022-04-06 15:13:31 -070089 kChunk = 0x06,
90 kMessageIndex = 0x07,
91 kChunkIndex = 0x08,
92 kAttachment = 0x09,
93 kAttachmentIndex = 0x0A,
94 kStatistics = 0x0B,
95 kMetadata = 0x0C,
96 kMetadataIndex = 0x0D,
97 kSummaryOffset = 0x0E,
James Kuszmaul4ed5fb12022-03-22 15:20:04 -070098 kDataEnd = 0x0F,
99 };
James Kuszmaulb3fba252022-04-06 15:13:31 -0700100 // Stores information associated with a SummaryOffset entry (an offset to the
101 // start of a section within Summary section, which allows readers to quickly
102 // find all the indices/channel definitions/etc. for a given log).
103 struct SummaryOffset {
104 OpCode op_code;
105 // Offset from the start of the file.
106 uint64_t offset;
107 // Total length of the section, in bytes.
108 uint64_t size;
109 };
110 // Information needed to build a ChunkIndex entry.
111 struct ChunkIndex {
112 // Earliest and latest message times within the Chunk being referenced.
113 aos::monotonic_clock::time_point start_time;
114 aos::monotonic_clock::time_point end_time;
115 // Offset from the start of the file to the start of the relevant Chunk.
116 uint64_t offset;
117 // Total size of the Chunk, in bytes.
118 uint64_t chunk_size;
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800119 // Total uncompressed size of the records portion of the Chunk, in bytes.
James Kuszmaulb3fba252022-04-06 15:13:31 -0700120 uint64_t records_size;
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800121 // Total size of the records portion of the Chunk, when compressed
122 uint64_t records_size_compressed;
James Kuszmaulb3fba252022-04-06 15:13:31 -0700123 // Mapping of channel IDs to the MessageIndex entry for that channel within
124 // the referenced Chunk. The MessageIndex is referenced by an offset from
125 // the start of the file.
126 std::map<uint16_t, uint64_t> message_index_offsets;
127 // Total size, in bytes, of all the MessageIndex entries for this Chunk
128 // together (note that they are required to be contiguous).
129 uint64_t message_index_size;
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800130 // Compression used in this Chunk.
131 Compression compression;
James Kuszmaulb3fba252022-04-06 15:13:31 -0700132 };
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800133 // Maintains the state of a single Chunk. In order to maximize read
134 // performance, we currently maintain separate chunks for each channel so
135 // that, in order to read a given channel, only data associated with that
136 // channel nead be read.
James Kuszmaul36a25f42022-10-28 10:18:00 -0700137 struct ChunkStatus {
138 // Buffer containing serialized message data for the currently-being-built
139 // chunk.
140 std::stringstream data;
141 // Earliest message observed in this chunk.
142 std::optional<aos::monotonic_clock::time_point> earliest_message;
143 // Latest message observed in this chunk.
144 std::optional<aos::monotonic_clock::time_point> latest_message;
145 // MessageIndex's for each message. The std::map is indexed by channel ID.
146 // The vector is then a series of pairs of (timestamp, offset from start of
147 // data).
148 // Note that currently this will only ever have one entry, for the channel
149 // that this chunk corresponds to. However, the standard provides for there
150 // being more than one channel per chunk and so we still have some code that
151 // supports that.
152 std::map<uint16_t, std::vector<std::pair<uint64_t, uint64_t>>>
153 message_indices;
154 };
James Kuszmaulb3fba252022-04-06 15:13:31 -0700155 enum class RegisterHandlers { kYes, kNo };
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700156 // Helpers to write each type of relevant record.
157 void WriteMagic();
158 void WriteHeader();
James Kuszmaulb3fba252022-04-06 15:13:31 -0700159 void WriteFooter(uint64_t summary_offset, uint64_t summary_offset_offset);
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700160 void WriteDataEnd();
161 void WriteSchema(const uint16_t id, const aos::Channel *channel);
162 void WriteChannel(const uint16_t id, const uint16_t schema_id,
James Kuszmaule4aa01d2022-06-28 14:09:02 -0700163 const aos::Channel *channel,
164 std::string_view override_name = "");
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700165 void WriteMessage(uint16_t channel_id, const Channel *channel,
James Kuszmaul36a25f42022-10-28 10:18:00 -0700166 const Context &context, ChunkStatus *chunk);
167 void WriteChunk(ChunkStatus *chunk);
James Kuszmaulb3fba252022-04-06 15:13:31 -0700168
James Kuszmaulbed2af02023-01-28 15:57:24 -0800169 // Writes out the special configuration channel. This gets called right before
170 // the first actual message is written so that we can have a reasonable
171 // monotonic clock time.
172 void WriteConfigurationMessage();
173
James Kuszmaulb3fba252022-04-06 15:13:31 -0700174 // The helpers for writing records which appear in the Summary section will
175 // return SummaryOffset's so that they can be referenced in the SummaryOffset
176 // section.
177 SummaryOffset WriteChunkIndices();
178 SummaryOffset WriteStatistics();
179 std::vector<SummaryOffset> WriteSchemasAndChannels(
180 RegisterHandlers register_handlers);
181 void WriteSummaryOffset(const SummaryOffset &offset);
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700182
183 // Writes an MCAP record to the output file.
James Kuszmaulb3fba252022-04-06 15:13:31 -0700184 void WriteRecord(OpCode op, std::string_view record, std::ostream *ostream);
185 void WriteRecord(OpCode op, std::string_view record) {
186 WriteRecord(op, record, &output_);
187 }
188 // Adds an MCAP-spec string/byte-array/map/array of pairs/fixed-size integer
189 // to a buffer.
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700190 static void AppendString(FastStringBuilder *builder, std::string_view string);
James Kuszmaulb3fba252022-04-06 15:13:31 -0700191 static void AppendBytes(FastStringBuilder *builder, std::string_view bytes);
192 static void AppendChannelMap(FastStringBuilder *builder,
193 const std::map<uint16_t, uint64_t> &map);
194 static void AppendMessageIndices(
195 FastStringBuilder *builder,
196 const std::vector<std::pair<uint64_t, uint64_t>> &messages);
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700197 static void AppendInt16(FastStringBuilder *builder, uint16_t val);
198 static void AppendInt32(FastStringBuilder *builder, uint32_t val);
199 static void AppendInt64(FastStringBuilder *builder, uint64_t val);
200
James Kuszmaulb3fba252022-04-06 15:13:31 -0700201 aos::EventLoop *event_loop_;
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700202 std::ofstream output_;
James Kuszmaulc31d7362022-05-27 14:20:04 -0700203 const Serialization serialization_;
James Kuszmaul9f607c62022-10-27 17:01:55 -0700204 const CanonicalChannelNames canonical_channels_;
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800205 const Compression compression_;
James Kuszmaulc31d7362022-05-27 14:20:04 -0700206 size_t total_message_bytes_ = 0;
207 std::map<const Channel *, size_t> total_channel_bytes_;
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700208 FastStringBuilder string_builder_;
James Kuszmaulb3fba252022-04-06 15:13:31 -0700209
210 // Earliest message observed in this logfile.
211 std::optional<aos::monotonic_clock::time_point> earliest_message_;
James Kuszmaul36a25f42022-10-28 10:18:00 -0700212 // Latest message observed in this logfile.
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800213 aos::monotonic_clock::time_point latest_message_ =
214 aos::monotonic_clock::min_time;
James Kuszmaulb3fba252022-04-06 15:13:31 -0700215 // Count of all messages on each channel, indexed by channel ID.
216 std::map<uint16_t, uint64_t> message_counts_;
James Kuszmaulc31d7362022-05-27 14:20:04 -0700217 std::map<uint16_t, std::unique_ptr<RawFetcher>> fetchers_;
James Kuszmaul36a25f42022-10-28 10:18:00 -0700218 // All currently-being-built chunks. Indexed by channel ID. This is used to
219 // segregate channels into separate chunks to support more efficient reading.
220 std::map<uint16_t, ChunkStatus> current_chunks_;
James Kuszmaulb3fba252022-04-06 15:13:31 -0700221 // ChunkIndex's for all fully written Chunks.
222 std::vector<ChunkIndex> chunk_indices_;
James Kuszmaule4aa01d2022-06-28 14:09:02 -0700223
224 // Metadata associated with the fake "configuration" channel that we create in
225 // order to ensure that foxglove extensions/users have access to the full
226 // configuration.
227 uint16_t configuration_id_ = 0;
228 FlatbufferDetachedBuffer<Channel> configuration_channel_;
229 FlatbufferDetachedBuffer<Configuration> configuration_;
James Kuszmaulbed2af02023-01-28 15:57:24 -0800230 bool wrote_configuration_ = false;
James Kuszmaul5ab990d2022-11-07 16:35:49 -0800231
232 // Memory buffer to use for compressing data.
233 std::vector<uint8_t> compression_buffer_;
James Kuszmaul4ed5fb12022-03-22 15:20:04 -0700234};
235} // namespace aos
236#endif // AOS_UTIL_MCAP_LOGGER_H_