| include "aos/configuration.fbs"; |
| |
| namespace aos.logger; |
| |
| // A log file is a sequence of size prefixed flatbuffers. |
| // The first flatbuffer will be the LogFileHeader, followed by an arbitrary |
| // number of MessageHeaders. |
| // |
| // The log file starts at the time demarcated in the header on the monotonic |
| // clock. There will be any number of messages per channel logged before the |
| // start time. These messages are logged so that fetchers can retrieve the |
| // state of the system at the start of the logfile for things like capturing |
| // parameters. In replay, they should be made available to fetchers, but not |
| // trigger watchers. |
| |
| table LogFileHeader { |
| // Time this log file started on the monotonic clock in nanoseconds. |
| // If this isn't known (the log file is being recorded from another node |
| // where we don't know the time offset), both timestamps will be min_time. |
| // This log file may contain data from before the start times (e.g., |
| // fetched message data), but should guarantee that all data within the |
| // logfile *after* start_time is present until the end of the file (note |
| // that there may be incomplete data at the very end of a log if it is |
| // truncated poorly). |
| // These timestamps are from the perspective of `node`. |
| monotonic_start_time:int64 = -9223372036854775808 (id: 0); |
| // Time this log file started on the realtime clock in nanoseconds. |
| // Will only be populated if logger_node == node. |
| realtime_start_time:int64 = -9223372036854775808 (id: 1); |
| |
| // Messages are not written in order to disk. They will be out of order by |
| // at most this duration (in nanoseconds). If the log reader buffers until |
| // it finds messages this much newer than it's simulation time, it will never |
| // find a message out of order. |
| max_out_of_order_duration:long (id: 2); |
| |
| // The configuration of the channels. It is valid to have a log file with |
| // just this filled out. That is a config only file which will be pointed to |
| // by files using configuration_sha256 and optionally configuration_path. |
| configuration:aos.Configuration (id: 3); |
| // sha256 of the configuration used. If this is set, configuration will not |
| // be set. |
| configuration_sha256:string (id: 16); |
| |
| // Name of the device which this log file is for. |
| name:string (id: 4); |
| |
| // The current node, if known and running in a multi-node configuration. |
| node:Node (id: 5); |
| |
| // All UUIDs are uuid4. |
| |
| // A log file is made up of a bunch of log files and parts. These build up |
| // a tree. Every .bfbs file has a LogFileHeader at the start. |
| // |
| // /-- basename_pi1_data.part0.bfbs, basename_pi1_data.part1.bfbs, etc. |
| // ---- basename_timestamps/pi1/aos/remote_timestamps/pi2/aos.logger.MessageHeader.part0.bfbs, etc. |
| // \-- basename_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part0.bfbs, etc. |
| |
| // All log files and parts from a single logging event will have |
| // the same uuid. This should be all the files generated on a single node. |
| // Used to correlate files recorded together. |
| log_event_uuid:string (id: 6); |
| |
| // All log parts generated by a single Logger instance will have the same |
| // value here. |
| logger_instance_uuid: string (id: 10); |
| |
| // All log parts generated on a single node while it is booted will have the |
| // same value here. It also matches with the one used by systemd. |
| logger_node_boot_uuid: string (id: 11); |
| |
| // Empty if we didn't have one at the time. |
| source_node_boot_uuid: string (id: 13); |
| |
| // Timestamps that this logfile started at on the logger's clocks. This is |
| // mostly useful when trying to deduce the order of node reboots. These |
| // timestamps don't change on reboot, so they can't be used reliably. |
| logger_monotonic_start_time:int64 = -9223372036854775808 (id: 14); |
| logger_realtime_start_time:int64 = -9223372036854775808 (id: 15); |
| |
| // All log events across all nodes produced by a single high-level start event |
| // will have the same value here. |
| log_start_uuid: string (id: 12); |
| |
| // Part files which go together all have headers. When creating a log file |
| // with multiple parts, the logger should stop writing to part n-1 as soon |
| // as it starts writing to part n, and write messages as though there was |
| // just 1 big file. Therefore, part files won't be self standing, since |
| // they won't have data fetched at the beginning. |
| |
| // If data is logged before the time offset can be established to the other |
| // node, the start time will be monotonic_clock::min_time, and a new part file |
| // will be created when the start time is known. |
| |
| // All the parts which go together have the same uuid. |
| parts_uuid:string (id: 7); |
| // And the parts_index corresponds to which part this is in the sequence. The |
| // index should start at 0. |
| parts_index:int32 (id: 8); |
| |
| // The node the data was logged on, if known and running in a multi-node configuration. |
| logger_node:Node (id: 9); |
| |
| // The boot UUIDs for all nodes we know them for, or "" for the ones we don't. |
| boot_uuids:[string] (id: 17); |
| |
| // Timestamps that the header on this part file was written on the logger node. |
| logger_part_monotonic_start_time:int64 = -9223372036854775808 (id: 18); |
| logger_part_realtime_start_time:int64 = -9223372036854775808 (id: 19); |
| |
| // These timestamps provide summary information about the oldest messages we |
| // know which crossed the network. The goal is to enable log file sorting |
| // to determine the order of all boots by observing corresponding times |
| // across the network and using those to determine constraints so we can sort |
| // a DAG. |
| // |
| // There are 5 main cases. Let's say we have 2 channels. /r which |
| // is reliable, and /u which isn't, both sent from the same remote node. |
| // The examples below are listed as the remote node sending the message, and |
| // then the local node receiving and logging the message. |
| // |
| // case 0: /r -> boot 0 received on boot 0. |
| // /u -> boot 0 received on boot 0. |
| // We log for a bit, then the remote reboots. |
| // /r -> boot 1 received on boot 0. |
| // /u -> boot 1 received on boot 0. |
| // |
| // case 1: /r -> boot 0 received on boot 0. |
| // /u -> boot 1 received on boot 0. |
| // We start logging after both messages arrive. |
| // |
| // case 2: /r -> boot 0 received on boot 0. |
| // /u -> boot 0 received on boot 0. |
| // We log for a bit, then reboot. More messages show up when we reconnect. |
| // /r -> boot 0 received on boot 1. |
| // /u -> boot 0 received on boot 1. |
| // |
| // case 3: /u -> boot 0 received on boot 0. |
| // /r -> boot 1 received on boot 0. |
| // /u -> boot 1 received on boot 0. |
| // We start logging after all three messages arrive. |
| // |
| // case 4: /u -> boot 0 received on boot 0. |
| // /r -> boot 1 received on boot 0. |
| // |
| // In case 0, we have all the messages showing up and a reboot of the remote. |
| // |
| // In case 1: we only have a reliable timestamp from boot 0, but that |
| // reliable timestamp makes it clear that /r was before /u, so boot 0 was |
| // before boot 1. |
| // |
| // In case 2: we have the same reliable timestamp, so that tells us nothing. |
| // The unreliable timestamps though tell a different story. /u will be after |
| // /r, since any messages on /u generated before the reboot won't get |
| // delivered. So, we get an ordering constraint saying that any sent /u's |
| // on the second boot were after /u on the first boot. |
| // |
| // In case 3: we only got the reliable message on the second boot for some |
| // reason. Reliable messages aren't 100% reliable. In this case, the |
| // reliable timestamps are actually a distraction and are misleading. We |
| // want to use the unreliable timestamps here. |
| // |
| // In case 4: we have utter madness... |
| // |
| // We expect the nominal case to be case 0, or the first half of case 0 if |
| // there are no reboots. |
| // |
| // We believe that any other cases are covered by the same mechanism. |
| // TODO(austin/brian): Shore up this and capture the cases that are 100% |
| // ambiguous and we can't sort. |
| // |
| // For all channels sent from a specific node, these vectors hold the |
| // timestamp of the oldest known message from that node, and the |
| // corresponding monotonic timestamp for when that was received on our node. |
| // |
| // The local node is the node that this log file is from the perspective of |
| // (field 6) |
| corrupted_oldest_remote_monotonic_timestamps:[int64] (id: 20, deprecated); |
| corrupted_oldest_local_monotonic_timestamps:[int64] (id: 21, deprecated); |
| oldest_remote_monotonic_timestamps:[int64] (id: 24); |
| oldest_local_monotonic_timestamps:[int64] (id: 25); |
| |
| // For all channels *excluding* the reliable channels (ttl == 0), record the |
| // same quantity. |
| corrupted_oldest_remote_unreliable_monotonic_timestamps:[int64] (id: 22, deprecated); |
| corrupted_oldest_local_unreliable_monotonic_timestamps:[int64] (id: 23, deprecated); |
| oldest_remote_unreliable_monotonic_timestamps:[int64] (id: 26); |
| oldest_local_unreliable_monotonic_timestamps:[int64] (id: 27); |
| |
| // For all channels *excluding* the unreliable channels (ttl != 0), record the |
| // same quantity. |
| oldest_remote_reliable_monotonic_timestamps:[int64] (id: 28); |
| oldest_local_reliable_monotonic_timestamps:[int64] (id: 29); |
| |
| // For all the remote timestamps which come back to the logger. The "local" |
| // time here is the logger node boot, and "remote" is the node which sent the |
| // timestamps. |
| oldest_logger_remote_unreliable_monotonic_timestamps:[int64] (id: 30); |
| oldest_logger_local_unreliable_monotonic_timestamps:[int64] (id: 31); |
| |
| // Logger build version. This is normally the git sha1 that the logger was |
| // built from. |
| logger_sha1:string (id:32); |
| // Logger textual version. This is normally the release name stamped into |
| // the binary. |
| logger_version:string (id:33); |
| } |
| |
| // Table holding a message. |
| table MessageHeader { |
| // Index into the channel datastructure in the log file header. This |
| // provides the data type. |
| channel_index:uint (id: 0); |
| // Time this message was sent on the monotonic clock in nanoseconds on this |
| // node. |
| monotonic_sent_time:long (id: 1); |
| // Time this message was sent on the realtime clock in nanoseconds on this |
| // node. |
| realtime_sent_time:long (id: 2); |
| // Index into the ipc queue of this message. This should start with 0 and |
| // always monotonically increment if no messages were ever lost. It will |
| // wrap at a multiple of the queue size. |
| queue_index:uint (id: 3); |
| |
| // TODO(austin): Format? Compressed? |
| |
| // The nested flatbuffer. |
| data:[ubyte] (id: 4); |
| |
| // Time this message was sent on the monotonic clock of the remote node in |
| // nanoseconds. |
| monotonic_remote_time:int64 = -9223372036854775808 (id: 5); |
| // Time this message was sent on the realtime clock of the remote node in |
| // nanoseconds. |
| realtime_remote_time:int64 = -9223372036854775808 (id: 6); |
| // Queue index of this message on the remote node. |
| remote_queue_index:uint32 = 4294967295 (id: 7); |
| |
| // Time this timestamp was received on the monotonic clock of the logger node |
| // in nanoseconds. |
| monotonic_timestamp_time:int64 = -9223372036854775808 (id: 8); |
| } |
| |
| root_type MessageHeader; |