aos/events/logging/logger.fbs - RealtimeRoboticsGroup/test - Gitiles

 include "aos/configuration.fbs";

 namespace aos.logger;

 // A log file is a sequence of size prefixed flatbuffers.
 // The first flatbuffer will be the LogFileHeader, followed by an arbitrary
 // number of MessageHeaders.
 //
 // The log file starts at the time demarcated in the header on the monotonic
 // clock.  There will be any number of messages per channel logged before the
 // start time.  These messages are logged so that fetchers can retrieve the
 // state of the system at the start of the logfile for things like capturing
 // parameters.  In replay, they should be made available to fetchers, but not
 // trigger watchers.

 table LogFileHeader {
   // Time this log file started on the monotonic clock in nanoseconds.
   // If this isn't known (the log file is being recorded from another node
   // where we don't know the time offset), both timestamps will be min_time.
   // This log file may contain data from before the start times (e.g.,
   // fetched message data), but should guarantee that all data within the
   // logfile *after* start_time is present until the end of the file (note
   // that there may be incomplete data at the very end of a log if it is
   // truncated poorly).
   // These timestamps are from the perspective of `node`.
   monotonic_start_time:int64 = -9223372036854775808 (id: 0);
   // Time this log file started on the realtime clock in nanoseconds.
   // Will only be populated if logger_node == node.
   realtime_start_time:int64 = -9223372036854775808 (id: 1);

   // Messages are not written in order to disk.  They will be out of order by
   // at most this duration (in nanoseconds).  If the log reader buffers until
   // it finds messages this much newer than it's simulation time, it will never
   // find a message out of order.
   max_out_of_order_duration:long (id: 2);

   // The configuration of the channels.  It is valid to have a log file with
   // just this filled out.  That is a config only file which will be pointed to
   // by files using configuration_sha256 and optionally configuration_path.
   configuration:aos.Configuration (id: 3);
   // sha256 of the configuration used.  If this is set, configuration will not
   // be set.
   configuration_sha256:string (id: 16);

   // Name of the device which this log file is for.
   name:string (id: 4);

   // The current node, if known and running in a multi-node configuration.
   node:Node (id: 5);

   // All UUIDs are uuid4.

   // A log file is made up of a bunch of log files and parts.  These build up
   // a tree.  Every .bfbs file has a LogFileHeader at the start.
   //
   //  /-- basename_pi1_data.part0.bfbs, basename_pi1_data.part1.bfbs, etc.
   // ---- basename_timestamps/pi1/aos/remote_timestamps/pi2/aos.logger.MessageHeader.part0.bfbs, etc.
   //  \-- basename_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part0.bfbs, etc.

   // All log files and parts from a single logging event will have
   // the same uuid.  This should be all the files generated on a single node.
   // Used to correlate files recorded together.
   log_event_uuid:string (id: 6);

   // All log parts generated by a single Logger instance will have the same
   // value here.
   logger_instance_uuid: string (id: 10);

   // All log parts generated on a single node while it is booted will have the
   // same value here. It also matches with the one used by systemd.
   logger_node_boot_uuid: string (id: 11);

   // Empty if we didn't have one at the time.
   source_node_boot_uuid: string (id: 13);

   // Timestamps that this logfile started at on the logger's clocks.  This is
   // mostly useful when trying to deduce the order of node reboots.  These
   // timestamps don't change on reboot, so they can't be used reliably.
   logger_monotonic_start_time:int64 = -9223372036854775808 (id: 14);
   logger_realtime_start_time:int64 = -9223372036854775808 (id: 15);

   // All log events across all nodes produced by a single high-level start event
   // will have the same value here.
   log_start_uuid: string (id: 12);

   // Part files which go together all have headers.  When creating a log file
   // with multiple parts, the logger should stop writing to part n-1 as soon
   // as it starts writing to part n, and write messages as though there was
   // just 1 big file.  Therefore, part files won't be self standing, since
   // they won't have data fetched at the beginning.

   // If data is logged before the time offset can be established to the other
   // node, the start time will be monotonic_clock::min_time, and a new part file
   // will be created when the start time is known.

   // All the parts which go together have the same uuid.
   parts_uuid:string (id: 7);
   // And the parts_index corresponds to which part this is in the sequence.  The
   // index should start at 0.
   parts_index:int32 (id: 8);

   // The node the data was logged on, if known and running in a multi-node configuration.
   logger_node:Node (id: 9);

   // The boot UUIDs for all nodes we know them for, or "" for the ones we don't.
   boot_uuids:[string] (id: 17);

   // Timestamps that the header on this part file was written on the logger node.
   logger_part_monotonic_start_time:int64 = -9223372036854775808 (id: 18);
   logger_part_realtime_start_time:int64 = -9223372036854775808 (id: 19);

   // These timestamps provide summary information about the oldest messages we
   // know which crossed the network.  The goal is to enable log file sorting
   // to determine the order of all boots by observing corresponding times
   // across the network and using those to determine constraints so we can sort
   // a DAG.
   //
   // There are 5 main cases.  Let's say we have 2 channels.  /r which
   // is reliable, and /u which isn't, both sent from the same remote node.
   // The examples below are listed as the remote node sending the message, and
   // then the local node receiving and logging the message.
   //
   // case 0:  /r -> boot 0 received on boot 0.
   //          /u -> boot 0 received on boot 0.
   //  We log for a bit, then the remote reboots.
   //          /r -> boot 1 received on boot 0.
   //          /u -> boot 1 received on boot 0.
   //
   // case 1:  /r -> boot 0 received on boot 0.
   //          /u -> boot 1 received on boot 0.
   //  We start logging after both messages arrive.
   //
   // case 2:  /r -> boot 0 received on boot 0.
   //          /u -> boot 0 received on boot 0.
   //  We log for a bit, then reboot.  More messages show up when we reconnect.
   //          /r -> boot 0 received on boot 1.
   //          /u -> boot 0 received on boot 1.
   //
   // case 3:  /u -> boot 0 received on boot 0.
   //          /r -> boot 1 received on boot 0.
   //          /u -> boot 1 received on boot 0.
   //  We start logging after all three messages arrive.
   //
   // case 4:  /u -> boot 0 received on boot 0.
   //          /r -> boot 1 received on boot 0.
   //
   // In case 0, we have all the messages showing up and a reboot of the remote.
   //
   // In case 1: we only have a reliable timestamp from boot 0, but that
   // reliable timestamp makes it clear that /r was before /u, so boot 0 was
   // before boot 1.
   //
   // In case 2: we have the same reliable timestamp, so that tells us nothing.
   // The unreliable timestamps though tell a different story.  /u will be after
   // /r, since any messages on /u generated before the reboot won't get
   // delivered.  So, we get an ordering constraint saying that any sent /u's
   // on the second boot were after /u on the first boot.
   //
   // In case 3: we only got the reliable message on the second boot for some
   // reason.  Reliable messages aren't 100% reliable.  In this case, the
   // reliable timestamps are actually a distraction and are misleading.  We
   // want to use the unreliable timestamps here.
   //
   // In case 4: we have utter madness...
   //
   // We expect the nominal case to be case 0, or the first half of case 0 if
   // there are no reboots.
   //
   // We believe that any other cases are covered by the same mechanism.
   // TODO(austin/brian): Shore up this and capture the cases that are 100%
   // ambiguous and we can't sort.
   //
   // For all channels sent from a specific node, these vectors hold the
   // timestamp of the oldest known message from that node, and the
   // corresponding monotonic timestamp for when that was received on our node.
   //
   // The local node is the node that this log file is from the perspective of
   // (field 6)
   corrupted_oldest_remote_monotonic_timestamps:[int64] (id: 20, deprecated);
   corrupted_oldest_local_monotonic_timestamps:[int64] (id: 21, deprecated);
   oldest_remote_monotonic_timestamps:[int64] (id: 24);
   oldest_local_monotonic_timestamps:[int64] (id: 25);

   // For all channels *excluding* the reliable channels (ttl == 0), record the
   // same quantity.
   corrupted_oldest_remote_unreliable_monotonic_timestamps:[int64] (id: 22, deprecated);
   corrupted_oldest_local_unreliable_monotonic_timestamps:[int64] (id: 23, deprecated);
   oldest_remote_unreliable_monotonic_timestamps:[int64] (id: 26);
   oldest_local_unreliable_monotonic_timestamps:[int64] (id: 27);

   // For all channels *excluding* the unreliable channels (ttl != 0), record the
   // same quantity.
   oldest_remote_reliable_monotonic_timestamps:[int64] (id: 28);
   oldest_local_reliable_monotonic_timestamps:[int64] (id: 29);

   // For all the remote timestamps which come back to the logger.  The "local"
   // time here is the logger node boot, and "remote" is the node which sent the
   // timestamps.
   oldest_logger_remote_unreliable_monotonic_timestamps:[int64] (id: 30);
   oldest_logger_local_unreliable_monotonic_timestamps:[int64] (id: 31);

   // Logger build version.  This is normally the git sha1 that the logger was
   // built from.
   logger_sha1:string (id:32);
   // Logger textual version.  This is normally the release name stamped into
   // the binary.
   logger_version:string (id:33);
 }

 // Table holding a message.
 table MessageHeader {
   // Index into the channel datastructure in the log file header.  This
   // provides the data type.
   channel_index:uint (id: 0);
   // Time this message was sent on the monotonic clock in nanoseconds on this
   // node.
   monotonic_sent_time:long (id: 1);
   // Time this message was sent on the realtime clock in nanoseconds on this
   // node.
   realtime_sent_time:long (id: 2);
   // Index into the ipc queue of this message.  This should start with 0 and
   // always monotonically increment if no messages were ever lost.  It will
   // wrap at a multiple of the queue size.
   queue_index:uint (id: 3);

   // TODO(austin): Format?  Compressed?

   // The nested flatbuffer.
   data:[ubyte] (id: 4);

   // Time this message was sent on the monotonic clock of the remote node in
   // nanoseconds.
   monotonic_remote_time:int64 = -9223372036854775808 (id: 5);
   // Time this message was sent on the realtime clock of the remote node in
   // nanoseconds.
   realtime_remote_time:int64 = -9223372036854775808 (id: 6);
   // Queue index of this message on the remote node.
   remote_queue_index:uint32 = 4294967295 (id: 7);

   // Time this timestamp was received on the monotonic clock of the logger node
   // in nanoseconds.
   monotonic_timestamp_time:int64 = -9223372036854775808 (id: 8);
 }

 root_type MessageHeader;
	include "aos/configuration.fbs";

	namespace aos.logger;

	// A log file is a sequence of size prefixed flatbuffers.
	// The first flatbuffer will be the LogFileHeader, followed by an arbitrary
	// number of MessageHeaders.
	//
	// The log file starts at the time demarcated in the header on the monotonic
	// clock. There will be any number of messages per channel logged before the
	// start time. These messages are logged so that fetchers can retrieve the
	// state of the system at the start of the logfile for things like capturing
	// parameters. In replay, they should be made available to fetchers, but not
	// trigger watchers.

	table LogFileHeader {
	// Time this log file started on the monotonic clock in nanoseconds.
	// If this isn't known (the log file is being recorded from another node
	// where we don't know the time offset), both timestamps will be min_time.
	// This log file may contain data from before the start times (e.g.,
	// fetched message data), but should guarantee that all data within the
	// logfile after start_time is present until the end of the file (note
	// that there may be incomplete data at the very end of a log if it is
	// truncated poorly).
	// These timestamps are from the perspective of `node`.
	monotonic_start_time:int64 = -9223372036854775808 (id: 0);
	// Time this log file started on the realtime clock in nanoseconds.
	// Will only be populated if logger_node == node.
	realtime_start_time:int64 = -9223372036854775808 (id: 1);

	// Messages are not written in order to disk. They will be out of order by
	// at most this duration (in nanoseconds). If the log reader buffers until
	// it finds messages this much newer than it's simulation time, it will never
	// find a message out of order.
	max_out_of_order_duration:long (id: 2);

	// The configuration of the channels. It is valid to have a log file with
	// just this filled out. That is a config only file which will be pointed to
	// by files using configuration_sha256 and optionally configuration_path.
	configuration:aos.Configuration (id: 3);
	// sha256 of the configuration used. If this is set, configuration will not
	// be set.
	configuration_sha256:string (id: 16);

	// Name of the device which this log file is for.
	name:string (id: 4);

	// The current node, if known and running in a multi-node configuration.
	node:Node (id: 5);

	// All UUIDs are uuid4.

	// A log file is made up of a bunch of log files and parts. These build up
	// a tree. Every .bfbs file has a LogFileHeader at the start.
	//
	// /-- basename_pi1_data.part0.bfbs, basename_pi1_data.part1.bfbs, etc.
	// ---- basename_timestamps/pi1/aos/remote_timestamps/pi2/aos.logger.MessageHeader.part0.bfbs, etc.
	// \-- basename_pi2_data/pi2/aos/aos.message_bridge.Timestamp.part0.bfbs, etc.

	// All log files and parts from a single logging event will have
	// the same uuid. This should be all the files generated on a single node.
	// Used to correlate files recorded together.
	log_event_uuid:string (id: 6);

	// All log parts generated by a single Logger instance will have the same
	// value here.
	logger_instance_uuid: string (id: 10);

	// All log parts generated on a single node while it is booted will have the
	// same value here. It also matches with the one used by systemd.
	logger_node_boot_uuid: string (id: 11);

	// Empty if we didn't have one at the time.
	source_node_boot_uuid: string (id: 13);

	// Timestamps that this logfile started at on the logger's clocks. This is
	// mostly useful when trying to deduce the order of node reboots. These
	// timestamps don't change on reboot, so they can't be used reliably.
	logger_monotonic_start_time:int64 = -9223372036854775808 (id: 14);
	logger_realtime_start_time:int64 = -9223372036854775808 (id: 15);

	// All log events across all nodes produced by a single high-level start event
	// will have the same value here.
	log_start_uuid: string (id: 12);

	// Part files which go together all have headers. When creating a log file
	// with multiple parts, the logger should stop writing to part n-1 as soon
	// as it starts writing to part n, and write messages as though there was
	// just 1 big file. Therefore, part files won't be self standing, since
	// they won't have data fetched at the beginning.

	// If data is logged before the time offset can be established to the other
	// node, the start time will be monotonic_clock::min_time, and a new part file
	// will be created when the start time is known.

	// All the parts which go together have the same uuid.
	parts_uuid:string (id: 7);
	// And the parts_index corresponds to which part this is in the sequence. The
	// index should start at 0.
	parts_index:int32 (id: 8);

	// The node the data was logged on, if known and running in a multi-node configuration.
	logger_node:Node (id: 9);

	// The boot UUIDs for all nodes we know them for, or "" for the ones we don't.
	boot_uuids:[string] (id: 17);

	// Timestamps that the header on this part file was written on the logger node.
	logger_part_monotonic_start_time:int64 = -9223372036854775808 (id: 18);
	logger_part_realtime_start_time:int64 = -9223372036854775808 (id: 19);

	// These timestamps provide summary information about the oldest messages we
	// know which crossed the network. The goal is to enable log file sorting
	// to determine the order of all boots by observing corresponding times
	// across the network and using those to determine constraints so we can sort
	// a DAG.
	//
	// There are 5 main cases. Let's say we have 2 channels. /r which
	// is reliable, and /u which isn't, both sent from the same remote node.
	// The examples below are listed as the remote node sending the message, and
	// then the local node receiving and logging the message.
	//
	// case 0: /r -> boot 0 received on boot 0.
	// /u -> boot 0 received on boot 0.
	// We log for a bit, then the remote reboots.
	// /r -> boot 1 received on boot 0.
	// /u -> boot 1 received on boot 0.
	//
	// case 1: /r -> boot 0 received on boot 0.
	// /u -> boot 1 received on boot 0.
	// We start logging after both messages arrive.
	//
	// case 2: /r -> boot 0 received on boot 0.
	// /u -> boot 0 received on boot 0.
	// We log for a bit, then reboot. More messages show up when we reconnect.
	// /r -> boot 0 received on boot 1.
	// /u -> boot 0 received on boot 1.
	//
	// case 3: /u -> boot 0 received on boot 0.
	// /r -> boot 1 received on boot 0.
	// /u -> boot 1 received on boot 0.
	// We start logging after all three messages arrive.
	//
	// case 4: /u -> boot 0 received on boot 0.
	// /r -> boot 1 received on boot 0.
	//
	// In case 0, we have all the messages showing up and a reboot of the remote.
	//
	// In case 1: we only have a reliable timestamp from boot 0, but that
	// reliable timestamp makes it clear that /r was before /u, so boot 0 was
	// before boot 1.
	//
	// In case 2: we have the same reliable timestamp, so that tells us nothing.
	// The unreliable timestamps though tell a different story. /u will be after
	// /r, since any messages on /u generated before the reboot won't get
	// delivered. So, we get an ordering constraint saying that any sent /u's
	// on the second boot were after /u on the first boot.
	//
	// In case 3: we only got the reliable message on the second boot for some
	// reason. Reliable messages aren't 100% reliable. In this case, the
	// reliable timestamps are actually a distraction and are misleading. We
	// want to use the unreliable timestamps here.
	//
	// In case 4: we have utter madness...
	//
	// We expect the nominal case to be case 0, or the first half of case 0 if
	// there are no reboots.
	//
	// We believe that any other cases are covered by the same mechanism.
	// TODO(austin/brian): Shore up this and capture the cases that are 100%
	// ambiguous and we can't sort.
	//
	// For all channels sent from a specific node, these vectors hold the
	// timestamp of the oldest known message from that node, and the
	// corresponding monotonic timestamp for when that was received on our node.
	//
	// The local node is the node that this log file is from the perspective of
	// (field 6)
	corrupted_oldest_remote_monotonic_timestamps:[int64] (id: 20, deprecated);
	corrupted_oldest_local_monotonic_timestamps:[int64] (id: 21, deprecated);
	oldest_remote_monotonic_timestamps:[int64] (id: 24);
	oldest_local_monotonic_timestamps:[int64] (id: 25);

	// For all channels excluding the reliable channels (ttl == 0), record the
	// same quantity.
	corrupted_oldest_remote_unreliable_monotonic_timestamps:[int64] (id: 22, deprecated);
	corrupted_oldest_local_unreliable_monotonic_timestamps:[int64] (id: 23, deprecated);
	oldest_remote_unreliable_monotonic_timestamps:[int64] (id: 26);
	oldest_local_unreliable_monotonic_timestamps:[int64] (id: 27);

	// For all channels excluding the unreliable channels (ttl != 0), record the
	// same quantity.
	oldest_remote_reliable_monotonic_timestamps:[int64] (id: 28);
	oldest_local_reliable_monotonic_timestamps:[int64] (id: 29);

	// For all the remote timestamps which come back to the logger. The "local"
	// time here is the logger node boot, and "remote" is the node which sent the
	// timestamps.
	oldest_logger_remote_unreliable_monotonic_timestamps:[int64] (id: 30);
	oldest_logger_local_unreliable_monotonic_timestamps:[int64] (id: 31);

	// Logger build version. This is normally the git sha1 that the logger was
	// built from.
	logger_sha1:string (id:32);
	// Logger textual version. This is normally the release name stamped into
	// the binary.
	logger_version:string (id:33);
	}

	// Table holding a message.
	table MessageHeader {
	// Index into the channel datastructure in the log file header. This
	// provides the data type.
	channel_index:uint (id: 0);
	// Time this message was sent on the monotonic clock in nanoseconds on this
	// node.
	monotonic_sent_time:long (id: 1);
	// Time this message was sent on the realtime clock in nanoseconds on this
	// node.
	realtime_sent_time:long (id: 2);
	// Index into the ipc queue of this message. This should start with 0 and
	// always monotonically increment if no messages were ever lost. It will
	// wrap at a multiple of the queue size.
	queue_index:uint (id: 3);

	// TODO(austin): Format? Compressed?

	// The nested flatbuffer.
	data:[ubyte] (id: 4);

	// Time this message was sent on the monotonic clock of the remote node in
	// nanoseconds.
	monotonic_remote_time:int64 = -9223372036854775808 (id: 5);
	// Time this message was sent on the realtime clock of the remote node in
	// nanoseconds.
	realtime_remote_time:int64 = -9223372036854775808 (id: 6);
	// Queue index of this message on the remote node.
	remote_queue_index:uint32 = 4294967295 (id: 7);

	// Time this timestamp was received on the monotonic clock of the logger node
	// in nanoseconds.
	monotonic_timestamp_time:int64 = -9223372036854775808 (id: 8);
	}

	root_type MessageHeader;