Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 1 | #ifndef AOS_EVENTS_LOGGING_LOG_NAMER_H_ |
| 2 | #define AOS_EVENTS_LOGGING_LOG_NAMER_H_ |
| 3 | |
| 4 | #include <functional> |
| 5 | #include <map> |
| 6 | #include <memory> |
| 7 | #include <string_view> |
| 8 | #include <vector> |
| 9 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 10 | #include "absl/container/btree_map.h" |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 11 | #include "flatbuffers/flatbuffers.h" |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 12 | #include "glog/logging.h" |
Philipp Schrader | 790cb54 | 2023-07-05 21:06:52 -0700 | [diff] [blame] | 13 | |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 14 | #include "aos/events/logging/logfile_utils.h" |
| 15 | #include "aos/events/logging/logger_generated.h" |
Austin Schuh | 4385b14 | 2021-03-14 21:31:13 -0700 | [diff] [blame] | 16 | #include "aos/uuid.h" |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 17 | |
| 18 | namespace aos { |
| 19 | namespace logger { |
| 20 | |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 21 | class LogNamer; |
| 22 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 23 | // TODO(austin): Rename this back to DataWriter once all other callers are of |
| 24 | // the old DataWriter. |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 25 | // |
| 26 | // Class to manage writing data to log files. This lets us track which boot the |
| 27 | // written header has in it, and if the header has been written or not. |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 28 | // |
| 29 | // The design of this class is that instead of being notified when any of the |
| 30 | // header data changes, it polls and owns that decision. This makes it much |
| 31 | // harder to write corrupted data. If that becomes a performance problem, we |
| 32 | // can DCHECK and take it out of production binaries. |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 33 | class NewDataWriter { |
| 34 | public: |
| 35 | // Constructs a NewDataWriter. |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 36 | // log_namer is the log namer which holds the config and any other data we |
| 37 | // need for our header. |
| 38 | // node is the node whom's prespective we are logging from. |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 39 | // reopen is called whenever a file needs to be reopened. |
| 40 | // close is called to close that file and extract any statistics. |
Austin Schuh | f5f99f3 | 2022-02-07 20:05:37 -0800 | [diff] [blame] | 41 | NewDataWriter(LogNamer *log_namer, const Node *node, const Node *logger_node, |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 42 | std::function<void(NewDataWriter *)> reopen, |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 43 | std::function<void(NewDataWriter *)> close, |
| 44 | size_t max_message_size); |
| 45 | |
| 46 | void UpdateMaxMessageSize(size_t new_size) { |
| 47 | if (new_size > max_message_size_) { |
Alexei Strots | bc082d8 | 2023-05-03 08:43:42 -0700 | [diff] [blame] | 48 | CHECK(!header_written_) << ": Tried to update to " << new_size << ", was " |
| 49 | << max_message_size_ << " for " << name(); |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 50 | max_message_size_ = new_size; |
| 51 | } |
| 52 | } |
| 53 | size_t max_message_size() const { return max_message_size_; } |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 54 | |
| 55 | NewDataWriter(NewDataWriter &&other) = default; |
| 56 | aos::logger::NewDataWriter &operator=(NewDataWriter &&other) = default; |
| 57 | NewDataWriter(const NewDataWriter &) = delete; |
| 58 | void operator=(const NewDataWriter &) = delete; |
| 59 | |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 60 | ~NewDataWriter(); |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 61 | |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 62 | // Rotates the log file, delaying writing the new header until data arrives. |
| 63 | void Rotate(); |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 64 | |
Austin Schuh | f5f99f3 | 2022-02-07 20:05:37 -0800 | [diff] [blame] | 65 | // Updates all the metadata in the log file about the remote node which this |
| 66 | // message is from. |
Austin Schuh | 72211ae | 2021-08-05 14:02:30 -0700 | [diff] [blame] | 67 | void UpdateRemote(size_t remote_node_index, const UUID &remote_node_boot_uuid, |
| 68 | monotonic_clock::time_point monotonic_remote_time, |
| 69 | monotonic_clock::time_point monotonic_event_time, |
Austin Schuh | f5f99f3 | 2022-02-07 20:05:37 -0800 | [diff] [blame] | 70 | bool reliable, |
| 71 | monotonic_clock::time_point monotonic_timestamp_time = |
| 72 | monotonic_clock::min_time); |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 73 | // Coppies a message with the provided boot UUID. |
| 74 | void CopyMessage(DataEncoder::Copier *coppier, |
| 75 | const UUID &source_node_boot_uuid, |
| 76 | aos::monotonic_clock::time_point now); |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 77 | |
Austin Schuh | 5e14d84 | 2022-01-21 12:02:15 -0800 | [diff] [blame] | 78 | // Updates the current boot for the source node. This is useful when you want |
| 79 | // to queue a message that may trigger a reboot rotation, but then need to |
| 80 | // update the remote timestamps. |
| 81 | void UpdateBoot(const UUID &source_node_boot_uuid); |
| 82 | |
Alexei Strots | bc082d8 | 2023-05-03 08:43:42 -0700 | [diff] [blame] | 83 | // Returns the name of the writer. It may be a filename, but assume it is not. |
| 84 | std::string_view name() const { return writer ? writer->name() : "(closed)"; } |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 85 | |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 86 | void Close(); |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 87 | |
| 88 | std::unique_ptr<DetachedBufferWriter> writer = nullptr; |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 89 | |
| 90 | size_t node_index() const { return node_index_; } |
| 91 | const UUID &parts_uuid() const { return parts_uuid_; } |
| 92 | size_t parts_index() const { return parts_index_; } |
| 93 | const Node *node() const { return node_; } |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 94 | |
Austin Schuh | 72211ae | 2021-08-05 14:02:30 -0700 | [diff] [blame] | 95 | // Datastructure used to capture all the information about a remote node. |
| 96 | struct State { |
| 97 | // Boot UUID of the node. |
| 98 | UUID boot_uuid = UUID::Zero(); |
| 99 | // Timestamp on the remote monotonic clock of the oldest message sent to |
| 100 | // node_index_. |
| 101 | monotonic_clock::time_point oldest_remote_monotonic_timestamp = |
| 102 | monotonic_clock::max_time; |
| 103 | // Timestamp on the local monotonic clock of the message in |
| 104 | // oldest_remote_monotonic_timestamp. |
| 105 | monotonic_clock::time_point oldest_local_monotonic_timestamp = |
| 106 | monotonic_clock::max_time; |
| 107 | // Timestamp on the remote monotonic clock of the oldest message sent to |
| 108 | // node_index_, excluding messages forwarded with time_to_live() == 0. |
| 109 | monotonic_clock::time_point oldest_remote_unreliable_monotonic_timestamp = |
| 110 | monotonic_clock::max_time; |
| 111 | // Timestamp on the local monotonic clock of the message in |
| 112 | // oldest_local_unreliable_monotonic_timestamp. |
| 113 | monotonic_clock::time_point oldest_local_unreliable_monotonic_timestamp = |
| 114 | monotonic_clock::max_time; |
Austin Schuh | bfe6c57 | 2022-01-27 20:48:20 -0800 | [diff] [blame] | 115 | |
| 116 | // Timestamp on the remote monotonic clock of the oldest message sent to |
| 117 | // node_index_, only including messages forwarded with time_to_live() == 0. |
| 118 | monotonic_clock::time_point oldest_remote_reliable_monotonic_timestamp = |
| 119 | monotonic_clock::max_time; |
| 120 | // Timestamp on the local monotonic clock of the message in |
| 121 | // oldest_local_reliable_monotonic_timestamp. |
| 122 | monotonic_clock::time_point oldest_local_reliable_monotonic_timestamp = |
| 123 | monotonic_clock::max_time; |
Austin Schuh | f5f99f3 | 2022-02-07 20:05:37 -0800 | [diff] [blame] | 124 | |
| 125 | // Timestamp on the remote monotonic clock of the oldest message timestamp |
| 126 | // sent back to logger_node_index_. The remote here will be the node this |
| 127 | // part is from the perspective of, ie node_index_. |
| 128 | monotonic_clock::time_point |
| 129 | oldest_logger_remote_unreliable_monotonic_timestamp = |
| 130 | monotonic_clock::max_time; |
| 131 | // The time on the monotonic clock of the logger when this timestamp made it |
| 132 | // back to the logger (logger_node_index_). |
| 133 | monotonic_clock::time_point |
| 134 | oldest_logger_local_unreliable_monotonic_timestamp = |
| 135 | monotonic_clock::max_time; |
Austin Schuh | 72211ae | 2021-08-05 14:02:30 -0700 | [diff] [blame] | 136 | }; |
| 137 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 138 | private: |
Austin Schuh | e46492f | 2021-07-31 19:49:41 -0700 | [diff] [blame] | 139 | // Signals that a node has rebooted. |
Austin Schuh | 5e14d84 | 2022-01-21 12:02:15 -0800 | [diff] [blame] | 140 | void Reboot(const UUID &source_node_boot_uuid); |
Austin Schuh | e46492f | 2021-07-31 19:49:41 -0700 | [diff] [blame] | 141 | |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 142 | void QueueHeader( |
| 143 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> &&header); |
| 144 | |
Austin Schuh | e46492f | 2021-07-31 19:49:41 -0700 | [diff] [blame] | 145 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> MakeHeader(); |
| 146 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 147 | monotonic_clock::time_point monotonic_start_time_ = monotonic_clock::min_time; |
| 148 | |
Austin Schuh | 577610e | 2021-12-08 12:07:19 -0800 | [diff] [blame] | 149 | const Node *node_ = nullptr; |
| 150 | size_t node_index_ = 0; |
Austin Schuh | f5f99f3 | 2022-02-07 20:05:37 -0800 | [diff] [blame] | 151 | size_t logger_node_index_ = 0; |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 152 | LogNamer *log_namer_; |
| 153 | UUID parts_uuid_ = UUID::Random(); |
| 154 | size_t parts_index_ = 0; |
| 155 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 156 | std::function<void(NewDataWriter *)> reopen_; |
| 157 | std::function<void(NewDataWriter *)> close_; |
Austin Schuh | 572924a | 2021-07-30 22:32:12 -0700 | [diff] [blame] | 158 | bool header_written_ = false; |
Austin Schuh | e46492f | 2021-07-31 19:49:41 -0700 | [diff] [blame] | 159 | |
Austin Schuh | 72211ae | 2021-08-05 14:02:30 -0700 | [diff] [blame] | 160 | std::vector<State> state_; |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 161 | |
| 162 | size_t max_message_size_; |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 163 | }; |
| 164 | |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 165 | // Interface describing how to name, track, and add headers to log file parts. |
| 166 | class LogNamer { |
| 167 | public: |
| 168 | // Constructs a LogNamer with the primary node (ie the one the logger runs on) |
| 169 | // being node. |
Austin Schuh | 5b728b7 | 2021-06-16 14:57:15 -0700 | [diff] [blame] | 170 | LogNamer(const aos::Configuration *configuration, EventLoop *event_loop, |
| 171 | const aos::Node *node) |
Austin Schuh | a499cea | 2021-07-31 19:49:53 -0700 | [diff] [blame] | 172 | : event_loop_(event_loop), |
Austin Schuh | 5b728b7 | 2021-06-16 14:57:15 -0700 | [diff] [blame] | 173 | configuration_(configuration), |
| 174 | node_(node), |
Austin Schuh | a499cea | 2021-07-31 19:49:53 -0700 | [diff] [blame] | 175 | logger_node_index_(configuration::GetNodeIndex(configuration_, node_)) { |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 176 | nodes_.emplace_back(node_); |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 177 | } |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 178 | virtual ~LogNamer() = default; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 179 | |
Brian Silverman | 87ac040 | 2020-09-17 14:47:01 -0700 | [diff] [blame] | 180 | // Returns a writer for writing data from messages on this channel (on the |
| 181 | // primary node). |
| 182 | // |
| 183 | // The returned pointer will stay valid across rotations, but the object it |
| 184 | // points to will be assigned to. |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 185 | virtual NewDataWriter *MakeWriter(const Channel *channel) = 0; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 186 | |
Brian Silverman | 87ac040 | 2020-09-17 14:47:01 -0700 | [diff] [blame] | 187 | // Returns a writer for writing timestamps from messages on this channel (on |
| 188 | // the primary node). |
| 189 | // |
| 190 | // The returned pointer will stay valid across rotations, but the object it |
| 191 | // points to will be assigned to. |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 192 | virtual NewDataWriter *MakeTimestampWriter(const Channel *channel) = 0; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 193 | |
| 194 | // Returns a writer for writing timestamps delivered over the special |
| 195 | // /aos/remote_timestamps/* channels. node is the node that the timestamps |
Brian Silverman | 87ac040 | 2020-09-17 14:47:01 -0700 | [diff] [blame] | 196 | // are forwarded back from (to the primary node). |
| 197 | // |
| 198 | // The returned pointer will stay valid across rotations, but the object it |
| 199 | // points to will be assigned to. |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 200 | virtual NewDataWriter *MakeForwardedTimestampWriter(const Channel *channel, |
| 201 | const Node *node) = 0; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 202 | |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 203 | // Rotates all log files for the provided node. |
| 204 | virtual void Rotate(const Node *node) = 0; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 205 | |
| 206 | // Returns all the nodes that data is being written for. |
| 207 | const std::vector<const Node *> &nodes() const { return nodes_; } |
| 208 | |
Austin Schuh | 08dba8f | 2023-05-01 08:29:30 -0700 | [diff] [blame] | 209 | // Closes all existing log data writers. No more data may be written after |
| 210 | // this. |
| 211 | virtual WriteCode Close() = 0; |
| 212 | |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 213 | // Returns the node the logger is running on. |
| 214 | const Node *node() const { return node_; } |
Austin Schuh | e46492f | 2021-07-31 19:49:41 -0700 | [diff] [blame] | 215 | const UUID &logger_node_boot_uuid() const { return logger_node_boot_uuid_; } |
| 216 | size_t logger_node_index() const { return logger_node_index_; } |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 217 | |
Austin Schuh | 8c39996 | 2020-12-25 21:51:45 -0800 | [diff] [blame] | 218 | // Writes out the nested Configuration object to the config file location. |
| 219 | virtual void WriteConfiguration( |
| 220 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> *header, |
| 221 | std::string_view config_sha256) = 0; |
| 222 | |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 223 | void SetHeaderTemplate( |
| 224 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> header) { |
| 225 | header_ = std::move(header); |
Austin Schuh | e46492f | 2021-07-31 19:49:41 -0700 | [diff] [blame] | 226 | logger_node_boot_uuid_ = |
| 227 | UUID::FromString(header_.message().logger_node_boot_uuid()); |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 228 | } |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 229 | |
Austin Schuh | 60e7794 | 2022-05-16 17:48:24 -0700 | [diff] [blame] | 230 | void ClearStartTimes() { node_states_.clear(); } |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 231 | |
| 232 | void SetStartTimes(size_t node_index, const UUID &boot_uuid, |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 233 | monotonic_clock::time_point monotonic_start_time, |
| 234 | realtime_clock::time_point realtime_start_time, |
| 235 | monotonic_clock::time_point logger_monotonic_start_time, |
| 236 | realtime_clock::time_point logger_realtime_start_time) { |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 237 | VLOG(1) << "Setting node " << node_index << " to start time " |
| 238 | << monotonic_start_time << " rt " << realtime_start_time << " UUID " |
| 239 | << boot_uuid; |
| 240 | NodeState *node_state = GetNodeState(node_index, boot_uuid); |
| 241 | node_state->monotonic_start_time = monotonic_start_time; |
| 242 | node_state->realtime_start_time = realtime_start_time; |
| 243 | node_state->logger_monotonic_start_time = logger_monotonic_start_time; |
| 244 | node_state->logger_realtime_start_time = logger_realtime_start_time; |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 245 | } |
| 246 | |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 247 | monotonic_clock::time_point monotonic_start_time(size_t node_index, |
| 248 | const UUID &boot_uuid) { |
| 249 | DCHECK_NE(boot_uuid, UUID::Zero()); |
| 250 | |
| 251 | NodeState *node_state = GetNodeState(node_index, boot_uuid); |
| 252 | return node_state->monotonic_start_time; |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 253 | } |
| 254 | |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 255 | protected: |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 256 | // Structure with state per node about times and such. |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 257 | struct NodeState { |
| 258 | // Time when this node started logging. |
| 259 | monotonic_clock::time_point monotonic_start_time = |
| 260 | monotonic_clock::min_time; |
| 261 | realtime_clock::time_point realtime_start_time = realtime_clock::min_time; |
| 262 | |
| 263 | // Corresponding time on the logger node when it started logging. |
| 264 | monotonic_clock::time_point logger_monotonic_start_time = |
| 265 | monotonic_clock::min_time; |
| 266 | realtime_clock::time_point logger_realtime_start_time = |
| 267 | realtime_clock::min_time; |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 268 | }; |
Austin Schuh | 58646e2 | 2021-08-23 23:51:46 -0700 | [diff] [blame] | 269 | |
| 270 | // Creates a new header by copying fields out of the template and combining |
| 271 | // them with the arguments provided. |
| 272 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> MakeHeader( |
| 273 | size_t node_index, const std::vector<NewDataWriter::State> &state, |
| 274 | const UUID &parts_uuid, int parts_index); |
| 275 | |
| 276 | EventLoop *event_loop_; |
| 277 | const Configuration *const configuration_; |
| 278 | const Node *const node_; |
| 279 | const size_t logger_node_index_; |
| 280 | UUID logger_node_boot_uuid_; |
| 281 | std::vector<const Node *> nodes_; |
| 282 | |
| 283 | friend NewDataWriter; |
| 284 | |
| 285 | // Returns the start/stop time state structure for a node and boot. We can |
| 286 | // have data from multiple boots, and it makes sense to reuse the start/stop |
| 287 | // times if we get data from the same boot again. |
| 288 | NodeState *GetNodeState(size_t node_index, const UUID &boot_uuid); |
| 289 | |
| 290 | absl::btree_map<std::pair<size_t, UUID>, NodeState> node_states_; |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 291 | |
| 292 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> header_ = |
| 293 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader>::Empty(); |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 294 | }; |
| 295 | |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 296 | // Log namer which uses a config to name a bunch of files. |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 297 | class MultiNodeLogNamer : public LogNamer { |
| 298 | public: |
Alexei Strots | caf17d3 | 2023-04-03 22:31:11 -0700 | [diff] [blame] | 299 | MultiNodeLogNamer(std::unique_ptr<LogBackend> log_backend, |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 300 | EventLoop *event_loop); |
Alexei Strots | caf17d3 | 2023-04-03 22:31:11 -0700 | [diff] [blame] | 301 | MultiNodeLogNamer(std::unique_ptr<LogBackend> log_backend, |
Austin Schuh | 5b728b7 | 2021-06-16 14:57:15 -0700 | [diff] [blame] | 302 | const Configuration *configuration, EventLoop *event_loop, |
| 303 | const Node *node); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 304 | ~MultiNodeLogNamer() override; |
| 305 | |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 306 | // Sets the function for creating encoders. The argument is the max message |
| 307 | // size (including headers) that will be written into this encoder. |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 308 | // |
| 309 | // Defaults to just creating DummyEncoders. |
| 310 | void set_encoder_factory( |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 311 | std::function<std::unique_ptr<DataEncoder>(size_t)> encoder_factory) { |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 312 | encoder_factory_ = std::move(encoder_factory); |
| 313 | } |
| 314 | |
| 315 | // Sets an additional file extension. |
| 316 | // |
| 317 | // Defaults to nothing. |
| 318 | void set_extension(std::string_view extension) { extension_ = extension; } |
Brian Silverman | 1f34522 | 2020-09-24 21:14:48 -0700 | [diff] [blame] | 319 | |
Brian Silverman | a621f52 | 2020-09-30 16:52:43 -0700 | [diff] [blame] | 320 | // A list of all the filenames we've written. |
| 321 | // |
| 322 | // This only includes the part after base_name(). |
| 323 | const std::vector<std::string> &all_filenames() const { |
| 324 | return all_filenames_; |
| 325 | } |
| 326 | |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 327 | void Rotate(const Node *node) override; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 328 | |
Austin Schuh | 8c39996 | 2020-12-25 21:51:45 -0800 | [diff] [blame] | 329 | void WriteConfiguration( |
| 330 | aos::SizePrefixedFlatbufferDetachedBuffer<LogFileHeader> *header, |
| 331 | std::string_view config_sha256) override; |
| 332 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 333 | NewDataWriter *MakeWriter(const Channel *channel) override; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 334 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 335 | NewDataWriter *MakeForwardedTimestampWriter(const Channel *channel, |
Austin Schuh | 7334084 | 2021-07-30 22:32:06 -0700 | [diff] [blame] | 336 | const Node *node) override; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 337 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 338 | NewDataWriter *MakeTimestampWriter(const Channel *channel) override; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 339 | |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 340 | // Indicates that at least one file ran out of space. Once this happens, we |
| 341 | // stop trying to open new files, to avoid writing any files with holes from |
| 342 | // previous parts. |
| 343 | // |
| 344 | // Besides this function, this object will silently stop logging data when |
| 345 | // this occurs. If you want to ensure log files are complete, you must call |
| 346 | // this method. |
Brian Silverman | a9f2ec9 | 2020-10-06 18:00:53 -0700 | [diff] [blame] | 347 | bool ran_out_of_space() const { |
| 348 | return accumulate_data_writers<bool>( |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 349 | ran_out_of_space_, [](bool x, const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 350 | CHECK_NOTNULL(data_writer.writer); |
Brian Silverman | a9f2ec9 | 2020-10-06 18:00:53 -0700 | [diff] [blame] | 351 | return x || |
| 352 | (data_writer.writer && data_writer.writer->ran_out_of_space()); |
| 353 | }); |
| 354 | } |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 355 | |
Brian Silverman | 1f34522 | 2020-09-24 21:14:48 -0700 | [diff] [blame] | 356 | // Returns the maximum total_bytes() value for all existing |
| 357 | // DetachedBufferWriters. |
| 358 | // |
| 359 | // Returns 0 if no files are open. |
| 360 | size_t maximum_total_bytes() const { |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 361 | return accumulate_data_writers<size_t>( |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 362 | 0, [](size_t x, const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 363 | CHECK_NOTNULL(data_writer.writer); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 364 | return std::max(x, data_writer.writer->total_bytes()); |
| 365 | }); |
Brian Silverman | 1f34522 | 2020-09-24 21:14:48 -0700 | [diff] [blame] | 366 | } |
| 367 | |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 368 | // Closes all existing log files. No more data may be written after this. |
| 369 | // |
| 370 | // This may set ran_out_of_space(). |
Austin Schuh | 08dba8f | 2023-05-01 08:29:30 -0700 | [diff] [blame] | 371 | WriteCode Close() override; |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 372 | |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 373 | // Accessors for various statistics. See the identically-named methods in |
| 374 | // DetachedBufferWriter for documentation. These are aggregated across all |
| 375 | // past and present DetachedBufferWriters. |
| 376 | std::chrono::nanoseconds max_write_time() const { |
| 377 | return accumulate_data_writers( |
| 378 | max_write_time_, |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 379 | [](std::chrono::nanoseconds x, const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 380 | CHECK_NOTNULL(data_writer.writer); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 381 | return std::max( |
| 382 | x, data_writer.writer->WriteStatistics()->max_write_time()); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 383 | }); |
| 384 | } |
| 385 | int max_write_time_bytes() const { |
| 386 | return std::get<0>(accumulate_data_writers( |
| 387 | std::make_tuple(max_write_time_bytes_, max_write_time_), |
| 388 | [](std::tuple<int, std::chrono::nanoseconds> x, |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 389 | const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 390 | CHECK_NOTNULL(data_writer.writer); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 391 | if (data_writer.writer->WriteStatistics()->max_write_time() > |
| 392 | std::get<1>(x)) { |
| 393 | return std::make_tuple( |
| 394 | data_writer.writer->WriteStatistics()->max_write_time_bytes(), |
| 395 | data_writer.writer->WriteStatistics()->max_write_time()); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 396 | } |
| 397 | return x; |
| 398 | })); |
| 399 | } |
| 400 | int max_write_time_messages() const { |
| 401 | return std::get<0>(accumulate_data_writers( |
| 402 | std::make_tuple(max_write_time_messages_, max_write_time_), |
| 403 | [](std::tuple<int, std::chrono::nanoseconds> x, |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 404 | const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 405 | CHECK_NOTNULL(data_writer.writer); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 406 | if (data_writer.writer->WriteStatistics()->max_write_time() > |
| 407 | std::get<1>(x)) { |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 408 | return std::make_tuple( |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 409 | data_writer.writer->WriteStatistics() |
| 410 | ->max_write_time_messages(), |
| 411 | data_writer.writer->WriteStatistics()->max_write_time()); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 412 | } |
| 413 | return x; |
| 414 | })); |
| 415 | } |
| 416 | std::chrono::nanoseconds total_write_time() const { |
| 417 | return accumulate_data_writers( |
| 418 | total_write_time_, |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 419 | [](std::chrono::nanoseconds x, const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 420 | CHECK_NOTNULL(data_writer.writer); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 421 | return x + data_writer.writer->WriteStatistics()->total_write_time(); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 422 | }); |
| 423 | } |
| 424 | int total_write_count() const { |
| 425 | return accumulate_data_writers( |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 426 | total_write_count_, [](int x, const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 427 | CHECK_NOTNULL(data_writer.writer); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 428 | return x + data_writer.writer->WriteStatistics()->total_write_count(); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 429 | }); |
| 430 | } |
| 431 | int total_write_messages() const { |
| 432 | return accumulate_data_writers( |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 433 | total_write_messages_, [](int x, const NewDataWriter &data_writer) { |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 434 | return x + |
| 435 | data_writer.writer->WriteStatistics()->total_write_messages(); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 436 | }); |
| 437 | } |
| 438 | int total_write_bytes() const { |
| 439 | return accumulate_data_writers( |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 440 | total_write_bytes_, [](int x, const NewDataWriter &data_writer) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 441 | CHECK_NOTNULL(data_writer.writer); |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 442 | return x + data_writer.writer->WriteStatistics()->total_write_bytes(); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 443 | }); |
| 444 | } |
| 445 | |
| 446 | void ResetStatistics(); |
| 447 | |
Alexei Strots | caf17d3 | 2023-04-03 22:31:11 -0700 | [diff] [blame] | 448 | protected: |
| 449 | // TODO (Alexei): consider to move ownership of log_namer to concrete sub |
| 450 | // class and make log_backend_ raw pointer. |
| 451 | LogBackend *log_backend() { return log_backend_.get(); } |
| 452 | const LogBackend *log_backend() const { return log_backend_.get(); } |
| 453 | |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 454 | private: |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 455 | // Opens up a writer for timestamps forwarded back. |
| 456 | void OpenForwardedTimestampWriter(const Channel *channel, |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 457 | NewDataWriter *data_writer); |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 458 | |
| 459 | // Opens up a writer for remote data. |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 460 | void OpenWriter(const Channel *channel, NewDataWriter *data_writer); |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 461 | |
| 462 | // Opens the main data writer file for this node responsible for data_writer_. |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 463 | void MakeDataWriter(); |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 464 | |
Austin Schuh | 48d10d6 | 2022-10-16 22:19:23 -0700 | [diff] [blame] | 465 | void CreateBufferWriter(std::string_view path, size_t max_message_size, |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 466 | std::unique_ptr<DetachedBufferWriter> *destination); |
| 467 | |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 468 | void CloseWriter(std::unique_ptr<DetachedBufferWriter> *writer_pointer); |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 469 | |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 470 | // A version of std::accumulate which operates over all of our DataWriters. |
| 471 | template <typename T, typename BinaryOperation> |
| 472 | T accumulate_data_writers(T t, BinaryOperation op) const { |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 473 | for (const std::pair<const Channel *const, NewDataWriter> &data_writer : |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 474 | data_writers_) { |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 475 | if (data_writer.second.writer != nullptr) { |
| 476 | t = op(std::move(t), data_writer.second); |
| 477 | } |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 478 | } |
Maxwell Gumley | 8ad7778 | 2023-07-11 13:27:03 -0600 | [diff] [blame^] | 479 | if (data_writer_ != nullptr && data_writer_->writer != nullptr) { |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 480 | t = op(std::move(t), *data_writer_); |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 481 | } |
| 482 | return t; |
| 483 | } |
| 484 | |
Alexei Strots | caf17d3 | 2023-04-03 22:31:11 -0700 | [diff] [blame] | 485 | std::unique_ptr<LogBackend> log_backend_; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 486 | |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 487 | bool ran_out_of_space_ = false; |
Brian Silverman | a621f52 | 2020-09-30 16:52:43 -0700 | [diff] [blame] | 488 | std::vector<std::string> all_filenames_; |
Brian Silverman | 0465fcf | 2020-09-24 00:29:18 -0700 | [diff] [blame] | 489 | |
Austin Schuh | 8bdfc49 | 2023-02-11 12:53:13 -0800 | [diff] [blame] | 490 | std::function<std::unique_ptr<DataEncoder>(size_t)> encoder_factory_; |
Brian Silverman | cb80582 | 2020-10-06 17:43:35 -0700 | [diff] [blame] | 491 | std::string extension_; |
| 492 | |
| 493 | // Storage for statistics from previously-rotated DetachedBufferWriters. |
| 494 | std::chrono::nanoseconds max_write_time_ = std::chrono::nanoseconds::zero(); |
| 495 | int max_write_time_bytes_ = -1; |
| 496 | int max_write_time_messages_ = -1; |
| 497 | std::chrono::nanoseconds total_write_time_ = std::chrono::nanoseconds::zero(); |
| 498 | int total_write_count_ = 0; |
| 499 | int total_write_messages_ = 0; |
| 500 | int total_write_bytes_ = 0; |
| 501 | |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 502 | // File to write both delivery timestamps and local data to. |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 503 | std::unique_ptr<NewDataWriter> data_writer_; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 504 | |
Austin Schuh | b8bca73 | 2021-07-30 22:32:00 -0700 | [diff] [blame] | 505 | std::map<const Channel *, NewDataWriter> data_writers_; |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 506 | }; |
| 507 | |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 508 | // This is specialized log namer that deals with directory centric log events. |
| 509 | class MultiNodeFilesLogNamer : public MultiNodeLogNamer { |
| 510 | public: |
| 511 | MultiNodeFilesLogNamer(std::string_view base_name, EventLoop *event_loop) |
colleen | 61276dc | 2023-06-01 09:23:29 -0700 | [diff] [blame] | 512 | : MultiNodeLogNamer( |
| 513 | std::make_unique<RenamableFileBackend>(base_name, false), |
| 514 | event_loop) {} |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 515 | |
| 516 | MultiNodeFilesLogNamer(std::string_view base_name, |
| 517 | const Configuration *configuration, |
| 518 | EventLoop *event_loop, const Node *node) |
colleen | 61276dc | 2023-06-01 09:23:29 -0700 | [diff] [blame] | 519 | : MultiNodeLogNamer( |
| 520 | std::make_unique<RenamableFileBackend>(base_name, false), |
| 521 | configuration, event_loop, node) {} |
| 522 | |
| 523 | MultiNodeFilesLogNamer(EventLoop *event_loop, |
| 524 | std::unique_ptr<RenamableFileBackend> backend) |
| 525 | : MultiNodeLogNamer(std::move(backend), event_loop) {} |
| 526 | |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 527 | ~MultiNodeFilesLogNamer() override = default; |
Alexei Strots | caf17d3 | 2023-04-03 22:31:11 -0700 | [diff] [blame] | 528 | |
| 529 | std::string_view base_name() const { |
| 530 | return renamable_file_backend()->base_name(); |
| 531 | } |
| 532 | |
| 533 | // Rotate should be called at least once in between calls to set_base_name. |
| 534 | // Otherwise, temporary files will not be recoverable. |
| 535 | // Rotate is called by Logger::RenameLogBase, which is currently the only user |
| 536 | // of this method. |
| 537 | // Only renaming the folder is supported, not the file base name. |
| 538 | void set_base_name(std::string_view base_name) { |
| 539 | renamable_file_backend()->RenameLogBase(base_name); |
| 540 | } |
| 541 | |
| 542 | // When enabled, this will write files under names beginning |
| 543 | // with the .tmp suffix, and then rename them to the desired name after |
| 544 | // they are fully written. |
| 545 | // |
| 546 | // This is useful to enable incremental copying of the log files. |
| 547 | // |
| 548 | // Defaults to writing directly to the final filename. |
| 549 | void EnableTempFiles() { renamable_file_backend()->EnableTempFiles(); } |
| 550 | |
| 551 | private: |
| 552 | RenamableFileBackend *renamable_file_backend() { |
| 553 | return reinterpret_cast<RenamableFileBackend *>(log_backend()); |
| 554 | } |
| 555 | const RenamableFileBackend *renamable_file_backend() const { |
| 556 | return reinterpret_cast<const RenamableFileBackend *>(log_backend()); |
| 557 | } |
Alexei Strots | 0139549 | 2023-03-20 13:59:56 -0700 | [diff] [blame] | 558 | }; |
| 559 | |
Austin Schuh | cb5601b | 2020-09-10 15:29:59 -0700 | [diff] [blame] | 560 | } // namespace logger |
| 561 | } // namespace aos |
| 562 | |
| 563 | #endif // AOS_EVENTS_LOGGING_LOG_NAMER_H_ |