James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 1 | #include "aos/util/config_validator_lib.h" |
| 2 | |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame^] | 3 | #include <algorithm> |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 4 | #include <chrono> |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame^] | 5 | #include <cstdlib> |
| 6 | #include <initializer_list> |
| 7 | #include <map> |
| 8 | #include <memory> |
| 9 | #include <ostream> |
| 10 | #include <set> |
| 11 | #include <string> |
| 12 | #include <string_view> |
| 13 | #include <utility> |
| 14 | #include <vector> |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 15 | |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame^] | 16 | #include "flatbuffers/buffer.h" |
| 17 | #include "flatbuffers/detached_buffer.h" |
| 18 | #include "flatbuffers/string.h" |
| 19 | #include "flatbuffers/vector.h" |
| 20 | #include "gflags/gflags_declare.h" |
| 21 | #include "glog/logging.h" |
| 22 | #include "gtest/gtest.h" |
| 23 | |
| 24 | #include "aos/events/event_loop.h" |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 25 | #include "aos/events/logging/log_reader.h" |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame^] | 26 | #include "aos/events/logging/logfile_sorting.h" |
| 27 | #include "aos/events/logging/logfile_utils.h" |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 28 | #include "aos/events/simulated_event_loop.h" |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame^] | 29 | #include "aos/flatbuffers/builder.h" |
| 30 | #include "aos/flatbuffers/static_vector.h" |
| 31 | #include "aos/json_to_flatbuffer.h" |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 32 | #include "aos/network/remote_message_generated.h" |
| 33 | #include "aos/network/timestamp_channel.h" |
| 34 | #include "aos/testing/tmpdir.h" |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 35 | #include "aos/util/config_validator_config_static.h" |
Stephan Pleines | b117767 | 2024-05-27 17:48:32 -0700 | [diff] [blame^] | 36 | #include "aos/util/file.h" |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 37 | #include "aos/util/simulation_logger.h" |
| 38 | |
| 39 | DECLARE_bool(validate_timestamp_logger_nodes); |
| 40 | |
| 41 | namespace aos::util { |
| 42 | |
| 43 | namespace { |
| 44 | void RunSimulationAndExit(const aos::Configuration *config) { |
| 45 | aos::SimulatedEventLoopFactory factory(config); |
| 46 | |
| 47 | factory.RunFor(std::chrono::seconds(1)); |
| 48 | |
| 49 | std::exit(EXIT_SUCCESS); |
| 50 | } |
| 51 | |
| 52 | // Checks if either the node is in the specified list of node names or if the |
| 53 | // list is empty (in which case it is treated as matching all nodes). |
| 54 | bool NodeInList( |
| 55 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *list, |
| 56 | const aos::Node *node) { |
| 57 | if (list == nullptr || list->size() == 0) { |
| 58 | return true; |
| 59 | } |
| 60 | for (const flatbuffers::String *name : *list) { |
| 61 | if (name->string_view() == node->name()->string_view()) { |
| 62 | return true; |
| 63 | } |
| 64 | } |
| 65 | return false; |
| 66 | } |
| 67 | |
| 68 | } // namespace |
| 69 | |
| 70 | void ConfigIsValid(const aos::Configuration *config, |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 71 | const ConfigValidatorConfig *validation_config_raw) { |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 72 | ASSERT_TRUE(config->has_channels()) |
| 73 | << "An AOS config must have channels. If you have a valid use-case for " |
| 74 | "channels with no channels, please write a design proposal."; |
| 75 | |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 76 | aos::fbs::Builder<ConfigValidatorConfigStatic> validation_config; |
| 77 | CHECK(validation_config->FromFlatbuffer(validation_config_raw)); |
| 78 | |
| 79 | if (validation_config_raw->has_logging() && |
| 80 | validation_config_raw->logging()->validate_individual_node_loggers() && |
| 81 | configuration::MultiNode(config)) { |
| 82 | if (!validation_config->logging()->has_logger_sets()) { |
| 83 | validation_config->mutable_logging()->add_logger_sets(); |
| 84 | } |
| 85 | auto logger_sets = |
| 86 | validation_config->mutable_logging()->mutable_logger_sets(); |
| 87 | for (const aos::Node *node : configuration::GetNodes(config)) { |
| 88 | CHECK(logger_sets->reserve(logger_sets->size() + 1)); |
| 89 | auto logger_set = logger_sets->emplace_back(); |
| 90 | CHECK(logger_set->add_loggers()->FromFlatbuffer({node->name()->str()})); |
| 91 | CHECK(logger_set->add_replay_nodes()->FromFlatbuffer( |
| 92 | {node->name()->str()})); |
| 93 | } |
| 94 | } |
| 95 | |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 96 | // First, we do some sanity checks--these are likely to indicate a malformed |
| 97 | // config, and so catching them early with a clear error message is likely to |
| 98 | // help. |
| 99 | |
| 100 | // The set of all channels that are required by the channels that are |
| 101 | // configured--these are the remote timestamp channels that *must* be present, |
| 102 | // and ideally there are no other channels present. |
| 103 | std::set<const Channel *> required_timestamp_channels; |
| 104 | // The set of all channels that *look* like remote timestamp channels. This |
| 105 | // may include channels that are improperly configured and thus have typos & |
| 106 | // aren't actually going to do anything at runtime. |
| 107 | std::set<const Channel *> configured_timestamp_channels; |
| 108 | bool validation_failed = false; |
| 109 | for (size_t channel_index = 0; channel_index < config->channels()->size(); |
| 110 | ++channel_index) { |
| 111 | const aos::Channel *channel = config->channels()->Get(channel_index); |
| 112 | ASSERT_TRUE(channel->has_name()) << "All AOS channels must have a name."; |
| 113 | ASSERT_TRUE(channel->has_type()) << "All AOS channels must have a type."; |
| 114 | |
| 115 | const bool channel_looks_like_remote_message_channel = |
| 116 | channel->type()->string_view() == |
| 117 | message_bridge::RemoteMessage::GetFullyQualifiedName(); |
| 118 | |
| 119 | const bool check_for_not_logged_channels = |
| 120 | !validation_config->has_logging() || |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 121 | validation_config->AsFlatbuffer().logging()->all_channels_logged(); |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 122 | const bool channel_is_not_logged = |
| 123 | channel->logger() == aos::LoggerConfig::NOT_LOGGED; |
| 124 | if (check_for_not_logged_channels) { |
| 125 | if (channel_looks_like_remote_message_channel != channel_is_not_logged) { |
| 126 | LOG(WARNING) |
| 127 | << "Channel " << configuration::StrippedChannelToString(channel) |
| 128 | << " is " << EnumNameLoggerConfig(channel->logger()) << " but " |
| 129 | << (channel_looks_like_remote_message_channel ? "is" : "is not") |
| 130 | << " a remote timestamp channel. This is almost certainly wrong."; |
| 131 | validation_failed = true; |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | if (channel_looks_like_remote_message_channel) { |
| 136 | configured_timestamp_channels.insert(channel); |
| 137 | } else { |
| 138 | if (channel->has_destination_nodes()) { |
| 139 | // TODO(james): Technically the timestamp finder should receive a |
| 140 | // non-empty application name. However, there are no known users that |
| 141 | // care at this moment. |
| 142 | message_bridge::ChannelTimestampFinder timestamp_finder( |
| 143 | config, "", |
| 144 | configuration::GetNode(config, |
| 145 | channel->source_node()->string_view())); |
| 146 | for (const Connection *connection : *channel->destination_nodes()) { |
| 147 | switch (connection->timestamp_logger()) { |
| 148 | case LoggerConfig::NOT_LOGGED: |
| 149 | case LoggerConfig::LOCAL_LOGGER: |
| 150 | if (connection->has_timestamp_logger_nodes()) { |
| 151 | LOG(WARNING) |
| 152 | << "Connections that are " |
| 153 | << EnumNameLoggerConfig(connection->timestamp_logger()) |
| 154 | << " should not have remote timestamp logger nodes " |
| 155 | "populated. This is for the connection to " |
| 156 | << connection->name()->string_view() << " on " |
| 157 | << configuration::StrippedChannelToString(channel); |
| 158 | validation_failed = true; |
| 159 | } |
| 160 | break; |
| 161 | case LoggerConfig::REMOTE_LOGGER: |
| 162 | case LoggerConfig::LOCAL_AND_REMOTE_LOGGER: |
| 163 | if (!connection->has_timestamp_logger_nodes() || |
| 164 | connection->timestamp_logger_nodes()->size() != 1 || |
| 165 | connection->timestamp_logger_nodes()->Get(0)->string_view() != |
| 166 | channel->source_node()->string_view()) { |
| 167 | LOG(WARNING) |
| 168 | << "Connections that are " |
| 169 | << EnumNameLoggerConfig(connection->timestamp_logger()) |
| 170 | << " should have exactly 1 remote timestamp logger node " |
| 171 | "populated, and that node should be the source_node (" |
| 172 | << channel->source_node()->string_view() |
| 173 | << "). This is for the connection to " |
| 174 | << connection->name()->string_view() << " on " |
| 175 | << configuration::StrippedChannelToString(channel); |
| 176 | validation_failed = true; |
| 177 | } |
| 178 | // TODO(james): This will be overly noisy, as it ends up |
| 179 | // CHECK-failing. |
| 180 | required_timestamp_channels.insert(CHECK_NOTNULL( |
| 181 | timestamp_finder.ForChannel(channel, connection))); |
| 182 | break; |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | // Check that all of the things that look like timestamp channels are indeed |
| 190 | // required. |
| 191 | // Note: Because ForChannel() will die if a required channel is not present, |
| 192 | // we do not do a separate check that all the required channels exist. |
| 193 | for (const auto &channel : configured_timestamp_channels) { |
| 194 | if (required_timestamp_channels.count(channel) == 0) { |
| 195 | LOG(WARNING) << "Timestamp channel " |
| 196 | << configuration::StrippedChannelToString(channel) |
| 197 | << " was specified in the config but is not used."; |
| 198 | validation_failed = true; |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | if (validation_failed) { |
| 203 | FAIL() << "Remote timestamp linting failed."; |
| 204 | return; |
| 205 | } |
| 206 | |
| 207 | // Because the most common way for simulation to fail involves it dying, force |
| 208 | // it to fail in a slightly more controlled manner. |
| 209 | ASSERT_EXIT(RunSimulationAndExit(config), |
| 210 | ::testing::ExitedWithCode(EXIT_SUCCESS), ""); |
| 211 | |
| 212 | if (!validation_config->has_logging() || !configuration::MultiNode(config)) { |
| 213 | return; |
| 214 | } |
| 215 | |
| 216 | // We will run all the logger configs in two modes: |
| 217 | // 1) We don't send any data on any non-infrastructure channels; this confirms |
| 218 | // that the logs are readable in the absence of any user applications being |
| 219 | // present. |
| 220 | // 2) We confirm that we can generate a good logfile that actually has data |
| 221 | // on every channel (some checks in the LogReader may not get hit if there |
| 222 | // is no data on a given channel). |
| 223 | const std::string log_path = aos::testing::TestTmpDir() + "/logs/"; |
| 224 | for (const bool send_data_on_channels : {false, true}) { |
| 225 | SCOPED_TRACE(send_data_on_channels); |
Pallavi Madhukar | aaba67e | 2023-09-08 14:20:00 -0700 | [diff] [blame] | 226 | // Single nodes (multi-nodes with node count = 1) will not produce readable |
| 227 | // logs in the absense of data. |
| 228 | if (!send_data_on_channels && (configuration::NodesCount(config) == 1u)) { |
| 229 | continue; |
| 230 | } |
Pallavi Madhukar | 3076d5c | 2023-09-09 10:23:26 -0700 | [diff] [blame] | 231 | // Send timing report when we are sending data. |
| 232 | const bool do_skip_timing_report = !send_data_on_channels; |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 233 | for (const LoggerNodeSetValidationStatic &logger_set : |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 234 | *validation_config->logging()->logger_sets()) { |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 235 | SCOPED_TRACE(aos::FlatbufferToJson(&logger_set.AsFlatbuffer())); |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 236 | aos::SimulatedEventLoopFactory factory(config); |
| 237 | std::vector<std::unique_ptr<LoggerState>> loggers; |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 238 | if (logger_set.has_loggers() && logger_set.loggers()->size() > 0) { |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 239 | std::vector<std::string> logger_nodes; |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 240 | for (const auto &node : *logger_set.loggers()) { |
| 241 | logger_nodes.push_back(node.str()); |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 242 | } |
Pallavi Madhukar | 3076d5c | 2023-09-09 10:23:26 -0700 | [diff] [blame] | 243 | loggers = MakeLoggersForNodes(&factory, logger_nodes, log_path, |
| 244 | do_skip_timing_report); |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 245 | } else { |
Pallavi Madhukar | 3076d5c | 2023-09-09 10:23:26 -0700 | [diff] [blame] | 246 | loggers = |
| 247 | MakeLoggersForAllNodes(&factory, log_path, do_skip_timing_report); |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 248 | } |
| 249 | |
| 250 | std::vector<std::unique_ptr<EventLoop>> test_loops; |
| 251 | std::map<std::string, std::vector<std::unique_ptr<RawSender>>> |
| 252 | test_senders; |
| 253 | |
| 254 | if (send_data_on_channels) { |
| 255 | // Make a sender on every non-infrastructure channel on every node |
| 256 | // (including channels that may not be observable by the current logger |
| 257 | // set). |
| 258 | for (const aos::Node *node : configuration::GetNodes(config)) { |
| 259 | test_loops.emplace_back(factory.MakeEventLoop("", node)); |
| 260 | for (const aos::Channel *channel : *config->channels()) { |
| 261 | // TODO(james): Make a more sophisticated check for "infrastructure" |
| 262 | // channels than just looking for a "/aos" in the channel--we don't |
| 263 | // accidentally want to spam nonsense data onto any timestamp |
| 264 | // channels, though. |
| 265 | if (configuration::ChannelIsSendableOnNode(channel, node) && |
| 266 | channel->name()->str().find("/aos") == std::string::npos && |
| 267 | channel->logger() != LoggerConfig::NOT_LOGGED) { |
| 268 | test_senders[node->name()->str()].emplace_back( |
| 269 | test_loops.back()->MakeRawSender(channel)); |
| 270 | RawSender *sender = |
| 271 | test_senders[node->name()->str()].back().get(); |
| 272 | test_loops.back()->OnRun([sender, channel]() { |
| 273 | flatbuffers::DetachedBuffer buffer = |
| 274 | JsonToFlatbuffer("{}", channel->schema()); |
| 275 | sender->CheckOk(sender->Send(buffer.data(), buffer.size())); |
| 276 | }); |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | factory.RunFor(std::chrono::seconds(2)); |
| 283 | |
| 284 | // Get all of the loggers to close before trying to read the logfiles. |
| 285 | loggers.clear(); |
| 286 | |
| 287 | // Confirm that we can read the log, and that if we put data in it that we |
| 288 | // can find data on all the nodes that the user cares about. |
| 289 | logger::LogReader reader(logger::SortParts(logger::FindLogs(log_path))); |
| 290 | SimulatedEventLoopFactory replay_factory(reader.configuration()); |
| 291 | reader.RegisterWithoutStarting(&replay_factory); |
| 292 | |
| 293 | // Find every channel we deliberately sent data on, and if it is for a |
| 294 | // node that we care about, confirm that we get it during replay. |
| 295 | std::vector<std::unique_ptr<EventLoop>> replay_loops; |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 296 | for (const aos::Node *node : |
| 297 | configuration::GetNodes(replay_factory.configuration())) { |
| 298 | // If the user doesn't care about this node, don't check it. |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 299 | if (!NodeInList(logger_set.has_replay_nodes() |
| 300 | ? logger_set.replay_nodes()->AsFlatbufferVector() |
| 301 | : nullptr, |
| 302 | node)) { |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 303 | continue; |
| 304 | } |
| 305 | replay_loops.emplace_back(replay_factory.MakeEventLoop("", node)); |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 306 | } |
| 307 | |
| 308 | std::vector<std::pair<const aos::Node *, std::unique_ptr<RawFetcher>>> |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 309 | fetchers; |
| 310 | for (const auto &node_senders : test_senders) { |
| 311 | for (const auto &sender : node_senders.second) { |
| 312 | for (auto &loop : replay_loops) { |
| 313 | if (configuration::ChannelIsReadableOnNode(sender->channel(), |
| 314 | loop->node())) { |
| 315 | fetchers.push_back(std::make_pair( |
| 316 | loop->node(), |
| 317 | loop->MakeRawFetcher(configuration::GetChannel( |
| 318 | replay_factory.configuration(), sender->channel(), |
| 319 | loop->name(), loop->node())))); |
| 320 | } |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 321 | } |
| 322 | } |
| 323 | } |
| 324 | |
| 325 | replay_factory.Run(); |
| 326 | |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 327 | for (auto &pair : fetchers) { |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 328 | EXPECT_TRUE(pair.second->Fetch()) |
| 329 | << "Failed to log or replay any data on " |
| 330 | << configuration::StrippedChannelToString(pair.second->channel()) |
James Kuszmaul | 464012b | 2024-03-20 14:12:08 -0700 | [diff] [blame] | 331 | << " reading from " << logger::MaybeNodeName(pair.first) |
| 332 | << " with source node " |
| 333 | << (pair.second->channel()->has_source_node() |
| 334 | ? pair.second->channel()->source_node()->string_view() |
| 335 | : "") |
| 336 | << "."; |
James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 337 | } |
| 338 | |
| 339 | reader.Deregister(); |
| 340 | |
| 341 | // Clean up the logs. |
| 342 | UnlinkRecursive(log_path); |
| 343 | } |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | } // namespace aos::util |