James Kuszmaul | 827bd21 | 2023-05-15 23:57:39 -0700 | [diff] [blame] | 1 | #include "aos/util/config_validator_lib.h" |
| 2 | |
| 3 | #include <chrono> |
| 4 | |
| 5 | #include "aos/events/logging/log_reader.h" |
| 6 | #include "aos/events/logging/log_writer.h" |
| 7 | #include "aos/events/simulated_event_loop.h" |
| 8 | #include "aos/network/remote_message_generated.h" |
| 9 | #include "aos/network/timestamp_channel.h" |
| 10 | #include "aos/testing/tmpdir.h" |
| 11 | #include "aos/util/simulation_logger.h" |
| 12 | |
| 13 | DECLARE_bool(validate_timestamp_logger_nodes); |
| 14 | |
| 15 | namespace aos::util { |
| 16 | |
| 17 | namespace { |
| 18 | void RunSimulationAndExit(const aos::Configuration *config) { |
| 19 | aos::SimulatedEventLoopFactory factory(config); |
| 20 | |
| 21 | factory.RunFor(std::chrono::seconds(1)); |
| 22 | |
| 23 | std::exit(EXIT_SUCCESS); |
| 24 | } |
| 25 | |
| 26 | // Checks if either the node is in the specified list of node names or if the |
| 27 | // list is empty (in which case it is treated as matching all nodes). |
| 28 | bool NodeInList( |
| 29 | const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *list, |
| 30 | const aos::Node *node) { |
| 31 | if (list == nullptr || list->size() == 0) { |
| 32 | return true; |
| 33 | } |
| 34 | for (const flatbuffers::String *name : *list) { |
| 35 | if (name->string_view() == node->name()->string_view()) { |
| 36 | return true; |
| 37 | } |
| 38 | } |
| 39 | return false; |
| 40 | } |
| 41 | |
| 42 | } // namespace |
| 43 | |
| 44 | void ConfigIsValid(const aos::Configuration *config, |
| 45 | const ConfigValidatorConfig *validation_config) { |
| 46 | ASSERT_TRUE(config->has_channels()) |
| 47 | << "An AOS config must have channels. If you have a valid use-case for " |
| 48 | "channels with no channels, please write a design proposal."; |
| 49 | |
| 50 | // First, we do some sanity checks--these are likely to indicate a malformed |
| 51 | // config, and so catching them early with a clear error message is likely to |
| 52 | // help. |
| 53 | |
| 54 | // The set of all channels that are required by the channels that are |
| 55 | // configured--these are the remote timestamp channels that *must* be present, |
| 56 | // and ideally there are no other channels present. |
| 57 | std::set<const Channel *> required_timestamp_channels; |
| 58 | // The set of all channels that *look* like remote timestamp channels. This |
| 59 | // may include channels that are improperly configured and thus have typos & |
| 60 | // aren't actually going to do anything at runtime. |
| 61 | std::set<const Channel *> configured_timestamp_channels; |
| 62 | bool validation_failed = false; |
| 63 | for (size_t channel_index = 0; channel_index < config->channels()->size(); |
| 64 | ++channel_index) { |
| 65 | const aos::Channel *channel = config->channels()->Get(channel_index); |
| 66 | ASSERT_TRUE(channel->has_name()) << "All AOS channels must have a name."; |
| 67 | ASSERT_TRUE(channel->has_type()) << "All AOS channels must have a type."; |
| 68 | |
| 69 | const bool channel_looks_like_remote_message_channel = |
| 70 | channel->type()->string_view() == |
| 71 | message_bridge::RemoteMessage::GetFullyQualifiedName(); |
| 72 | |
| 73 | const bool check_for_not_logged_channels = |
| 74 | !validation_config->has_logging() || |
| 75 | validation_config->logging()->all_channels_logged(); |
| 76 | const bool channel_is_not_logged = |
| 77 | channel->logger() == aos::LoggerConfig::NOT_LOGGED; |
| 78 | if (check_for_not_logged_channels) { |
| 79 | if (channel_looks_like_remote_message_channel != channel_is_not_logged) { |
| 80 | LOG(WARNING) |
| 81 | << "Channel " << configuration::StrippedChannelToString(channel) |
| 82 | << " is " << EnumNameLoggerConfig(channel->logger()) << " but " |
| 83 | << (channel_looks_like_remote_message_channel ? "is" : "is not") |
| 84 | << " a remote timestamp channel. This is almost certainly wrong."; |
| 85 | validation_failed = true; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | if (channel_looks_like_remote_message_channel) { |
| 90 | configured_timestamp_channels.insert(channel); |
| 91 | } else { |
| 92 | if (channel->has_destination_nodes()) { |
| 93 | // TODO(james): Technically the timestamp finder should receive a |
| 94 | // non-empty application name. However, there are no known users that |
| 95 | // care at this moment. |
| 96 | message_bridge::ChannelTimestampFinder timestamp_finder( |
| 97 | config, "", |
| 98 | configuration::GetNode(config, |
| 99 | channel->source_node()->string_view())); |
| 100 | for (const Connection *connection : *channel->destination_nodes()) { |
| 101 | switch (connection->timestamp_logger()) { |
| 102 | case LoggerConfig::NOT_LOGGED: |
| 103 | case LoggerConfig::LOCAL_LOGGER: |
| 104 | if (connection->has_timestamp_logger_nodes()) { |
| 105 | LOG(WARNING) |
| 106 | << "Connections that are " |
| 107 | << EnumNameLoggerConfig(connection->timestamp_logger()) |
| 108 | << " should not have remote timestamp logger nodes " |
| 109 | "populated. This is for the connection to " |
| 110 | << connection->name()->string_view() << " on " |
| 111 | << configuration::StrippedChannelToString(channel); |
| 112 | validation_failed = true; |
| 113 | } |
| 114 | break; |
| 115 | case LoggerConfig::REMOTE_LOGGER: |
| 116 | case LoggerConfig::LOCAL_AND_REMOTE_LOGGER: |
| 117 | if (!connection->has_timestamp_logger_nodes() || |
| 118 | connection->timestamp_logger_nodes()->size() != 1 || |
| 119 | connection->timestamp_logger_nodes()->Get(0)->string_view() != |
| 120 | channel->source_node()->string_view()) { |
| 121 | LOG(WARNING) |
| 122 | << "Connections that are " |
| 123 | << EnumNameLoggerConfig(connection->timestamp_logger()) |
| 124 | << " should have exactly 1 remote timestamp logger node " |
| 125 | "populated, and that node should be the source_node (" |
| 126 | << channel->source_node()->string_view() |
| 127 | << "). This is for the connection to " |
| 128 | << connection->name()->string_view() << " on " |
| 129 | << configuration::StrippedChannelToString(channel); |
| 130 | validation_failed = true; |
| 131 | } |
| 132 | // TODO(james): This will be overly noisy, as it ends up |
| 133 | // CHECK-failing. |
| 134 | required_timestamp_channels.insert(CHECK_NOTNULL( |
| 135 | timestamp_finder.ForChannel(channel, connection))); |
| 136 | break; |
| 137 | } |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | // Check that all of the things that look like timestamp channels are indeed |
| 144 | // required. |
| 145 | // Note: Because ForChannel() will die if a required channel is not present, |
| 146 | // we do not do a separate check that all the required channels exist. |
| 147 | for (const auto &channel : configured_timestamp_channels) { |
| 148 | if (required_timestamp_channels.count(channel) == 0) { |
| 149 | LOG(WARNING) << "Timestamp channel " |
| 150 | << configuration::StrippedChannelToString(channel) |
| 151 | << " was specified in the config but is not used."; |
| 152 | validation_failed = true; |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | if (validation_failed) { |
| 157 | FAIL() << "Remote timestamp linting failed."; |
| 158 | return; |
| 159 | } |
| 160 | |
| 161 | // Because the most common way for simulation to fail involves it dying, force |
| 162 | // it to fail in a slightly more controlled manner. |
| 163 | ASSERT_EXIT(RunSimulationAndExit(config), |
| 164 | ::testing::ExitedWithCode(EXIT_SUCCESS), ""); |
| 165 | |
| 166 | if (!validation_config->has_logging() || !configuration::MultiNode(config)) { |
| 167 | return; |
| 168 | } |
| 169 | |
| 170 | // We will run all the logger configs in two modes: |
| 171 | // 1) We don't send any data on any non-infrastructure channels; this confirms |
| 172 | // that the logs are readable in the absence of any user applications being |
| 173 | // present. |
| 174 | // 2) We confirm that we can generate a good logfile that actually has data |
| 175 | // on every channel (some checks in the LogReader may not get hit if there |
| 176 | // is no data on a given channel). |
| 177 | const std::string log_path = aos::testing::TestTmpDir() + "/logs/"; |
| 178 | for (const bool send_data_on_channels : {false, true}) { |
| 179 | SCOPED_TRACE(send_data_on_channels); |
| 180 | for (const LoggerNodeSetValidation *logger_set : |
| 181 | *validation_config->logging()->logger_sets()) { |
| 182 | SCOPED_TRACE(aos::FlatbufferToJson(logger_set)); |
| 183 | aos::SimulatedEventLoopFactory factory(config); |
| 184 | std::vector<std::unique_ptr<LoggerState>> loggers; |
| 185 | if (logger_set->has_loggers() && logger_set->loggers()->size() > 0) { |
| 186 | std::vector<std::string> logger_nodes; |
| 187 | for (const auto &node : *logger_set->loggers()) { |
| 188 | logger_nodes.push_back(node->str()); |
| 189 | } |
| 190 | loggers = MakeLoggersForNodes(&factory, logger_nodes, log_path); |
| 191 | } else { |
| 192 | loggers = MakeLoggersForAllNodes(&factory, log_path); |
| 193 | } |
| 194 | |
| 195 | std::vector<std::unique_ptr<EventLoop>> test_loops; |
| 196 | std::map<std::string, std::vector<std::unique_ptr<RawSender>>> |
| 197 | test_senders; |
| 198 | |
| 199 | if (send_data_on_channels) { |
| 200 | // Make a sender on every non-infrastructure channel on every node |
| 201 | // (including channels that may not be observable by the current logger |
| 202 | // set). |
| 203 | for (const aos::Node *node : configuration::GetNodes(config)) { |
| 204 | test_loops.emplace_back(factory.MakeEventLoop("", node)); |
| 205 | for (const aos::Channel *channel : *config->channels()) { |
| 206 | // TODO(james): Make a more sophisticated check for "infrastructure" |
| 207 | // channels than just looking for a "/aos" in the channel--we don't |
| 208 | // accidentally want to spam nonsense data onto any timestamp |
| 209 | // channels, though. |
| 210 | if (configuration::ChannelIsSendableOnNode(channel, node) && |
| 211 | channel->name()->str().find("/aos") == std::string::npos && |
| 212 | channel->logger() != LoggerConfig::NOT_LOGGED) { |
| 213 | test_senders[node->name()->str()].emplace_back( |
| 214 | test_loops.back()->MakeRawSender(channel)); |
| 215 | RawSender *sender = |
| 216 | test_senders[node->name()->str()].back().get(); |
| 217 | test_loops.back()->OnRun([sender, channel]() { |
| 218 | flatbuffers::DetachedBuffer buffer = |
| 219 | JsonToFlatbuffer("{}", channel->schema()); |
| 220 | sender->CheckOk(sender->Send(buffer.data(), buffer.size())); |
| 221 | }); |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | factory.RunFor(std::chrono::seconds(2)); |
| 228 | |
| 229 | // Get all of the loggers to close before trying to read the logfiles. |
| 230 | loggers.clear(); |
| 231 | |
| 232 | // Confirm that we can read the log, and that if we put data in it that we |
| 233 | // can find data on all the nodes that the user cares about. |
| 234 | logger::LogReader reader(logger::SortParts(logger::FindLogs(log_path))); |
| 235 | SimulatedEventLoopFactory replay_factory(reader.configuration()); |
| 236 | reader.RegisterWithoutStarting(&replay_factory); |
| 237 | |
| 238 | // Find every channel we deliberately sent data on, and if it is for a |
| 239 | // node that we care about, confirm that we get it during replay. |
| 240 | std::vector<std::unique_ptr<EventLoop>> replay_loops; |
| 241 | std::vector<std::unique_ptr<RawFetcher>> fetchers; |
| 242 | for (const aos::Node *node : |
| 243 | configuration::GetNodes(replay_factory.configuration())) { |
| 244 | // If the user doesn't care about this node, don't check it. |
| 245 | if (!NodeInList(logger_set->replay_nodes(), node)) { |
| 246 | continue; |
| 247 | } |
| 248 | replay_loops.emplace_back(replay_factory.MakeEventLoop("", node)); |
| 249 | for (const auto &sender : test_senders[node->name()->str()]) { |
| 250 | const aos::Channel *channel = configuration::GetChannel( |
| 251 | replay_factory.configuration(), sender->channel(), "", node); |
| 252 | fetchers.emplace_back(replay_loops.back()->MakeRawFetcher(channel)); |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | std::vector<std::pair<const aos::Node *, std::unique_ptr<RawFetcher>>> |
| 257 | remote_fetchers; |
| 258 | for (const auto &fetcher : fetchers) { |
| 259 | for (auto &loop : replay_loops) { |
| 260 | const Connection *connection = |
| 261 | configuration::ConnectionToNode(fetcher->channel(), loop->node()); |
| 262 | if (connection != nullptr) { |
| 263 | remote_fetchers.push_back(std::make_pair( |
| 264 | loop->node(), loop->MakeRawFetcher(fetcher->channel()))); |
| 265 | } |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | replay_factory.Run(); |
| 270 | |
| 271 | for (auto &fetcher : fetchers) { |
| 272 | EXPECT_TRUE(fetcher->Fetch()) |
| 273 | << "Failed to log or replay any data on " |
| 274 | << configuration::StrippedChannelToString(fetcher->channel()); |
| 275 | } |
| 276 | |
| 277 | for (auto &pair : remote_fetchers) { |
| 278 | EXPECT_TRUE(pair.second->Fetch()) |
| 279 | << "Failed to log or replay any data on " |
| 280 | << configuration::StrippedChannelToString(pair.second->channel()) |
| 281 | << " from remote node " << logger::MaybeNodeName(pair.first) << "."; |
| 282 | } |
| 283 | |
| 284 | reader.Deregister(); |
| 285 | |
| 286 | // Clean up the logs. |
| 287 | UnlinkRecursive(log_path); |
| 288 | } |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | } // namespace aos::util |