blob: 2bd660a5cc2d22667fea9bc833551a2a3a0044ed [file] [log] [blame]
James Kuszmaul827bd212023-05-15 23:57:39 -07001#include "aos/util/config_validator_lib.h"
2
3#include <chrono>
4
5#include "aos/events/logging/log_reader.h"
6#include "aos/events/logging/log_writer.h"
7#include "aos/events/simulated_event_loop.h"
8#include "aos/network/remote_message_generated.h"
9#include "aos/network/timestamp_channel.h"
10#include "aos/testing/tmpdir.h"
11#include "aos/util/simulation_logger.h"
12
13DECLARE_bool(validate_timestamp_logger_nodes);
14
15namespace aos::util {
16
17namespace {
18void RunSimulationAndExit(const aos::Configuration *config) {
19 aos::SimulatedEventLoopFactory factory(config);
20
21 factory.RunFor(std::chrono::seconds(1));
22
23 std::exit(EXIT_SUCCESS);
24}
25
26// Checks if either the node is in the specified list of node names or if the
27// list is empty (in which case it is treated as matching all nodes).
28bool NodeInList(
29 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *list,
30 const aos::Node *node) {
31 if (list == nullptr || list->size() == 0) {
32 return true;
33 }
34 for (const flatbuffers::String *name : *list) {
35 if (name->string_view() == node->name()->string_view()) {
36 return true;
37 }
38 }
39 return false;
40}
41
42} // namespace
43
44void ConfigIsValid(const aos::Configuration *config,
45 const ConfigValidatorConfig *validation_config) {
46 ASSERT_TRUE(config->has_channels())
47 << "An AOS config must have channels. If you have a valid use-case for "
48 "channels with no channels, please write a design proposal.";
49
50 // First, we do some sanity checks--these are likely to indicate a malformed
51 // config, and so catching them early with a clear error message is likely to
52 // help.
53
54 // The set of all channels that are required by the channels that are
55 // configured--these are the remote timestamp channels that *must* be present,
56 // and ideally there are no other channels present.
57 std::set<const Channel *> required_timestamp_channels;
58 // The set of all channels that *look* like remote timestamp channels. This
59 // may include channels that are improperly configured and thus have typos &
60 // aren't actually going to do anything at runtime.
61 std::set<const Channel *> configured_timestamp_channels;
62 bool validation_failed = false;
63 for (size_t channel_index = 0; channel_index < config->channels()->size();
64 ++channel_index) {
65 const aos::Channel *channel = config->channels()->Get(channel_index);
66 ASSERT_TRUE(channel->has_name()) << "All AOS channels must have a name.";
67 ASSERT_TRUE(channel->has_type()) << "All AOS channels must have a type.";
68
69 const bool channel_looks_like_remote_message_channel =
70 channel->type()->string_view() ==
71 message_bridge::RemoteMessage::GetFullyQualifiedName();
72
73 const bool check_for_not_logged_channels =
74 !validation_config->has_logging() ||
75 validation_config->logging()->all_channels_logged();
76 const bool channel_is_not_logged =
77 channel->logger() == aos::LoggerConfig::NOT_LOGGED;
78 if (check_for_not_logged_channels) {
79 if (channel_looks_like_remote_message_channel != channel_is_not_logged) {
80 LOG(WARNING)
81 << "Channel " << configuration::StrippedChannelToString(channel)
82 << " is " << EnumNameLoggerConfig(channel->logger()) << " but "
83 << (channel_looks_like_remote_message_channel ? "is" : "is not")
84 << " a remote timestamp channel. This is almost certainly wrong.";
85 validation_failed = true;
86 }
87 }
88
89 if (channel_looks_like_remote_message_channel) {
90 configured_timestamp_channels.insert(channel);
91 } else {
92 if (channel->has_destination_nodes()) {
93 // TODO(james): Technically the timestamp finder should receive a
94 // non-empty application name. However, there are no known users that
95 // care at this moment.
96 message_bridge::ChannelTimestampFinder timestamp_finder(
97 config, "",
98 configuration::GetNode(config,
99 channel->source_node()->string_view()));
100 for (const Connection *connection : *channel->destination_nodes()) {
101 switch (connection->timestamp_logger()) {
102 case LoggerConfig::NOT_LOGGED:
103 case LoggerConfig::LOCAL_LOGGER:
104 if (connection->has_timestamp_logger_nodes()) {
105 LOG(WARNING)
106 << "Connections that are "
107 << EnumNameLoggerConfig(connection->timestamp_logger())
108 << " should not have remote timestamp logger nodes "
109 "populated. This is for the connection to "
110 << connection->name()->string_view() << " on "
111 << configuration::StrippedChannelToString(channel);
112 validation_failed = true;
113 }
114 break;
115 case LoggerConfig::REMOTE_LOGGER:
116 case LoggerConfig::LOCAL_AND_REMOTE_LOGGER:
117 if (!connection->has_timestamp_logger_nodes() ||
118 connection->timestamp_logger_nodes()->size() != 1 ||
119 connection->timestamp_logger_nodes()->Get(0)->string_view() !=
120 channel->source_node()->string_view()) {
121 LOG(WARNING)
122 << "Connections that are "
123 << EnumNameLoggerConfig(connection->timestamp_logger())
124 << " should have exactly 1 remote timestamp logger node "
125 "populated, and that node should be the source_node ("
126 << channel->source_node()->string_view()
127 << "). This is for the connection to "
128 << connection->name()->string_view() << " on "
129 << configuration::StrippedChannelToString(channel);
130 validation_failed = true;
131 }
132 // TODO(james): This will be overly noisy, as it ends up
133 // CHECK-failing.
134 required_timestamp_channels.insert(CHECK_NOTNULL(
135 timestamp_finder.ForChannel(channel, connection)));
136 break;
137 }
138 }
139 }
140 }
141 }
142
143 // Check that all of the things that look like timestamp channels are indeed
144 // required.
145 // Note: Because ForChannel() will die if a required channel is not present,
146 // we do not do a separate check that all the required channels exist.
147 for (const auto &channel : configured_timestamp_channels) {
148 if (required_timestamp_channels.count(channel) == 0) {
149 LOG(WARNING) << "Timestamp channel "
150 << configuration::StrippedChannelToString(channel)
151 << " was specified in the config but is not used.";
152 validation_failed = true;
153 }
154 }
155
156 if (validation_failed) {
157 FAIL() << "Remote timestamp linting failed.";
158 return;
159 }
160
161 // Because the most common way for simulation to fail involves it dying, force
162 // it to fail in a slightly more controlled manner.
163 ASSERT_EXIT(RunSimulationAndExit(config),
164 ::testing::ExitedWithCode(EXIT_SUCCESS), "");
165
166 if (!validation_config->has_logging() || !configuration::MultiNode(config)) {
167 return;
168 }
169
170 // We will run all the logger configs in two modes:
171 // 1) We don't send any data on any non-infrastructure channels; this confirms
172 // that the logs are readable in the absence of any user applications being
173 // present.
174 // 2) We confirm that we can generate a good logfile that actually has data
175 // on every channel (some checks in the LogReader may not get hit if there
176 // is no data on a given channel).
177 const std::string log_path = aos::testing::TestTmpDir() + "/logs/";
178 for (const bool send_data_on_channels : {false, true}) {
179 SCOPED_TRACE(send_data_on_channels);
Pallavi Madhukaraaba67e2023-09-08 14:20:00 -0700180 // Single nodes (multi-nodes with node count = 1) will not produce readable
181 // logs in the absense of data.
182 if (!send_data_on_channels && (configuration::NodesCount(config) == 1u)) {
183 continue;
184 }
James Kuszmaul827bd212023-05-15 23:57:39 -0700185 for (const LoggerNodeSetValidation *logger_set :
186 *validation_config->logging()->logger_sets()) {
187 SCOPED_TRACE(aos::FlatbufferToJson(logger_set));
188 aos::SimulatedEventLoopFactory factory(config);
189 std::vector<std::unique_ptr<LoggerState>> loggers;
190 if (logger_set->has_loggers() && logger_set->loggers()->size() > 0) {
191 std::vector<std::string> logger_nodes;
192 for (const auto &node : *logger_set->loggers()) {
193 logger_nodes.push_back(node->str());
194 }
195 loggers = MakeLoggersForNodes(&factory, logger_nodes, log_path);
196 } else {
197 loggers = MakeLoggersForAllNodes(&factory, log_path);
198 }
199
200 std::vector<std::unique_ptr<EventLoop>> test_loops;
201 std::map<std::string, std::vector<std::unique_ptr<RawSender>>>
202 test_senders;
203
204 if (send_data_on_channels) {
205 // Make a sender on every non-infrastructure channel on every node
206 // (including channels that may not be observable by the current logger
207 // set).
208 for (const aos::Node *node : configuration::GetNodes(config)) {
209 test_loops.emplace_back(factory.MakeEventLoop("", node));
210 for (const aos::Channel *channel : *config->channels()) {
211 // TODO(james): Make a more sophisticated check for "infrastructure"
212 // channels than just looking for a "/aos" in the channel--we don't
213 // accidentally want to spam nonsense data onto any timestamp
214 // channels, though.
215 if (configuration::ChannelIsSendableOnNode(channel, node) &&
216 channel->name()->str().find("/aos") == std::string::npos &&
217 channel->logger() != LoggerConfig::NOT_LOGGED) {
218 test_senders[node->name()->str()].emplace_back(
219 test_loops.back()->MakeRawSender(channel));
220 RawSender *sender =
221 test_senders[node->name()->str()].back().get();
222 test_loops.back()->OnRun([sender, channel]() {
223 flatbuffers::DetachedBuffer buffer =
224 JsonToFlatbuffer("{}", channel->schema());
225 sender->CheckOk(sender->Send(buffer.data(), buffer.size()));
226 });
227 }
228 }
229 }
230 }
231
232 factory.RunFor(std::chrono::seconds(2));
233
234 // Get all of the loggers to close before trying to read the logfiles.
235 loggers.clear();
236
237 // Confirm that we can read the log, and that if we put data in it that we
238 // can find data on all the nodes that the user cares about.
239 logger::LogReader reader(logger::SortParts(logger::FindLogs(log_path)));
240 SimulatedEventLoopFactory replay_factory(reader.configuration());
241 reader.RegisterWithoutStarting(&replay_factory);
242
243 // Find every channel we deliberately sent data on, and if it is for a
244 // node that we care about, confirm that we get it during replay.
245 std::vector<std::unique_ptr<EventLoop>> replay_loops;
246 std::vector<std::unique_ptr<RawFetcher>> fetchers;
247 for (const aos::Node *node :
248 configuration::GetNodes(replay_factory.configuration())) {
249 // If the user doesn't care about this node, don't check it.
250 if (!NodeInList(logger_set->replay_nodes(), node)) {
251 continue;
252 }
253 replay_loops.emplace_back(replay_factory.MakeEventLoop("", node));
254 for (const auto &sender : test_senders[node->name()->str()]) {
255 const aos::Channel *channel = configuration::GetChannel(
256 replay_factory.configuration(), sender->channel(), "", node);
257 fetchers.emplace_back(replay_loops.back()->MakeRawFetcher(channel));
258 }
259 }
260
261 std::vector<std::pair<const aos::Node *, std::unique_ptr<RawFetcher>>>
262 remote_fetchers;
263 for (const auto &fetcher : fetchers) {
264 for (auto &loop : replay_loops) {
265 const Connection *connection =
266 configuration::ConnectionToNode(fetcher->channel(), loop->node());
267 if (connection != nullptr) {
268 remote_fetchers.push_back(std::make_pair(
269 loop->node(), loop->MakeRawFetcher(fetcher->channel())));
270 }
271 }
272 }
273
274 replay_factory.Run();
275
276 for (auto &fetcher : fetchers) {
277 EXPECT_TRUE(fetcher->Fetch())
278 << "Failed to log or replay any data on "
279 << configuration::StrippedChannelToString(fetcher->channel());
280 }
281
282 for (auto &pair : remote_fetchers) {
283 EXPECT_TRUE(pair.second->Fetch())
284 << "Failed to log or replay any data on "
285 << configuration::StrippedChannelToString(pair.second->channel())
286 << " from remote node " << logger::MaybeNodeName(pair.first) << ".";
287 }
288
289 reader.Deregister();
290
291 // Clean up the logs.
292 UnlinkRecursive(log_path);
293 }
294 }
295}
296
297} // namespace aos::util