blob: ad85027b1b59fef987b7b3bad71922cbef4279a6 [file] [log] [blame]
James Kuszmaul827bd212023-05-15 23:57:39 -07001#include "aos/util/config_validator_lib.h"
2
3#include <chrono>
4
5#include "aos/events/logging/log_reader.h"
6#include "aos/events/logging/log_writer.h"
7#include "aos/events/simulated_event_loop.h"
8#include "aos/network/remote_message_generated.h"
9#include "aos/network/timestamp_channel.h"
10#include "aos/testing/tmpdir.h"
James Kuszmaul464012b2024-03-20 14:12:08 -070011#include "aos/util/config_validator_config_static.h"
James Kuszmaul827bd212023-05-15 23:57:39 -070012#include "aos/util/simulation_logger.h"
13
14DECLARE_bool(validate_timestamp_logger_nodes);
15
16namespace aos::util {
17
18namespace {
19void RunSimulationAndExit(const aos::Configuration *config) {
20 aos::SimulatedEventLoopFactory factory(config);
21
22 factory.RunFor(std::chrono::seconds(1));
23
24 std::exit(EXIT_SUCCESS);
25}
26
27// Checks if either the node is in the specified list of node names or if the
28// list is empty (in which case it is treated as matching all nodes).
29bool NodeInList(
30 const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *list,
31 const aos::Node *node) {
32 if (list == nullptr || list->size() == 0) {
33 return true;
34 }
35 for (const flatbuffers::String *name : *list) {
36 if (name->string_view() == node->name()->string_view()) {
37 return true;
38 }
39 }
40 return false;
41}
42
43} // namespace
44
45void ConfigIsValid(const aos::Configuration *config,
James Kuszmaul464012b2024-03-20 14:12:08 -070046 const ConfigValidatorConfig *validation_config_raw) {
James Kuszmaul827bd212023-05-15 23:57:39 -070047 ASSERT_TRUE(config->has_channels())
48 << "An AOS config must have channels. If you have a valid use-case for "
49 "channels with no channels, please write a design proposal.";
50
James Kuszmaul464012b2024-03-20 14:12:08 -070051 aos::fbs::Builder<ConfigValidatorConfigStatic> validation_config;
52 CHECK(validation_config->FromFlatbuffer(validation_config_raw));
53
54 if (validation_config_raw->has_logging() &&
55 validation_config_raw->logging()->validate_individual_node_loggers() &&
56 configuration::MultiNode(config)) {
57 if (!validation_config->logging()->has_logger_sets()) {
58 validation_config->mutable_logging()->add_logger_sets();
59 }
60 auto logger_sets =
61 validation_config->mutable_logging()->mutable_logger_sets();
62 for (const aos::Node *node : configuration::GetNodes(config)) {
63 CHECK(logger_sets->reserve(logger_sets->size() + 1));
64 auto logger_set = logger_sets->emplace_back();
65 CHECK(logger_set->add_loggers()->FromFlatbuffer({node->name()->str()}));
66 CHECK(logger_set->add_replay_nodes()->FromFlatbuffer(
67 {node->name()->str()}));
68 }
69 }
70
James Kuszmaul827bd212023-05-15 23:57:39 -070071 // First, we do some sanity checks--these are likely to indicate a malformed
72 // config, and so catching them early with a clear error message is likely to
73 // help.
74
75 // The set of all channels that are required by the channels that are
76 // configured--these are the remote timestamp channels that *must* be present,
77 // and ideally there are no other channels present.
78 std::set<const Channel *> required_timestamp_channels;
79 // The set of all channels that *look* like remote timestamp channels. This
80 // may include channels that are improperly configured and thus have typos &
81 // aren't actually going to do anything at runtime.
82 std::set<const Channel *> configured_timestamp_channels;
83 bool validation_failed = false;
84 for (size_t channel_index = 0; channel_index < config->channels()->size();
85 ++channel_index) {
86 const aos::Channel *channel = config->channels()->Get(channel_index);
87 ASSERT_TRUE(channel->has_name()) << "All AOS channels must have a name.";
88 ASSERT_TRUE(channel->has_type()) << "All AOS channels must have a type.";
89
90 const bool channel_looks_like_remote_message_channel =
91 channel->type()->string_view() ==
92 message_bridge::RemoteMessage::GetFullyQualifiedName();
93
94 const bool check_for_not_logged_channels =
95 !validation_config->has_logging() ||
James Kuszmaul464012b2024-03-20 14:12:08 -070096 validation_config->AsFlatbuffer().logging()->all_channels_logged();
James Kuszmaul827bd212023-05-15 23:57:39 -070097 const bool channel_is_not_logged =
98 channel->logger() == aos::LoggerConfig::NOT_LOGGED;
99 if (check_for_not_logged_channels) {
100 if (channel_looks_like_remote_message_channel != channel_is_not_logged) {
101 LOG(WARNING)
102 << "Channel " << configuration::StrippedChannelToString(channel)
103 << " is " << EnumNameLoggerConfig(channel->logger()) << " but "
104 << (channel_looks_like_remote_message_channel ? "is" : "is not")
105 << " a remote timestamp channel. This is almost certainly wrong.";
106 validation_failed = true;
107 }
108 }
109
110 if (channel_looks_like_remote_message_channel) {
111 configured_timestamp_channels.insert(channel);
112 } else {
113 if (channel->has_destination_nodes()) {
114 // TODO(james): Technically the timestamp finder should receive a
115 // non-empty application name. However, there are no known users that
116 // care at this moment.
117 message_bridge::ChannelTimestampFinder timestamp_finder(
118 config, "",
119 configuration::GetNode(config,
120 channel->source_node()->string_view()));
121 for (const Connection *connection : *channel->destination_nodes()) {
122 switch (connection->timestamp_logger()) {
123 case LoggerConfig::NOT_LOGGED:
124 case LoggerConfig::LOCAL_LOGGER:
125 if (connection->has_timestamp_logger_nodes()) {
126 LOG(WARNING)
127 << "Connections that are "
128 << EnumNameLoggerConfig(connection->timestamp_logger())
129 << " should not have remote timestamp logger nodes "
130 "populated. This is for the connection to "
131 << connection->name()->string_view() << " on "
132 << configuration::StrippedChannelToString(channel);
133 validation_failed = true;
134 }
135 break;
136 case LoggerConfig::REMOTE_LOGGER:
137 case LoggerConfig::LOCAL_AND_REMOTE_LOGGER:
138 if (!connection->has_timestamp_logger_nodes() ||
139 connection->timestamp_logger_nodes()->size() != 1 ||
140 connection->timestamp_logger_nodes()->Get(0)->string_view() !=
141 channel->source_node()->string_view()) {
142 LOG(WARNING)
143 << "Connections that are "
144 << EnumNameLoggerConfig(connection->timestamp_logger())
145 << " should have exactly 1 remote timestamp logger node "
146 "populated, and that node should be the source_node ("
147 << channel->source_node()->string_view()
148 << "). This is for the connection to "
149 << connection->name()->string_view() << " on "
150 << configuration::StrippedChannelToString(channel);
151 validation_failed = true;
152 }
153 // TODO(james): This will be overly noisy, as it ends up
154 // CHECK-failing.
155 required_timestamp_channels.insert(CHECK_NOTNULL(
156 timestamp_finder.ForChannel(channel, connection)));
157 break;
158 }
159 }
160 }
161 }
162 }
163
164 // Check that all of the things that look like timestamp channels are indeed
165 // required.
166 // Note: Because ForChannel() will die if a required channel is not present,
167 // we do not do a separate check that all the required channels exist.
168 for (const auto &channel : configured_timestamp_channels) {
169 if (required_timestamp_channels.count(channel) == 0) {
170 LOG(WARNING) << "Timestamp channel "
171 << configuration::StrippedChannelToString(channel)
172 << " was specified in the config but is not used.";
173 validation_failed = true;
174 }
175 }
176
177 if (validation_failed) {
178 FAIL() << "Remote timestamp linting failed.";
179 return;
180 }
181
182 // Because the most common way for simulation to fail involves it dying, force
183 // it to fail in a slightly more controlled manner.
184 ASSERT_EXIT(RunSimulationAndExit(config),
185 ::testing::ExitedWithCode(EXIT_SUCCESS), "");
186
187 if (!validation_config->has_logging() || !configuration::MultiNode(config)) {
188 return;
189 }
190
191 // We will run all the logger configs in two modes:
192 // 1) We don't send any data on any non-infrastructure channels; this confirms
193 // that the logs are readable in the absence of any user applications being
194 // present.
195 // 2) We confirm that we can generate a good logfile that actually has data
196 // on every channel (some checks in the LogReader may not get hit if there
197 // is no data on a given channel).
198 const std::string log_path = aos::testing::TestTmpDir() + "/logs/";
199 for (const bool send_data_on_channels : {false, true}) {
200 SCOPED_TRACE(send_data_on_channels);
Pallavi Madhukaraaba67e2023-09-08 14:20:00 -0700201 // Single nodes (multi-nodes with node count = 1) will not produce readable
202 // logs in the absense of data.
203 if (!send_data_on_channels && (configuration::NodesCount(config) == 1u)) {
204 continue;
205 }
Pallavi Madhukar3076d5c2023-09-09 10:23:26 -0700206 // Send timing report when we are sending data.
207 const bool do_skip_timing_report = !send_data_on_channels;
James Kuszmaul464012b2024-03-20 14:12:08 -0700208 for (const LoggerNodeSetValidationStatic &logger_set :
James Kuszmaul827bd212023-05-15 23:57:39 -0700209 *validation_config->logging()->logger_sets()) {
James Kuszmaul464012b2024-03-20 14:12:08 -0700210 SCOPED_TRACE(aos::FlatbufferToJson(&logger_set.AsFlatbuffer()));
James Kuszmaul827bd212023-05-15 23:57:39 -0700211 aos::SimulatedEventLoopFactory factory(config);
212 std::vector<std::unique_ptr<LoggerState>> loggers;
James Kuszmaul464012b2024-03-20 14:12:08 -0700213 if (logger_set.has_loggers() && logger_set.loggers()->size() > 0) {
James Kuszmaul827bd212023-05-15 23:57:39 -0700214 std::vector<std::string> logger_nodes;
James Kuszmaul464012b2024-03-20 14:12:08 -0700215 for (const auto &node : *logger_set.loggers()) {
216 logger_nodes.push_back(node.str());
James Kuszmaul827bd212023-05-15 23:57:39 -0700217 }
Pallavi Madhukar3076d5c2023-09-09 10:23:26 -0700218 loggers = MakeLoggersForNodes(&factory, logger_nodes, log_path,
219 do_skip_timing_report);
James Kuszmaul827bd212023-05-15 23:57:39 -0700220 } else {
Pallavi Madhukar3076d5c2023-09-09 10:23:26 -0700221 loggers =
222 MakeLoggersForAllNodes(&factory, log_path, do_skip_timing_report);
James Kuszmaul827bd212023-05-15 23:57:39 -0700223 }
224
225 std::vector<std::unique_ptr<EventLoop>> test_loops;
226 std::map<std::string, std::vector<std::unique_ptr<RawSender>>>
227 test_senders;
228
229 if (send_data_on_channels) {
230 // Make a sender on every non-infrastructure channel on every node
231 // (including channels that may not be observable by the current logger
232 // set).
233 for (const aos::Node *node : configuration::GetNodes(config)) {
234 test_loops.emplace_back(factory.MakeEventLoop("", node));
235 for (const aos::Channel *channel : *config->channels()) {
236 // TODO(james): Make a more sophisticated check for "infrastructure"
237 // channels than just looking for a "/aos" in the channel--we don't
238 // accidentally want to spam nonsense data onto any timestamp
239 // channels, though.
240 if (configuration::ChannelIsSendableOnNode(channel, node) &&
241 channel->name()->str().find("/aos") == std::string::npos &&
242 channel->logger() != LoggerConfig::NOT_LOGGED) {
243 test_senders[node->name()->str()].emplace_back(
244 test_loops.back()->MakeRawSender(channel));
245 RawSender *sender =
246 test_senders[node->name()->str()].back().get();
247 test_loops.back()->OnRun([sender, channel]() {
248 flatbuffers::DetachedBuffer buffer =
249 JsonToFlatbuffer("{}", channel->schema());
250 sender->CheckOk(sender->Send(buffer.data(), buffer.size()));
251 });
252 }
253 }
254 }
255 }
256
257 factory.RunFor(std::chrono::seconds(2));
258
259 // Get all of the loggers to close before trying to read the logfiles.
260 loggers.clear();
261
262 // Confirm that we can read the log, and that if we put data in it that we
263 // can find data on all the nodes that the user cares about.
264 logger::LogReader reader(logger::SortParts(logger::FindLogs(log_path)));
265 SimulatedEventLoopFactory replay_factory(reader.configuration());
266 reader.RegisterWithoutStarting(&replay_factory);
267
268 // Find every channel we deliberately sent data on, and if it is for a
269 // node that we care about, confirm that we get it during replay.
270 std::vector<std::unique_ptr<EventLoop>> replay_loops;
James Kuszmaul827bd212023-05-15 23:57:39 -0700271 for (const aos::Node *node :
272 configuration::GetNodes(replay_factory.configuration())) {
273 // If the user doesn't care about this node, don't check it.
James Kuszmaul464012b2024-03-20 14:12:08 -0700274 if (!NodeInList(logger_set.has_replay_nodes()
275 ? logger_set.replay_nodes()->AsFlatbufferVector()
276 : nullptr,
277 node)) {
James Kuszmaul827bd212023-05-15 23:57:39 -0700278 continue;
279 }
280 replay_loops.emplace_back(replay_factory.MakeEventLoop("", node));
James Kuszmaul827bd212023-05-15 23:57:39 -0700281 }
282
283 std::vector<std::pair<const aos::Node *, std::unique_ptr<RawFetcher>>>
James Kuszmaul464012b2024-03-20 14:12:08 -0700284 fetchers;
285 for (const auto &node_senders : test_senders) {
286 for (const auto &sender : node_senders.second) {
287 for (auto &loop : replay_loops) {
288 if (configuration::ChannelIsReadableOnNode(sender->channel(),
289 loop->node())) {
290 fetchers.push_back(std::make_pair(
291 loop->node(),
292 loop->MakeRawFetcher(configuration::GetChannel(
293 replay_factory.configuration(), sender->channel(),
294 loop->name(), loop->node()))));
295 }
James Kuszmaul827bd212023-05-15 23:57:39 -0700296 }
297 }
298 }
299
300 replay_factory.Run();
301
James Kuszmaul464012b2024-03-20 14:12:08 -0700302 for (auto &pair : fetchers) {
James Kuszmaul827bd212023-05-15 23:57:39 -0700303 EXPECT_TRUE(pair.second->Fetch())
304 << "Failed to log or replay any data on "
305 << configuration::StrippedChannelToString(pair.second->channel())
James Kuszmaul464012b2024-03-20 14:12:08 -0700306 << " reading from " << logger::MaybeNodeName(pair.first)
307 << " with source node "
308 << (pair.second->channel()->has_source_node()
309 ? pair.second->channel()->source_node()->string_view()
310 : "")
311 << ".";
James Kuszmaul827bd212023-05-15 23:57:39 -0700312 }
313
314 reader.Deregister();
315
316 // Clean up the logs.
317 UnlinkRecursive(log_path);
318 }
319 }
320}
321
322} // namespace aos::util