aos/network: reduce number of messsages on reconnect
Also incidentally send one abort per old association.
Change-Id: I2b7e98861367daaccd5f8ac79be7f1d1d23af89e
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/network/message_bridge_server_lib.cc b/aos/network/message_bridge_server_lib.cc
index f6c0c4d..b0973ac 100644
--- a/aos/network/message_bridge_server_lib.cc
+++ b/aos/network/message_bridge_server_lib.cc
@@ -192,26 +192,33 @@
return -1;
}
-int ChannelState::NodeConnected(
- const Node *node, sctp_assoc_t assoc_id, int stream, SctpServer *server,
- aos::monotonic_clock::time_point monotonic_now) {
+int ChannelState::NodeConnected(const Node *node, sctp_assoc_t assoc_id,
+ int stream, SctpServer *server,
+ aos::monotonic_clock::time_point monotonic_now,
+ std::vector<sctp_assoc_t> *reconnected) {
VLOG(1) << "Channel " << channel_->name()->string_view() << " "
<< channel_->type()->string_view() << " mapped to stream " << stream
<< " for node " << node->name()->string_view() << " assoc_id "
<< assoc_id;
for (ChannelState::Peer &peer : peers_) {
+ // The node name is the most reliable method of detecting the same peer
+ // because in a multihomed system, the same IP address may not always be
+ // used to connect.
if (peer.connection->name()->string_view() == node->name()->string_view()) {
// There's a peer already connected. Disconnect them and take over.
- if (peer.sac_assoc_id != 0) {
+ if (peer.sac_assoc_id != 0 &&
+ (std::find(reconnected->begin(), reconnected->end(),
+ peer.sac_assoc_id) == reconnected->end())) {
+ reconnected->push_back(peer.sac_assoc_id);
if (peer.sac_assoc_id == assoc_id) {
- LOG(WARNING) << "Node " << node->name()->string_view()
- << " reconnecting on " << assoc_id
- << "with the same ID, something got lost";
+ LOG_EVERY_T(WARNING, 0.025)
+ << "Node " << node->name()->string_view() << " reconnecting on "
+ << assoc_id << " with the same ID, something got lost";
} else {
- LOG(WARNING) << "Node " << node->name()->string_view() << " "
- << " already connected on " << peer.sac_assoc_id
- << "aborting old connection and switching to "
- << assoc_id;
+ LOG_EVERY_T(WARNING, 0.025)
+ << "Node " << node->name()->string_view() << " "
+ << " already connected on " << peer.sac_assoc_id
+ << " aborting old connection and switching to " << assoc_id;
server->Abort(peer.sac_assoc_id);
}
}
@@ -480,6 +487,14 @@
// Account for the control channel and delivery times channel.
size_t channel_index = kControlStreams();
int node_index = -1;
+ // TODO(sarah.newman): it would be better to do a refactor so that peers
+ // don't belong to channels. I am trying to do a quick hack to reduce the
+ // number of log messages without potentially losing information because the
+ // number of messages is overwhelming right now at first boot. This also
+ // should mean that we only send a single abort per association change,
+ // which is more correct behavior.
+ std::vector<sctp_assoc_t> reconnected;
+ reconnected.reserve(connect->channels_to_transfer()->size());
for (const Channel *channel : *connect->channels_to_transfer()) {
bool matched = false;
for (std::unique_ptr<ChannelState> &channel_state : channels_) {
@@ -489,7 +504,7 @@
if (channel_state->Matches(channel)) {
node_index = channel_state->NodeConnected(
connect->node(), message->header.rcvinfo.rcv_assoc_id,
- channel_index, &server_, monotonic_now);
+ channel_index, &server_, monotonic_now, &reconnected);
CHECK_NE(node_index, -1);
matched = true;
break;
diff --git a/aos/network/message_bridge_server_lib.h b/aos/network/message_bridge_server_lib.h
index e70d34a..6f2be08 100644
--- a/aos/network/message_bridge_server_lib.h
+++ b/aos/network/message_bridge_server_lib.h
@@ -64,10 +64,13 @@
// Needs to be called when a node (might have) disconnected.
// Returns the node index which [dis]connected, or -1 if it didn't match.
+ // reconnected is a vector of associations which have already connected.
+ // This will potentially grow to the number of associations as we find reconnects.
int NodeDisconnected(sctp_assoc_t assoc_id);
int NodeConnected(const Node *node, sctp_assoc_t assoc_id, int stream,
SctpServer *server,
- aos::monotonic_clock::time_point monotonic_now);
+ aos::monotonic_clock::time_point monotonic_now,
+ std::vector<sctp_assoc_t> *reconnected);
// Adds a new peer.
void AddPeer(const Connection *connection, int node_index,
diff --git a/aos/network/sctp_client.cc b/aos/network/sctp_client.cc
index fac1114..a2c0439 100644
--- a/aos/network/sctp_client.cc
+++ b/aos/network/sctp_client.cc
@@ -52,7 +52,8 @@
scheduler.assoc_value = SCTP_SS_PRIO;
if (setsockopt(fd(), IPPROTO_SCTP, SCTP_STREAM_SCHEDULER, &scheduler,
sizeof(scheduler)) != 0) {
- PLOG(WARNING) << "Failed to set scheduler";
+ LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
+ << " [" << errno << "]";
}
}
diff --git a/aos/network/sctp_server.cc b/aos/network/sctp_server.cc
index 15da7c0..2f6a041 100644
--- a/aos/network/sctp_server.cc
+++ b/aos/network/sctp_server.cc
@@ -76,7 +76,8 @@
scheduler.assoc_value = SCTP_SS_PRIO;
if (setsockopt(fd(), IPPROTO_SCTP, SCTP_STREAM_SCHEDULER, &scheduler,
sizeof(scheduler)) != 0) {
- PLOG(WARNING) << "Failed to set scheduler";
+ LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
+ << " [" << errno << "]";
}
}
@@ -89,7 +90,8 @@
sctp_priority.stream_value = priority;
if (setsockopt(fd(), IPPROTO_SCTP, SCTP_STREAM_SCHEDULER_VALUE,
&sctp_priority, sizeof(sctp_priority)) != 0) {
- PLOG(WARNING) << "Failed to set scheduler";
+ LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
+ << " [" << errno << "]";
}
}