aos/network: reduce number of messsages on reconnect

Also incidentally send one abort per old association.

Change-Id: I2b7e98861367daaccd5f8ac79be7f1d1d23af89e
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/network/message_bridge_server_lib.cc b/aos/network/message_bridge_server_lib.cc
index f6c0c4d..b0973ac 100644
--- a/aos/network/message_bridge_server_lib.cc
+++ b/aos/network/message_bridge_server_lib.cc
@@ -192,26 +192,33 @@
   return -1;
 }
 
-int ChannelState::NodeConnected(
-    const Node *node, sctp_assoc_t assoc_id, int stream, SctpServer *server,
-    aos::monotonic_clock::time_point monotonic_now) {
+int ChannelState::NodeConnected(const Node *node, sctp_assoc_t assoc_id,
+                                int stream, SctpServer *server,
+                                aos::monotonic_clock::time_point monotonic_now,
+                                std::vector<sctp_assoc_t> *reconnected) {
   VLOG(1) << "Channel " << channel_->name()->string_view() << " "
           << channel_->type()->string_view() << " mapped to stream " << stream
           << " for node " << node->name()->string_view() << " assoc_id "
           << assoc_id;
   for (ChannelState::Peer &peer : peers_) {
+    // The node name is the most reliable method of detecting the same peer
+    // because in a multihomed system, the same IP address may not always be
+    // used to connect.
     if (peer.connection->name()->string_view() == node->name()->string_view()) {
       // There's a peer already connected.  Disconnect them and take over.
-      if (peer.sac_assoc_id != 0) {
+      if (peer.sac_assoc_id != 0 &&
+          (std::find(reconnected->begin(), reconnected->end(),
+                     peer.sac_assoc_id) == reconnected->end())) {
+        reconnected->push_back(peer.sac_assoc_id);
         if (peer.sac_assoc_id == assoc_id) {
-          LOG(WARNING) << "Node " << node->name()->string_view()
-                       << " reconnecting on " << assoc_id
-                       << "with the same ID, something got lost";
+          LOG_EVERY_T(WARNING, 0.025)
+              << "Node " << node->name()->string_view() << " reconnecting on "
+              << assoc_id << " with the same ID, something got lost";
         } else {
-          LOG(WARNING) << "Node " << node->name()->string_view() << " "
-                       << " already connected on " << peer.sac_assoc_id
-                       << "aborting old connection and switching to "
-                       << assoc_id;
+          LOG_EVERY_T(WARNING, 0.025)
+              << "Node " << node->name()->string_view() << " "
+              << " already connected on " << peer.sac_assoc_id
+              << " aborting old connection and switching to " << assoc_id;
           server->Abort(peer.sac_assoc_id);
         }
       }
@@ -480,6 +487,14 @@
     // Account for the control channel and delivery times channel.
     size_t channel_index = kControlStreams();
     int node_index = -1;
+    // TODO(sarah.newman): it would be better to do a refactor so that peers
+    // don't belong to channels. I am trying to do a quick hack to reduce the
+    // number of log messages without potentially losing information because the
+    // number of messages is overwhelming right now at first boot. This also
+    // should mean that we only send a single abort per association change,
+    // which is more correct behavior.
+    std::vector<sctp_assoc_t> reconnected;
+    reconnected.reserve(connect->channels_to_transfer()->size());
     for (const Channel *channel : *connect->channels_to_transfer()) {
       bool matched = false;
       for (std::unique_ptr<ChannelState> &channel_state : channels_) {
@@ -489,7 +504,7 @@
         if (channel_state->Matches(channel)) {
           node_index = channel_state->NodeConnected(
               connect->node(), message->header.rcvinfo.rcv_assoc_id,
-              channel_index, &server_, monotonic_now);
+              channel_index, &server_, monotonic_now, &reconnected);
           CHECK_NE(node_index, -1);
           matched = true;
           break;
diff --git a/aos/network/message_bridge_server_lib.h b/aos/network/message_bridge_server_lib.h
index e70d34a..6f2be08 100644
--- a/aos/network/message_bridge_server_lib.h
+++ b/aos/network/message_bridge_server_lib.h
@@ -64,10 +64,13 @@
 
   // Needs to be called when a node (might have) disconnected.
   // Returns the node index which [dis]connected, or -1 if it didn't match.
+  // reconnected is a vector of associations which have already connected.
+  // This will potentially grow to the number of associations as we find reconnects.
   int NodeDisconnected(sctp_assoc_t assoc_id);
   int NodeConnected(const Node *node, sctp_assoc_t assoc_id, int stream,
                     SctpServer *server,
-                    aos::monotonic_clock::time_point monotonic_now);
+                    aos::monotonic_clock::time_point monotonic_now,
+                    std::vector<sctp_assoc_t> *reconnected);
 
   // Adds a new peer.
   void AddPeer(const Connection *connection, int node_index,
diff --git a/aos/network/sctp_client.cc b/aos/network/sctp_client.cc
index fac1114..a2c0439 100644
--- a/aos/network/sctp_client.cc
+++ b/aos/network/sctp_client.cc
@@ -52,7 +52,8 @@
   scheduler.assoc_value = SCTP_SS_PRIO;
   if (setsockopt(fd(), IPPROTO_SCTP, SCTP_STREAM_SCHEDULER, &scheduler,
                  sizeof(scheduler)) != 0) {
-    PLOG(WARNING) << "Failed to set scheduler";
+    LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
+                            << " [" << errno << "]";
   }
 }
 
diff --git a/aos/network/sctp_server.cc b/aos/network/sctp_server.cc
index 15da7c0..2f6a041 100644
--- a/aos/network/sctp_server.cc
+++ b/aos/network/sctp_server.cc
@@ -76,7 +76,8 @@
   scheduler.assoc_value = SCTP_SS_PRIO;
   if (setsockopt(fd(), IPPROTO_SCTP, SCTP_STREAM_SCHEDULER, &scheduler,
                  sizeof(scheduler)) != 0) {
-    PLOG(WARNING) << "Failed to set scheduler";
+    LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
+                            << " [" << errno << "]";
   }
 }
 
@@ -89,7 +90,8 @@
   sctp_priority.stream_value = priority;
   if (setsockopt(fd(), IPPROTO_SCTP, SCTP_STREAM_SCHEDULER_VALUE,
                  &sctp_priority, sizeof(sctp_priority)) != 0) {
-    PLOG(WARNING) << "Failed to set scheduler";
+    LOG_FIRST_N(WARNING, 1) << "Failed to set scheduler: " << strerror(errno)
+                            << " [" << errno << "]";
   }
 }