Explode when time is unsortable with a better error

So far, this has been shown to be because when you solve for different
nodes, you get slightly different answers.  And those answers can be
un-orderable slightly.

Make it very clear if this is the contributing effect by re-solving for
each node with the bad solution, and show how the different nodes
produce different solutions.  Then, explode.  This is more actionable.

Change-Id: I2e72b49987a835f15197a4dd8a7176a94cde0bff
diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index 84de0f7..c338bf3 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc
@@ -177,7 +177,7 @@
     if (grad) {
       std::stringstream ss;
       ss << " grad ["
-         << absl::StrJoin(absl::Span<const double>(grad, filters_.size() - 1u),
+         << absl::StrJoin(absl::Span<const double>(grad, LiveNodesCount() - 1u),
                           ", ", MyFormatter())
          << "]";
       gradient = ss.str();
@@ -213,16 +213,14 @@
     std::string gradient = "0.0";
     for (const struct FilterPair &filter : filters_[i]) {
       if (i != solution_node_ && live(i)) {
-        gradients[NodeToSolutionIndex(i)].emplace_back(
-            filter.filter->DebugDCostDta(base_clock_[i], 0.0,
-                                         base_clock_[filter.b_index], 0.0, i,
-                                         filter.b_index));
+        gradients[i].emplace_back(filter.filter->DebugDCostDta(
+            base_clock_[i], 0.0, base_clock_[filter.b_index], 0.0, i,
+            filter.b_index));
       }
       if (filter.b_index != solution_node_ && live(filter.b_index)) {
-        gradients[NodeToSolutionIndex(filter.b_index)].emplace_back(
-            filter.filter->DebugDCostDtb(base_clock_[i], 0.0,
-                                         base_clock_[filter.b_index], 0.0, i,
-                                         filter.b_index));
+        gradients[filter.b_index].emplace_back(filter.filter->DebugDCostDtb(
+            base_clock_[i], 0.0, base_clock_[filter.b_index], 0.0, i,
+            filter.b_index));
       }
     }
   }
@@ -828,7 +826,7 @@
     TimestampProblem *problem,
     const std::vector<aos::monotonic_clock::time_point> &base_times) {
   // Ok, now solve for the minimum time on each channel.
-  std::vector<aos::monotonic_clock::time_point> times;
+  std::vector<aos::monotonic_clock::time_point> result_times;
   NoncausalTimestampFilter *next_filter = nullptr;
   size_t solution_index = 0;
   {
@@ -884,15 +882,15 @@
           VLOG(1) << "  " << solution[i];
         }
       }
-      if (times.empty()) {
-        times = std::move(solution);
+      if (result_times.empty()) {
+        result_times = std::move(solution);
         next_filter = next_node_filter;
         solution_index = node_a_index;
         ++node_a_index;
         continue;
       }
 
-      switch (CompareTimes(times, solution, false)) {
+      switch (CompareTimes(result_times, solution, false)) {
         // The old solution is before or at the new solution.  This means that
         // the old solution is a better result, so ignore this one.
         case TimeComparison::kBefore:
@@ -900,18 +898,20 @@
           break;
         case TimeComparison::kAfter:
           // The new solution is better!  Save it.
-          times = std::move(solution);
+          result_times = std::move(solution);
           next_filter = next_node_filter;
           solution_index = node_a_index;
           break;
-        case TimeComparison::kInvalid:
+        case TimeComparison::kInvalid: {
           // If times are close enough, drop the invalid time.
-          if (InvalidDistance(times, solution) < chrono::nanoseconds(500)) {
+          if (InvalidDistance(result_times, solution) <
+              chrono::nanoseconds(500)) {
             VLOG(1) << "Times can't be compared by "
-                    << InvalidDistance(times, solution).count() << "ns";
-            for (size_t i = 0; i < times.size(); ++i) {
-              VLOG(1) << "  " << times[i] << " vs " << solution[i] << " -> "
-                      << (times[i] - solution[i]).count() << "ns";
+                    << InvalidDistance(result_times, solution).count() << "ns";
+            for (size_t i = 0; i < result_times.size(); ++i) {
+              VLOG(1) << "  " << result_times[i] << " vs " << solution[i]
+                      << " -> " << (result_times[i] - solution[i]).count()
+                      << "ns";
             }
             VLOG(1) << "Ignoring because it is close enough.";
             next_node_filter->Consume();
@@ -920,26 +920,58 @@
           // Somehow the new solution is better *and* worse than the old
           // solution...  This is an internal failure because that means time
           // goes backwards on a node.
-          CHECK_EQ(times.size(), solution.size());
+          CHECK_EQ(result_times.size(), solution.size());
           LOG(INFO) << "Times can't be compared by "
-                    << InvalidDistance(times, solution).count() << "ns";
-          for (size_t i = 0; i < times.size(); ++i) {
-            LOG(INFO) << "  " << times[i] << " vs " << solution[i] << " -> "
-                      << (times[i] - solution[i]).count() << "ns";
+                    << InvalidDistance(result_times, solution).count() << "ns";
+          for (size_t i = 0; i < result_times.size(); ++i) {
+            LOG(INFO) << "  " << result_times[i] << " vs " << solution[i]
+                      << " -> " << (result_times[i] - solution[i]).count()
+                      << "ns";
           }
+          FLAGS_v = 1;
+
+          // Attempting solution for each node.
+          for (size_t a_index = 0; a_index < solution.size(); ++a_index) {
+            if (!problem->live(a_index)) {
+              continue;
+            }
+            for (size_t node_index = 0; node_index < solution.size();
+                 ++node_index) {
+              // Offset everything based on the elapsed time since the last
+              // solution on the node we are solving for.  The rate that time
+              // elapses should be ~1.
+              problem->set_base_clock(node_index, solution[node_index]);
+            }
+
+            problem->set_solution_node(a_index);
+            problem->Debug();
+            std::vector<monotonic_clock::time_point> resolve_solution =
+                problem->Solve();
+
+            if (VLOG_IS_ON(1)) {
+              VLOG(1) << "Candidate solution for resolved node " << a_index
+                      << " is";
+              for (size_t i = 0; i < resolve_solution.size(); ++i) {
+                VLOG(1) << "  " << resolve_solution[i] << " vs original "
+                        << solution[i] << " -> "
+                        << (resolve_solution[i] - solution[i]).count();
+              }
+            }
+          }
+
           LOG(FATAL) << "Please investigate.";
-          break;
+        } break;
       }
       ++node_a_index;
     }
   }
   if (VLOG_IS_ON(1)) {
     VLOG(1) << "Best solution is for node " << solution_index;
-    for (size_t i = 0; i < times.size(); ++i) {
-      VLOG(1) << "  " << times[i];
+    for (size_t i = 0; i < result_times.size(); ++i) {
+      VLOG(1) << "  " << result_times[i];
     }
   }
-  return std::make_tuple(next_filter, std::move(times));
+  return std::make_tuple(next_filter, std::move(result_times));
 }
 
 std::optional<std::tuple<distributed_clock::time_point,
@@ -950,9 +982,10 @@
   TimestampProblem problem = MakeProblem();
 
   // Ok, now solve for the minimum time on each channel.
-  std::vector<aos::monotonic_clock::time_point> times;
+  std::vector<aos::monotonic_clock::time_point> result_times;
   NoncausalTimestampFilter *next_filter = nullptr;
-  std::tie(next_filter, times) = NextSolution(&problem, last_monotonics_);
+  std::tie(next_filter, result_times) =
+      NextSolution(&problem, last_monotonics_);
 
   CHECK(!all_done_);
 
@@ -1001,19 +1034,19 @@
 
     VLOG(1) << "Resolving with updated base times for accuracy.";
     std::tie(resolved_next_filter, resolved_times) =
-        NextSolution(&problem, times);
+        NextSolution(&problem, result_times);
 
     first_solution_ = false;
     next_filter = resolved_next_filter;
 
     // Force any unknown nodes to track the distributed clock (which starts at 0
     // too).
-    for (monotonic_clock::time_point &time : times) {
+    for (monotonic_clock::time_point &time : result_times) {
       if (time == monotonic_clock::min_time) {
         time = monotonic_clock::epoch();
       }
     }
-    times = std::move(resolved_times);
+    result_times = std::move(resolved_times);
     next_filter->Consume();
   } else {
     next_filter->Consume();
@@ -1021,7 +1054,7 @@
     // want to consume it.  But, if this is the first time around, we want to
     // re-solve by recursing (once) to pickup the better base.
 
-    TimeComparison compare = CompareTimes(last_monotonics_, times, true);
+    TimeComparison compare = CompareTimes(last_monotonics_, result_times, true);
     switch (compare) {
       case TimeComparison::kBefore:
         break;
@@ -1031,9 +1064,9 @@
       case TimeComparison::kEq:
         return NextTimestamp();
       case TimeComparison::kInvalid:
-        CHECK_EQ(last_monotonics_.size(), times.size());
-        for (size_t i = 0; i < times.size(); ++i) {
-          LOG(INFO) << "  " << last_monotonics_[i] << " vs " << times[i];
+        CHECK_EQ(last_monotonics_.size(), result_times.size());
+        for (size_t i = 0; i < result_times.size(); ++i) {
+          LOG(INFO) << "  " << last_monotonics_[i] << " vs " << result_times[i];
         }
         LOG(FATAL) << "Found solutions which can't be ordered.";
         break;
@@ -1043,16 +1076,16 @@
   // Now, figure out what distributed should be.  It should move at the rate of
   // the max elapsed time so that conversions to and from it don't round to bad
   // values.
-  const chrono::nanoseconds dt = MaxElapsedTime(last_monotonics_, times);
+  const chrono::nanoseconds dt = MaxElapsedTime(last_monotonics_, result_times);
   last_distributed_ += dt;
-  for (size_t i = 0; i < times.size(); ++i) {
-    if (times[i] == monotonic_clock::min_time) {
+  for (size_t i = 0; i < result_times.size(); ++i) {
+    if (result_times[i] == monotonic_clock::min_time) {
       // Found an unknown node.  Move its time along by the amount the
       // distributed clock moved.
-      times[i] = last_monotonics_[i] + dt;
+      result_times[i] = last_monotonics_[i] + dt;
     }
   }
-  last_monotonics_ = std::move(times);
+  last_monotonics_ = std::move(result_times);
 
   // And freeze everything.
   {