Make line search in our timestamp solving code more correct We had a mix of the newton's method and equality constrained primal-dual solver strategy in the same algorithm. Move over to using the primal-dual algorithm in https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf Algoritym 10.2 fully. Line search was occasionally getting stuck at a local minima. To fix that, we add a limit to how small a step it will take. This makes it so we always make some progress, even if it isn't far, and solves both the logs where we need line search, and the logs where line search hurts convergence. Change-Id: I464e3f65f79bd42c50f7bc52d00337d7b22bf515 Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>

commit: a442ef55c9707f15c153adb10ed3470347822e0d [log] [tgz]
author: Austin Schuh <austin.schuh@bluerivertech.com> Thu Sep 28 12:57:12 2023 -0700
committer: James Kuszmaul <james.kuszmaul@bluerivertech.com> Mon Oct 02 13:21:29 2023 -0700
tree: be99b6b761a9fc408d3c2af9b106424173ab58a0
parent: 91b1c67aeefea012882e4e54c55c93fc924fa739 [diff]
diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index ddb7864..55b6b02 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc

@@ -459,7 +459,8 @@
   return result;
 }
 
-Eigen::VectorXd NewtonSolver::Newton(const Problem::Derivatives &derivatives,
+Eigen::VectorXd NewtonSolver::Newton(const Eigen::Ref<const Eigen::VectorXd> y,
+                                     const Problem::Derivatives &derivatives,
                                      size_t iteration) {
   // https://www.cs.purdue.edu/homes/jhonorio/16spring-cs52000-equality.pdf
   // https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf chapter 10 is good
@@ -479,6 +480,10 @@
   Eigen::VectorXd b = Eigen::VectorXd::Zero(a.rows());
   b.block(0, 0, derivatives.gradient.rows(), 1) = -derivatives.gradient;
   if (derivatives.Axmb.rows()) {
+    const Eigen::Ref<const Eigen::VectorXd> v =
+        y.block(derivatives.hessian.rows(), 0, derivatives.A.rows(), 1);
+    b.block(0, 0, derivatives.gradient.rows(), 1) -=
+        derivatives.A.transpose() * v;
     b.block(derivatives.gradient.rows(), 0, derivatives.Axmb.rows(), 1) =
         -derivatives.Axmb;
   }
@@ -517,9 +522,6 @@
   // Note: all the prints are transposed so they fit on one line instead of
   // having a ton of massive column vectors being printed all the time.
   SOLVE_VLOG(my_solve_number_, 2)
-      << "    r_primal: " << std::setprecision(12) << std::fixed
-      << std::setfill(' ') << derivatives.Axmb.transpose().format(kHeavyFormat);
-  SOLVE_VLOG(my_solve_number_, 2)
       << "    r_dual: " << std::setprecision(12) << std::fixed
       << std::setfill(' ')
       << derivatives.gradient.transpose().format(kHeavyFormat) << " + "
@@ -527,8 +529,10 @@
            t * step.block(derivatives.gradient.rows(), 0, 1, 1))
               .transpose() *
           derivatives.A)
-             .transpose()
              .format(kHeavyFormat);
+  SOLVE_VLOG(my_solve_number_, 2)
+      << "    r_primal: " << std::setprecision(12) << std::fixed
+      << std::setfill(' ') << derivatives.Axmb.transpose().format(kHeavyFormat);
   return (derivatives.gradient +
           derivatives.A.transpose() *
               (y.block(derivatives.gradient.rows(), 0, 1, 1) +
@@ -554,12 +558,18 @@
       my_solve_number_, y, false, false, absl::Span<const size_t>());
   double r_orig_squared = RSquaredUnconstrained(derivatives, y, y, 0.0);
   while (true) {
-    const Eigen::VectorXd step = Newton(derivatives, iteration);
+    SOLVE_VLOG(my_solve_number_, 1) << "Starting iteration " << iteration;
+
+    const Eigen::VectorXd step = Newton(y, derivatives, iteration);
     solution_node = derivatives.solution_node;
 
+    SOLVE_VLOG(my_solve_number_, 1)
+        << "  y(" << iteration << ") is " << y.transpose().format(kHeavyFormat);
+    PrintDerivatives(derivatives, y, "", 3);
     SOLVE_VLOG(my_solve_number_, 2)
-        << "Step " << iteration << " -> " << std::setprecision(12) << std::fixed
-        << std::setfill(' ') << step.transpose().format(kHeavyFormat);
+        << "  initial step(" << iteration << ") -> " << std::setprecision(12)
+        << std::fixed << std::setfill(' ')
+        << step.transpose().format(kHeavyFormat);
 
     // We now have a search direction.  Line search and go.
     double t = 1.0;
@@ -576,16 +586,30 @@
       break;
     }
 
+    SOLVE_VLOG(my_solve_number_, 2)
+        << "  Not close enough, "
+        << step.block(0, 0, problem->states(), 1).lpNorm<Eigen::Infinity>()
+        << " >= 1e-4 || " << derivatives.Axmb.squaredNorm() << " >= 1e-8";
+
     // Now, do line search.
     while (true) {
-      SOLVE_VLOG(my_solve_number_, 3) << "   Trying t: " << t;
       Problem::Derivatives new_derivatives =
           problem->ComputeDerivatives(my_solve_number_, y + t * step, false,
                                       false, absl::Span<const size_t>());
       const double r_squared =
           RSquaredUnconstrained(new_derivatives, y, step, t);
+      SOLVE_VLOG(my_solve_number_, 2)
+          << std::setprecision(12) << std::setfill(' ') << "   |r| < |r+1| "
+          << r_orig_squared << " < " << r_squared << " for step size " << t;
       // See Boyd, section 10.3.2, algorithm 10.2
-      if (r_squared <= std::pow(1.0 - kAlpha * t, 2.0) * r_orig_squared) {
+      if (r_squared <=
+              std::pow(1.0 - kUnconstrainedAlpha * t, 2.0) * r_orig_squared ||
+          t < kLineSearchStopThreshold) {
+        // This is modified because we have seen cases where the way we handle
+        // equality constraints sometimes ends up increasing the cost slightly.
+        // We are better off taking a small step and trying again than shrinking
+        // our step forever.
+        //
         // Save the derivatives and norm computed for the next iteration to save
         // CPU.
         derivatives = std::move(new_derivatives);
@@ -1350,7 +1374,8 @@
       }
 
       SOLVE_VLOG(solve_number, 2)
-          << "    live " << points_[j].time << " vs solution "
+          << std::setprecision(12) << std::fixed << "    live "
+          << points_[j].time << " vs solution "
           << base_clock_[j].time +
                  std::chrono::nanoseconds(static_cast<int64_t>(
                      std::round(data(NodeToFullSolutionIndex(j)))))

diff --git a/aos/network/multinode_timestamp_filter.h b/aos/network/multinode_timestamp_filter.h
index 3d5a125..828c489 100644
--- a/aos/network/multinode_timestamp_filter.h
+++ b/aos/network/multinode_timestamp_filter.h

@@ -84,6 +84,10 @@
   // Ratio to let the cost increase when line searching.  A small increase is
   // fine since we aren't perfectly convex.
   static constexpr double kAlpha = -0.15;
+  // Ratio to require the cost to decrease when line searching.
+  static constexpr double kUnconstrainedAlpha = 0.5;
+  // Unconstrained min line search distance to guarantee forward progress.
+  static constexpr double kLineSearchStopThreshold = 0.4;
   // Line search step parameter.
   static constexpr double kBeta = 0.5;
   static constexpr double kMu = 2.0;
@@ -160,7 +164,8 @@
   // used for the equality constraint.  The last term is the scalar on the
   // equality constraint.  This needs to be removed from the solution to get the
   // actual newton step.
-  Eigen::VectorXd Newton(const Problem::Derivatives &derivatives,
+  Eigen::VectorXd Newton(const Eigen::Ref<const Eigen::VectorXd> y,
+                         const Problem::Derivatives &derivatives,
                          size_t iteration);
 
   // The solve number for this instance of the problem.
commit	a442ef55c9707f15c153adb10ed3470347822e0d	[log] [tgz]
author	Austin Schuh <austin.schuh@bluerivertech.com>	Thu Sep 28 12:57:12 2023 -0700
committer	James Kuszmaul <james.kuszmaul@bluerivertech.com>	Mon Oct 02 13:21:29 2023 -0700
tree	be99b6b761a9fc408d3c2af9b106424173ab58a0
parent	91b1c67aeefea012882e4e54c55c93fc924fa739 [diff]