Fix convergence issue when solving for time In the unconstrained optimization problem, we were doing an undamped step. This was resulting in us getting stuck in cycles for some problems. The correct answer theoretically is to add line search. So, add it. Section 10.3 in https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf has the algorithm documented. This takes a log which used to fail and makes it pass. Change-Id: I79a7bc8e81aafefdcde5984e8f61cb253f6b0e71 Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>

commit: db6d710c746c62a725f27bed7224ca620b7b5be7 [log] [tgz]
author: Austin Schuh <austin.schuh@bluerivertech.com> Thu Sep 21 12:50:56 2023 -0700
committer: James Kuszmaul <james.kuszmaul@bluerivertech.com> Mon Oct 02 13:21:29 2023 -0700
tree: 2f1a39f37ddcd0d14ae6b4a781ece4993a8366e5
parent: 595979d029ba6ea610bfa584db24f7b10b546105 [diff]
diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index 04a3631..a2e251d 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc

@@ -508,6 +508,35 @@
   return step;
 }
 
+double NewtonSolver::RSquaredUnconstrained(
+    const Problem::Derivatives &derivatives,
+    Eigen::Ref<const Eigen::VectorXd> y, Eigen::Ref<const Eigen::VectorXd> step,
+    double t) {
+  // See https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf section 10.3.1
+  //
+  // Note: all the prints are transposed so they fit on one line instead of
+  // having a ton of massive column vectors being printed all the time.
+  SOLVE_VLOG(my_solve_number_, 2)
+      << "    r_primal: " << std::setprecision(12) << std::fixed
+      << std::setfill(' ') << derivatives.Axmb.transpose().format(kHeavyFormat);
+  SOLVE_VLOG(my_solve_number_, 2)
+      << "    r_dual: " << std::setprecision(12) << std::fixed
+      << std::setfill(' ')
+      << derivatives.gradient.transpose().format(kHeavyFormat) << " + "
+      << ((y.block(derivatives.gradient.rows(), 0, 1, 1) +
+           t * step.block(derivatives.gradient.rows(), 0, 1, 1))
+              .transpose() *
+          derivatives.A)
+             .transpose()
+             .format(kHeavyFormat);
+  return (derivatives.gradient +
+          derivatives.A.transpose() *
+              (y.block(derivatives.gradient.rows(), 0, 1, 1) +
+               t * step.block(derivatives.gradient.rows(), 0, 1, 1)))
+             .squaredNorm() +
+         derivatives.Axmb.squaredNorm();
+}
+
 std::tuple<Eigen::VectorXd, size_t, size_t> NewtonSolver::SolveNewton(
     Problem *problem, const size_t max_iterations) {
   SOLVE_VLOG(my_solve_number_, 2)
@@ -515,13 +544,16 @@
 
   problem->Prepare(my_solve_number_);
 
-  Eigen::VectorXd data = Eigen::VectorXd::Zero(problem->states());
+  Eigen::VectorXd y = Eigen::VectorXd::Zero(problem->states() + 1);
 
   size_t iteration = 0;
   size_t solution_node;
+
+  // The derivatives and squared norm of r(y).
+  Problem::Derivatives derivatives = problem->ComputeDerivatives(
+      my_solve_number_, y, false, false, absl::Span<const size_t>());
+  double r_orig_squared = RSquaredUnconstrained(derivatives, y, y, 0.0);
   while (true) {
-    const Problem::Derivatives derivatives = problem->ComputeDerivatives(
-        my_solve_number_, data, false, false, absl::Span<const size_t>());
     const Eigen::VectorXd step = Newton(derivatives, iteration);
     solution_node = derivatives.solution_node;
 
@@ -530,10 +562,11 @@
         << std::setfill(' ') << step.transpose().format(kHeavyFormat);
 
     // We now have a search direction.  Line search and go.
+    double t = 1.0;
 
-    // We got there if the max step is small (this is strongly correlated to the
-    // gradient since the Hessian is constant), and our solution node's time is
-    // also close.
+    // We got there if the max step is small (this is strongly correlated to
+    // the gradient since the Hessian is constant), and our solution node's
+    // time is also close.
 
     // Grad is < epsilon.
     // Dual is < epsilon too.
@@ -543,7 +576,30 @@
       break;
     }
 
-    data += step.block(0, 0, problem->states(), 1);
+    // Now, do line search.
+    while (true) {
+      SOLVE_VLOG(my_solve_number_, 3) << "   Trying t: " << t;
+      Problem::Derivatives new_derivatives =
+          problem->ComputeDerivatives(my_solve_number_, y + t * step, false,
+                                      false, absl::Span<const size_t>());
+      const double r_squared =
+          RSquaredUnconstrained(new_derivatives, y, step, t);
+      // See Boyd, section 10.3.2, algorithm 10.2
+      if (r_squared <= std::pow(1.0 - kAlpha * t, 2.0) * r_orig_squared) {
+        // Save the derivatives and norm computed for the next iteration to save
+        // CPU.
+        derivatives = std::move(new_derivatives);
+        r_orig_squared = r_squared;
+        SOLVE_VLOG(my_solve_number_, 3)
+            << "  Line search terminated with a step of " << t;
+        break;
+      } else {
+        CHECK_NE(t, 0.0) << ": Failed on solve " << my_solve_number_;
+      }
+      t *= kBeta;
+    }
+
+    y += t * step;
 
     ++iteration;
 
@@ -555,8 +611,8 @@
     // true for the first solution.  Because we control the solver, as we
     // determine that the double is getting too big, we can move that
     // information to the int64 base clock.  Threshold this to not be *too* big
-    // since it makes it hard to debug as the data keeps jumping around.
-    problem->Update(my_solve_number_, data);
+    // since it makes it hard to debug as y keeps jumping around.
+    problem->Update(my_solve_number_, y);
 
     // And finally, don't let us iterate forever.  If it isn't converging,
     // report back.
@@ -571,7 +627,8 @@
 
   SOLVE_VLOG(my_solve_number_, 1)
       << "Took " << iteration << " iterations to solve.";
-  return std::make_tuple(std::move(data), solution_node, iteration);
+  return std::make_tuple(y.block(0, 0, problem->states(), 1), solution_node,
+                         iteration);
 }
 
 Problem::Derivatives NewtonSolver::AddConstraintSlackVariable(

diff --git a/aos/network/multinode_timestamp_filter.h b/aos/network/multinode_timestamp_filter.h
index 959fe85..3d5a125 100644
--- a/aos/network/multinode_timestamp_filter.h
+++ b/aos/network/multinode_timestamp_filter.h

@@ -138,6 +138,14 @@
   Eigen::VectorXd Rt(const Problem::Derivatives &derivatives,
                      Eigen::Ref<const Eigen::VectorXd> y, double t);
 
+  // Returns the squared norm of r for the unconstrained problem.
+  // (10.3.1 in Convex Optimization,
+  //  https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf)
+  double RSquaredUnconstrained(const Problem::Derivatives &derivatives,
+                               Eigen::Ref<const Eigen::VectorXd> y,
+                               Eigen::Ref<const Eigen::VectorXd> step,
+                               double t);
+
   // Returns the constrained newtons step, t_inverse, and Rt.
   std::tuple<Eigen::VectorXd, double, Eigen::VectorXd> ConstrainedNewton(
       const Eigen::Ref<const Eigen::VectorXd> y,
commit	db6d710c746c62a725f27bed7224ca620b7b5be7	[log] [tgz]
author	Austin Schuh <austin.schuh@bluerivertech.com>	Thu Sep 21 12:50:56 2023 -0700
committer	James Kuszmaul <james.kuszmaul@bluerivertech.com>	Mon Oct 02 13:21:29 2023 -0700
tree	2f1a39f37ddcd0d14ae6b4a781ece4993a8366e5
parent	595979d029ba6ea610bfa584db24f7b10b546105 [diff]