Make line search in our timestamp solving code more correct
We had a mix of the newton's method and equality constrained primal-dual
solver strategy in the same algorithm. Move over to using the
primal-dual algorithm in
https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf Algoritym 10.2
fully.
Line search was occasionally getting stuck at a local minima. To fix
that, we add a limit to how small a step it will take. This makes it so
we always make some progress, even if it isn't far, and solves both the
logs where we need line search, and the logs where line search hurts
convergence.
Change-Id: I464e3f65f79bd42c50f7bc52d00337d7b22bf515
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index ddb7864..55b6b02 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc
@@ -459,7 +459,8 @@
return result;
}
-Eigen::VectorXd NewtonSolver::Newton(const Problem::Derivatives &derivatives,
+Eigen::VectorXd NewtonSolver::Newton(const Eigen::Ref<const Eigen::VectorXd> y,
+ const Problem::Derivatives &derivatives,
size_t iteration) {
// https://www.cs.purdue.edu/homes/jhonorio/16spring-cs52000-equality.pdf
// https://web.stanford.edu/~boyd/cvxbook/bv_cvxbook.pdf chapter 10 is good
@@ -479,6 +480,10 @@
Eigen::VectorXd b = Eigen::VectorXd::Zero(a.rows());
b.block(0, 0, derivatives.gradient.rows(), 1) = -derivatives.gradient;
if (derivatives.Axmb.rows()) {
+ const Eigen::Ref<const Eigen::VectorXd> v =
+ y.block(derivatives.hessian.rows(), 0, derivatives.A.rows(), 1);
+ b.block(0, 0, derivatives.gradient.rows(), 1) -=
+ derivatives.A.transpose() * v;
b.block(derivatives.gradient.rows(), 0, derivatives.Axmb.rows(), 1) =
-derivatives.Axmb;
}
@@ -517,9 +522,6 @@
// Note: all the prints are transposed so they fit on one line instead of
// having a ton of massive column vectors being printed all the time.
SOLVE_VLOG(my_solve_number_, 2)
- << " r_primal: " << std::setprecision(12) << std::fixed
- << std::setfill(' ') << derivatives.Axmb.transpose().format(kHeavyFormat);
- SOLVE_VLOG(my_solve_number_, 2)
<< " r_dual: " << std::setprecision(12) << std::fixed
<< std::setfill(' ')
<< derivatives.gradient.transpose().format(kHeavyFormat) << " + "
@@ -527,8 +529,10 @@
t * step.block(derivatives.gradient.rows(), 0, 1, 1))
.transpose() *
derivatives.A)
- .transpose()
.format(kHeavyFormat);
+ SOLVE_VLOG(my_solve_number_, 2)
+ << " r_primal: " << std::setprecision(12) << std::fixed
+ << std::setfill(' ') << derivatives.Axmb.transpose().format(kHeavyFormat);
return (derivatives.gradient +
derivatives.A.transpose() *
(y.block(derivatives.gradient.rows(), 0, 1, 1) +
@@ -554,12 +558,18 @@
my_solve_number_, y, false, false, absl::Span<const size_t>());
double r_orig_squared = RSquaredUnconstrained(derivatives, y, y, 0.0);
while (true) {
- const Eigen::VectorXd step = Newton(derivatives, iteration);
+ SOLVE_VLOG(my_solve_number_, 1) << "Starting iteration " << iteration;
+
+ const Eigen::VectorXd step = Newton(y, derivatives, iteration);
solution_node = derivatives.solution_node;
+ SOLVE_VLOG(my_solve_number_, 1)
+ << " y(" << iteration << ") is " << y.transpose().format(kHeavyFormat);
+ PrintDerivatives(derivatives, y, "", 3);
SOLVE_VLOG(my_solve_number_, 2)
- << "Step " << iteration << " -> " << std::setprecision(12) << std::fixed
- << std::setfill(' ') << step.transpose().format(kHeavyFormat);
+ << " initial step(" << iteration << ") -> " << std::setprecision(12)
+ << std::fixed << std::setfill(' ')
+ << step.transpose().format(kHeavyFormat);
// We now have a search direction. Line search and go.
double t = 1.0;
@@ -576,16 +586,30 @@
break;
}
+ SOLVE_VLOG(my_solve_number_, 2)
+ << " Not close enough, "
+ << step.block(0, 0, problem->states(), 1).lpNorm<Eigen::Infinity>()
+ << " >= 1e-4 || " << derivatives.Axmb.squaredNorm() << " >= 1e-8";
+
// Now, do line search.
while (true) {
- SOLVE_VLOG(my_solve_number_, 3) << " Trying t: " << t;
Problem::Derivatives new_derivatives =
problem->ComputeDerivatives(my_solve_number_, y + t * step, false,
false, absl::Span<const size_t>());
const double r_squared =
RSquaredUnconstrained(new_derivatives, y, step, t);
+ SOLVE_VLOG(my_solve_number_, 2)
+ << std::setprecision(12) << std::setfill(' ') << " |r| < |r+1| "
+ << r_orig_squared << " < " << r_squared << " for step size " << t;
// See Boyd, section 10.3.2, algorithm 10.2
- if (r_squared <= std::pow(1.0 - kAlpha * t, 2.0) * r_orig_squared) {
+ if (r_squared <=
+ std::pow(1.0 - kUnconstrainedAlpha * t, 2.0) * r_orig_squared ||
+ t < kLineSearchStopThreshold) {
+ // This is modified because we have seen cases where the way we handle
+ // equality constraints sometimes ends up increasing the cost slightly.
+ // We are better off taking a small step and trying again than shrinking
+ // our step forever.
+ //
// Save the derivatives and norm computed for the next iteration to save
// CPU.
derivatives = std::move(new_derivatives);
@@ -1350,7 +1374,8 @@
}
SOLVE_VLOG(solve_number, 2)
- << " live " << points_[j].time << " vs solution "
+ << std::setprecision(12) << std::fixed << " live "
+ << points_[j].time << " vs solution "
<< base_clock_[j].time +
std::chrono::nanoseconds(static_cast<int64_t>(
std::round(data(NodeToFullSolutionIndex(j)))))
diff --git a/aos/network/multinode_timestamp_filter.h b/aos/network/multinode_timestamp_filter.h
index 3d5a125..828c489 100644
--- a/aos/network/multinode_timestamp_filter.h
+++ b/aos/network/multinode_timestamp_filter.h
@@ -84,6 +84,10 @@
// Ratio to let the cost increase when line searching. A small increase is
// fine since we aren't perfectly convex.
static constexpr double kAlpha = -0.15;
+ // Ratio to require the cost to decrease when line searching.
+ static constexpr double kUnconstrainedAlpha = 0.5;
+ // Unconstrained min line search distance to guarantee forward progress.
+ static constexpr double kLineSearchStopThreshold = 0.4;
// Line search step parameter.
static constexpr double kBeta = 0.5;
static constexpr double kMu = 2.0;
@@ -160,7 +164,8 @@
// used for the equality constraint. The last term is the scalar on the
// equality constraint. This needs to be removed from the solution to get the
// actual newton step.
- Eigen::VectorXd Newton(const Problem::Derivatives &derivatives,
+ Eigen::VectorXd Newton(const Eigen::Ref<const Eigen::VectorXd> y,
+ const Problem::Derivatives &derivatives,
size_t iteration);
// The solve number for this instance of the problem.