Compute the gradient and hessian at the same time
We are already iterating over everything. It's simpler to save out both
results at once and return them in a structure.
We want to add constraints so we get a constrained optimization
problem. Since the goal is essentially to keep the error > 0, we can
use an active set method based off each computed error. That is going
to be most efficient to do if we compute which are the active
constraints as we solve. Hence refactoring to provide a single function
to do it all at once.
Change-Id: Iadfe672743db8c8856299ecab05c1844f859d94b
Signed-off-by: Austin Schuh <austin.schuh@bluerivertech.com>
diff --git a/aos/network/multinode_timestamp_filter.cc b/aos/network/multinode_timestamp_filter.cc
index 98ad185..b87d5b9 100644
--- a/aos/network/multinode_timestamp_filter.cc
+++ b/aos/network/multinode_timestamp_filter.cc
@@ -127,9 +127,12 @@
return success;
}
-Eigen::VectorXd TimestampProblem::Gradient(
- const Eigen::Ref<Eigen::VectorXd> time_offsets) {
- Eigen::VectorXd grad = Eigen::VectorXd::Zero(live_nodes_);
+TimestampProblem::Derivitives TimestampProblem::ComputeDerivitives(
+ const Eigen::Ref<Eigen::VectorXd> time_offsets) {
+ Derivitives result;
+ result.gradient = Eigen::VectorXd::Zero(live_nodes_);
+ result.hessian = Eigen::MatrixXd::Zero(live_nodes_, live_nodes_);
+
for (size_t i = 0; i < clock_offset_filter_for_node_.size(); ++i) {
for (struct FilterPair &filter : clock_offset_filter_for_node_[i]) {
// Especially when reboots are involved, it isn't guarenteed that there
@@ -172,52 +175,31 @@
const double error = 2.0 * (offset_error.second - kMinNetworkDelay);
filter.pointer = offset_error.first;
- grad(a_solution_index) += -error;
- grad(b_solution_index) += error;
- }
- }
- return grad;
-}
-
-Eigen::MatrixXd TimestampProblem::Hessian(
- const Eigen::Ref<Eigen::VectorXd> /*time_offsets*/) const {
- Eigen::MatrixXd hessian = Eigen::MatrixXd::Zero(live_nodes_, live_nodes_);
-
- for (size_t i = 0; i < clock_offset_filter_for_node_.size(); ++i) {
- for (const struct FilterPair &filter : clock_offset_filter_for_node_[i]) {
- // If the gradient is 0, the hessian should also be 0.
- if (filter.filter->timestamps_empty(base_clock_[i].boot,
- base_clock_[filter.b_index].boot)) {
- continue;
- }
+ result.gradient(a_solution_index) += -error;
+ result.gradient(b_solution_index) += error;
// Reminder, our cost function has the following form.
// ((tb - (1 + ma) ta - ba)^2
// We are ignoring the slope when taking the derivative and applying the
// chain rule to keep the gradient smooth. This means that the Hessian is
// 2 for d^2 cost/dta^2 and d^2 cost/dtb^2
- const size_t a_solution_index = NodeToFullSolutionIndex(i);
- const size_t b_solution_index = NodeToFullSolutionIndex(filter.b_index);
- hessian(a_solution_index, a_solution_index) += 2;
- hessian(b_solution_index, a_solution_index) += -2;
- hessian(a_solution_index, b_solution_index) =
- hessian(b_solution_index, a_solution_index);
- hessian(b_solution_index, b_solution_index) += 2;
+ result.hessian(a_solution_index, a_solution_index) += 2;
+ result.hessian(b_solution_index, a_solution_index) += -2;
+ result.hessian(a_solution_index, b_solution_index) =
+ result.hessian(b_solution_index, a_solution_index);
+ result.hessian(b_solution_index, b_solution_index) += 2;
}
}
- return hessian;
+ return result;
}
std::tuple<Eigen::VectorXd, size_t> TimestampProblem::Newton(
const Eigen::Ref<Eigen::VectorXd> time_offsets,
const std::vector<logger::BootTimestamp> &points) {
CHECK_GT(live_nodes_, 0u) << ": No live nodes to solve for.";
- // TODO(austin): Each of the DCost functions does a binary search of the
- // timestamps list. By the time we have computed the gradient and Hessian,
- // we've done 5 binary searches for the same information.
- const Eigen::VectorXd grad = Gradient(time_offsets);
- const Eigen::MatrixXd hessian = Hessian(time_offsets);
+ const Derivitives derivitives = ComputeDerivitives(time_offsets);
+
const Eigen::MatrixXd constraint_jacobian =
Eigen::MatrixXd::Ones(1, live_nodes_) / static_cast<double>(live_nodes_);
// https://www.cs.purdue.edu/homes/jhonorio/16spring-cs52000-equality.pdf
@@ -283,13 +265,13 @@
Eigen::MatrixXd a;
a.resize(live_nodes_ + 1, live_nodes_ + 1);
- a.block(0, 0, live_nodes_, live_nodes_) = hessian;
+ a.block(0, 0, live_nodes_, live_nodes_) = derivitives.hessian;
a.block(0, live_nodes_, live_nodes_, 1) = constraint_jacobian.transpose();
a.block(live_nodes_, 0, 1, live_nodes_) = constraint_jacobian;
a(live_nodes_, live_nodes_) = 0.0;
Eigen::VectorXd b = Eigen::VectorXd::Zero(live_nodes_ + 1);
- b.block(0, 0, live_nodes_, 1) = -grad;
+ b.block(0, 0, live_nodes_, 1) = -derivitives.gradient;
// Now, we want to set b(live_nodes_) to be -time_offset for the earliest
// clock.
@@ -350,7 +332,8 @@
Eigen::MatrixXd::Ones(1, live_nodes_) /
static_cast<double>(live_nodes_);
Eigen::VectorXd adjusted_grad =
- Gradient(data) + step(live_nodes_) * constraint_jacobian.transpose();
+ ComputeDerivitives(data).gradient +
+ step(live_nodes_) * constraint_jacobian.transpose();
VLOG(2) << "Adjusted grad " << solution_number << " -> "
<< std::setprecision(12) << std::fixed << std::setfill(' ')
diff --git a/aos/network/multinode_timestamp_filter.h b/aos/network/multinode_timestamp_filter.h
index ea52b23..eff6179 100644
--- a/aos/network/multinode_timestamp_filter.h
+++ b/aos/network/multinode_timestamp_filter.h
@@ -101,10 +101,16 @@
return solution_node;
}
- // Returns the Hessian of the cost function at time_offsets.
- Eigen::MatrixXd Hessian(const Eigen::Ref<Eigen::VectorXd> time_offsets) const;
- // Returns the gradient of the cost function at time_offsets.
- Eigen::VectorXd Gradient(const Eigen::Ref<Eigen::VectorXd> time_offsets);
+ // The derivitives and other work products needed for constrained newtons
+ // method.
+ struct Derivitives {
+ Eigen::VectorXd gradient;
+ Eigen::MatrixXd hessian;
+ };
+
+ // Returns the gradient and Hessian of the cost function at time_offsets.
+ Derivitives ComputeDerivitives(
+ const Eigen::Ref<Eigen::VectorXd> time_offsets);
// Returns the newton step of the timestamp problem, and the node which was
// used for the equality constraint. The last term is the scalar on the