Improve performance of the y2020 localizer
I was noticing that the rewinding that we had to do when receiving a
camera image was causing a disproportionate amount of CPU usage (this
is more of an issue this year than last, partially because I've added a
couple of states to the EKF). This commit does two things:
(a) Caches some of the prediction-step calculations so that the rewind
is significantly cheaper.
(b) For the 2020 setup, stop actually doing a full rewind; instead, just
offset the x/y/theta for camera measurements based on their age, but
then insert the correction at the current time (rather than the
capture time). This makes thing much cheaper.
Change-Id: I26209d512591eb08251b3a25c746f66fc975424f
diff --git a/frc971/control_loops/drivetrain/hybrid_ekf.h b/frc971/control_loops/drivetrain/hybrid_ekf.h
index e03d581..bd358ca 100644
--- a/frc971/control_loops/drivetrain/hybrid_ekf.h
+++ b/frc971/control_loops/drivetrain/hybrid_ekf.h
@@ -138,6 +138,18 @@
static constexpr int kNInputs = 4;
// Number of previous samples to save.
static constexpr int kSaveSamples = 50;
+ // Whether we should completely rerun the entire stored history of
+ // kSaveSamples on every correction. Enabling this will increase overall CPU
+ // usage substantially; however, leaving it disabled makes it so that we are
+ // less likely to notice if processing camera frames is causing delays in the
+ // drivetrain.
+ // If we are having CPU issues, we have three easy avenues to improve things:
+ // (1) Reduce kSaveSamples (e.g., if all camera frames arive within
+ // 100 ms, then we can reduce kSaveSamples to be 25 (125 ms of samples)).
+ // (2) Don't actually rely on the ability to insert corrections into the
+ // timeline.
+ // (3) Set this to false.
+ static constexpr bool kFullRewindOnEverySample = false;
// Assume that all correction steps will have kNOutputs
// dimensions.
// TODO(james): Relax this assumption; relaxing it requires
@@ -182,19 +194,24 @@
X_hat_ = state;
have_zeroed_encoders_ = true;
P_ = P;
- observations_.PushFromBottom(
- {t,
- t,
- X_hat_,
- P_,
- Input::Zero(),
- Output::Zero(),
- {},
- [](const State &, const Input &) { return Output::Zero(); },
- [](const State &) {
- return Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero();
- },
- Eigen::Matrix<Scalar, kNOutputs, kNOutputs>::Identity()});
+ observations_.PushFromBottom({
+ t,
+ t,
+ X_hat_,
+ P_,
+ Input::Zero(),
+ Output::Zero(),
+ {},
+ [](const State &, const Input &) { return Output::Zero(); },
+ [](const State &) {
+ return Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero();
+ },
+ Eigen::Matrix<Scalar, kNOutputs, kNOutputs>::Identity(),
+ StateSquare::Identity(),
+ StateSquare::Zero(),
+ std::chrono::seconds(0),
+ State::Zero(),
+ });
}
// Correct with:
@@ -292,6 +309,29 @@
dt_config_.robot_radius;
}
+ // Returns the last state before the specified time.
+ // Returns nullopt if time is older than the oldest measurement.
+ std::optional<State> LastStateBeforeTime(
+ aos::monotonic_clock::time_point time) {
+ if (observations_.empty() || observations_.begin()->t > time) {
+ return std::nullopt;
+ }
+ for (const auto &observation : observations_) {
+ if (observation.t > time) {
+ // Note that observation.X_hat actually references the _previous_ X_hat.
+ return observation.X_hat;
+ }
+ }
+ return X_hat();
+ }
+
+ // Returns the most recent input vector.
+ Input MostRecentInput() {
+ CHECK(!observations_.empty());
+ Input U = observations_.top().U;
+ return U;
+ }
+
private:
struct Observation {
// Time when the observation was taken.
@@ -331,6 +371,16 @@
// The measurement noise matrix.
Eigen::Matrix<Scalar, kNOutputs, kNOutputs> R;
+ // Discretized A and Q to use on this update step. These will only be
+ // recalculated if the timestep changes.
+ StateSquare A_d;
+ StateSquare Q_d;
+ aos::monotonic_clock::duration discretization_time;
+
+ // A cached value indicating how much we change X_hat in the prediction step
+ // of this Observation.
+ State predict_update;
+
// In order to sort the observations in the PriorityQueue object, we
// need a comparison function.
friend bool operator<(const Observation &l, const Observation &r) {
@@ -453,45 +503,56 @@
return Xdot;
}
- void PredictImpl(const Input &U, std::chrono::nanoseconds dt, State *state,
+ void PredictImpl(Observation *obs, std::chrono::nanoseconds dt, State *state,
StateSquare *P) {
- StateSquare A_c = AForState(*state);
- StateSquare A_d;
- StateSquare Q_d;
- // TODO(james): By far the biggest CPU sink in the localization appears to
- // be this discretization--it's possible the spline code spikes higher,
- // but it doesn't create anywhere near the same sustained load. There
- // are a few potential options for optimizing this code, but none of
- // them are entirely trivial, e.g. we could:
- // -Reduce the number of states (this function grows at O(kNStates^3))
- // -Adjust the discretization function itself (there're a few things we
- // can tune there).
- // -Try to come up with some sort of lookup table or other way of
- // pre-calculating A_d and Q_d.
- // I also have to figure out how much we care about the precision of
- // some of these values--I don't think we care much, but we probably
- // do want to maintain some of the structure of the matrices.
- controls::DiscretizeQAFast(Q_continuous_, A_c, dt, &Q_d, &A_d);
+ // Only recalculate the discretization if the timestep has changed.
+ // Technically, this isn't quite correct, since the discretization will
+ // change depending on the current state. However, the slight loss of
+ // precision seems acceptable for the sake of significantly reducing CPU
+ // usage.
+ if (obs->discretization_time != dt) {
+ // TODO(james): By far the biggest CPU sink in the localization appears to
+ // be this discretization--it's possible the spline code spikes higher,
+ // but it doesn't create anywhere near the same sustained load. There
+ // are a few potential options for optimizing this code, but none of
+ // them are entirely trivial, e.g. we could:
+ // -Reduce the number of states (this function grows at O(kNStates^3))
+ // -Adjust the discretization function itself (there're a few things we
+ // can tune there).
+ // -Try to come up with some sort of lookup table or other way of
+ // pre-calculating A_d and Q_d.
+ // I also have to figure out how much we care about the precision of
+ // some of these values--I don't think we care much, but we probably
+ // do want to maintain some of the structure of the matrices.
+ const StateSquare A_c = AForState(*state);
+ controls::DiscretizeQAFast(Q_continuous_, A_c, dt, &obs->Q_d, &obs->A_d);
+ obs->discretization_time = dt;
- *state = RungeKuttaU(
- [this](const State &X, const Input &U) { return DiffEq(X, U); }, *state,
- U, aos::time::DurationInSeconds(dt));
+ obs->predict_update =
+ RungeKuttaU(
+ [this](const State &X, const Input &U) { return DiffEq(X, U); },
+ *state, obs->U, aos::time::DurationInSeconds(dt)) -
+ *state;
+ }
- StateSquare Ptemp = A_d * *P * A_d.transpose() + Q_d;
+ *state += obs->predict_update;
+
+ StateSquare Ptemp = obs->A_d * *P * obs->A_d.transpose() + obs->Q_d;
*P = Ptemp;
}
- void CorrectImpl(const Eigen::Matrix<Scalar, kNOutputs, kNOutputs> &R,
- const Output &Z, const Output &expected_Z,
- const Eigen::Matrix<Scalar, kNOutputs, kNStates> &H,
- State *state, StateSquare *P) {
- Output err = Z - expected_Z;
- Eigen::Matrix<Scalar, kNStates, kNOutputs> PH = *P * H.transpose();
- Eigen::Matrix<Scalar, kNOutputs, kNOutputs> S = H * PH + R;
- Eigen::Matrix<Scalar, kNStates, kNOutputs> K = PH * S.inverse();
- *state += K * err;
- StateSquare Ptemp = (StateSquare::Identity() - K * H) * *P;
+ void CorrectImpl(Observation *obs, State *state, StateSquare *P) {
+ const Eigen::Matrix<Scalar, kNOutputs, kNStates> H = obs->dhdx(*state);
+ // Note: Technically, this does calculate P * H.transpose() twice. However,
+ // when I was mucking around with some things, I found that in practice
+ // putting everything into one expression and letting Eigen optimize it
+ // directly actually improved performance relative to precalculating P *
+ // H.transpose().
+ const Eigen::Matrix<Scalar, kNStates, kNOutputs> K =
+ *P * H.transpose() * (H * *P * H.transpose() + obs->R).inverse();
+ const StateSquare Ptemp = (StateSquare::Identity() - K * H) * *P;
*P = Ptemp;
+ *state += K * (obs->z - obs->h(*state, obs->U));
}
void ProcessObservation(Observation *obs, const std::chrono::nanoseconds dt,
@@ -499,14 +560,13 @@
*state = obs->X_hat;
*P = obs->P;
if (dt.count() != 0) {
- PredictImpl(obs->U, dt, state, P);
+ PredictImpl(obs, dt, state, P);
}
if (!(obs->h && obs->dhdx)) {
CHECK(obs->make_h);
obs->make_h(*state, *P, &obs->h, &obs->dhdx);
}
- CorrectImpl(obs->R, obs->z, obs->h(*state, obs->U), obs->dhdx(*state),
- state, P);
+ CorrectImpl(obs, state, P);
}
DrivetrainConfig<double> dt_config_;
@@ -547,9 +607,10 @@
"initialized.\n";
return;
}
- auto cur_it =
- observations_.PushFromBottom({t, t, State::Zero(), StateSquare::Zero(),
- Input::Zero(), z, make_h, h, dhdx, R});
+ auto cur_it = observations_.PushFromBottom(
+ {t, t, State::Zero(), StateSquare::Zero(), Input::Zero(), z, make_h, h,
+ dhdx, R, StateSquare::Identity(), StateSquare::Zero(),
+ std::chrono::seconds(0), State::Zero()});
if (cur_it == observations_.end()) {
VLOG(1) << "Camera dropped off of end with time of "
<< aos::time::DurationInSeconds(t.time_since_epoch())
@@ -560,7 +621,6 @@
<< "s.\n";
return;
}
-
// Now we populate any state information that depends on where the
// observation was inserted into the queue. X_hat and P must be populated
// from the values present in the observation *following* this one in
@@ -594,6 +654,12 @@
next_it->prev_t = cur_it->t;
cur_it->U = (U == nullptr) ? next_it->U : *U;
}
+
+ if (kFullRewindOnEverySample) {
+ next_it = observations_.begin();
+ cur_it = next_it++;
+ }
+
// Now we need to rerun the predict step from the previous to the new
// observation as well as every following correct/predict up to the current
// time.
diff --git a/y2019/control_loops/drivetrain/localized_drivetrain_test.cc b/y2019/control_loops/drivetrain/localized_drivetrain_test.cc
index c5a2d18..46b9b8b 100644
--- a/y2019/control_loops/drivetrain/localized_drivetrain_test.cc
+++ b/y2019/control_loops/drivetrain/localized_drivetrain_test.cc
@@ -383,7 +383,7 @@
// the target exactly. Instead, just run slightly past the target:
EXPECT_LT(
::std::abs(::aos::math::DiffAngle(M_PI, drivetrain_plant_.state()(2, 0))),
- 0.5);
+ 1.0);
EXPECT_GT(HPSlotLeft().abs_pos().x(), drivetrain_plant_.state().x());
EXPECT_NEAR(HPSlotLeft().abs_pos().y(), drivetrain_plant_.state().y(), 0.2);
}
diff --git a/y2020/control_loops/drivetrain/localizer.cc b/y2020/control_loops/drivetrain/localizer.cc
index f84be3e..7d7c83f 100644
--- a/y2020/control_loops/drivetrain/localizer.cc
+++ b/y2020/control_loops/drivetrain/localizer.cc
@@ -122,7 +122,7 @@
return best_data_match;
}
-void Localizer::Update(const ::Eigen::Matrix<double, 2, 1> &U,
+void Localizer::Update(const Eigen::Matrix<double, 2, 1> &U,
aos::monotonic_clock::time_point now,
double left_encoder, double right_encoder,
double gyro_rate, const Eigen::Vector3d &accel) {
@@ -251,24 +251,36 @@
AOS_LOG(WARNING, "Dropped image match due to heading mismatch.\n");
continue;
}
+ // In order to do the EKF correction, we determine the expected state based
+ // on the state at the time the image was captured; however, we insert the
+ // correction update itself at the current time. This is technically not
+ // quite correct, but saves substantial CPU usage by making it so that we
+ // don't have to constantly rewind the entire EKF history.
+ const std::optional<State> state_at_capture =
+ ekf_.LastStateBeforeTime(capture_time);
+ if (!state_at_capture.has_value()) {
+ AOS_LOG(WARNING, "Dropped image match due to age of image.\n");
+ continue;
+ }
+ const Input U = ekf_.MostRecentInput();
// For the correction step, instead of passing in the measurement directly,
// we pass in (0, 0, 0) as the measurement and then for the expected
// measurement (Zhat) we calculate the error between the implied and actual
// poses. This doesn't affect any of the math, it just makes the code a bit
// more convenient to write given the Correct() interface we already have.
ekf_.Correct(
- Eigen::Vector3f::Zero(), nullptr, {},
- [H, H_field_target, pose_robot_target](
- const State &X, const Input &) -> Eigen::Vector3f {
- const Eigen::Vector3f Z =
- CalculateImpliedPose(X, H_field_target, pose_robot_target);
+ Eigen::Vector3f::Zero(), &U, {},
+ [H, H_field_target, pose_robot_target, state_at_capture](
+ const State &, const Input &) -> Eigen::Vector3f {
+ const Eigen::Vector3f Z = CalculateImpliedPose(
+ state_at_capture.value(), H_field_target, pose_robot_target);
// Just in case we ever do encounter any, drop measurements if they
// have non-finite numbers.
if (!Z.allFinite()) {
AOS_LOG(WARNING, "Got measurement with infinites or NaNs.\n");
return Eigen::Vector3f::Zero();
}
- Eigen::Vector3f Zhat = H * X - Z;
+ Eigen::Vector3f Zhat = H * state_at_capture.value() - Z;
// Rewrap angle difference to put it back in range. Note that this
// component of the error is currently ignored (see definition of H
// above).
@@ -283,7 +295,7 @@
}
return Zhat;
},
- [H](const State &) { return H; }, R, capture_time);
+ [H](const State &) { return H; }, R, now);
}
}