Optimize discretization of A/Q for EKF Because we are recomputing the discretization of A/Q for every single step in the EKF, it had been taking up ~70-80% of the total computation time of the entire EKF. This knocks it down to something like 50% of the total EKF computation time (for ~3-4x speed improvements overall). Because the discretizations of A/Q are dependent on both dt and on the heading of the robot, they are not trivially cacheable. Future improvements would be: -Only recalculate discretization when the timestep changes and assume that the heading estimate will only change by minute amounts and so we don't need to redescritize unless dt has changed. -Figure out how to cache 7x7 components of each that should stay constant for constant dt. -Determine whether any convenient relationships hold with respect to dt/heading that would let us make a reasonably small interpolation table. Change-Id: I8838d4837f380a2411ab9da99d542cdbe9999e41

commit: b2a2f35cc905b3379f3edbac50279417ee4b5bad [log] [tgz]
author: James Kuszmaul <jabukuszmaul@gmail.com> Sat Mar 02 16:59:34 2019 -0800
committer: James Kuszmaul <jabukuszmaul@gmail.com> Wed Mar 06 07:20:02 2019 -0800
tree: 49febd07281fe05e64d3c105b081171bd402078f
parent: e2c71eadabfb34a726e5637f45378a3fbb3ea28b [diff]
diff --git a/frc971/control_loops/c2d.h b/frc971/control_loops/c2d.h
index 1bf99a2..b6f4198 100644
--- a/frc971/control_loops/c2d.h
+++ b/frc971/control_loops/c2d.h

@@ -64,8 +64,60 @@
   Eigen::Matrix<Scalar, num_states, num_states> phi22 =
       phi.block(num_states, num_states, num_states, num_states);
 
-  *Q_d = phi22.transpose() * phi12;
-  *Q_d = (*Q_d + Q_d->transpose()) / static_cast<Scalar>(2.0);
+  Qtemp = phi22.transpose() * phi12;
+  *Q_d = (Qtemp + Qtemp.transpose()) / static_cast<Scalar>(2.0);
+}
+
+// Does a faster approximation for the discretizing A/Q, for if solving a 2Nx2N
+// matrix exponential is too expensive.
+// Basic reasoning/method:
+// The original algorithm does a matrix exponential on a 2N x 2N matrix (the
+// block matrix made of of A and Q). This is extremely expensive for larg-ish
+// matrices. This function takes advantage of the structure of the matrix
+// we are taking the exponential and notes that we care about two things:
+// 1) The exponential of A*t, which is only NxN and so is relatively cheap.
+// 2) The upper-right quarter of the 2Nx2N matrix, which we can approximate
+//    using a taylor series to several terms and still be substantially cheaper
+//    than taking the big exponential.
+template <typename Scalar, int num_states>
+void DiscretizeQAFast(
+    const Eigen::Matrix<Scalar, num_states, num_states> &Q_continuous,
+    const Eigen::Matrix<Scalar, num_states, num_states> &A_continuous,
+    ::std::chrono::nanoseconds dt,
+    Eigen::Matrix<Scalar, num_states, num_states> *Q_d,
+    Eigen::Matrix<Scalar, num_states, num_states> *A_d) {
+  Eigen::Matrix<Scalar, num_states, num_states> Qtemp =
+      (Q_continuous + Q_continuous.transpose()) / static_cast<Scalar>(2.0);
+  Scalar dt_d =
+      ::std::chrono::duration_cast<::std::chrono::duration<Scalar>>(dt).count();
+  Eigen::Matrix<Scalar, num_states, num_states> last_term = Qtemp;
+  double last_coeff = dt_d;
+  const Eigen::Matrix<Scalar, num_states, num_states> At =
+      A_continuous.transpose();
+  Eigen::Matrix<Scalar, num_states, num_states> Atn = At;
+  Eigen::Matrix<Scalar, num_states, num_states> phi12 = last_term * last_coeff;
+  Eigen::Matrix<Scalar, num_states, num_states> phi22 =
+      At.Identity() + Atn * last_coeff;
+  // TODO(james): Tune this once we have the robot up; ii < 6 is enough to get
+  // beyond any real numerical issues. 5 should be fine, but just happened to
+  // kick a test over to failing. 4 would probably work.
+  for (int ii = 2; ii < 6; ++ii) {
+    Eigen::Matrix<Scalar, num_states, num_states> next_term =
+        -A_continuous * last_term + Qtemp * Atn;
+    last_coeff *= dt_d / static_cast<Scalar>(ii);
+    phi12 += next_term * last_coeff;
+
+    last_term = next_term;
+
+    Atn *= At;
+    phi22 += last_coeff * Atn;
+  }
+  Eigen::Matrix<Scalar, num_states, num_states> phi22t =
+    phi22.transpose();
+
+  Qtemp = phi22t * phi12;
+  *Q_d = (Qtemp + Qtemp.transpose()) / static_cast<Scalar>(2.0);
+  *A_d = phi22t;
 }
 
 }  // namespace controls

diff --git a/frc971/control_loops/c2d_test.cc b/frc971/control_loops/c2d_test.cc
index dcdb048..a0c4deb 100644
--- a/frc971/control_loops/c2d_test.cc
+++ b/frc971/control_loops/c2d_test.cc

@@ -69,6 +69,21 @@
       << "\nQ_d_integrated:\n" << Q_d_integrated;
 }
 
+// Tests that the "fast" discretization produces nearly identical results.
+TEST_F(C2DTest, DiscretizeQAFast) {
+  Eigen::Matrix<double, 2, 2> Q_d;
+  Eigen::Matrix<double, 2, 2> Q_d_fast;
+  Eigen::Matrix<double, 2, 2> A_d;
+  Eigen::Matrix<double, 2, 2> A_d_fast;
+  Eigen::Matrix<double, 2, 1> B_d;
+  const auto dt = ::std::chrono::seconds(1);
+  DiscretizeQ(Q_continuous, A_continuous, dt, &Q_d);
+  C2D(A_continuous, B_continuous, dt, &A_d, &B_d);
+  DiscretizeQAFast(Q_continuous, A_continuous, dt, &Q_d_fast, &A_d_fast);
+  EXPECT_LT((Q_d - Q_d_fast).norm(), 1e-20);
+  EXPECT_LT((A_d - A_d_fast).norm(), 1e-20);
+}
+
 }  // namespace testing
 }  // namespace controls
 }  // namespace frc971

diff --git a/frc971/control_loops/drivetrain/hybrid_ekf.h b/frc971/control_loops/drivetrain/hybrid_ekf.h
index 01a49a9..82f409c 100644
--- a/frc971/control_loops/drivetrain/hybrid_ekf.h
+++ b/frc971/control_loops/drivetrain/hybrid_ekf.h

@@ -251,12 +251,8 @@
                    StateSquare *P) {
     StateSquare A_c = AForState(*state);
     StateSquare A_d;
-    Eigen::Matrix<Scalar, kNStates, 0> dummy_B;
-    controls::C2D<Scalar, kNStates, 0>(
-        A_c, Eigen::Matrix<Scalar, kNStates, 0>::Zero(), dt, &A_d, &dummy_B);
     StateSquare Q_d;
-    controls::DiscretizeQ(Q_continuous_, A_c, dt, &Q_d);
-    *P = A_d * *P * A_d.transpose() + Q_d;
+    controls::DiscretizeQAFast(Q_continuous_, A_c, dt, &Q_d, &A_d);
 
     *state = RungeKuttaU(
         [this](const State &X,
@@ -264,6 +260,9 @@
         *state, U,
         ::std::chrono::duration_cast<::std::chrono::duration<double>>(dt)
             .count());
+
+    StateSquare Ptemp = A_d * *P * A_d.transpose() + Q_d;
+    *P = Ptemp;
   }
 
   void CorrectImpl(const Eigen::Matrix<Scalar, kNOutputs, kNOutputs> &R,
@@ -274,8 +273,9 @@
     Eigen::Matrix<Scalar, kNStates, kNOutputs> PH = *P * H.transpose();
     Eigen::Matrix<Scalar, kNOutputs, kNOutputs> S = H * PH + R;
     Eigen::Matrix<Scalar, kNStates, kNOutputs> K = PH * S.inverse();
-    *state = *state + K * err;
-    *P = (StateSquare::Identity() - K * H) * *P;
+    *state += K * err;
+    StateSquare Ptemp = (StateSquare::Identity() - K * H) * *P;
+    *P = Ptemp;
   }
 
   void ProcessObservation(Observation *obs, const std::chrono::nanoseconds dt,
commit	b2a2f35cc905b3379f3edbac50279417ee4b5bad	[log] [tgz]
author	James Kuszmaul <jabukuszmaul@gmail.com>	Sat Mar 02 16:59:34 2019 -0800
committer	James Kuszmaul <jabukuszmaul@gmail.com>	Wed Mar 06 07:20:02 2019 -0800
tree	49febd07281fe05e64d3c105b081171bd402078f
parent	e2c71eadabfb34a726e5637f45378a3fbb3ea28b [diff]