y2019/control_loops/drivetrain/localizer.h - RealtimeRoboticsGroup/test - Gitiles

 #ifndef Y2019_CONTROL_LOOPS_DRIVETRAIN_LOCALIZATER_H_
 #define Y2019_CONTROL_LOOPS_DRIVETRAIN_LOCALIZATER_H_

 #include <cmath>
 #include <memory>

 #include "frc971/control_loops/drivetrain/camera.h"
 #include "frc971/control_loops/drivetrain/hybrid_ekf.h"
 #include "frc971/control_loops/drivetrain/localization/utils.h"
 #include "frc971/control_loops/pose.h"

 #if !defined(__clang__) && defined(__GNUC__)
 // GCC miss-detects that when zero is set to true, the member variables could be
 // uninitialized.  Rather than spend the CPU to initialize them in addition to
 // the memory for no good reason, tell GCC to stop doing that.  Clang appears to
 // get it.
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
 #endif

 namespace y2019::control_loops {

 template <int num_cameras, int num_targets, int num_obstacles,
           int max_targets_per_frame, typename Scalar = double>
 class TypedLocalizer
     : public ::frc971::control_loops::drivetrain::HybridEkf<Scalar> {
  public:
   typedef frc971::control_loops::TypedCamera<num_targets, num_obstacles, Scalar>
       Camera;
   typedef typename Camera::TargetView TargetView;
   typedef typename Camera::Pose Pose;
   typedef typename frc971::control_loops::Target Target;
   typedef ::frc971::control_loops::drivetrain::HybridEkf<Scalar> HybridEkf;
   typedef typename HybridEkf::State State;
   typedef typename HybridEkf::StateSquare StateSquare;
   typedef typename HybridEkf::Input Input;
   typedef typename HybridEkf::Output Output;
   using HybridEkf::kNInputs;
   using HybridEkf::kNOutputs;
   using HybridEkf::kNStates;

   // robot_pose should be the object that is used by the cameras, such that when
   // we update robot_pose, the cameras will change what they report the relative
   // position of the targets as.
   // Note that the parameters for the cameras should be set to allow slightly
   // larger fields of view and slightly longer range than the true cameras so
   // that we can identify potential matches for targets even when we have slight
   // modelling errors.
   TypedLocalizer(
       const ::frc971::control_loops::drivetrain::DrivetrainConfig<Scalar>
           &dt_config,
       Pose *robot_pose)
       : ::frc971::control_loops::drivetrain::HybridEkf<Scalar>(dt_config),
         robot_pose_(robot_pose),
         h_queue_(this),
         make_h_queue_(this) {}

   // Performs a kalman filter correction with a single camera frame, consisting
   // of up to max_targets_per_frame targets and taken at time t.
   // camera is the Camera used to take the images.
   void UpdateTargets(
       const Camera &camera,
       const ::aos::SizedArray<TargetView, max_targets_per_frame> &targets,
       ::aos::monotonic_clock::time_point t) {
     if (targets.empty()) {
       return;
     }

     if (!SanitizeTargets(targets)) {
       AOS_LOG(ERROR, "Throwing out targets due to in insane values.\n");
       return;
     }

     if (t > HybridEkf::latest_t()) {
       AOS_LOG(ERROR,
               "target observations must be older than most recent encoder/gyro "
               "update.\n");
       return;
     }

     Output z;
     Eigen::Matrix<Scalar, kNOutputs, kNOutputs> R;
     TargetViewToMatrices(targets[0], &z, &R);

     // In order to perform the correction steps for the targets, we will
     // separately perform a Correct step for each following target.
     // This way, we can have the first correction figure out the mappings
     // between targets in the image and targets on the field, and then re-use
     // those mappings for all the remaining corrections.
     // As such, we need to store the EKF functions that the remaining targets
     // will need in arrays:
     ::aos::SizedArray<HFunction, max_targets_per_frame> h_functions;
     ::aos::SizedArray<Eigen::Matrix<Scalar, kNOutputs, kNStates>,
                       max_targets_per_frame>
         dhdx;
     make_h_queue_.CorrectKnownHBuilder(
         z, nullptr,
         ExpectedObservationBuilder(this, camera, targets, &h_functions, &dhdx),
         R, t);
     // Fetch cache:
     for (size_t ii = 1; ii < targets.size(); ++ii) {
       TargetViewToMatrices(targets[ii], &z, &R);
       h_queue_.CorrectKnownH(
           z, nullptr, ExpectedObservationFunctor(h_functions[ii], dhdx[ii]), R,
           t);
     }
   }

  private:
   class HFunction {
    public:
     HFunction() : zero_(true) {}
     HFunction(const Camera *camera, const TargetView &best_view,
               const TargetView &target_view, TypedLocalizer *localizer)
         : zero_(false),
           camera_(camera),
           best_view_(best_view),
           target_view_(target_view),
           localizer_(localizer) {}
     Output operator()(const State &X, const Input &) {
       if (zero_) {
         return Output::Zero();
       }

       // This function actually handles determining what the Output should
       // be at a given state, now that we have chosen the target that
       // we want to match to.
       *localizer_->robot_pose_->mutable_pos() << X(0, 0), X(1, 0), 0.0;
       localizer_->robot_pose_->set_theta(X(2, 0));
       const Pose relative_pose =
           best_view_.target->pose().Rebase(&camera_->pose());
       const Scalar heading = relative_pose.heading();
       const Scalar distance = relative_pose.xy_norm();
       const Scalar skew =
           ::aos::math::NormalizeAngle(relative_pose.rel_theta() - heading);
       return Output(heading, distance, skew);
     }

    private:
     bool zero_ = false;

     const Camera *camera_;
     TargetView best_view_;
     TargetView target_view_;
     TypedLocalizer *localizer_;
   };

   friend class HFunction;

   class ExpectedObservationFunctor
       : public HybridEkf::ExpectedObservationFunctor {
    public:
     ExpectedObservationFunctor(const HFunction &h,
                                Eigen::Matrix<Scalar, kNOutputs, kNStates> dhdx)
         : h_(h), dhdx_(dhdx) {}

     Output H(const State &state, const Input &input) final {
       return h_(state, input);
     }

     virtual Eigen::Matrix<Scalar, kNOutputs, kNStates> DHDX(
         const State &) final {
       return dhdx_;
     }

    private:
     HFunction h_;
     Eigen::Matrix<Scalar, kNOutputs, kNStates> dhdx_;
   };
   class ExpectedObservationBuilder
       : public HybridEkf::ExpectedObservationBuilder {
    public:
     ExpectedObservationBuilder(
         TypedLocalizer *localizer, const Camera &camera,
         const ::aos::SizedArray<TargetView, max_targets_per_frame>
             &target_views,
         ::aos::SizedArray<HFunction, max_targets_per_frame> *h_functions,
         ::aos::SizedArray<Eigen::Matrix<Scalar, kNOutputs, kNStates>,
                           max_targets_per_frame> *dhdx)
         : localizer_(localizer),
           camera_(camera),
           target_views_(target_views),
           h_functions_(h_functions),
           dhdx_(dhdx) {}

     virtual ExpectedObservationFunctor *MakeExpectedObservations(
         const State &state, const StateSquare &P) {
       HFunction h;
       Eigen::Matrix<Scalar, kNOutputs, kNStates> dhdx;
       localizer_->MakeH(camera_, target_views_, h_functions_, dhdx_, state, P,
                         &h, &dhdx);
       functor_.emplace(h, dhdx);
       return &functor_.value();
     }

    private:
     TypedLocalizer *localizer_;
     const Camera &camera_;
     const ::aos::SizedArray<TargetView, max_targets_per_frame> &target_views_;
     ::aos::SizedArray<HFunction, max_targets_per_frame> *h_functions_;
     ::aos::SizedArray<Eigen::Matrix<Scalar, kNOutputs, kNStates>,
                       max_targets_per_frame> *dhdx_;
     std::optional<ExpectedObservationFunctor> functor_;
   };

   // The threshold to use for completely rejecting potentially bad target
   // matches.
   // TODO(james): Tune
   static constexpr Scalar kRejectionScore = 1.0;

   // Checks that the targets coming in make some sense--mostly to prevent NaNs
   // or the such from propagating.
   bool SanitizeTargets(
       const ::aos::SizedArray<TargetView, max_targets_per_frame> &targets) {
     for (const TargetView &view : targets) {
       const typename TargetView::Reading reading = view.reading;
       if (!(::std::isfinite(reading.heading) &&
             ::std::isfinite(reading.distance) &&
             ::std::isfinite(reading.skew) && ::std::isfinite(reading.height))) {
         AOS_LOG(ERROR, "Got non-finite values in target.\n");
         return false;
       }
       if (reading.distance < 0) {
         AOS_LOG(ERROR, "Got negative distance.\n");
         return false;
       }
       if (::std::abs(::aos::math::NormalizeAngle(reading.skew)) > M_PI_2) {
         AOS_LOG(ERROR, "Got skew > pi / 2.\n");
         return false;
       }
     }
     return true;
   }

   // Computes the measurement (z) and noise covariance (R) matrices for a given
   // TargetView.
   void TargetViewToMatrices(const TargetView &view, Output *z,
                             Eigen::Matrix<Scalar, kNOutputs, kNOutputs> *R) {
     *z << view.reading.heading, view.reading.distance,
         ::aos::math::NormalizeAngle(view.reading.skew);
     // TODO(james): R should account as well for our confidence in the target
     // matching. However, handling that properly requires thing a lot more about
     // the probabilities.
     R->setZero();
     R->diagonal() << ::std::pow(view.noise.heading, 2),
         ::std::pow(view.noise.distance, 2), ::std::pow(view.noise.skew, 2);
   }

   // This is the function that will be called once the Ekf has inserted the
   // measurement into the right spot in the measurement queue and needs the
   // output functions to actually perform the corrections.
   // Specifically, this will take the estimate of the state at that time and
   // figure out how the targets seen by the camera best map onto the actual
   // targets on the field.
   // It then fills in the h and dhdx functions that are called by the Ekf.
   void MakeH(
       const Camera &camera,
       const ::aos::SizedArray<TargetView, max_targets_per_frame> &target_views,
       ::aos::SizedArray<HFunction, max_targets_per_frame> *h_functions,
       ::aos::SizedArray<Eigen::Matrix<Scalar, kNOutputs, kNStates>,
                         max_targets_per_frame> *dhdx,
       const State &X_hat, const StateSquare &P, HFunction *h,
       Eigen::Matrix<Scalar, kNOutputs, kNStates> *current_dhdx) {
     // Because we need to match camera targets ("views") to actual field
     // targets, and because we want to take advantage of the correlations
     // between the targets (i.e., if we see two targets in the image, they
     // probably correspond to different on-field targets), the matching problem
     // here is somewhat non-trivial. Some of the methods we use only work
     // because we are dealing with very small N (e.g., handling the correlations
     // between multiple views has combinatoric complexity, but since N = 3,
     // it's not an issue).
     //
     // High-level steps:
     // 1) Set the base robot pose for the cameras to the Pose implied by X_hat.
     // 2) Fetch all the expected target views from the camera.
     // 3) Determine the "magnitude" of the Kalman correction from each potential
     //    view/target pair.
     // 4) Match based on the combination of targets with the smallest
     //    corrections.
     // 5) Calculate h and dhdx for each pair of targets.
     //
     // For the "magnitude" of the correction, we do not directly use the
     // standard Kalman correction formula. Instead, we calculate the correction
     // we would get from each component of the measurement and take the L2 norm
     // of those. This prevents situations where a target matches very poorly but
     // produces an overall correction of near-zero.
     // TODO(james): I do not know if this is strictly the correct method to
     // minimize likely error, but should be reasonable.
     //
     // For the matching, we do the following (see MatchFrames):
     // 1. Compute the best max_targets_per_frame matches for each view.
     // 2. Exhaust every possible combination of view/target pairs and
     //    choose the best one.
     // When we don't think the camera should be able to see as many targets as
     // we actually got in the frame, then we do permit doubling/tripling/etc.
     // up on potential targets once we've exhausted all the targets we think
     // we can see.

     // Set the current robot pose so that the cameras know where they are
     // (all the cameras have robot_pose_ as their base):
     *robot_pose_->mutable_pos() << X_hat(0, 0), X_hat(1, 0), 0.0;
     robot_pose_->set_theta(X_hat(2, 0));

     // Compute the things we *think* the camera should be seeing.
     // Note: Because we will not try to match to any targets that are not
     // returned by this function, we generally want the modelled camera to have
     // a slightly larger field of view than the real camera, and be able to see
     // slightly smaller targets.
     const ::aos::SizedArray<TargetView, num_targets> camera_views =
         camera.target_views();

     // Each row contains the scores for each pair of target view and camera
     // target view. Values in each row will not be populated past
     // camera.target_views().size(); of the rows, only the first
     // target_views.size() shall be populated.
     // Higher scores imply a worse match. Zero implies a perfect match.
     Eigen::Matrix<Scalar, max_targets_per_frame, num_targets> scores;
     scores.setConstant(::std::numeric_limits<Scalar>::infinity());
     // Each row contains the indices of the best matches per view, where
     // index 0 is the best, 1 the second best, and 2 the third, etc.
     // -1 indicates an unfilled field.
     Eigen::Matrix<int, max_targets_per_frame, max_targets_per_frame>
         best_matches;
     best_matches.setConstant(-1);
     // The H matrices for each potential matching. This has the same structure
     // as the scores matrix.
     ::std::array<::std::array<Eigen::Matrix<Scalar, kNOutputs, kNStates>,
                               max_targets_per_frame>,
                  num_targets>
         all_H_matrices;

     // Iterate through and fill out the scores for each potential pairing:
     for (size_t ii = 0; ii < target_views.size(); ++ii) {
       const TargetView &target_view = target_views[ii];
       Output z;
       Eigen::Matrix<Scalar, kNOutputs, kNOutputs> R;
       TargetViewToMatrices(target_view, &z, &R);

       for (size_t jj = 0; jj < camera_views.size(); ++jj) {
         // Compute the ckalman update for this step:
         const TargetView &view = camera_views[jj];
         const Eigen::Matrix<Scalar, kNOutputs, kNStates> H =
             HMatrix(*view.target, camera.pose());
         const Eigen::Matrix<Scalar, kNStates, kNOutputs> PH = P * H.transpose();
         const Eigen::Matrix<Scalar, kNOutputs, kNOutputs> S = H * PH + R;
         // Note: The inverse here should be very cheap so long as kNOutputs = 3.
         const Eigen::Matrix<Scalar, kNStates, kNOutputs> K = PH * S.inverse();
         const Output err = z - Output(view.reading.heading,
                                       view.reading.distance, view.reading.skew);
         // In order to compute the actual score, we want to consider each
         // component of the error separately, as well as considering the impacts
         // on the each of the states separately. As such, we calculate what
         // the separate updates from each error component would be, and sum
         // the impacts on the states.
         Output scorer;
         for (size_t kk = 0; kk < kNOutputs; ++kk) {
           // TODO(james): squaredNorm or norm or L1-norm? Do we care about the
           // square root? Do we prefer a quadratic or linear response?
           scorer(kk, 0) = (K.col(kk) * err(kk, 0)).squaredNorm();
         }
         // Compute the overall score--note that we add in a term for the height,
         // scaled by a manual fudge-factor. The height is not accounted for
         // in the Kalman update because we are not trying to estimate the height
         // of the robot directly.
         Scalar score =
             scorer.squaredNorm() +
             ::std::pow((view.reading.height - target_view.reading.height) /
                            target_view.noise.height / 20.0,
                        2);
         scores(ii, jj) = score;
         all_H_matrices[ii][jj] = H;

         // Update the best_matches matrix:
         int insert_target = jj;
         for (size_t kk = 0; kk < max_targets_per_frame; ++kk) {
           int idx = best_matches(ii, kk);
           // Note that -1 indicates an unfilled value.
           if (idx == -1 || scores(ii, idx) > scores(ii, insert_target)) {
             best_matches(ii, kk) = insert_target;
             insert_target = idx;
             if (idx == -1) {
               break;
             }
           }
         }
       }
     }

     if (camera_views.size() == 0) {
       AOS_LOG(DEBUG, "Unable to identify potential target matches.\n");
       // If we can't get a match, provide H = zero, which will make this
       // correction step a nop.
       *h = HFunction();
       *current_dhdx = Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero();
       for (size_t ii = 0; ii < target_views.size(); ++ii) {
         h_functions->push_back(*h);
         dhdx->push_back(*current_dhdx);
       }
     } else {
       // Go through and brute force the issue of what the best combination of
       // target matches are. The worst case for this algorithm will be
       // max_targets_per_frame!, which is awful for any N > ~4, but since
       // max_targets_per_frame = 3, I'm not really worried.
       ::std::array<int, max_targets_per_frame> best_frames =
           MatchFrames(scores, best_matches, target_views.size());
       for (size_t ii = 0; ii < target_views.size(); ++ii) {
         size_t view_idx = best_frames[ii];
         if (view_idx >= camera_views.size()) {
           AOS_LOG(ERROR, "Somehow, the view scorer failed.\n");
           h_functions->emplace_back();
           dhdx->push_back(Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero());
           continue;
         }
         const Eigen::Matrix<Scalar, kNOutputs, kNStates> best_H =
             all_H_matrices[ii][view_idx];
         const TargetView best_view = camera_views[view_idx];
         const TargetView target_view = target_views[ii];
         const Scalar match_score = scores(ii, view_idx);
         if (match_score > kRejectionScore) {
           AOS_LOG(DEBUG,
                   "Rejecting target at (%f, %f, %f, %f) due to high score.\n",
                   target_view.reading.heading, target_view.reading.distance,
                   target_view.reading.skew, target_view.reading.height);
           h_functions->emplace_back();
           dhdx->push_back(Eigen::Matrix<Scalar, kNOutputs, kNStates>::Zero());
         } else {
           h_functions->emplace_back(&camera, best_view, target_view, this);

           // TODO(james): Experiment to better understand whether we want to
           // recalculate H or not.
           dhdx->push_back(best_H);
         }
       }
       *h = h_functions->at(0);
       *current_dhdx = dhdx->at(0);
     }
   }

   Eigen::Matrix<Scalar, kNOutputs, kNStates> HMatrix(const Target &target,
                                                      const Pose &camera_pose) {
     return frc971::control_loops::drivetrain::
         HMatrixForCameraHeadingDistanceSkew(target.pose(), camera_pose);
   }

   // A helper function for the fuller version of MatchFrames; this just
   // removes some of the arguments that are only needed during the recursion.
   // n_views is the number of targets actually seen in the camera image (i.e.,
   // the number of rows in scores/best_matches that are actually populated).
   ::std::array<int, max_targets_per_frame> MatchFrames(
       const Eigen::Matrix<Scalar, max_targets_per_frame, num_targets> &scores,
       const Eigen::Matrix<int, max_targets_per_frame, max_targets_per_frame>
           &best_matches,
       int n_views) {
     ::std::array<int, max_targets_per_frame> best_set;
     best_set.fill(-1);
     Scalar best_score;
     // We start out without having "used" any views/targets:
     ::aos::SizedArray<bool, max_targets_per_frame> used_views;
     for (int ii = 0; ii < n_views; ++ii) {
       used_views.push_back(false);
     }
     MatchFrames(scores, best_matches, used_views, {{false}}, &best_set,
                 &best_score);
     return best_set;
   }

   // Recursively iterates over every plausible combination of targets/views
   // that there is and determines the lowest-scoring combination.
   // used_views and used_targets indicate which rows/columns of the
   // scores/best_matches matrices should be ignored. When used_views is all
   // true, that means that we are done recursing.
   void MatchFrames(
       const Eigen::Matrix<Scalar, max_targets_per_frame, num_targets> &scores,
       const Eigen::Matrix<int, max_targets_per_frame, max_targets_per_frame>
           &best_matches,
       const ::aos::SizedArray<bool, max_targets_per_frame> &used_views,
       const ::std::array<bool, num_targets> &used_targets,
       ::std::array<int, max_targets_per_frame> *best_set, Scalar *best_score) {
     *best_score = ::std::numeric_limits<Scalar>::infinity();
     // Iterate by letting each target in the camera frame (that isn't in
     // used_views) choose it's best match that isn't already taken. We then set
     // the appropriate flags in used_views and used_targets and call MatchFrames
     // to let all the other views sort themselves out.
     for (size_t ii = 0; ii < used_views.size(); ++ii) {
       if (used_views[ii]) {
         continue;
       }
       int best_match = -1;
       for (size_t jj = 0; jj < max_targets_per_frame; ++jj) {
         if (best_matches(ii, jj) == -1) {
           // If we run out of potential targets from the camera, then there
           // are more targets in the frame than we think there should be.
           // In this case, we are going to be doubling/tripling/etc. up
           // anyhow. So we just give everyone their top choice:
           // TODO(james): If we ever are dealing with larger numbers of
           // targets per frame, do something to minimize doubling-up.
           best_match = best_matches(ii, 0);
           break;
         }
         best_match = best_matches(ii, jj);
         if (!used_targets[best_match]) {
           break;
         }
       }
       // If we reach here and best_match = -1, that means that no potential
       // targets were generated by the camera, and we should never have gotten
       // here.
       AOS_CHECK(best_match != -1);
       ::aos::SizedArray<bool, max_targets_per_frame> sub_views = used_views;
       sub_views[ii] = true;
       ::std::array<bool, num_targets> sub_targets = used_targets;
       sub_targets[best_match] = true;
       ::std::array<int, max_targets_per_frame> sub_best_set;
       Scalar score;
       MatchFrames(scores, best_matches, sub_views, sub_targets, &sub_best_set,
                   &score);
       score += scores(ii, best_match);
       sub_best_set[ii] = best_match;
       if (score < *best_score) {
         *best_score = score;
         *best_set = sub_best_set;
       }
     }
     // best_score will be infinite if we did not find a result due to there
     // being no targets that weren't set in used_vies; this is the
     // base case of the recursion and so we set best_score to zero:
     if (!::std::isfinite(*best_score)) {
       *best_score = 0.0;
     }
   }

   // The pose that is used by the cameras to determine the location of the robot
   // and thus the expected view of the targets.
   Pose *robot_pose_;

   typename HybridEkf::template ExpectedObservationAllocator<
       ExpectedObservationFunctor>
       h_queue_;
   typename HybridEkf::template ExpectedObservationAllocator<
       ExpectedObservationBuilder>
       make_h_queue_;

   friend class ExpectedObservationBuilder;
 };

 #if !defined(__clang__) && defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif

 }  // namespace y2019::control_loops

 #endif  // Y2019_CONTROL_LOOPS_DRIVETRAIN_LOCALIZATER_H_