Penalize vision solver based on projected size

This makes the distance much more accurate.
Also, use standard deviation in pixels to compute confidence.

Signed-off-by: Milind Upadhyay <milind.upadhyay@gmail.com>
Change-Id: I53f16100065a2bb75b31e78af057326163265d8e
diff --git a/y2022/vision/BUILD b/y2022/vision/BUILD
index 0b96539..a4e44f3 100644
--- a/y2022/vision/BUILD
+++ b/y2022/vision/BUILD
@@ -178,6 +178,7 @@
     target_compatible_with = ["@platforms//os:linux"],
     visibility = ["//y2022:__subpackages__"],
     deps = [
+        ":blob_detector_lib",
         ":calibration_fbs",
         ":target_estimate_fbs",
         "//aos/logging",
diff --git a/y2022/vision/blob_detector.cc b/y2022/vision/blob_detector.cc
index 68832fd..6949dd0 100644
--- a/y2022/vision/blob_detector.cc
+++ b/y2022/vision/blob_detector.cc
@@ -7,6 +7,7 @@
 #include "aos/network/team_number.h"
 #include "aos/time/time.h"
 #include "opencv2/features2d.hpp"
+#include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 #include "y2022/vision/geometry.h"
 
@@ -67,21 +68,34 @@
 
 std::vector<BlobDetector::BlobStats> BlobDetector::ComputeStats(
     const std::vector<std::vector<cv::Point>> &blobs) {
+  cv::Mat img = cv::Mat::zeros(640, 480, CV_8UC3);
+
   std::vector<BlobDetector::BlobStats> blob_stats;
   for (auto blob : blobs) {
-    auto blob_size = cv::boundingRect(blob).size();
+    // Opencv doesn't have height and width ordered correctly.
+    // The rotated size will only be used after blobs have been filtered, so it
+    // is ok to assume that width is the larger side
+    const cv::Size rotated_rect_size_unordered = cv::minAreaRect(blob).size;
+    const cv::Size rotated_rect_size = {
+        std::max(rotated_rect_size_unordered.width,
+                 rotated_rect_size_unordered.height),
+        std::min(rotated_rect_size_unordered.width,
+                 rotated_rect_size_unordered.height)};
+    const cv::Size bounding_box_size = cv::boundingRect(blob).size();
+
     cv::Moments moments = cv::moments(blob);
 
     const auto centroid =
         cv::Point(moments.m10 / moments.m00, moments.m01 / moments.m00);
     const double aspect_ratio =
-        static_cast<double>(blob_size.width) / blob_size.height;
+        static_cast<double>(bounding_box_size.width) / bounding_box_size.height;
     const double area = moments.m00;
     const size_t num_points = blob.size();
 
     blob_stats.emplace_back(
-        BlobStats{centroid, aspect_ratio, area, num_points});
+        BlobStats{centroid, rotated_rect_size, aspect_ratio, area, num_points});
   }
+
   return blob_stats;
 }
 
@@ -118,8 +132,8 @@
   constexpr double kMinBlobAngle = 50.0 * kDegToRad;
   constexpr double kMaxBlobAngle = M_PI - kMinBlobAngle;
   std::vector<std::vector<cv::Point>> blob_circle;
+  std::vector<BlobStats> blob_circle_stats;
   Circle circle;
-  std::vector<cv::Point2d> centroids;
 
   // If we see more than this number of blobs after filtering based on
   // color/size, the circle fit may detect noise so just return no blobs.
@@ -142,11 +156,11 @@
 
       std::vector<std::vector<cv::Point>> current_blobs{
           filtered_blobs[j], filtered_blobs[k], filtered_blobs[l]};
-      std::vector<cv::Point2d> current_centroids{filtered_stats[j].centroid,
-                                                 filtered_stats[k].centroid,
-                                                 filtered_stats[l].centroid};
+      std::vector<BlobStats> current_stats{filtered_stats[j], filtered_stats[k],
+                                           filtered_stats[l]};
       const std::optional<Circle> current_circle =
-          Circle::Fit(current_centroids);
+          Circle::Fit({current_stats[0].centroid, current_stats[1].centroid,
+                       current_stats[2].centroid});
 
       // Make sure that a circle could be created from the points
       if (!current_circle) {
@@ -155,11 +169,11 @@
 
       // Only try to fit points to this circle if all of these are between
       // certain angles.
-      if (current_circle->InAngleRange(current_centroids[0], kMinBlobAngle,
+      if (current_circle->InAngleRange(current_stats[0].centroid, kMinBlobAngle,
                                        kMaxBlobAngle) &&
-          current_circle->InAngleRange(current_centroids[1], kMinBlobAngle,
+          current_circle->InAngleRange(current_stats[1].centroid, kMinBlobAngle,
                                        kMaxBlobAngle) &&
-          current_circle->InAngleRange(current_centroids[2], kMinBlobAngle,
+          current_circle->InAngleRange(current_stats[2].centroid, kMinBlobAngle,
                                        kMaxBlobAngle)) {
         for (size_t m = 0; m < filtered_blobs.size(); m++) {
           // Add this blob to the list if it is close to the circle, is on the
@@ -170,44 +184,31 @@
               (current_circle->DistanceTo(filtered_stats[m].centroid) <
                kCircleDistanceThreshold)) {
             current_blobs.emplace_back(filtered_blobs[m]);
-            current_centroids.emplace_back(filtered_stats[m].centroid);
+            current_stats.emplace_back(filtered_stats[m]);
           }
         }
 
         if (current_blobs.size() > blob_circle.size()) {
           blob_circle = current_blobs;
+          blob_circle_stats = current_stats;
           circle = *current_circle;
-          centroids = current_centroids;
         }
       }
     }
   }
 
   cv::Point avg_centroid(-1, -1);
-  if (centroids.size() > 0) {
-    for (auto centroid : centroids) {
-      avg_centroid.x += centroid.x;
-      avg_centroid.y += centroid.y;
+  if (blob_circle.size() > 0) {
+    for (const auto &stats : blob_circle_stats) {
+      avg_centroid.x += stats.centroid.x;
+      avg_centroid.y += stats.centroid.y;
     }
-    avg_centroid.x /= centroids.size();
-    avg_centroid.y /= centroids.size();
-
-    for (auto centroid : centroids) {
-      blob_result->filtered_centroids.emplace_back(
-          static_cast<int>(centroid.x), static_cast<int>(centroid.y));
-    }
-
-    // Sort the filtered centroids to make them go from left to right
-    std::sort(blob_result->filtered_centroids.begin(),
-              blob_result->filtered_centroids.end(),
-              [&circle](cv::Point p, cv::Point q) {
-                // If the angle is greater, it is more left and should be
-                // considered "less" for sorting
-                return circle.AngleOf(p) > circle.AngleOf(q);
-              });
+    avg_centroid.x /= blob_circle_stats.size();
+    avg_centroid.y /= blob_circle_stats.size();
   }
 
   blob_result->filtered_blobs = blob_circle;
+  blob_result->filtered_stats = blob_circle_stats;
   blob_result->centroid = avg_centroid;
 }
 
@@ -229,8 +230,8 @@
     cv::circle(view_image, stats.centroid, kCircleRadius,
                cv::Scalar(0, 215, 255), cv::FILLED);
   }
-  for (auto centroid : blob_result.filtered_centroids) {
-    cv::circle(view_image, centroid, kCircleRadius, cv::Scalar(0, 255, 0),
+  for (auto stats : blob_result.filtered_stats) {
+    cv::circle(view_image, stats.centroid, kCircleRadius, cv::Scalar(0, 255, 0),
                cv::FILLED);
   }
 
@@ -247,7 +248,7 @@
   blob_result->blob_stats = ComputeStats(blob_result->unfiltered_blobs);
   FilterBlobs(blob_result);
   auto end = aos::monotonic_clock::now();
-  VLOG(2) << "Blob detection elapsed time: "
+  VLOG(1) << "Blob detection elapsed time: "
           << std::chrono::duration<double, std::milli>(end - start).count()
           << " ms";
 }
diff --git a/y2022/vision/blob_detector.h b/y2022/vision/blob_detector.h
index d0a2b85..a60316a 100644
--- a/y2022/vision/blob_detector.h
+++ b/y2022/vision/blob_detector.h
@@ -11,6 +11,9 @@
  public:
   struct BlobStats {
     cv::Point centroid;
+    // Size of the rotated rect fitting around the blob
+    cv::Size size;
+    // Aspect ratio of the non-rotated bounding box
     double aspect_ratio;
     double area;
     size_t num_points;
@@ -20,8 +23,7 @@
     cv::Mat binarized_image;
     std::vector<std::vector<cv::Point>> filtered_blobs, unfiltered_blobs;
     std::vector<BlobStats> blob_stats;
-    // In sorted order from left to right on the circle
-    std::vector<cv::Point> filtered_centroids;
+    std::vector<BlobStats> filtered_stats;
     cv::Point centroid;
   };
 
diff --git a/y2022/vision/camera_reader.cc b/y2022/vision/camera_reader.cc
index 637ad0a..2be2ef1 100644
--- a/y2022/vision/camera_reader.cc
+++ b/y2022/vision/camera_reader.cc
@@ -98,8 +98,8 @@
         CvBlobsToFbs(blob_result.unfiltered_blobs, &builder);
     const auto blob_stats_offset =
         BlobStatsToFbs(blob_result.blob_stats, &builder);
-    const auto filtered_centroids_offset =
-        CvPointsToFbs(blob_result.filtered_centroids, &builder);
+    const auto filtered_stats_offset =
+        BlobStatsToFbs(blob_result.filtered_stats, &builder);
     const Point centroid_fbs =
         Point{blob_result.centroid.x, blob_result.centroid.y};
 
@@ -107,12 +107,12 @@
     blob_result_builder.add_filtered_blobs(filtered_blobs_offset);
     blob_result_builder.add_unfiltered_blobs(unfiltered_blobs_offset);
     blob_result_builder.add_blob_stats(blob_stats_offset);
-    blob_result_builder.add_filtered_centroids(filtered_centroids_offset);
+    blob_result_builder.add_filtered_stats(filtered_stats_offset);
     blob_result_builder.add_centroid(&centroid_fbs);
     blob_result_offset = blob_result_builder.Finish();
   }
 
-  target_estimator_.Solve(blob_result.filtered_centroids, std::nullopt);
+  target_estimator_.Solve(blob_result.filtered_stats, std::nullopt);
 
   const auto camera_calibration_offset =
       aos::RecursiveCopyFlatBuffer(camera_calibration_, builder.fbb());
diff --git a/y2022/vision/target_estimate.fbs b/y2022/vision/target_estimate.fbs
index 42a012d..3ba10f03 100644
--- a/y2022/vision/target_estimate.fbs
+++ b/y2022/vision/target_estimate.fbs
@@ -7,6 +7,11 @@
   y:int (id: 1);
 }
 
+struct Size {
+  width:int (id: 0);
+  height:int (id: 1);
+}
+
 table Blob  {
   points:[Point] (id: 0);
 }
@@ -14,6 +19,7 @@
 // Statistics for each blob used for filtering
 table BlobStatsFbs {
   centroid:Point (id: 0);
+  size:Size (id: 4);
   aspect_ratio:double (id: 1);
   area:double (id: 2);
   num_points:uint64 (id: 3);
@@ -27,8 +33,8 @@
   unfiltered_blobs:[Blob] (id: 1);
   // Stats on the blobs
   blob_stats:[BlobStatsFbs] (id: 2);
-  // Centroids of filtered blobs
-  filtered_centroids:[Point] (id: 4);
+  // Stats of filtered blobs
+  filtered_stats:[BlobStatsFbs] (id: 4);
   // Average centroid of the filtered blobs
   centroid:Point (id: 3);
 }
@@ -52,7 +58,8 @@
   angle_to_camera:double (id: 3);
   // Rotation of the camera in the hub's reference frame
   rotation_camera_hub:Rotation (id: 4);
-  // Confidence in the estimate from 0 to 1, based on the final solver cost.
+  // Confidence in the estimate from 0 to 1,
+  // based on the final deviation between projected points and actual blobs.
   // Good estimates currently have confidences of around 0.9 or greater.
   confidence:double (id: 7);
 
diff --git a/y2022/vision/target_estimator.cc b/y2022/vision/target_estimator.cc
index 6773631..c78a1c6 100644
--- a/y2022/vision/target_estimator.cc
+++ b/y2022/vision/target_estimator.cc
@@ -4,13 +4,14 @@
 #include "aos/time/time.h"
 #include "ceres/ceres.h"
 #include "frc971/control_loops/quaternion_utils.h"
+#include "geometry.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/core/eigen.hpp"
 #include "opencv2/features2d.hpp"
 #include "opencv2/highgui/highgui.hpp"
 #include "opencv2/imgproc.hpp"
 
-DEFINE_bool(freeze_roll, true, "If true, don't solve for roll");
+DEFINE_bool(freeze_roll, false, "If true, don't solve for roll");
 DEFINE_bool(freeze_pitch, false, "If true, don't solve for pitch");
 DEFINE_bool(freeze_yaw, false, "If true, don't solve for yaw");
 DEFINE_bool(freeze_camera_height, true,
@@ -25,9 +26,19 @@
 
 namespace y2022::vision {
 
-std::vector<cv::Point3d> TargetEstimator::ComputeTapePoints() {
+namespace {
+
+constexpr size_t kNumPiecesOfTape = 16;
+// Width and height of a piece of reflective tape
+constexpr double kTapePieceWidth = 0.13;
+constexpr double kTapePieceHeight = 0.05;
+// Height of the center of the tape (m)
+constexpr double kTapeCenterHeight = 2.58 + (kTapePieceHeight / 2);
+// Horizontal distance from tape to center of hub (m)
+constexpr double kUpperHubRadius = 1.22 / 2;
+
+std::vector<cv::Point3d> ComputeTapePoints() {
   std::vector<cv::Point3d> tape_points;
-  tape_points.reserve(kNumPiecesOfTape);
 
   constexpr size_t kNumVisiblePiecesOfTape = 5;
   for (size_t i = 0; i < kNumVisiblePiecesOfTape; i++) {
@@ -38,17 +49,54 @@
     // The polar angle is a multiple of the angle between tape centers
     double theta = theta_index * ((2.0 * M_PI) / kNumPiecesOfTape);
     tape_points.emplace_back(kUpperHubRadius * std::cos(theta),
-                             kUpperHubRadius * std::sin(theta), kTapeHeight);
+                             kUpperHubRadius * std::sin(theta),
+                             kTapeCenterHeight);
   }
 
   return tape_points;
 }
 
+std::array<cv::Point3d, 4> ComputeMiddleTapePiecePoints() {
+  std::array<cv::Point3d, 4> tape_piece_points;
+
+  // Angle that each piece of tape occupies on the hub
+  constexpr double kTapePieceAngle =
+      (kTapePieceWidth / (2.0 * M_PI * kUpperHubRadius)) * (2.0 * M_PI);
+
+  constexpr double kThetaTapeLeft = -kTapePieceAngle / 2.0;
+  constexpr double kThetaTapeRight = kTapePieceAngle / 2.0;
+
+  constexpr double kTapeTopHeight =
+      kTapeCenterHeight + (kTapePieceHeight / 2.0);
+  constexpr double kTapeBottomHeight =
+      kTapeCenterHeight - (kTapePieceHeight / 2.0);
+
+  tape_piece_points[0] = {kUpperHubRadius * std::cos(kThetaTapeLeft),
+                          kUpperHubRadius * std::sin(kThetaTapeLeft),
+                          kTapeTopHeight};
+  tape_piece_points[1] = {kUpperHubRadius * std::cos(kThetaTapeRight),
+                          kUpperHubRadius * std::sin(kThetaTapeRight),
+                          kTapeTopHeight};
+
+  tape_piece_points[2] = {kUpperHubRadius * std::cos(kThetaTapeRight),
+                          kUpperHubRadius * std::sin(kThetaTapeRight),
+                          kTapeBottomHeight};
+  tape_piece_points[3] = {kUpperHubRadius * std::cos(kThetaTapeLeft),
+                          kUpperHubRadius * std::sin(kThetaTapeLeft),
+                          kTapeBottomHeight};
+
+  return tape_piece_points;
+}
+
+}  // namespace
+
 const std::vector<cv::Point3d> TargetEstimator::kTapePoints =
     ComputeTapePoints();
+const std::array<cv::Point3d, 4> TargetEstimator::kMiddleTapePiecePoints =
+    ComputeMiddleTapePiecePoints();
 
 TargetEstimator::TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics)
-    : centroids_(),
+    : blob_stats_(),
       image_(std::nullopt),
       roll_(0.0),
       pitch_(0.0),
@@ -65,8 +113,7 @@
 void SetBoundsOrFreeze(double *param, bool freeze, double min, double max,
                        ceres::Problem *problem) {
   if (freeze) {
-    problem->SetParameterization(
-        param, new ceres::SubsetParameterization(1, std::vector<int>{0}));
+    problem->SetParameterBlockConstant(param);
   } else {
     problem->SetParameterLowerBound(param, 0, min);
     problem->SetParameterUpperBound(param, 0, max);
@@ -74,20 +121,56 @@
 }
 }  // namespace
 
-void TargetEstimator::Solve(const std::vector<cv::Point> &centroids,
-                            std::optional<cv::Mat> image) {
+void TargetEstimator::Solve(
+    const std::vector<BlobDetector::BlobStats> &blob_stats,
+    std::optional<cv::Mat> image) {
   auto start = aos::monotonic_clock::now();
 
-  centroids_ = centroids;
+  blob_stats_ = blob_stats;
   image_ = image;
 
+  // Do nothing if no blobs were detected
+  if (blob_stats_.size() == 0) {
+    confidence_ = 0.0;
+    return;
+  }
+
+  CHECK_GE(blob_stats_.size(), 3) << "Expected at least 3 blobs";
+
+  const auto circle =
+      Circle::Fit({blob_stats_[0].centroid, blob_stats_[1].centroid,
+                   blob_stats_[2].centroid});
+  CHECK(circle.has_value());
+
+  // Find the middle blob, which is the one with the angle closest to the
+  // average
+  double theta_avg = 0.0;
+  for (const auto &stats : blob_stats_) {
+    theta_avg += circle->AngleOf(stats.centroid);
+  }
+  theta_avg /= blob_stats_.size();
+
+  double min_diff = std::numeric_limits<double>::infinity();
+  for (auto it = blob_stats_.begin(); it < blob_stats_.end(); it++) {
+    const double diff = std::abs(circle->AngleOf(it->centroid) - theta_avg);
+    if (diff < min_diff) {
+      min_diff = diff;
+      middle_blob_index_ = it - blob_stats_.begin();
+    }
+  }
+
   ceres::Problem problem;
 
+  // x and y differences between projected centroids and blob centroids, as well
+  // as width and height differences between middle projected piece and the
+  // detected blob
+  const size_t num_residuals = (blob_stats_.size() * 2) + 2;
+
   // Set up the only cost function (also known as residual). This uses
   // auto-differentiation to obtain the derivative (jacobian).
   ceres::CostFunction *cost_function =
       new ceres::AutoDiffCostFunction<TargetEstimator, ceres::DYNAMIC, 1, 1, 1,
-                                      1, 1, 1>(this, centroids_.size() * 2,
+                                      1, 1, 1>(this, num_residuals,
                                                ceres::DO_NOT_TAKE_OWNERSHIP);
 
   // TODO(milind): add loss function when we get more noisy data
@@ -136,28 +219,33 @@
   ceres::Solve(options, &problem, &summary);
 
   auto end = aos::monotonic_clock::now();
-  LOG(INFO) << "Target estimation elapsed time: "
-            << std::chrono::duration<double, std::milli>(end - start).count()
-            << " ms";
+  VLOG(1) << "Target estimation elapsed time: "
+          << std::chrono::duration<double, std::milli>(end - start).count()
+          << " ms";
 
-  // Use a sigmoid to convert the final cost into a confidence for the
-  // localizer. Fit a sigmoid to the points of (0, 1) and two other reasonable
-  // residual-confidence combinations using
-  // https://www.desmos.com/calculator/jj7p8zk0w2
-  constexpr double kSigmoidCapacity = 1.206;
-  // Stretch the sigmoid out correctly.
-  // Currently, good estimates have final costs of around 1 to 2 pixels.
-  constexpr double kSigmoidScalar = 0.2061;
-  constexpr double kSigmoidGrowthRate = -0.1342;
-  if (centroids_.size() > 0) {
-    confidence_ = kSigmoidCapacity /
-                  (1.0 + kSigmoidScalar * std::exp(-kSigmoidGrowthRate *
-                                                   summary.final_cost));
-  } else {
-    // If we detected no blobs, the confidence should be zero and not depending
-    // on the final cost, which would be 0.
-    confidence_ = 0.0;
+  // For computing the confidence, find the standard deviation in pixels
+  std::vector<double> residual(num_residuals);
+  (*this)(&roll_, &pitch_, &yaw_, &distance_, &angle_to_camera_,
+          &camera_height_, residual.data());
+  double std_dev = 0.0;
+  for (auto it = residual.begin(); it < residual.end() - 2; it++) {
+    std_dev += std::pow(*it, 2);
   }
+  std_dev /= num_residuals - 2;
+  std_dev = std::sqrt(std_dev);
+
+  // Use a sigmoid to convert the deviation into a confidence for the
+  // localizer. Fit a sigmoid to the points of (0, 1) and two other
+  // reasonable deviation-confidence combinations using
+  // https://www.desmos.com/calculator/try0pgx1qw
+  constexpr double kSigmoidCapacity = 1.045;
+  // Stretch the sigmoid out correctly.
+  // Currently, good estimates have deviations of around 2 pixels.
+  constexpr double kSigmoidScalar = 0.04452;
+  constexpr double kSigmoidGrowthRate = -0.4021;
+  confidence_ =
+      kSigmoidCapacity /
+      (1.0 + kSigmoidScalar * std::exp(-kSigmoidGrowthRate * std_dev));
 
   if (FLAGS_solver_output) {
     LOG(INFO) << summary.FullReport();
@@ -169,6 +257,7 @@
     LOG(INFO) << "angle to camera (polar): " << angle_to_camera_;
     LOG(INFO) << "distance (polar): " << distance_;
     LOG(INFO) << "camera height: " << camera_height_;
+    LOG(INFO) << "standard deviation (px): " << std_dev;
     LOG(INFO) << "confidence: " << confidence_;
   }
 }
@@ -206,32 +295,26 @@
                         *distance * ceres::sin(*theta), *camera_height);
 
   Affine3s H_camera_hub = Eigen::Translation<S, 3>(T_camera_hub) * R_camera_hub;
+  Affine3s H_hub_camera = H_camera_hub.inverse();
 
   std::vector<cv::Point_<S>> tape_points_proj;
   for (cv::Point3d tape_point_hub : kTapePoints) {
-    Vector3s tape_point_hub_eigen(S(tape_point_hub.x), S(tape_point_hub.y),
-                                  S(tape_point_hub.z));
-
-    // With X, Y, Z being world axes and x, y, z being camera axes,
-    // x = Y, y = Z, z = -X
-    static const Eigen::Matrix3d kCameraAxisConversion =
-        (Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0)
-            .finished();
-    // Project the 3d tape point onto the image using the transformation and
-    // intrinsics
-    Vector3s tape_point_proj =
-        intrinsics_ * (kCameraAxisConversion *
-                       (H_camera_hub.inverse() * tape_point_hub_eigen));
-
-    // Normalize the projected point
-    tape_points_proj.emplace_back(tape_point_proj.x() / tape_point_proj.z(),
-                                  tape_point_proj.y() / tape_point_proj.z());
-    VLOG(1) << "Projected tape point: "
+    tape_points_proj.emplace_back(ProjectToImage(tape_point_hub, H_hub_camera));
+    VLOG(2) << "Projected tape point: "
             << ScalarPointToDouble(
                    tape_points_proj[tape_points_proj.size() - 1]);
   }
 
-  for (size_t i = 0; i < centroids_.size(); i++) {
+  // Find the rectangle bounding the projected piece of tape
+  std::array<cv::Point_<S>, 4> middle_tape_piece_points_proj;
+  for (auto tape_piece_it = kMiddleTapePiecePoints.begin();
+       tape_piece_it < kMiddleTapePiecePoints.end(); tape_piece_it++) {
+    middle_tape_piece_points_proj[tape_piece_it -
+                                  kMiddleTapePiecePoints.begin()] =
+        ProjectToImage(*tape_piece_it, H_hub_camera);
+  }
+
+  for (size_t i = 0; i < blob_stats_.size(); i++) {
     const auto distance = DistanceFromTape(i, tape_points_proj);
     // Set the residual to the (x, y) distance of the centroid from the
     // nearest projected piece of tape
@@ -239,6 +322,31 @@
     residual[(i * 2) + 1] = distance.y;
   }
 
+  // Penalize based on the difference between the size of the projected piece of
+  // tape and that of the detected blobs. Use the squared size to avoid taking a
+  // norm, which ceres can't handle well
+  const S middle_tape_piece_width_squared =
+      ceres::pow(middle_tape_piece_points_proj[2].x -
+                     middle_tape_piece_points_proj[3].x,
+                 2) +
+      ceres::pow(middle_tape_piece_points_proj[2].y -
+                     middle_tape_piece_points_proj[3].y,
+                 2);
+  const S middle_tape_piece_height_squared =
+      ceres::pow(middle_tape_piece_points_proj[1].x -
+                     middle_tape_piece_points_proj[2].x,
+                 2) +
+      ceres::pow(middle_tape_piece_points_proj[1].y -
+                     middle_tape_piece_points_proj[2].y,
+                 2);
+
+  residual[blob_stats_.size() * 2] =
+      middle_tape_piece_width_squared -
+      std::pow(blob_stats_[middle_blob_index_].size.width, 2);
+  residual[(blob_stats_.size() * 2) + 1] =
+      middle_tape_piece_height_squared -
+      std::pow(blob_stats_[middle_blob_index_].size.height, 2);
+
   if (image_.has_value()) {
     // Draw the current stage of the solving
     cv::Mat image = image_->clone();
@@ -258,6 +366,30 @@
   return true;
 }
 
+template <typename S>
+cv::Point_<S> TargetEstimator::ProjectToImage(
+    cv::Point3d tape_point_hub,
+    Eigen::Transform<S, 3, Eigen::Affine> &H_hub_camera) const {
+  using Vector3s = Eigen::Matrix<S, 3, 1>;
+
+  // With X, Y, Z being world axes and x, y, z being camera axes,
+  // x = Y, y = Z, z = -X
+  static const Eigen::Matrix3d kCameraAxisConversion =
+      (Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0)
+          .finished();
+  const Vector3s tape_point_hub_eigen =
+      Vector3s(S(tape_point_hub.x), S(tape_point_hub.y), S(tape_point_hub.z));
+  // Project the 3d tape point onto the image using the transformation and
+  // intrinsics
+  const Vector3s tape_point_proj =
+      intrinsics_ *
+      (kCameraAxisConversion * (H_hub_camera * tape_point_hub_eigen));
+
+  // Normalize the projected point
+  return {tape_point_proj.x() / tape_point_proj.z(),
+          tape_point_proj.y() / tape_point_proj.z()};
+}
+
 namespace {
 template <typename S>
 cv::Point_<S> Distance(cv::Point p, cv::Point_<S> q) {
@@ -273,35 +405,20 @@
 
 template <typename S>
 cv::Point_<S> TargetEstimator::DistanceFromTape(
-    size_t centroid_index,
-    const std::vector<cv::Point_<S>> &tape_points) const {
-  // Figure out the middle index in the tape points
-  size_t middle_index = centroids_.size() / 2;
-  if (centroids_.size() % 2 == 0) {
-    // There are two possible middles in this case. Figure out which one fits
-    // the current better
-    const auto tape_middle = tape_points[tape_points.size() / 2];
-    const auto middle_distance_1 =
-        Distance(centroids_[(centroids_.size() / 2) - 1], tape_middle);
-    const auto middle_distance_2 =
-        Distance(centroids_[centroids_.size() / 2], tape_middle);
-    if (Less(middle_distance_1, middle_distance_2)) {
-      middle_index--;
-    }
-  }
-
+    size_t blob_index, const std::vector<cv::Point_<S>> &tape_points) const {
   auto distance = cv::Point_<S>(std::numeric_limits<S>::infinity(),
                                 std::numeric_limits<S>::infinity());
-  if (centroid_index == middle_index) {
-    // Fix the middle centroid so the solver can't go too far off
-    distance =
-        Distance(centroids_[middle_index], tape_points[tape_points.size() / 2]);
+  if (blob_index == middle_blob_index_) {
+    // Fix the middle blob so the solver can't go too far off
+    distance = Distance(blob_stats_[middle_blob_index_].centroid,
+                        tape_points[tape_points.size() / 2]);
   } else {
-    // Give the other centroids some freedom in case some are split into pieces
-    for (auto tape_point : tape_points) {
+    // Give the other blob_stats some freedom in case some are split into pieces
+    for (auto it = tape_points.begin(); it < tape_points.end(); it++) {
       const auto current_distance =
-          Distance(centroids_[centroid_index], tape_point);
-      if (Less(current_distance, distance)) {
+          Distance(blob_stats_[blob_index].centroid, *it);
+      if ((it != tape_points.begin() + (tape_points.size() / 2)) &&
+          Less(current_distance, distance)) {
         distance = current_distance;
       }
     }
diff --git a/y2022/vision/target_estimator.h b/y2022/vision/target_estimator.h
index 5d338c4..b509a2e 100644
--- a/y2022/vision/target_estimator.h
+++ b/y2022/vision/target_estimator.h
@@ -7,22 +7,21 @@
 #include "Eigen/Geometry"
 #include "opencv2/core/types.hpp"
 #include "opencv2/imgproc.hpp"
+#include "y2022/vision/blob_detector.h"
 #include "y2022/vision/target_estimate_generated.h"
 
 namespace y2022::vision {
 
-// Class to estimate the polar coordinates and rotation from the camera to the
+// Class to estimate the distance and rotation of the camera from the
 // target.
 class TargetEstimator {
  public:
   TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics);
 
   // Runs the solver to estimate the target
-  // centroids must be in sorted order from left to right on the circle.
-  // TODO(milind): seems safer to sort them here.
   // If image != std::nullopt, the solver's progress will be displayed
   // graphically.
-  void Solve(const std::vector<cv::Point> &centroids,
+  void Solve(const std::vector<BlobDetector::BlobStats> &blob_stats,
              std::optional<cv::Mat> image);
 
   // Cost function for the solver.
@@ -54,22 +53,24 @@
   void DrawEstimate(cv::Mat view_image) const;
 
  private:
-  // Height of the center of the tape (m)
-  static constexpr double kTapeHeight = 2.58 + (0.05 / 2);
-  // Horizontal distance from tape to center of hub (m)
-  static constexpr double kUpperHubRadius = 1.22 / 2;
-  static constexpr size_t kNumPiecesOfTape = 16;
-
   // 3d points of the visible pieces of tape in the hub's frame
   static const std::vector<cv::Point3d> kTapePoints;
-  static std::vector<cv::Point3d> ComputeTapePoints();
+  // 3d outer points of the middle piece of tape in the hub's frame,
+  // clockwise around the rectangle
+  static const std::array<cv::Point3d, 4> kMiddleTapePiecePoints;
+
+  template <typename S>
+  cv::Point_<S> ProjectToImage(
+      cv::Point3d tape_point_hub,
+      Eigen::Transform<S, 3, Eigen::Affine> &H_hub_camera) const;
 
   template <typename S>
   cv::Point_<S> DistanceFromTape(
       size_t centroid_index,
       const std::vector<cv::Point_<S>> &tape_points) const;
 
-  std::vector<cv::Point> centroids_;
+  std::vector<BlobDetector::BlobStats> blob_stats_;
+  size_t middle_blob_index_;
   std::optional<cv::Mat> image_;
 
   Eigen::Matrix3d intrinsics_;
diff --git a/y2022/vision/viewer.cc b/y2022/vision/viewer.cc
index e20f433..feff592 100644
--- a/y2022/vision/viewer.cc
+++ b/y2022/vision/viewer.cc
@@ -58,8 +58,9 @@
   std::vector<BlobDetector::BlobStats> blob_stats;
   for (const auto stats_fbs : blob_stats_fbs) {
     cv::Point centroid{stats_fbs->centroid()->x(), stats_fbs->centroid()->y()};
+    cv::Size size{stats_fbs->size()->width(), stats_fbs->size()->height()};
     blob_stats.emplace_back(BlobDetector::BlobStats{
-        centroid, stats_fbs->aspect_ratio(), stats_fbs->area(),
+        centroid, size, stats_fbs->aspect_ratio(), stats_fbs->area(),
         static_cast<size_t>(stats_fbs->num_points())});
   }
   return blob_stats;
@@ -82,7 +83,7 @@
         FbsToCvBlobs(*target_est->blob_result()->filtered_blobs()),
         FbsToCvBlobs(*target_est->blob_result()->unfiltered_blobs()),
         FbsToBlobStats(*target_est->blob_result()->blob_stats()),
-        FbsToCvPoints(*target_est->blob_result()->filtered_centroids()),
+        FbsToBlobStats(*target_est->blob_result()->filtered_stats()),
         cv::Point{target_est->blob_result()->centroid()->x(),
                   target_est->blob_result()->centroid()->y()}};
     // Only keep last 10 matches
@@ -204,7 +205,7 @@
               << ")";
 
     if (blob_result.filtered_blobs.size() > 0) {
-      estimator.Solve(blob_result.filtered_centroids,
+      estimator.Solve(blob_result.filtered_stats,
                       FLAGS_display_estimation ? std::make_optional(ret_image)
                                                : std::nullopt);
       estimator.DrawEstimate(ret_image);