Use a solver for hub estimation

Uses ceres to estimate the rotation and distance
from the hub to the camera.
Ready for reviewing

Signed-off-by: Milind Upadhyay <milind.upadhyay@gmail.com>
Change-Id: Idd4fb3334146a0587c713887626cb0abe43cdd4e
diff --git a/y2022/vision/BUILD b/y2022/vision/BUILD
index 4f1c7ff..599b4ed 100644
--- a/y2022/vision/BUILD
+++ b/y2022/vision/BUILD
@@ -153,7 +153,13 @@
     deps = [
         ":calibration_fbs",
         ":target_estimate_fbs",
+        "//aos/logging",
+        "//aos/time",
+        "//frc971/control_loops:quaternion_utils",
         "//third_party:opencv",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_ceres_solver//:ceres",
+        "@org_tuxfamily_eigen//:eigen",
     ],
 )
 
diff --git a/y2022/vision/blob_detector.cc b/y2022/vision/blob_detector.cc
index 96d7ffd..2c68a27 100644
--- a/y2022/vision/blob_detector.cc
+++ b/y2022/vision/blob_detector.cc
@@ -18,7 +18,7 @@
             "If true, change thresholds to handle outdoor illumination");
 DEFINE_uint64(outdoors_red_delta, 100,
               "Difference between green pixels vs. red, when outdoors");
-DEFINE_uint64(outdoors_blue_delta, 15,
+DEFINE_uint64(outdoors_blue_delta, 1,
               "Difference between green pixels vs. blue, when outdoors");
 
 namespace y2022 {
@@ -30,7 +30,7 @@
 
   if (FLAGS_use_outdoors) {
     red_delta = FLAGS_outdoors_red_delta;
-    red_delta = FLAGS_outdoors_blue_delta;
+    blue_delta = FLAGS_outdoors_blue_delta;
   }
 
   cv::Mat binarized_image(cv::Size(bgr_image.cols, bgr_image.rows), CV_8UC1);
@@ -65,14 +65,11 @@
 }
 
 std::vector<BlobDetector::BlobStats> BlobDetector::ComputeStats(
-    std::vector<std::vector<cv::Point>> blobs) {
+    const std::vector<std::vector<cv::Point>> &blobs) {
   std::vector<BlobDetector::BlobStats> blob_stats;
   for (auto blob : blobs) {
-    // Make the blob convex before finding bounding box
-    std::vector<cv::Point> convex_blob;
-    cv::convexHull(blob, convex_blob);
-    auto blob_size = cv::boundingRect(convex_blob).size();
-    cv::Moments moments = cv::moments(convex_blob);
+    auto blob_size = cv::boundingRect(blob).size();
+    cv::Moments moments = cv::moments(blob);
 
     const auto centroid =
         cv::Point(moments.m10 / moments.m00, moments.m01 / moments.m00);
@@ -163,11 +160,16 @@
     return std::abs(cv::norm(p_prime) - radius);
   }
 
-  bool InAngleRange(cv::Point2d p, double theta_min, double theta_max) const {
+  double AngleOf(cv::Point2d p) const {
     auto p_prime = TranslateToOrigin(p);
     // Flip the y because y values go downwards.
     p_prime.y *= -1;
-    const double theta = std::atan2(p_prime.y, p_prime.x);
+    return std::atan2(p_prime.y, p_prime.x);
+  }
+
+  // TODO(milind): handle wrapping around 2pi
+  bool InAngleRange(cv::Point2d p, double theta_min, double theta_max) const {
+    const double theta = AngleOf(p);
     return (theta >= theta_min && theta <= theta_max);
   }
 
@@ -181,25 +183,14 @@
 
 }  // namespace
 
-std::pair<std::vector<std::vector<cv::Point>>, cv::Point>
-BlobDetector::FilterBlobs(std::vector<std::vector<cv::Point>> blobs,
-                          std::vector<BlobDetector::BlobStats> blob_stats) {
+void BlobDetector::FilterBlobs(BlobResult *blob_result) {
   std::vector<std::vector<cv::Point>> filtered_blobs;
   std::vector<BlobStats> filtered_stats;
 
-  auto blob_it = blobs.begin();
-  auto stats_it = blob_stats.begin();
-  while (blob_it < blobs.end() && stats_it < blob_stats.end()) {
-    // To estimate the maximum y, we can figure out the y value of the blobs
-    // when the camera is the farthest from the target, at the field corner.
-    // We can solve for the pitch of the blob:
-    // blob_pitch = atan((height_tape - height_camera) / depth) + camera_pitch
-    // The triangle with the height of the tape above the camera and the
-    // camera depth is similar to the one with the focal length in y pixels
-    // and the y coordinate offset from the center of the image. Therefore
-    // y_offset = focal_length_y * tan(blob_pitch), and y = -(y_offset -
-    // offset_y)
-    constexpr int kMaxY = 400;
+  auto blob_it = blob_result->unfiltered_blobs.begin();
+  auto stats_it = blob_result->blob_stats.begin();
+  while (blob_it < blob_result->unfiltered_blobs.end() &&
+         stats_it < blob_result->blob_stats.end()) {
     constexpr double kTapeAspectRatio = 5.0 / 2.0;
     constexpr double kAspectRatioThreshold = 1.6;
     constexpr double kMinArea = 10;
@@ -207,8 +198,7 @@
 
     // Remove all blobs that are at the bottom of the image, have a different
     // aspect ratio than the tape, or have too little area or points.
-    if ((stats_it->centroid.y <= kMaxY) &&
-        (std::abs(kTapeAspectRatio - stats_it->aspect_ratio) <
+    if ((std::abs(1.0 - kTapeAspectRatio / stats_it->aspect_ratio) <
          kAspectRatioThreshold) &&
         (stats_it->area >= kMinArea) &&
         (stats_it->num_points >= kMinNumPoints)) {
@@ -220,11 +210,13 @@
   }
 
   // Threshold for mean distance from a blob centroid to a circle.
-  constexpr double kCircleDistanceThreshold = 5.0;
+  constexpr double kCircleDistanceThreshold = 10.0;
   // We should only expect to see blobs between these angles on a circle.
-  constexpr double kMinBlobAngle = M_PI / 3;
+  constexpr double kDegToRad = M_PI / 180.0;
+  constexpr double kMinBlobAngle = 50.0 * kDegToRad;
   constexpr double kMaxBlobAngle = M_PI - kMinBlobAngle;
   std::vector<std::vector<cv::Point>> blob_circle;
+  Circle circle;
   std::vector<cv::Point2d> centroids;
 
   // If we see more than this number of blobs after filtering based on
@@ -251,28 +243,29 @@
       std::vector<cv::Point2d> current_centroids{filtered_stats[j].centroid,
                                                  filtered_stats[k].centroid,
                                                  filtered_stats[l].centroid};
-      const std::optional<Circle> circle = Circle::Fit(current_centroids);
+      const std::optional<Circle> current_circle =
+          Circle::Fit(current_centroids);
 
       // Make sure that a circle could be created from the points
-      if (!circle) {
+      if (!current_circle) {
         continue;
       }
 
       // Only try to fit points to this circle if all of these are between
       // certain angles.
-      if (circle->InAngleRange(current_centroids[0], kMinBlobAngle,
-                               kMaxBlobAngle) &&
-          circle->InAngleRange(current_centroids[1], kMinBlobAngle,
-                               kMaxBlobAngle) &&
-          circle->InAngleRange(current_centroids[2], kMinBlobAngle,
-                               kMaxBlobAngle)) {
+      if (current_circle->InAngleRange(current_centroids[0], kMinBlobAngle,
+                                       kMaxBlobAngle) &&
+          current_circle->InAngleRange(current_centroids[1], kMinBlobAngle,
+                                       kMaxBlobAngle) &&
+          current_circle->InAngleRange(current_centroids[2], kMinBlobAngle,
+                                       kMaxBlobAngle)) {
         for (size_t m = 0; m < filtered_blobs.size(); m++) {
           // Add this blob to the list if it is close to the circle, is on the
           // top half,  and isn't one of the other blobs
-          if ((m != i) && (m != j) && (m != k) &&
-              circle->InAngleRange(filtered_stats[m].centroid, kMinBlobAngle,
-                                   kMaxBlobAngle) &&
-              (circle->DistanceTo(filtered_stats[m].centroid) <
+          if ((m != j) && (m != k) && (m != l) &&
+              current_circle->InAngleRange(filtered_stats[m].centroid,
+                                           kMinBlobAngle, kMaxBlobAngle) &&
+              (current_circle->DistanceTo(filtered_stats[m].centroid) <
                kCircleDistanceThreshold)) {
             current_blobs.emplace_back(filtered_blobs[m]);
             current_centroids.emplace_back(filtered_stats[m].centroid);
@@ -281,6 +274,7 @@
 
         if (current_blobs.size() > blob_circle.size()) {
           blob_circle = current_blobs;
+          circle = *current_circle;
           centroids = current_centroids;
         }
       }
@@ -295,34 +289,52 @@
     }
     avg_centroid.x /= centroids.size();
     avg_centroid.y /= centroids.size();
+
+    for (auto centroid : centroids) {
+      blob_result->filtered_centroids.emplace_back(
+          static_cast<int>(centroid.x), static_cast<int>(centroid.y));
+    }
+
+    // Sort the filtered centroids to make them go from left to right
+    std::sort(blob_result->filtered_centroids.begin(),
+              blob_result->filtered_centroids.end(),
+              [&circle](cv::Point p, cv::Point q) {
+                // If the angle is greater, it is more left and should be
+                // considered "less" for sorting
+                return circle.AngleOf(p) > circle.AngleOf(q);
+              });
   }
 
-  return {blob_circle, avg_centroid};
+  blob_result->filtered_blobs = blob_circle;
+  blob_result->centroid = avg_centroid;
 }
 
-void BlobDetector::DrawBlobs(
-    cv::Mat view_image,
-    const std::vector<std::vector<cv::Point>> &unfiltered_blobs,
-    const std::vector<std::vector<cv::Point>> &filtered_blobs,
-    const std::vector<BlobStats> &blob_stats, cv::Point centroid) {
+void BlobDetector::DrawBlobs(const BlobResult &blob_result,
+                             cv::Mat view_image) {
   CHECK_GT(view_image.cols, 0);
-  if (unfiltered_blobs.size() > 0) {
+  if (blob_result.unfiltered_blobs.size() > 0) {
     // Draw blobs unfilled, with red color border
-    cv::drawContours(view_image, unfiltered_blobs, -1, cv::Scalar(0, 0, 255),
-                     0);
+    cv::drawContours(view_image, blob_result.unfiltered_blobs, -1,
+                     cv::Scalar(0, 0, 255), 0);
   }
 
-  cv::drawContours(view_image, filtered_blobs, -1, cv::Scalar(0, 255, 0),
-                   cv::FILLED);
+  cv::drawContours(view_image, blob_result.filtered_blobs, -1,
+                   cv::Scalar(0, 100, 0), cv::FILLED);
 
+  static constexpr double kCircleRadius = 2.0;
   // Draw blob centroids
-  for (auto stats : blob_stats) {
-    cv::circle(view_image, stats.centroid, 2, cv::Scalar(255, 0, 0),
+  for (auto stats : blob_result.blob_stats) {
+    cv::circle(view_image, stats.centroid, kCircleRadius,
+               cv::Scalar(0, 215, 255), cv::FILLED);
+  }
+  for (auto centroid : blob_result.filtered_centroids) {
+    cv::circle(view_image, centroid, kCircleRadius, cv::Scalar(0, 255, 0),
                cv::FILLED);
   }
 
   // Draw average centroid
-  cv::circle(view_image, centroid, 3, cv::Scalar(255, 255, 0), cv::FILLED);
+  cv::circle(view_image, blob_result.centroid, kCircleRadius,
+             cv::Scalar(255, 255, 0), cv::FILLED);
 }
 
 void BlobDetector::ExtractBlobs(cv::Mat bgr_image,
@@ -331,10 +343,7 @@
   blob_result->binarized_image = ThresholdImage(bgr_image);
   blob_result->unfiltered_blobs = FindBlobs(blob_result->binarized_image);
   blob_result->blob_stats = ComputeStats(blob_result->unfiltered_blobs);
-  auto filtered_pair =
-      FilterBlobs(blob_result->unfiltered_blobs, blob_result->blob_stats);
-  blob_result->filtered_blobs = filtered_pair.first;
-  blob_result->centroid = filtered_pair.second;
+  FilterBlobs(blob_result);
   auto end = aos::monotonic_clock::now();
   VLOG(2) << "Blob detection elapsed time: "
           << std::chrono::duration<double, std::milli>(end - start).count()
diff --git a/y2022/vision/blob_detector.h b/y2022/vision/blob_detector.h
index d263d32..d0a2b85 100644
--- a/y2022/vision/blob_detector.h
+++ b/y2022/vision/blob_detector.h
@@ -20,6 +20,8 @@
     cv::Mat binarized_image;
     std::vector<std::vector<cv::Point>> filtered_blobs, unfiltered_blobs;
     std::vector<BlobStats> blob_stats;
+    // In sorted order from left to right on the circle
+    std::vector<cv::Point> filtered_centroids;
     cv::Point centroid;
   };
 
@@ -35,22 +37,16 @@
 
   // Extract stats for each blob
   static std::vector<BlobStats> ComputeStats(
-      std::vector<std::vector<cv::Point>> blobs);
+      const std::vector<std::vector<cv::Point>> &blobs);
 
   // Filter blobs to get rid of noise, too small/large items, and blobs that
-  // aren't in a circle. Returns a pair of filtered blobs and the average
-  // of their centroids.
-  static std::pair<std::vector<std::vector<cv::Point>>, cv::Point> FilterBlobs(
-      std::vector<std::vector<cv::Point>> blobs,
-      std::vector<BlobStats> blob_stats);
+  // aren't in a circle. Finds the filtered blobs, centroids, and the absolute
+  // centroid.
+  static void FilterBlobs(BlobResult *blob_result);
 
   // Draw Blobs on image
   // Optionally draw all blobs and filtered blobs
-  static void DrawBlobs(
-      cv::Mat view_image,
-      const std::vector<std::vector<cv::Point>> &filtered_blobs,
-      const std::vector<std::vector<cv::Point>> &unfiltered_blobs,
-      const std::vector<BlobStats> &blob_stats, cv::Point centroid);
+  static void DrawBlobs(const BlobResult &blob_result, cv::Mat view_image);
 
   static void ExtractBlobs(cv::Mat bgr_image, BlobResult *blob_result);
 };
diff --git a/y2022/vision/camera_reader.cc b/y2022/vision/camera_reader.cc
index 000d7f2..1d1f13e 100644
--- a/y2022/vision/camera_reader.cc
+++ b/y2022/vision/camera_reader.cc
@@ -13,7 +13,6 @@
 #include "opencv2/imgproc.hpp"
 #include "y2022/vision/blob_detector.h"
 #include "y2022/vision/calibration_generated.h"
-#include "y2022/vision/target_estimator.h"
 
 DEFINE_string(image_png, "", "A set of PNG images");
 
@@ -41,32 +40,38 @@
 }
 
 namespace {
+flatbuffers::Offset<flatbuffers::Vector<const Point *>> CvPointsToFbs(
+    const std::vector<cv::Point> &points,
+    aos::Sender<TargetEstimate>::Builder *builder) {
+  std::vector<Point> points_fbs;
+  for (auto p : points) {
+    points_fbs.push_back(Point{p.x, p.y});
+  }
+  return builder->fbb()->CreateVectorOfStructs(points_fbs);
+}
+
 // Converts a vector of cv::Point to PointT for the flatbuffer
 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<Blob>>>
 CvBlobsToFbs(const std::vector<std::vector<cv::Point>> &blobs,
-             aos::Sender<TargetEstimate>::Builder &builder) {
+             aos::Sender<TargetEstimate>::Builder *builder) {
   std::vector<flatbuffers::Offset<Blob>> blobs_fbs;
   for (auto &blob : blobs) {
-    std::vector<Point> points_fbs;
-    for (auto p : blob) {
-      points_fbs.push_back(Point{p.x, p.y});
-    }
-    auto points_offset = builder.fbb()->CreateVectorOfStructs(points_fbs);
-    auto blob_builder = builder.MakeBuilder<Blob>();
+    const auto points_offset = CvPointsToFbs(blob, builder);
+    auto blob_builder = builder->MakeBuilder<Blob>();
     blob_builder.add_points(points_offset);
     blobs_fbs.emplace_back(blob_builder.Finish());
   }
-  return builder.fbb()->CreateVector(blobs_fbs.data(), blobs_fbs.size());
+  return builder->fbb()->CreateVector(blobs_fbs.data(), blobs_fbs.size());
 }
 
 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<BlobStatsFbs>>>
 BlobStatsToFbs(const std::vector<BlobDetector::BlobStats> blob_stats,
-               aos::Sender<TargetEstimate>::Builder &builder) {
+               aos::Sender<TargetEstimate>::Builder *builder) {
   std::vector<flatbuffers::Offset<BlobStatsFbs>> stats_fbs;
   for (auto &stats : blob_stats) {
     // Make BlobStatsFbs builder then fill each field using the BlobStats
     // struct, then you finish it and add it to stats_fbs.
-    auto stats_builder = builder.MakeBuilder<BlobStatsFbs>();
+    auto stats_builder = builder->MakeBuilder<BlobStatsFbs>();
     Point centroid_fbs = Point{stats.centroid.x, stats.centroid.y};
     stats_builder.add_centroid(&centroid_fbs);
     stats_builder.add_aspect_ratio(stats.aspect_ratio);
@@ -76,7 +81,7 @@
     auto current_stats = stats_builder.Finish();
     stats_fbs.emplace_back(current_stats);
   }
-  return builder.fbb()->CreateVector(stats_fbs.data(), stats_fbs.size());
+  return builder->fbb()->CreateVector(stats_fbs.data(), stats_fbs.size());
 }
 }  // namespace
 
@@ -88,11 +93,13 @@
   flatbuffers::Offset<BlobResultFbs> blob_result_offset;
   {
     const auto filtered_blobs_offset =
-        CvBlobsToFbs(blob_result.filtered_blobs, builder);
+        CvBlobsToFbs(blob_result.filtered_blobs, &builder);
     const auto unfiltered_blobs_offset =
-        CvBlobsToFbs(blob_result.unfiltered_blobs, builder);
+        CvBlobsToFbs(blob_result.unfiltered_blobs, &builder);
     const auto blob_stats_offset =
-        BlobStatsToFbs(blob_result.blob_stats, builder);
+        BlobStatsToFbs(blob_result.blob_stats, &builder);
+    const auto filtered_centroids_offset =
+        CvPointsToFbs(blob_result.filtered_centroids, &builder);
     const Point centroid_fbs =
         Point{blob_result.centroid.x, blob_result.centroid.y};
 
@@ -100,17 +107,28 @@
     blob_result_builder.add_filtered_blobs(filtered_blobs_offset);
     blob_result_builder.add_unfiltered_blobs(unfiltered_blobs_offset);
     blob_result_builder.add_blob_stats(blob_stats_offset);
+    blob_result_builder.add_filtered_centroids(filtered_centroids_offset);
     blob_result_builder.add_centroid(&centroid_fbs);
     blob_result_offset = blob_result_builder.Finish();
   }
 
+  target_estimator_.Solve(blob_result.filtered_centroids, std::nullopt);
+
   const auto camera_calibration_offset =
       aos::RecursiveCopyFlatBuffer(camera_calibration_, builder.fbb());
 
+  const auto rotation =
+      Rotation{target_estimator_.roll(), target_estimator_.pitch(),
+               target_estimator_.yaw()};
+
   auto target_estimate_builder = builder.MakeBuilder<TargetEstimate>();
-  TargetEstimator::EstimateTargetLocation(
-      blob_result.centroid, CameraIntrinsics(), CameraExtrinsics(),
-      &target_estimate_builder);
+
+  target_estimate_builder.add_distance(target_estimator_.distance());
+  target_estimate_builder.add_angle_to_target(
+      target_estimator_.angle_to_target());
+  target_estimate_builder.add_angle_to_camera(
+      target_estimator_.angle_to_camera());
+  target_estimate_builder.add_rotation_camera_hub(&rotation);
   target_estimate_builder.add_blob_result(blob_result_offset);
   target_estimate_builder.add_camera_calibration(camera_calibration_offset);
   target_estimate_builder.add_image_monotonic_timestamp_ns(
diff --git a/y2022/vision/camera_reader.h b/y2022/vision/camera_reader.h
index 949247f..3b1eca6 100644
--- a/y2022/vision/camera_reader.h
+++ b/y2022/vision/camera_reader.h
@@ -17,6 +17,7 @@
 #include "y2022/vision/calibration_generated.h"
 #include "y2022/vision/gpio.h"
 #include "y2022/vision/target_estimate_generated.h"
+#include "y2022/vision/target_estimator.h"
 
 namespace y2022 {
 namespace vision {
@@ -36,6 +37,7 @@
         camera_calibration_(FindCameraCalibration()),
         reader_(reader),
         image_sender_(event_loop->MakeSender<CameraImage>("/camera")),
+        target_estimator_(CameraIntrinsics(), CameraExtrinsics()),
         target_estimate_sender_(
             event_loop->MakeSender<TargetEstimate>("/camera")),
         read_image_timer_(event_loop->AddTimer([this]() { ReadImage(); })),
@@ -94,6 +96,7 @@
   const calibration::CameraCalibration *const camera_calibration_;
   V4L2Reader *const reader_;
   aos::Sender<CameraImage> image_sender_;
+  TargetEstimator target_estimator_;
   aos::Sender<TargetEstimate> target_estimate_sender_;
 
   // We schedule this immediately to read an image. Having it on a timer
diff --git a/y2022/vision/target_estimate.fbs b/y2022/vision/target_estimate.fbs
index 7dbc69f..f7dc93a 100644
--- a/y2022/vision/target_estimate.fbs
+++ b/y2022/vision/target_estimate.fbs
@@ -27,28 +27,40 @@
   unfiltered_blobs:[Blob] (id: 1);
   // Stats on the blobs
   blob_stats:[BlobStatsFbs] (id: 2);
+  // Centroids of filtered blobs
+  filtered_centroids:[Point] (id: 4);
   // Average centroid of the filtered blobs
   centroid:Point (id: 3);
 }
 
+// Euler angles rotation
+struct Rotation {
+  roll:double (id: 0);
+  pitch:double (id: 1);
+  yaw:double (id: 2);
+}
+
 // Contains the information the EKF wants from blobs from a single image.
 table TargetEstimate {
   // Horizontal distance from the camera to the center of the upper hub
   distance:double (id: 0);
   // Angle from the camera to the target (horizontal angle in rad).
-  // Positive means right of center, negative means left.
+  // Positive means left of center, negative means right.
   angle_to_target:double (id: 1);
+  // Polar angle from target to camera (not rotation).
+  // Currently being frozen at 0
+  angle_to_camera:double (id: 3);
+  // Rotation of the camera in the hub's reference frame
+  rotation_camera_hub:Rotation (id: 4);
 
   blob_result:BlobResultFbs (id: 2);
 
   // Contains the duration between the epoch and the nearest point
   // in time from when it was called.
-  image_monotonic_timestamp_ns:int64 (id: 3);
+  image_monotonic_timestamp_ns:int64 (id: 5);
 
   // Information about the camera which took this image.
-  camera_calibration:frc971.vision.calibration.CameraCalibration (id: 4);
-
-  // TODO(milind): add confidence
+  camera_calibration:frc971.vision.calibration.CameraCalibration (id: 6);
 }
 
 root_type TargetEstimate;
diff --git a/y2022/vision/target_estimator.cc b/y2022/vision/target_estimator.cc
index 1ac3d55..3ebec95 100644
--- a/y2022/vision/target_estimator.cc
+++ b/y2022/vision/target_estimator.cc
@@ -1,31 +1,339 @@
 #include "y2022/vision/target_estimator.h"
 
+#include "absl/strings/str_format.h"
+#include "aos/time/time.h"
+#include "ceres/ceres.h"
+#include "frc971/control_loops/quaternion_utils.h"
+#include "opencv2/core/core.hpp"
+#include "opencv2/core/eigen.hpp"
+#include "opencv2/features2d.hpp"
+#include "opencv2/highgui/highgui.hpp"
+#include "opencv2/imgproc.hpp"
+
+DEFINE_bool(freeze_roll, true, "If true, don't solve for roll");
+DEFINE_bool(freeze_pitch, false, "If true, don't solve for pitch");
+DEFINE_bool(freeze_yaw, false, "If true, don't solve for yaw");
+DEFINE_bool(freeze_camera_height, true,
+            "If true, don't solve for camera height");
+DEFINE_bool(freeze_angle_to_camera, true,
+            "If true, don't solve for polar angle to camera");
+
+DEFINE_uint64(max_num_iterations, 200,
+              "Maximum number of iterations for the ceres solver");
+DEFINE_bool(solver_output, false,
+            "If true, log the solver progress and results");
+
 namespace y2022::vision {
 
-void TargetEstimator::EstimateTargetLocation(cv::Point2i centroid,
-                                             const cv::Mat &intrinsics,
-                                             const cv::Mat &extrinsics,
-                                             TargetEstimate::Builder *builder) {
-  const cv::Point2d focal_length(intrinsics.at<double>(0, 0),
-                                 intrinsics.at<double>(1, 1));
-  const cv::Point2d offset(intrinsics.at<double>(0, 2),
-                           intrinsics.at<double>(1, 2));
+std::vector<cv::Point3d> TargetEstimator::ComputeTapePoints() {
+  std::vector<cv::Point3d> tape_points;
+  tape_points.reserve(kNumPiecesOfTape);
 
-  // Blob pitch in camera reference frame
-  const double blob_pitch =
-      std::atan(static_cast<double>(-(centroid.y - offset.y)) /
-                static_cast<double>(focal_length.y));
-  const double camera_height = extrinsics.at<double>(2, 3);
-  // Depth from camera to blob
-  const double depth = (kTapeHeight - camera_height) / std::tan(blob_pitch);
+  constexpr size_t kNumVisiblePiecesOfTape = 5;
+  for (size_t i = 0; i < kNumVisiblePiecesOfTape; i++) {
+    // The center piece of tape is at 0 rad, so the angle indices are offset
+    // by the number of pieces of tape on each side of it
+    const double theta_index =
+        static_cast<double>(i) - ((kNumVisiblePiecesOfTape - 1) / 2);
+    // The polar angle is a multiple of the angle between tape centers
+    double theta = theta_index * ((2.0 * M_PI) / kNumPiecesOfTape);
+    tape_points.emplace_back(kUpperHubRadius * std::cos(theta),
+                             kUpperHubRadius * std::sin(theta), kTapeHeight);
+  }
 
-  double angle_to_target =
-      std::atan2(static_cast<double>(centroid.x - offset.x),
-                 static_cast<double>(focal_length.x));
-  double distance = (depth / std::cos(angle_to_target)) + kUpperHubRadius;
+  return tape_points;
+}
 
-  builder->add_angle_to_target(angle_to_target);
-  builder->add_distance(distance);
+const std::vector<cv::Point3d> TargetEstimator::kTapePoints =
+    ComputeTapePoints();
+
+TargetEstimator::TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics)
+    : centroids_(),
+      image_(std::nullopt),
+      roll_(0.0),
+      pitch_(0.0),
+      yaw_(M_PI),
+      distance_(3.0),
+      angle_to_camera_(0.0),
+      // TODO(milind): add IMU height
+      camera_height_(extrinsics.at<double>(2, 3)) {
+  cv::cv2eigen(intrinsics, intrinsics_);
+  cv::cv2eigen(extrinsics, extrinsics_);
+}
+
+namespace {
+void SetBoundsOrFreeze(double *param, bool freeze, double min, double max,
+                       ceres::Problem *problem) {
+  if (freeze) {
+    problem->SetParameterization(
+        param, new ceres::SubsetParameterization(1, std::vector<int>{0}));
+  } else {
+    problem->SetParameterLowerBound(param, 0, min);
+    problem->SetParameterUpperBound(param, 0, max);
+  }
+}
+}  // namespace
+
+void TargetEstimator::Solve(const std::vector<cv::Point> &centroids,
+                            std::optional<cv::Mat> image) {
+  auto start = aos::monotonic_clock::now();
+
+  centroids_ = centroids;
+  image_ = image;
+
+  ceres::Problem problem;
+
+  // Set up the only cost function (also known as residual). This uses
+  // auto-differentiation to obtain the derivative (jacobian).
+  ceres::CostFunction *cost_function =
+      new ceres::AutoDiffCostFunction<TargetEstimator, ceres::DYNAMIC, 1, 1, 1,
+                                      1, 1, 1>(this, centroids_.size() * 2,
+                                               ceres::DO_NOT_TAKE_OWNERSHIP);
+
+  // TODO(milind): add loss function when we get more noisy data
+  problem.AddResidualBlock(cost_function, nullptr, &roll_, &pitch_, &yaw_,
+                           &distance_, &angle_to_camera_, &camera_height_);
+
+  // TODO(milind): seed values at localizer output, and constrain to be close to
+  // that.
+
+  // Constrain the rotation, otherwise there can be multiple solutions.
+  // There shouldn't be too much roll or pitch
+  constexpr double kMaxRoll = 0.1;
+  SetBoundsOrFreeze(&roll_, FLAGS_freeze_roll, -kMaxRoll, kMaxRoll, &problem);
+
+  constexpr double kPitch = -35.0 * M_PI / 180.0;
+  constexpr double kMaxPitchDelta = 0.15;
+  SetBoundsOrFreeze(&pitch_, FLAGS_freeze_pitch, kPitch - kMaxPitchDelta,
+                    kPitch + kMaxPitchDelta, &problem);
+  // Constrain the yaw to where the target would be visible
+  constexpr double kMaxYawDelta = M_PI / 4.0;
+  SetBoundsOrFreeze(&yaw_, FLAGS_freeze_yaw, M_PI - kMaxYawDelta,
+                    M_PI + kMaxYawDelta, &problem);
+
+  constexpr double kMaxHeightDelta = 0.1;
+  SetBoundsOrFreeze(&camera_height_, FLAGS_freeze_camera_height,
+                    camera_height_ - kMaxHeightDelta,
+                    camera_height_ + kMaxHeightDelta, &problem);
+
+  // Distances shouldn't be too close to the target or too far
+  constexpr double kMinDistance = 1.0;
+  constexpr double kMaxDistance = 10.0;
+  SetBoundsOrFreeze(&distance_, false, kMinDistance, kMaxDistance, &problem);
+
+  // Keep the angle between +/- half of the angle between piece of tape
+  constexpr double kMaxAngle = ((2.0 * M_PI) / kNumPiecesOfTape) / 2.0;
+  SetBoundsOrFreeze(&angle_to_camera_, FLAGS_freeze_angle_to_camera, -kMaxAngle,
+                    kMaxAngle, &problem);
+
+  ceres::Solver::Options options;
+  options.minimizer_progress_to_stdout = FLAGS_solver_output;
+  options.gradient_tolerance = 1e-12;
+  options.function_tolerance = 1e-16;
+  options.parameter_tolerance = 1e-12;
+  options.max_num_iterations = FLAGS_max_num_iterations;
+  ceres::Solver::Summary summary;
+  ceres::Solve(options, &problem, &summary);
+
+  auto end = aos::monotonic_clock::now();
+  LOG(INFO) << "Target estimation elapsed time: "
+            << std::chrono::duration<double, std::milli>(end - start).count()
+            << " ms";
+
+  if (FLAGS_solver_output) {
+    LOG(INFO) << summary.FullReport();
+
+    LOG(INFO) << "roll: " << roll_;
+    LOG(INFO) << "pitch: " << pitch_;
+    LOG(INFO) << "yaw: " << yaw_;
+    LOG(INFO) << "angle to target (based on yaw): " << angle_to_target();
+    LOG(INFO) << "angle to camera (polar): " << angle_to_camera_;
+    LOG(INFO) << "distance (polar): " << distance_;
+    LOG(INFO) << "camera height: " << camera_height_;
+  }
+}
+
+namespace {
+// Hacks to extract a double from a scalar, which is either a ceres jet or a
+// double. Only used for debugging and displaying.
+template <typename S>
+double ScalarToDouble(S s) {
+  const double *ptr = reinterpret_cast<double *>(&s);
+  return *ptr;
+}
+
+template <typename S>
+cv::Point2d ScalarPointToDouble(cv::Point_<S> p) {
+  return cv::Point2d(ScalarToDouble(p.x), ScalarToDouble(p.y));
+}
+}  // namespace
+
+template <typename S>
+bool TargetEstimator::operator()(const S *const roll, const S *const pitch,
+                                 const S *const yaw, const S *const distance,
+                                 const S *const theta,
+                                 const S *const camera_height,
+                                 S *residual) const {
+  using Vector3s = Eigen::Matrix<S, 3, 1>;
+  using Affine3s = Eigen::Transform<S, 3, Eigen::Affine>;
+
+  Eigen::AngleAxis<S> roll_angle(*roll, Vector3s::UnitX());
+  Eigen::AngleAxis<S> pitch_angle(*pitch, Vector3s::UnitY());
+  Eigen::AngleAxis<S> yaw_angle(*yaw, Vector3s::UnitZ());
+  // Construct the rotation and translation of the camera in the hub's frame
+  Eigen::Quaternion<S> R_camera_hub = yaw_angle * pitch_angle * roll_angle;
+  Vector3s T_camera_hub(*distance * ceres::cos(*theta),
+                        *distance * ceres::sin(*theta), *camera_height);
+
+  Affine3s H_camera_hub = Eigen::Translation<S, 3>(T_camera_hub) * R_camera_hub;
+
+  std::vector<cv::Point_<S>> tape_points_proj;
+  for (cv::Point3d tape_point_hub : kTapePoints) {
+    Vector3s tape_point_hub_eigen(S(tape_point_hub.x), S(tape_point_hub.y),
+                                  S(tape_point_hub.z));
+
+    // With X, Y, Z being world axes and x, y, z being camera axes,
+    // x = Y, y = Z, z = -X
+    static const Eigen::Matrix3d kCameraAxisConversion =
+        (Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0)
+            .finished();
+    // Project the 3d tape point onto the image using the transformation and
+    // intrinsics
+    Vector3s tape_point_proj =
+        intrinsics_ * (kCameraAxisConversion *
+                       (H_camera_hub.inverse() * tape_point_hub_eigen));
+
+    // Normalize the projected point
+    tape_points_proj.emplace_back(tape_point_proj.x() / tape_point_proj.z(),
+                                  tape_point_proj.y() / tape_point_proj.z());
+    VLOG(1) << "Projected tape point: "
+            << ScalarPointToDouble(
+                   tape_points_proj[tape_points_proj.size() - 1]);
+  }
+
+  for (size_t i = 0; i < centroids_.size(); i++) {
+    const auto distance = DistanceFromTape(i, tape_points_proj);
+    // Set the residual to the (x, y) distance of the centroid from the
+    // nearest projected piece of tape
+    residual[i * 2] = distance.x;
+    residual[(i * 2) + 1] = distance.y;
+  }
+
+  if (image_.has_value()) {
+    // Draw the current stage of the solving
+    cv::Mat image = image_->clone();
+    for (size_t i = 0; i < tape_points_proj.size() - 1; i++) {
+      cv::line(image, ScalarPointToDouble(tape_points_proj[i]),
+               ScalarPointToDouble(tape_points_proj[i + 1]),
+               cv::Scalar(255, 255, 255));
+      cv::circle(image, ScalarPointToDouble(tape_points_proj[i]), 2,
+                 cv::Scalar(255, 20, 147), cv::FILLED);
+      cv::circle(image, ScalarPointToDouble(tape_points_proj[i + 1]), 2,
+                 cv::Scalar(255, 20, 147), cv::FILLED);
+    }
+    cv::imshow("image", image);
+    cv::waitKey(10);
+  }
+
+  return true;
+}
+
+namespace {
+template <typename S>
+cv::Point_<S> Distance(cv::Point p, cv::Point_<S> q) {
+  return cv::Point_<S>(S(p.x) - q.x, S(p.y) - q.y);
+}
+
+template <typename S>
+bool Less(cv::Point_<S> distance_1, cv::Point_<S> distance_2) {
+  return (ceres::pow(distance_1.x, 2) + ceres::pow(distance_1.y, 2) <
+          ceres::pow(distance_2.x, 2) + ceres::pow(distance_2.y, 2));
+}
+}  // namespace
+
+template <typename S>
+cv::Point_<S> TargetEstimator::DistanceFromTape(
+    size_t centroid_index,
+    const std::vector<cv::Point_<S>> &tape_points) const {
+  // Figure out the middle index in the tape points
+  size_t middle_index = centroids_.size() / 2;
+  if (centroids_.size() % 2 == 0) {
+    // There are two possible middles in this case. Figure out which one fits
+    // the current better
+    const auto tape_middle = tape_points[tape_points.size() / 2];
+    const auto middle_distance_1 =
+        Distance(centroids_[(centroids_.size() / 2) - 1], tape_middle);
+    const auto middle_distance_2 =
+        Distance(centroids_[centroids_.size() / 2], tape_middle);
+    if (Less(middle_distance_1, middle_distance_2)) {
+      middle_index--;
+    }
+  }
+
+  auto distance = cv::Point_<S>(std::numeric_limits<S>::infinity(),
+                                std::numeric_limits<S>::infinity());
+  if (centroid_index == middle_index) {
+    // Fix the middle centroid so the solver can't go too far off
+    distance =
+        Distance(centroids_[middle_index], tape_points[tape_points.size() / 2]);
+  } else {
+    // Give the other centroids some freedom in case some are split into pieces
+    for (auto tape_point : tape_points) {
+      const auto current_distance =
+          Distance(centroids_[centroid_index], tape_point);
+      if (Less(current_distance, distance)) {
+        distance = current_distance;
+      }
+    }
+  }
+
+  return distance;
+}
+
+namespace {
+void DrawEstimateValues(double distance, double angle_to_target,
+                        double angle_to_camera, double roll, double pitch,
+                        double yaw, cv::Mat view_image) {
+  constexpr int kTextX = 10;
+  int text_y = 330;
+  constexpr int kTextSpacing = 35;
+
+  const auto kTextColor = cv::Scalar(0, 255, 255);
+  constexpr double kFontScale = 1.0;
+
+  cv::putText(view_image, absl::StrFormat("Distance: %.3f", distance),
+              cv::Point(kTextX, text_y += kTextSpacing),
+              cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
+  cv::putText(view_image,
+              absl::StrFormat("Angle to target: %.3f", angle_to_target),
+              cv::Point(kTextX, text_y += kTextSpacing),
+              cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
+  cv::putText(view_image,
+              absl::StrFormat("Angle to camera: %.3f", angle_to_camera),
+              cv::Point(kTextX, text_y += kTextSpacing),
+              cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
+
+  cv::putText(
+      view_image,
+      absl::StrFormat("Roll: %.3f, pitch: %.3f, yaw: %.3f", roll, pitch, yaw),
+      cv::Point(kTextX, text_y += kTextSpacing), cv::FONT_HERSHEY_DUPLEX,
+      kFontScale, kTextColor, 2);
+}
+}  // namespace
+
+void TargetEstimator::DrawEstimate(const TargetEstimate &target_estimate,
+                                   cv::Mat view_image) {
+  DrawEstimateValues(target_estimate.distance(),
+                     target_estimate.angle_to_target(),
+                     target_estimate.angle_to_camera(),
+                     target_estimate.rotation_camera_hub()->roll(),
+                     target_estimate.rotation_camera_hub()->pitch(),
+                     target_estimate.rotation_camera_hub()->yaw(), view_image);
+}
+
+void TargetEstimator::DrawEstimate(cv::Mat view_image) const {
+  DrawEstimateValues(distance_, angle_to_target(), angle_to_camera_, roll_,
+                     pitch_, yaw_, view_image);
 }
 
 }  // namespace y2022::vision
diff --git a/y2022/vision/target_estimator.h b/y2022/vision/target_estimator.h
index 9db4121..4303b24 100644
--- a/y2022/vision/target_estimator.h
+++ b/y2022/vision/target_estimator.h
@@ -1,28 +1,86 @@
-#ifndef Y2022_VISION_POSE_ESTIMATOR_H_
-#define Y2022_VISION_POSE_ESTIMATOR_H_
+#ifndef Y2022_VISION_TARGET_ESTIMATOR_H_
+#define Y2022_VISION_TARGET_ESTIMATOR_H_
 
+#include <optional>
+
+#include "Eigen/Dense"
+#include "Eigen/Geometry"
+#include "opencv2/core/types.hpp"
 #include "opencv2/imgproc.hpp"
 #include "y2022/vision/target_estimate_generated.h"
 
 namespace y2022::vision {
-
+// Class to estimate the polar coordinates and rotation from the camera to the
+// target.
 class TargetEstimator {
  public:
-  // Computes the location of the target.
-  // blob_point is the mean (x, y) of blob pixels.
-  // Adds angle_to_target and distance to the given builder.
-  static void EstimateTargetLocation(cv::Point2i centroid,
-                                     const cv::Mat &intrinsics,
-                                     const cv::Mat &extrinsics,
-                                     TargetEstimate::Builder *builder);
+  TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics);
+
+  // Runs the solver to estimate the target
+  // centroids must be in sorted order from left to right on the circle.
+  // TODO(milind): seems safer to sort them here.
+  // If image != std::nullopt, the solver's progress will be displayed
+  // graphically.
+  void Solve(const std::vector<cv::Point> &centroids,
+             std::optional<cv::Mat> image);
+
+  // Cost function for the solver.
+  // Takes in the rotation of the camera in the hub's frame, the horizontal
+  // polar coordinates of the camera in the hub's frame, and the height of the
+  // camera (can change if the robot is shaking).
+  // Hub frame is relative to the center of the bottom of the hub.
+  // Compares the projected pieces of tape with these values to the detected
+  // blobs for calculating the cost.
+  template <typename S>
+  bool operator()(const S *const roll, const S *const pitch, const S *const yaw,
+                  const S *const distance, const S *const theta,
+                  const S *const camera_height, S *residual) const;
+
+  inline double roll() const { return roll_; }
+  inline double pitch() const { return pitch_; }
+  inline double yaw() const { return yaw_; }
+
+  inline double distance() const { return distance_; }
+  inline double angle_to_camera() const { return angle_to_camera_; }
+  inline double angle_to_target() const { return M_PI - yaw_; }
+  inline double camera_height() const { return camera_height_; }
+
+  // Draws the distance, angle, and rotation on the given image
+  static void DrawEstimate(const TargetEstimate &target_estimate,
+                           cv::Mat view_image);
+  void DrawEstimate(cv::Mat view_image) const;
 
  private:
   // Height of the center of the tape (m)
   static constexpr double kTapeHeight = 2.58 + (0.05 / 2);
   // Horizontal distance from tape to center of hub (m)
   static constexpr double kUpperHubRadius = 1.22 / 2;
+  static constexpr size_t kNumPiecesOfTape = 16;
+
+  // 3d points of the visible pieces of tape in the hub's frame
+  static const std::vector<cv::Point3d> kTapePoints;
+  static std::vector<cv::Point3d> ComputeTapePoints();
+
+  template <typename S>
+  cv::Point_<S> DistanceFromTape(
+      size_t centroid_index,
+      const std::vector<cv::Point_<S>> &tape_points) const;
+
+  std::vector<cv::Point> centroids_;
+  std::optional<cv::Mat> image_;
+
+  Eigen::Matrix3d intrinsics_;
+  Eigen::Matrix4d extrinsics_;
+
+  double roll_;
+  double pitch_;
+  double yaw_;
+
+  double distance_;
+  double angle_to_camera_;
+  double camera_height_;
 };
 
 }  // namespace y2022::vision
 
-#endif  // Y2022_VISION_POSE_ESTIMATOR_H_
+#endif  // Y2022_VISION_TARGET_ESTIMATOR_H_
diff --git a/y2022/vision/viewer.cc b/y2022/vision/viewer.cc
index ef503e2..e20f433 100644
--- a/y2022/vision/viewer.cc
+++ b/y2022/vision/viewer.cc
@@ -11,13 +11,18 @@
 #include "frc971/vision/vision_generated.h"
 #include "y2022/vision/blob_detector.h"
 #include "y2022/vision/target_estimate_generated.h"
+#include "y2022/vision/target_estimator.h"
 
 DEFINE_string(capture, "",
               "If set, capture a single image and save it to this filename.");
 DEFINE_string(channel, "/camera", "Channel name for the image.");
 DEFINE_string(config, "aos_config.json", "Path to the config file to use.");
 DEFINE_string(png_dir, "", "Path to a set of images to display.");
+DEFINE_uint64(skip, 0,
+              "Number of images to skip if doing local reading (png_dir set).");
 DEFINE_bool(show_features, true, "Show the blobs.");
+DEFINE_bool(display_estimation, false,
+            "If true, display the target estimation graphically");
 
 namespace y2022 {
 namespace vision {
@@ -29,15 +34,20 @@
 aos::Fetcher<frc971::vision::CameraImage> image_fetcher;
 aos::Fetcher<y2022::vision::TargetEstimate> target_estimate_fetcher;
 
+std::vector<cv::Point> FbsToCvPoints(
+    const flatbuffers::Vector<const Point *> &points_fbs) {
+  std::vector<cv::Point> points;
+  for (const Point *point : points_fbs) {
+    points.emplace_back(point->x(), point->y());
+  }
+  return points;
+}
+
 std::vector<std::vector<cv::Point>> FbsToCvBlobs(
     const flatbuffers::Vector<flatbuffers::Offset<Blob>> &blobs_fbs) {
   std::vector<std::vector<cv::Point>> blobs;
   for (const auto blob : blobs_fbs) {
-    std::vector<cv::Point> points;
-    for (const Point *point : *blob->points()) {
-      points.emplace_back(cv::Point{point->x(), point->y()});
-    }
-    blobs.emplace_back(points);
+    blobs.emplace_back(FbsToCvPoints(*blob->points()));
   }
   return blobs;
 }
@@ -67,12 +77,14 @@
       VLOG(2) << "Got blobs for timestamp " << target_est << "\n";
     }
     // Store the TargetEstimate data so we can match timestamp with image
-    target_est_map[target_timestamp] = BlobDetector::BlobResult(
-        {cv::Mat(), FbsToCvBlobs(*target_est->blob_result()->filtered_blobs()),
-         FbsToCvBlobs(*target_est->blob_result()->unfiltered_blobs()),
-         FbsToBlobStats(*target_est->blob_result()->blob_stats()),
-         cv::Point{target_est->blob_result()->centroid()->x(),
-                   target_est->blob_result()->centroid()->y()}});
+    target_est_map[target_timestamp] = BlobDetector::BlobResult{
+        cv::Mat(),
+        FbsToCvBlobs(*target_est->blob_result()->filtered_blobs()),
+        FbsToCvBlobs(*target_est->blob_result()->unfiltered_blobs()),
+        FbsToBlobStats(*target_est->blob_result()->blob_stats()),
+        FbsToCvPoints(*target_est->blob_result()->filtered_centroids()),
+        cv::Point{target_est->blob_result()->centroid()->x(),
+                  target_est->blob_result()->centroid()->y()}};
     // Only keep last 10 matches
     while (target_est_map.size() > 10u) {
       target_est_map.erase(target_est_map.begin());
@@ -108,10 +120,7 @@
 
     cv::Mat ret_image =
         cv::Mat::zeros(cv::Size(image->cols(), image->rows()), CV_8UC3);
-    BlobDetector::DrawBlobs(ret_image, target_est_it->second.filtered_blobs,
-                            target_est_it->second.unfiltered_blobs,
-                            target_est_it->second.blob_stats,
-                            target_est_it->second.centroid);
+    BlobDetector::DrawBlobs(target_est_it->second, ret_image);
     cv::imshow("blobs", ret_image);
   }
 
@@ -155,6 +164,62 @@
 
   event_loop.Run();
 }
+
+// TODO(milind): delete this when viewer can accumulate local images and results
+void ViewerLocal() {
+  std::vector<cv::String> file_list;
+  cv::glob(FLAGS_png_dir + "/*.png", file_list, false);
+
+  cv::Mat intrinsics = cv::Mat::zeros(cv::Size(3, 3), CV_64F);
+  intrinsics.at<double>(0, 0) = 391.63916;
+  intrinsics.at<double>(0, 1) = 0.0;
+  intrinsics.at<double>(0, 2) = 312.691162;
+  intrinsics.at<double>(1, 0) = 0.0;
+  intrinsics.at<double>(1, 1) = 391.535889;
+  intrinsics.at<double>(1, 2) = 267.138672;
+  intrinsics.at<double>(2, 0) = 0.0;
+  intrinsics.at<double>(2, 1) = 0.0;
+  intrinsics.at<double>(2, 2) = 1.0;
+  cv::Mat extrinsics = cv::Mat::zeros(cv::Size(4, 4), CV_64F);
+  extrinsics.at<double>(2, 3) = 0.9398;
+
+  TargetEstimator estimator(intrinsics, extrinsics);
+
+  for (auto it = file_list.begin() + FLAGS_skip; it != file_list.end(); it++) {
+    LOG(INFO) << "Reading file " << *it;
+    cv::Mat image_mat = cv::imread(it->c_str());
+    BlobDetector::BlobResult blob_result;
+    blob_result.binarized_image =
+        cv::Mat::zeros(cv::Size(image_mat.cols, image_mat.rows), CV_8UC1);
+    BlobDetector::ExtractBlobs(image_mat, &blob_result);
+
+    cv::Mat ret_image =
+        cv::Mat::zeros(cv::Size(image_mat.cols, image_mat.rows), CV_8UC3);
+    BlobDetector::DrawBlobs(blob_result, ret_image);
+
+    LOG(INFO) << ": # blobs: " << blob_result.filtered_blobs.size()
+              << " (# removed: "
+              << blob_result.unfiltered_blobs.size() -
+                     blob_result.filtered_blobs.size()
+              << ")";
+
+    if (blob_result.filtered_blobs.size() > 0) {
+      estimator.Solve(blob_result.filtered_centroids,
+                      FLAGS_display_estimation ? std::make_optional(ret_image)
+                                               : std::nullopt);
+      estimator.DrawEstimate(ret_image);
+    }
+
+    cv::imshow("image", image_mat);
+    cv::imshow("mask", blob_result.binarized_image);
+    cv::imshow("blobs", ret_image);
+
+    int keystroke = cv::waitKey(0);
+    if ((keystroke & 0xFF) == static_cast<int>('q')) {
+      return;
+    }
+  }
+}
 }  // namespace
 }  // namespace vision
 }  // namespace y2022
@@ -162,5 +227,8 @@
 // Quick and lightweight viewer for images
 int main(int argc, char **argv) {
   aos::InitGoogle(&argc, &argv);
-  y2022::vision::ViewerMain();
+  if (FLAGS_png_dir != "")
+    y2022::vision::ViewerLocal();
+  else
+    y2022::vision::ViewerMain();
 }