Add AprilTag support to charuco_lib and improve multi-camera cal code

May remove AprilTag from charuco lib later, and use target_mapper flow
For now, this allows us to do multi-camera calibration using AprilTags
instead of Charuco Diamonds

Cleaned up calibrate_multi_cameras to make it flow a bit better

Change-Id: If6f0536f57f8ee28bdad96ce3527dba71cd9be65
Signed-off-by: Jim Ostrowski <yimmy13@gmail.com>
diff --git a/y2023/vision/README.md b/y2023/vision/README.md
index 55d217f..c81581f 100644
--- a/y2023/vision/README.md
+++ b/y2023/vision/README.md
@@ -22,4 +22,4 @@
   * I've sometimes found it necessary to add the "--skip_missing_forwarding_entries" flag-- as guided by the logging messages
 * From the output, copy the calculated ("Updated") extrinsic into the
   corresponding calibration file for the right robot and camera in the
-  calib_files directory
\ No newline at end of file
+  calib_files directory
diff --git a/y2023/vision/calibrate_multi_cameras.cc b/y2023/vision/calibrate_multi_cameras.cc
index aaff033..30f12ee 100644
--- a/y2023/vision/calibrate_multi_cameras.cc
+++ b/y2023/vision/calibrate_multi_cameras.cc
@@ -25,6 +25,8 @@
 #include "y2023/vision/aprilrobotics.h"
 #include "y2023/vision/vision_util.h"
 
+DEFINE_bool(alt_view, false,
+            "If true, show visualization from field level, rather than above");
 DEFINE_string(config, "",
               "If set, override the log's config file with this one.");
 DEFINE_string(constants_path, "y2023/constants/constants.json",
@@ -52,8 +54,6 @@
 // TODO<Jim>: Not currently using estimate from pi1->pi4-- should do full
 // estimation, and probably also include camera->imu extrinsics from all
 // cameras, not just pi1
-// TODO<Jim>: Should add ability to do this with apriltags, since they're on the
-// field
 
 namespace y2023 {
 namespace vision {
@@ -62,6 +62,7 @@
 using frc971::vision::PoseUtils;
 using frc971::vision::TargetMap;
 using frc971::vision::TargetMapper;
+using frc971::vision::VisualizeRobot;
 namespace calibration = frc971::vision::calibration;
 
 static constexpr double kImagePeriodMs =
@@ -88,6 +89,7 @@
 std::vector<std::pair<TimestampedPiDetection, TimestampedPiDetection>>
     detection_list;
 std::vector<TimestampedPiDetection> two_board_extrinsics_list;
+VisualizeRobot vis_robot_;
 
 // TODO<jim>: Implement variance calcs
 Eigen::Affine3d ComputeAveragePose(
@@ -171,84 +173,166 @@
                                  charuco_ids, charuco_corners, valid,
                                  rvecs_eigen, tvecs_eigen);
 
+  // This is used to transform points for visualization
+  // Assumes targets are aligned with x->right, y->up, z->out of board
+  Eigen::Affine3d H_world_board;
+  H_world_board = Eigen::Translation3d::Identity() *
+                  Eigen::AngleAxisd(M_PI / 2.0, Eigen::Vector3d::UnitX());
+  if (FLAGS_alt_view) {
+    // Don't rotate -- this is like viewing from the side
+    H_world_board = Eigen::Translation3d(0.0, 0.0, 3.0);
+  }
+
+  bool draw_vis = false;
   if (valid) {
+    CHECK_LE(tvecs_eigen.size(), 2u)
+        << "Can't handle more than two tags in field of view";
     if (tvecs_eigen.size() == 2) {
+      draw_vis = true;
       VLOG(2) << "Saw two boards in same view from " << pi_name;
       // Handle when we see two boards at once
-      std::vector<TimestampedPiDetection> detections;
-      for (uint i = 0; i < tvecs_eigen.size(); i++) {
-        Eigen::Quaternion<double> rotation(
-            frc971::controls::ToQuaternionFromRotationVector(rvecs_eigen[i]));
-        Eigen::Translation3d translation(tvecs_eigen[i]);
-        Eigen::Affine3d H_camera_board = translation * rotation;
-        TimestampedPiDetection new_observation{
-            .time = distributed_eof,
-            .H_camera_target = H_camera_board,
-            .pi_name = pi_name,
-            .board_id = charuco_ids[i][0]};
-        detections.emplace_back(new_observation);
+      // We'll store them referenced to the lower id board
+      int from_index = 0;
+      int to_index = 1;
+      if (charuco_ids[from_index][0] > charuco_ids[to_index][0]) {
+        std::swap<int>(from_index, to_index);
       }
+
+      // Create "from" (A) and "to" (B) transforms
+      Eigen::Quaternion<double> rotationA(
+          frc971::controls::ToQuaternionFromRotationVector(
+              rvecs_eigen[from_index]));
+      Eigen::Translation3d translationA(tvecs_eigen[from_index]);
+      Eigen::Affine3d H_camera_boardA = translationA * rotationA;
+
+      Eigen::Quaternion<double> rotationB(
+          frc971::controls::ToQuaternionFromRotationVector(
+              rvecs_eigen[to_index]));
+      Eigen::Translation3d translationB(tvecs_eigen[to_index]);
+      Eigen::Affine3d H_camera_boardB = translationB * rotationB;
+
       Eigen::Affine3d H_boardA_boardB =
-          detections[0].H_camera_target.inverse() *
-          detections[1].H_camera_target;
+          H_camera_boardA.inverse() * H_camera_boardB;
 
-      TimestampedPiDetection board_extrinsic{.time = distributed_eof,
-                                             .H_camera_target = H_boardA_boardB,
-                                             .pi_name = pi_name,
-                                             .board_id = charuco_ids[0][0]};
+      TimestampedPiDetection boardA_boardB{
+          .time = distributed_eof,
+          .H_camera_target = H_boardA_boardB,
+          .pi_name = pi_name,
+          .board_id = charuco_ids[from_index][0]};
 
-      // Make sure we've got them in the right order (sorted by id)
-      if (detections[0].board_id < detections[1].board_id) {
-        two_board_extrinsics_list.push_back(board_extrinsic);
-      } else {
-        // Flip them around
-        board_extrinsic.H_camera_target =
-            board_extrinsic.H_camera_target.inverse();
-        board_extrinsic.board_id = charuco_ids[1][0];
-        two_board_extrinsics_list.push_back(board_extrinsic);
+      VLOG(1) << "Map from board " << from_index << " to " << to_index
+              << " is\n"
+              << H_boardA_boardB.matrix();
+      // Store this observation of the transform between two boards
+      two_board_extrinsics_list.push_back(boardA_boardB);
+
+      if (FLAGS_visualize) {
+        vis_robot_.DrawFrameAxes(
+            H_world_board,
+            std::string("board ") + std::to_string(charuco_ids[from_index][0]),
+            cv::Scalar(0, 255, 0));
+        vis_robot_.DrawFrameAxes(
+            H_world_board * boardA_boardB.H_camera_target,
+            std::string("board ") + std::to_string(charuco_ids[to_index][0]),
+            cv::Scalar(255, 0, 0));
+        VLOG(2) << "Detection map from camera " << pi_name << " to board "
+                << charuco_ids[from_index][0] << " is\n"
+                << H_camera_boardA.matrix() << "\n and inverse is\n"
+                << H_camera_boardA.inverse().matrix()
+                << "\n and with world to board rotation is\n"
+                << H_world_board * H_camera_boardA.inverse().matrix();
+        vis_robot_.DrawRobotOutline(H_world_board * H_camera_boardA.inverse(),
+                                    pi_name, cv::Scalar(0, 0, 255));
       }
+
     } else {
       VLOG(1) << "Saw single board in camera " << pi_name;
       Eigen::Quaternion<double> rotation(
           frc971::controls::ToQuaternionFromRotationVector(rvecs_eigen[0]));
       Eigen::Translation3d translation(tvecs_eigen[0]);
-      Eigen::Affine3d H_camera_board = translation * rotation;
-      TimestampedPiDetection new_observation{.time = distributed_eof,
-                                             .H_camera_target = H_camera_board,
-                                             .pi_name = pi_name,
-                                             .board_id = charuco_ids[0][0]};
+      Eigen::Affine3d H_camera2_board2 = translation * rotation;
+      TimestampedPiDetection new_observation{
+          .time = distributed_eof,
+          .H_camera_target = H_camera2_board2,
+          .pi_name = pi_name,
+          .board_id = charuco_ids[0][0]};
 
       VLOG(2) << "Checking versus last result from " << last_observation.pi_name
               << " at time " << last_observation.time << " with delta time = "
               << std::abs((distributed_eof - last_observation.time).count());
-      // Only take two observations if they're near enough to each other
-      // in time.  This should be within +/- kImagePeriodMs of each other (e.g.,
-      // +/-16.666ms for 30 Hz capture).  This should make sure
-      // we're always getting the closest images, and not miss too many possible
-      // pairs, between cameras
+      // Now, check if this new observation can be paired with a
+      // previous single board observation.
+
+      // Only take two observations if they're near enough to each
+      // other in time.  This should be within +/- kImagePeriodMs of
+      // each other (e.g., +/-16.666ms for 30 Hz capture).  This
+      // should make sure we're always getting the closest images, and
+      // not miss too many possible pairs, between cameras
+
       // TODO<Jim>: Should also check that (rotational) velocity of the robot is
       // small
       if (std::abs((distributed_eof - last_observation.time).count()) <
           static_cast<int>(kImagePeriodMs / 2.0 * 1000000)) {
-        Eigen::Affine3d H_camera1_board = last_observation.H_camera_target;
-        Eigen::Affine3d H_camera1_camera2 =
-            H_camera1_board * H_camera_board.inverse();
-        VLOG(1) << "Storing observation between " << last_observation.pi_name
-                << ", target " << last_observation.board_id << " and "
-                << new_observation.pi_name << ", target "
-                << new_observation.board_id << " is "
-                << H_camera1_camera2.matrix();
-        // Sort by pi numbering
+        // Sort by pi numbering, since this is how we will handle them
+        std::pair<TimestampedPiDetection, TimestampedPiDetection> new_pair;
         if (last_observation.pi_name < new_observation.pi_name) {
-          detection_list.push_back(
-              std::pair(last_observation, new_observation));
+          new_pair = std::pair(last_observation, new_observation);
         } else if (last_observation.pi_name > new_observation.pi_name) {
-          detection_list.push_back(
-              std::pair(new_observation, last_observation));
+          new_pair = std::pair(new_observation, last_observation);
         } else {
           LOG(WARNING) << "Got 2 observations in a row from same pi. Probably "
                           "not too much of an issue???";
         }
+        detection_list.push_back(new_pair);
+
+        // This bit is just for visualization and checking purposes-- use the
+        // last two-board observation to figure out the current estimate between
+        // the two cameras
+        if (FLAGS_visualize && two_board_extrinsics_list.size() > 0) {
+          draw_vis = true;
+          TimestampedPiDetection &last_two_board_ext =
+              two_board_extrinsics_list[two_board_extrinsics_list.size() - 1];
+          Eigen::Affine3d &H_boardA_boardB = last_two_board_ext.H_camera_target;
+          int boardA_boardB_id = last_two_board_ext.board_id;
+
+          TimestampedPiDetection camera1_boardA = new_pair.first;
+          TimestampedPiDetection camera2_boardB = new_pair.second;
+          // If camera1_boardA doesn't point to the first target, then swap
+          // these two
+          if (camera1_boardA.board_id != boardA_boardB_id) {
+            camera1_boardA = new_pair.second;
+            camera2_boardB = new_pair.first;
+          }
+          VLOG(1) << "Camera " << camera1_boardA.pi_name << " seeing board "
+                  << camera1_boardA.board_id << " and camera "
+                  << camera2_boardB.pi_name << " seeing board "
+                  << camera2_boardB.board_id;
+
+          vis_robot_.DrawRobotOutline(
+              H_world_board * camera1_boardA.H_camera_target.inverse(),
+              camera1_boardA.pi_name, cv::Scalar(0, 0, 255));
+          vis_robot_.DrawRobotOutline(
+              H_world_board * H_boardA_boardB *
+                  camera2_boardB.H_camera_target.inverse(),
+              camera2_boardB.pi_name, cv::Scalar(128, 128, 0));
+          vis_robot_.DrawFrameAxes(
+              H_world_board,
+              std::string("Board ") +
+                  std::to_string(last_two_board_ext.board_id),
+              cv::Scalar(0, 255, 0));
+          vis_robot_.DrawFrameAxes(H_world_board * H_boardA_boardB, "Board B",
+                                   cv::Scalar(255, 0, 0));
+          Eigen::Affine3d H_camera1_camera2 =
+              camera1_boardA.H_camera_target * H_boardA_boardB *
+              camera2_boardB.H_camera_target.inverse();
+
+          VLOG(1) << "Storing observation between " << new_pair.first.pi_name
+                  << ", target " << new_pair.first.board_id << " and "
+                  << new_pair.second.pi_name << ", target "
+                  << new_pair.second.board_id
+                  << " and camera-to-camera offset:\n"
+                  << H_camera1_camera2.matrix();
+        }
       } else {
         VLOG(2) << "Storing observation for " << pi_name << " at time "
                 << distributed_eof;
@@ -256,17 +340,28 @@
       }
     }
   }
+
   if (FLAGS_visualize) {
     std::string image_name = pi_name + " Image";
     cv::Mat rgb_small;
     cv::resize(rgb_image, rgb_small, cv::Size(), 0.5, 0.5);
     cv::imshow(image_name, rgb_small);
     cv::waitKey(FLAGS_wait_key);
+
+    if (draw_vis) {
+      cv::imshow("View", vis_robot_.image_);
+      cv::waitKey(1);
+      vis_robot_.ClearImage();
+    }
   }
 }
 
 void ExtrinsicsMain(int argc, char *argv[]) {
-  std::vector<DataAdapter::TimestampedDetection> timestamped_target_detections;
+  vis_robot_ = VisualizeRobot(cv::Size(1000, 1000));
+  vis_robot_.ClearImage();
+  const double kFocalLength = 1000.0;
+  const int kImageWidth = 1000;
+  vis_robot_.SetDefaultViewpoint(kImageWidth, kFocalLength);
 
   std::optional<aos::FlatbufferDetachedBuffer<aos::Configuration>> config =
       (FLAGS_config.empty()
@@ -293,7 +388,7 @@
   SendSimulationConstants(reader.event_loop_factory(), FLAGS_team_number,
                           FLAGS_constants_path);
 
-  LOG(INFO) << "Using target type " << FLAGS_target_type;
+  VLOG(1) << "Using target type " << FLAGS_target_type;
   std::vector<std::string> node_list;
   node_list.push_back("pi1");
   node_list.push_back("pi2");
@@ -316,9 +411,6 @@
 
     frc971::constants::ConstantsFetcher<y2023::Constants> constants_fetcher(
         detection_event_loops.back().get());
-    aos::FlatbufferDetachedBuffer<calibration::CameraCalibration> intrinsics =
-        aos::RecursiveCopyFlatBuffer(y2023::vision::FindCameraCalibration(
-            constants_fetcher.constants(), node));
 
     const calibration::CameraCalibration *calibration =
         FindCameraCalibration(constants_fetcher.constants(), node);
@@ -335,8 +427,8 @@
     const auto ext_H_robot_pi = Eigen::Affine3d(extrinsics_matrix);
     default_extrinsics.emplace_back(ext_H_robot_pi);
 
-    LOG(INFO) << "Got extrinsics for " << node << " as\n"
-              << default_extrinsics.back().matrix();
+    VLOG(1) << "Got extrinsics for " << node << " as\n"
+            << default_extrinsics.back().matrix();
 
     frc971::vision::ImageCallback *image_callback =
         new frc971::vision::ImageCallback(
@@ -356,83 +448,121 @@
     image_callbacks.emplace_back(image_callback);
   }
 
-  const auto ext_H_robot_piA = default_extrinsics[0];
-  const auto ext_H_robot_piB = default_extrinsics[1];
-
   reader.event_loop_factory()->Run();
 
+  // Do quick check to see what averaged two-board pose for each pi is
+  // individually
+  CHECK_GT(two_board_extrinsics_list.size(), 0u)
+      << "Must have at least one view of both boards";
+  int base_target_id = two_board_extrinsics_list[0].board_id;
+  VLOG(1) << "Base id for two_board_extrinsics_list is " << base_target_id;
   for (auto node : node_list) {
     std::vector<TimestampedPiDetection> pose_list;
     for (auto ext : two_board_extrinsics_list) {
+      CHECK_EQ(base_target_id, ext.board_id)
+          << " All boards should have same reference id";
       if (ext.pi_name == node) {
         pose_list.push_back(ext);
       }
     }
-    Eigen::Affine3d avg_pose = ComputeAveragePose(pose_list);
+    Eigen::Affine3d avg_pose_from_pi = ComputeAveragePose(pose_list);
     VLOG(1) << "Estimate from " << node << " with " << pose_list.size()
             << " observations is:\n"
-            << avg_pose.matrix();
+            << avg_pose_from_pi.matrix();
   }
-  Eigen::Affine3d avg_pose = ComputeAveragePose(two_board_extrinsics_list);
+  Eigen::Affine3d H_boardA_boardB_avg =
+      ComputeAveragePose(two_board_extrinsics_list);
+  // TODO: Should probably do some outlier rejection
   LOG(INFO) << "Estimate of two board pose using all nodes with "
             << two_board_extrinsics_list.size() << " observations is:\n"
-            << avg_pose.matrix() << "\n";
+            << H_boardA_boardB_avg.matrix() << "\n";
 
+  // Next, compute the relative camera poses
   LOG(INFO) << "Got " << detection_list.size() << " extrinsic observations";
-  Eigen::Affine3d H_target0_target1 = avg_pose;
-  int base_target_id = detection_list[0].first.board_id;
-  std::vector<Eigen::Affine3d> H_cameraA_cameraB_list;
+  std::vector<Eigen::Affine3d> H_camera1_camera2_list;
   std::vector<Eigen::Affine3d> updated_extrinsics;
   // Use the first node's extrinsics as our base, and fix from there
   updated_extrinsics.push_back(default_extrinsics[0]);
   LOG(INFO) << "Default extrinsic for node " << node_list[0] << " is "
             << default_extrinsics[0].matrix();
   for (uint i = 0; i < node_list.size() - 1; i++) {
-    H_cameraA_cameraB_list.clear();
-    for (auto [poseA, poseB] : detection_list) {
-      if ((poseA.pi_name == node_list[i]) &&
-          (poseB.pi_name == node_list[i + 1])) {
-        Eigen::Affine3d H_cameraA_target0 = poseA.H_camera_target;
-        // If this pose isn't referenced to target0, correct that
-        if (poseA.board_id != base_target_id) {
-          H_cameraA_target0 = H_cameraA_target0 * H_target0_target1.inverse();
+    H_camera1_camera2_list.clear();
+    // Go through the list, and find successive pairs of cameras
+    for (auto [pose1, pose2] : detection_list) {
+      if ((pose1.pi_name == node_list[i]) &&
+          (pose2.pi_name == node_list[i + 1])) {
+        Eigen::Affine3d H_camera1_boardA = pose1.H_camera_target;
+        // If pose1 isn't referenced to base_target_id, correct that
+        if (pose1.board_id != base_target_id) {
+          // pose1.H_camera_target references boardB, so map back to boardA
+          H_camera1_boardA =
+              pose1.H_camera_target * H_boardA_boardB_avg.inverse();
         }
 
-        Eigen::Affine3d H_cameraB_target0 = poseB.H_camera_target;
-        if (poseB.board_id != base_target_id) {
-          H_cameraB_target0 = H_cameraB_target0 * H_target0_target1.inverse();
+        // Now, get the camera2->boardA map (notice it's boardA, same as
+        // camera1, so we can compute the difference based both on boardA)
+        Eigen::Affine3d H_camera2_boardA = pose2.H_camera_target;
+        // If pose2 isn't referenced to boardA (base_target_id), correct that
+        if (pose2.board_id != base_target_id) {
+          // pose2.H_camera_target references boardB, so map back to boardA
+          H_camera2_boardA =
+              pose1.H_camera_target * H_boardA_boardB_avg.inverse();
         }
-        Eigen::Affine3d H_cameraA_cameraB =
-            H_cameraA_target0 * H_cameraB_target0.inverse();
-        H_cameraA_cameraB_list.push_back(H_cameraA_cameraB);
+
+        // Compute camera1->camera2 map
+        Eigen::Affine3d H_camera1_camera2 =
+            H_camera1_boardA * H_camera2_boardA.inverse();
+        H_camera1_camera2_list.push_back(H_camera1_camera2);
+        VLOG(1) << "Map from camera " << pose1.pi_name << " and tag "
+                << pose1.board_id << " with observation: \n"
+                << pose1.H_camera_target.matrix() << "\n to camera "
+                << pose2.pi_name << " and tag " << pose2.board_id
+                << " with observation: \n"
+                << pose2.H_camera_target.matrix() << "\ngot map as\n"
+                << H_camera1_camera2.matrix();
+
+        Eigen::Affine3d H_world_board;
+        H_world_board = Eigen::Translation3d::Identity() *
+                        Eigen::AngleAxisd(M_PI / 2.0, Eigen::Vector3d::UnitX());
+        if (FLAGS_alt_view) {
+          H_world_board = Eigen::Translation3d(0.0, 0.0, 3.0);
+        }
+
+        VLOG(2) << "Camera1 " << pose1.pi_name << " in world frame is \n"
+                << (H_world_board * H_camera1_boardA.inverse()).matrix();
+        VLOG(2) << "Camera2 " << pose2.pi_name << " in world frame is \n"
+                << (H_world_board * H_camera2_boardA.inverse()).matrix();
       }
     }
-    CHECK(H_cameraA_cameraB_list.size() > 0)
+    // TODO<Jim>: If we don't get any matches, we could just use default
+    // extrinsics
+    CHECK(H_camera1_camera2_list.size() > 0)
         << "Failed with zero poses for node " << node_list[i];
-    if (H_cameraA_cameraB_list.size() > 0) {
-      Eigen::Affine3d avg_H_cameraA_cameraB =
-          ComputeAveragePose(H_cameraA_cameraB_list);
+    if (H_camera1_camera2_list.size() > 0) {
+      Eigen::Affine3d H_camera1_camera2_avg =
+          ComputeAveragePose(H_camera1_camera2_list);
       LOG(INFO) << "From " << node_list[i] << " to " << node_list[i + 1]
-                << " found " << H_cameraA_cameraB_list.size()
+                << " found " << H_camera1_camera2_list.size()
                 << " observations, and the average pose is:\n"
-                << avg_H_cameraA_cameraB.matrix();
-      Eigen::Affine3d default_H_cameraA_camera_B =
+                << H_camera1_camera2_avg.matrix();
+      Eigen::Affine3d H_camera1_camera2_default =
           default_extrinsics[i].inverse() * default_extrinsics[i + 1];
       LOG(INFO) << "Compare this to that from default values:\n"
-                << default_H_cameraA_camera_B.matrix();
+                << H_camera1_camera2_default.matrix();
+      Eigen::Affine3d H_camera1_camera2_diff =
+          H_camera1_camera2_avg * H_camera1_camera2_default.inverse();
+      LOG(INFO)
+          << "Difference between averaged and default delta poses "
+             "has |T| = "
+          << H_camera1_camera2_diff.translation().norm() << "m and |R| = "
+          << Eigen::AngleAxisd(H_camera1_camera2_diff.rotation()).angle()
+          << " radians ("
+          << Eigen::AngleAxisd(H_camera1_camera2_diff.rotation()).angle() *
+                 180.0 / M_PI
+          << " degrees)";
       // Next extrinsic is just previous one * avg_delta_pose
       Eigen::Affine3d next_extrinsic =
-          updated_extrinsics.back() * avg_H_cameraA_cameraB;
-      LOG(INFO)
-          << "Difference between averaged and default delta poses has |T| = "
-          << (avg_H_cameraA_cameraB * default_H_cameraA_camera_B.inverse())
-                 .translation()
-                 .norm()
-          << " and |R| = "
-          << Eigen::AngleAxisd(
-                 (avg_H_cameraA_cameraB * default_H_cameraA_camera_B.inverse())
-                     .rotation())
-                 .angle();
+          updated_extrinsics.back() * H_camera1_camera2_avg;
       updated_extrinsics.push_back(next_extrinsic);
       LOG(INFO) << "Default Extrinsic for " << node_list[i + 1] << " is \n"
                 << default_extrinsics[i + 1].matrix();