Fill out tables for camera data some more

This should be all the information needed by all the runtime code.

Also reshuffle a few tables to be more memory-dense. This changes the
binary format, but we don't have any logfiles so whatever.

Change-Id: I85158ad8c28c349f2d1c4981fb6565b2b4804d78
diff --git a/y2020/vision/camera_reader.cc b/y2020/vision/camera_reader.cc
index de4dfb7..b8e84c0 100644
--- a/y2020/vision/camera_reader.cc
+++ b/y2020/vision/camera_reader.cc
@@ -57,6 +57,21 @@
                const std::vector<cv::KeyPoint> &keypoints,
                const cv::Mat &descriptors);
 
+  // Returns the 3D location for the specified training feature.
+  cv::Point3f Training3dPoint(int training_image_index, int feature_index) {
+    const sift::KeypointFieldLocation *const location =
+        training_data_->images()
+            ->Get(training_image_index)
+            ->features()
+            ->Get(feature_index)
+            ->field_location();
+    return cv::Point3f(location->x(), location->y(), location->z());
+  }
+
+  int number_training_images() const {
+    return training_data_->images()->size();
+  }
+
   aos::EventLoop *const event_loop_;
   const sift::TrainingData *const training_data_;
   V4L2Reader *const reader_;
@@ -144,29 +159,27 @@
     const std::vector<std::vector<cv::DMatch>> &matches) {
   // First, we need to pull out all the matches for each image. Might as well
   // build up the Match tables at the same time.
-  std::vector<std::vector<flatbuffers::Offset<sift::Match>>> per_image_matches;
+  std::vector<std::vector<sift::Match>> per_image_matches(
+      number_training_images());
   for (const std::vector<cv::DMatch> &image_matches : matches) {
     for (const cv::DMatch &image_match : image_matches) {
-      sift::Match::Builder match_builder(*fbb);
-      match_builder.add_query_feature(image_match.queryIdx);
-      match_builder.add_train_feature(image_match.trainIdx);
-      if (per_image_matches.size() <= static_cast<size_t>(image_match.imgIdx)) {
-        per_image_matches.resize(image_match.imgIdx + 1);
-      }
-      per_image_matches[image_match.imgIdx].emplace_back(
-          match_builder.Finish());
+      CHECK_LT(image_match.imgIdx, number_training_images());
+      per_image_matches[image_match.imgIdx].emplace_back();
+      sift::Match *const match = &per_image_matches[image_match.imgIdx].back();
+      match->mutate_query_feature(image_match.queryIdx);
+      match->mutate_train_feature(image_match.trainIdx);
+      match->mutate_distance(image_match.distance);
     }
   }
 
   // Then, we need to build up each ImageMatch table.
   std::vector<flatbuffers::Offset<sift::ImageMatch>> image_match_tables;
   for (size_t i = 0; i < per_image_matches.size(); ++i) {
-    const std::vector<flatbuffers::Offset<sift::Match>> &this_image_matches =
-        per_image_matches[i];
+    const std::vector<sift::Match> &this_image_matches = per_image_matches[i];
     if (this_image_matches.empty()) {
       continue;
     }
-    const auto vector_offset = fbb->CreateVector(this_image_matches);
+    const auto vector_offset = fbb->CreateVectorOfStructs(this_image_matches);
     sift::ImageMatch::Builder image_builder(*fbb);
     image_builder.add_train_image(i);
     image_builder.add_matches(vector_offset);
diff --git a/y2020/vision/sift/sift.fbs b/y2020/vision/sift/sift.fbs
index 5a1384e..77d0dc6 100644
--- a/y2020/vision/sift/sift.fbs
+++ b/y2020/vision/sift/sift.fbs
@@ -1,5 +1,12 @@
 namespace frc971.vision.sift;
 
+// Represents the location of a keypoint in field coordinates.
+struct KeypointFieldLocation {
+  x:float;
+  y:float;
+  z:float;
+}
+
 // Represents a single feature extracted from an image.
 table Feature {
   // Contains the descriptor data.
@@ -28,14 +35,20 @@
 
   // Which octave this keypoint is from.
   octave:int;
+
+  // Where this feature's keypoint is on the field. This will only be filled out
+  // for training features, not ones extracted from query images.
+  field_location:KeypointFieldLocation;
 }
 
 // Represents a single match between a training image and a query image.
-table Match {
+struct Match {
   // The index of the feature for the query image.
   query_feature:int;
   // The index of the feature for the training image.
   train_feature:int;
+  // How "good" the match is.
+  distance:float;
 }
 
 // Represents all the matches between a single training image and a query
@@ -51,11 +64,49 @@
   data:[double];
 }
 
-// Contains the information the EKF wants from an image.
+// Calibration information for a given camera on a given robot.
+table CameraCalibration {
+  // The name of the camera node which this calibration data applies to.
+  node_name:string;
+  // The team number of the robot which this calibration data applies to.
+  team_number:int;
+
+  // Intrinsics for the camera.
+  //
+  // This is the standard OpenCV intrinsics matrix in row major order (3x3).
+  intrinsics:[float];
+
+  // Fixed extrinsics for the camera. This transforms from camera coordinates to
+  // robot coordinates. For example: multiplying (0, 0, 0, 1) by this results in
+  // the position of the camera aperature in robot coordinates.
+  fixed_extrinsics:TransformationMatrix;
+
+  // Extrinsics for a camera on a turret. This will only be filled out for
+  // applicable cameras. For turret-mounted cameras, fixed_extrinsics defines
+  // a position for the center of rotation of the turret, and this field defines
+  // a position for the camera on the turret.
+  //
+  // The combination of the two transformations is underdefined, so nothing can
+  // distinguish between the two parts of the final extrinsics for a given
+  // turret position.
+  //
+  // To get the final extrinsics for a camera using this transformation,
+  // multiply (in order):
+  //   fixed_extrinsics
+  //   rotation around the Z axis by the turret angle
+  //   turret_extrinsics
+  turret_extrinsics:TransformationMatrix;
+}
+
+// Contains the information the EKF wants from an image matched against a single
+// training image.
 //
 // This is represented as a transformation to a target in field coordinates.
 table CameraPose {
   // Transformation matrix from the target to the camera's origin.
+  // (0, 0, 0) is the aperture of the camera (we pretend it's an ideal pinhole
+  // camera). Positive Z is out of the camera. Positive X and Y are right
+  // handed, but which way they face depends on the camera extrinsics.
   camera_to_target:TransformationMatrix;
 
   // Field coordinates of the target, represented as a transformation matrix
@@ -74,6 +125,7 @@
 
 table ImageMatchResult {
   // The matches from this image to each of the training images which matched.
+  // Each member is against the same captured image.
   image_matches:[ImageMatch];
   // The transformations for this image for each of the training images which
   // matched.
@@ -85,6 +137,9 @@
 
   // Timestamp when the frame was captured.
   image_monotonic_timestamp_ns:long;
+
+  // Information about the camera which took this image.
+  camera_calibration:CameraCalibration;
 }
 
 root_type ImageMatchResult;
diff --git a/y2020/vision/sift/sift_training.fbs b/y2020/vision/sift/sift_training.fbs
index 2af0233..5e82e9f 100644
--- a/y2020/vision/sift/sift_training.fbs
+++ b/y2020/vision/sift/sift_training.fbs
@@ -18,6 +18,9 @@
 // Represents the information used to match incoming images against.
 table TrainingData {
   images:[TrainingImage];
+
+  // Calibration information for all the cameras we know about.
+  camera_calibrations:[CameraCalibration];
 }
 
 root_type TrainingData;