Fill out tables for camera data some more
This should be all the information needed by all the runtime code.
Also reshuffle a few tables to be more memory-dense. This changes the
binary format, but we don't have any logfiles so whatever.
Change-Id: I85158ad8c28c349f2d1c4981fb6565b2b4804d78
diff --git a/y2020/vision/camera_reader.cc b/y2020/vision/camera_reader.cc
index de4dfb7..b8e84c0 100644
--- a/y2020/vision/camera_reader.cc
+++ b/y2020/vision/camera_reader.cc
@@ -57,6 +57,21 @@
const std::vector<cv::KeyPoint> &keypoints,
const cv::Mat &descriptors);
+ // Returns the 3D location for the specified training feature.
+ cv::Point3f Training3dPoint(int training_image_index, int feature_index) {
+ const sift::KeypointFieldLocation *const location =
+ training_data_->images()
+ ->Get(training_image_index)
+ ->features()
+ ->Get(feature_index)
+ ->field_location();
+ return cv::Point3f(location->x(), location->y(), location->z());
+ }
+
+ int number_training_images() const {
+ return training_data_->images()->size();
+ }
+
aos::EventLoop *const event_loop_;
const sift::TrainingData *const training_data_;
V4L2Reader *const reader_;
@@ -144,29 +159,27 @@
const std::vector<std::vector<cv::DMatch>> &matches) {
// First, we need to pull out all the matches for each image. Might as well
// build up the Match tables at the same time.
- std::vector<std::vector<flatbuffers::Offset<sift::Match>>> per_image_matches;
+ std::vector<std::vector<sift::Match>> per_image_matches(
+ number_training_images());
for (const std::vector<cv::DMatch> &image_matches : matches) {
for (const cv::DMatch &image_match : image_matches) {
- sift::Match::Builder match_builder(*fbb);
- match_builder.add_query_feature(image_match.queryIdx);
- match_builder.add_train_feature(image_match.trainIdx);
- if (per_image_matches.size() <= static_cast<size_t>(image_match.imgIdx)) {
- per_image_matches.resize(image_match.imgIdx + 1);
- }
- per_image_matches[image_match.imgIdx].emplace_back(
- match_builder.Finish());
+ CHECK_LT(image_match.imgIdx, number_training_images());
+ per_image_matches[image_match.imgIdx].emplace_back();
+ sift::Match *const match = &per_image_matches[image_match.imgIdx].back();
+ match->mutate_query_feature(image_match.queryIdx);
+ match->mutate_train_feature(image_match.trainIdx);
+ match->mutate_distance(image_match.distance);
}
}
// Then, we need to build up each ImageMatch table.
std::vector<flatbuffers::Offset<sift::ImageMatch>> image_match_tables;
for (size_t i = 0; i < per_image_matches.size(); ++i) {
- const std::vector<flatbuffers::Offset<sift::Match>> &this_image_matches =
- per_image_matches[i];
+ const std::vector<sift::Match> &this_image_matches = per_image_matches[i];
if (this_image_matches.empty()) {
continue;
}
- const auto vector_offset = fbb->CreateVector(this_image_matches);
+ const auto vector_offset = fbb->CreateVectorOfStructs(this_image_matches);
sift::ImageMatch::Builder image_builder(*fbb);
image_builder.add_train_image(i);
image_builder.add_matches(vector_offset);
diff --git a/y2020/vision/sift/sift.fbs b/y2020/vision/sift/sift.fbs
index 5a1384e..77d0dc6 100644
--- a/y2020/vision/sift/sift.fbs
+++ b/y2020/vision/sift/sift.fbs
@@ -1,5 +1,12 @@
namespace frc971.vision.sift;
+// Represents the location of a keypoint in field coordinates.
+struct KeypointFieldLocation {
+ x:float;
+ y:float;
+ z:float;
+}
+
// Represents a single feature extracted from an image.
table Feature {
// Contains the descriptor data.
@@ -28,14 +35,20 @@
// Which octave this keypoint is from.
octave:int;
+
+ // Where this feature's keypoint is on the field. This will only be filled out
+ // for training features, not ones extracted from query images.
+ field_location:KeypointFieldLocation;
}
// Represents a single match between a training image and a query image.
-table Match {
+struct Match {
// The index of the feature for the query image.
query_feature:int;
// The index of the feature for the training image.
train_feature:int;
+ // How "good" the match is.
+ distance:float;
}
// Represents all the matches between a single training image and a query
@@ -51,11 +64,49 @@
data:[double];
}
-// Contains the information the EKF wants from an image.
+// Calibration information for a given camera on a given robot.
+table CameraCalibration {
+ // The name of the camera node which this calibration data applies to.
+ node_name:string;
+ // The team number of the robot which this calibration data applies to.
+ team_number:int;
+
+ // Intrinsics for the camera.
+ //
+ // This is the standard OpenCV intrinsics matrix in row major order (3x3).
+ intrinsics:[float];
+
+ // Fixed extrinsics for the camera. This transforms from camera coordinates to
+ // robot coordinates. For example: multiplying (0, 0, 0, 1) by this results in
+ // the position of the camera aperature in robot coordinates.
+ fixed_extrinsics:TransformationMatrix;
+
+ // Extrinsics for a camera on a turret. This will only be filled out for
+ // applicable cameras. For turret-mounted cameras, fixed_extrinsics defines
+ // a position for the center of rotation of the turret, and this field defines
+ // a position for the camera on the turret.
+ //
+ // The combination of the two transformations is underdefined, so nothing can
+ // distinguish between the two parts of the final extrinsics for a given
+ // turret position.
+ //
+ // To get the final extrinsics for a camera using this transformation,
+ // multiply (in order):
+ // fixed_extrinsics
+ // rotation around the Z axis by the turret angle
+ // turret_extrinsics
+ turret_extrinsics:TransformationMatrix;
+}
+
+// Contains the information the EKF wants from an image matched against a single
+// training image.
//
// This is represented as a transformation to a target in field coordinates.
table CameraPose {
// Transformation matrix from the target to the camera's origin.
+ // (0, 0, 0) is the aperture of the camera (we pretend it's an ideal pinhole
+ // camera). Positive Z is out of the camera. Positive X and Y are right
+ // handed, but which way they face depends on the camera extrinsics.
camera_to_target:TransformationMatrix;
// Field coordinates of the target, represented as a transformation matrix
@@ -74,6 +125,7 @@
table ImageMatchResult {
// The matches from this image to each of the training images which matched.
+ // Each member is against the same captured image.
image_matches:[ImageMatch];
// The transformations for this image for each of the training images which
// matched.
@@ -85,6 +137,9 @@
// Timestamp when the frame was captured.
image_monotonic_timestamp_ns:long;
+
+ // Information about the camera which took this image.
+ camera_calibration:CameraCalibration;
}
root_type ImageMatchResult;
diff --git a/y2020/vision/sift/sift_training.fbs b/y2020/vision/sift/sift_training.fbs
index 2af0233..5e82e9f 100644
--- a/y2020/vision/sift/sift_training.fbs
+++ b/y2020/vision/sift/sift_training.fbs
@@ -18,6 +18,9 @@
// Represents the information used to match incoming images against.
table TrainingData {
images:[TrainingImage];
+
+ // Calibration information for all the cameras we know about.
+ camera_calibrations:[CameraCalibration];
}
root_type TrainingData;