Properly support matching multiple targets in camera_reader
Previously if one key-point closely matched two targets we would only
assign it to one of them. Separate out the matching of the different
training targets we have.
Change-Id: I73edbe8261bb5cbd3dbe1a260d5565a734ca118c
Signed-off-by: James Kuszmaul <jabukuszmaul@gmail.com>
diff --git a/y2020/vision/camera_reader.cc b/y2020/vision/camera_reader.cc
index 66c76dd..26f65b7 100644
--- a/y2020/vision/camera_reader.cc
+++ b/y2020/vision/camera_reader.cc
@@ -32,12 +32,12 @@
public:
CameraReader(aos::EventLoop *event_loop,
const sift::TrainingData *training_data, V4L2Reader *reader,
- cv::FlannBasedMatcher *matcher)
+ const cv::Ptr<cv::flann::IndexParams> &index_params,
+ const cv::Ptr<cv::flann::SearchParams> &search_params)
: event_loop_(event_loop),
training_data_(training_data),
camera_calibration_(FindCameraCalibration()),
reader_(reader),
- matcher_(matcher),
image_sender_(event_loop->MakeSender<CameraImage>("/camera")),
result_sender_(
event_loop->MakeSender<sift::ImageMatchResult>("/camera")),
@@ -46,10 +46,16 @@
read_image_timer_(event_loop->AddTimer([this]() { ReadImage(); })),
prev_R_camera_field_vec_(cv::Mat::zeros(3, 1, CV_32F)),
prev_T_camera_field_(cv::Mat::zeros(3, 1, CV_32F)) {
+
+ for (int ii = 0; ii < number_training_images(); ++ii) {
+ matchers_.push_back(cv::FlannBasedMatcher(index_params, search_params));
+ }
+
CopyTrainingFeatures();
- // Technically we don't need to do this, but doing it now avoids the first
- // match attempt being slow.
- matcher_->train();
+
+ for (auto &matcher : matchers_) {
+ matcher.train();
+ }
event_loop->OnRun(
[this]() { read_image_timer_->Setup(event_loop_->monotonic_now()); });
@@ -155,7 +161,7 @@
const sift::TrainingData *const training_data_;
const sift::CameraCalibration *const camera_calibration_;
V4L2Reader *const reader_;
- cv::FlannBasedMatcher *const matcher_;
+ std::vector<cv::FlannBasedMatcher> matchers_;
aos::Sender<CameraImage> image_sender_;
aos::Sender<sift::ImageMatchResult> result_sender_;
aos::Sender<sift::ImageMatchResult> detailed_result_sender_;
@@ -189,6 +195,7 @@
}
void CameraReader::CopyTrainingFeatures() {
+ int training_image_index = 0;
for (const sift::TrainingImage *training_image : *training_data_->images()) {
cv::Mat features(training_image->features()->size(), 128, CV_32F);
for (size_t i = 0; i < training_image->features()->size(); ++i) {
@@ -207,7 +214,8 @@
const auto out_mat = features(cv::Range(i, i + 1), cv::Range(0, 128));
in_mat.convertTo(out_mat, CV_32F);
}
- matcher_->add(features);
+ matchers_[training_image_index].add(features);
+ ++training_image_index;
}
}
@@ -304,14 +312,8 @@
sift_->detectAndCompute(image_mat, cv::noArray(), keypoints, descriptors);
}
- // Then, match those features against our training data.
- std::vector<std::vector<cv::DMatch>> matches;
- if (!FLAGS_skip_sift) {
- matcher_->knnMatch(/* queryDescriptors */ descriptors, matches, /* k */ 2);
- }
-
struct PerImageMatches {
- std::vector<const std::vector<cv::DMatch> *> matches;
+ std::vector<std::vector<cv::DMatch>> matches;
std::vector<cv::Point3f> training_points_3d;
std::vector<cv::Point2f> query_points;
std::vector<cv::Point2f> training_points;
@@ -319,32 +321,43 @@
};
std::vector<PerImageMatches> per_image_matches(number_training_images());
- // Pull out the good matches which we want for each image.
- // Discard the bad matches per Lowe's ratio test.
- // (Lowe originally proposed 0.7 ratio, but 0.75 was later proposed as a
- // better option. We'll go with the more conservative (fewer, better matches)
- // for now).
- for (const std::vector<cv::DMatch> &match : matches) {
- CHECK_EQ(2u, match.size());
- CHECK_LE(match[0].distance, match[1].distance);
- CHECK_LT(match[0].imgIdx, number_training_images());
- CHECK_LT(match[1].imgIdx, number_training_images());
- CHECK_EQ(match[0].queryIdx, match[1].queryIdx);
- if (!(match[0].distance < 0.7 * match[1].distance)) {
- continue;
+ for (int image_idx = 0; image_idx < number_training_images(); ++image_idx) {
+ // Then, match those features against our training data.
+ std::vector<std::vector<cv::DMatch>> matches;
+ if (!FLAGS_skip_sift) {
+ matchers_[image_idx].knnMatch(/* queryDescriptors */ descriptors, matches,
+ /* k */ 2);
}
- const int training_image = match[0].imgIdx;
- CHECK_LT(training_image, static_cast<int>(per_image_matches.size()));
- PerImageMatches *const per_image = &per_image_matches[training_image];
- per_image->matches.push_back(&match);
- per_image->training_points.push_back(
- Training2dPoint(training_image, match[0].trainIdx));
- per_image->training_points_3d.push_back(
- Training3dPoint(training_image, match[0].trainIdx));
+ // Pull out the good matches which we want for each image.
+ // Discard the bad matches per Lowe's ratio test.
+ // (Lowe originally proposed 0.7 ratio, but 0.75 was later proposed as a
+ // better option. We'll go with the more conservative (fewer, better
+ // matches) for now).
+ for (const std::vector<cv::DMatch> &match : matches) {
+ CHECK_EQ(2u, match.size());
+ CHECK_LE(match[0].distance, match[1].distance);
+ CHECK_EQ(match[0].imgIdx, 0);
+ CHECK_EQ(match[1].imgIdx, 0);
+ CHECK_EQ(match[0].queryIdx, match[1].queryIdx);
+ if (!(match[0].distance < 0.7 * match[1].distance)) {
+ continue;
+ }
- const cv::KeyPoint &keypoint = keypoints[match[0].queryIdx];
- per_image->query_points.push_back(keypoint.pt);
+ const int training_image = image_idx;
+ CHECK_LT(training_image, static_cast<int>(per_image_matches.size()));
+ PerImageMatches *const per_image = &per_image_matches[training_image];
+ per_image->matches.push_back(match);
+ per_image->matches.back()[0].imgIdx = image_idx;
+ per_image->matches.back()[1].imgIdx = image_idx;
+ per_image->training_points.push_back(
+ Training2dPoint(training_image, match[0].trainIdx));
+ per_image->training_points_3d.push_back(
+ Training3dPoint(training_image, match[0].trainIdx));
+
+ const cv::KeyPoint &keypoint = keypoints[match[0].queryIdx];
+ per_image->query_points.push_back(keypoint.pt);
+ }
}
// The minimum number of matches in a training image for us to use it.
@@ -395,8 +408,7 @@
}
// Add this to our collection of all matches that passed our criteria
- all_good_matches.push_back(
- static_cast<std::vector<cv::DMatch>>(*per_image.matches[j]));
+ all_good_matches.push_back(per_image.matches[j]);
// Fill out the data for matches per image that made it past
// homography check, for later use
@@ -418,8 +430,7 @@
// Collect training target location, so we can map it to matched image
cv::Point2f target_point;
float target_radius;
- TargetLocation((*(per_image_good_match.matches[0]))[0].imgIdx, target_point,
- target_radius);
+ TargetLocation(i, target_point, target_radius);
// Store target_point in vector for use by perspectiveTransform
std::vector<cv::Point2f> src_target_pt;
@@ -661,7 +672,7 @@
V4L2Reader v4l2_reader(&event_loop, "/dev/video0");
CameraReader camera_reader(&event_loop, &training_data.message(),
- &v4l2_reader, &matcher);
+ &v4l2_reader, index_params, search_params);
event_loop.Run();
}