Do SIFT and send out the results
Don't yet have the math for calculating poses based on these results.
Change-Id: I6494dbf1d3a7c13db902cf00b7c362a2a956691b
diff --git a/y2020/vision/camera_reader.cc b/y2020/vision/camera_reader.cc
index e5bcb64..de4dfb7 100644
--- a/y2020/vision/camera_reader.cc
+++ b/y2020/vision/camera_reader.cc
@@ -1,34 +1,235 @@
+#include <opencv2/features2d.hpp>
+#include <opencv2/imgproc.hpp>
+
#include "aos/events/shm_event_loop.h"
#include "aos/init.h"
+#include "y2020/vision/sift/demo_sift.h"
+#include "y2020/vision/sift/sift971.h"
+#include "y2020/vision/sift/sift_generated.h"
+#include "y2020/vision/sift/sift_training_generated.h"
#include "y2020/vision/v4l2_reader.h"
+#include "y2020/vision/vision_generated.h"
namespace frc971 {
namespace vision {
namespace {
+class CameraReader {
+ public:
+ CameraReader(aos::EventLoop *event_loop,
+ const sift::TrainingData *training_data, V4L2Reader *reader,
+ cv::FlannBasedMatcher *matcher)
+ : event_loop_(event_loop),
+ training_data_(training_data),
+ reader_(reader),
+ matcher_(matcher),
+ image_sender_(event_loop->MakeSender<CameraImage>("/camera")),
+ result_sender_(
+ event_loop->MakeSender<sift::ImageMatchResult>("/camera")),
+ read_image_timer_(event_loop->AddTimer([this]() {
+ ReadImage();
+ read_image_timer_->Setup(event_loop_->monotonic_now());
+ })) {
+ CopyTrainingFeatures();
+ // Technically we don't need to do this, but doing it now avoids the first
+ // match attempt being slow.
+ matcher_->train();
+
+ event_loop->OnRun(
+ [this]() { read_image_timer_->Setup(event_loop_->monotonic_now()); });
+ }
+
+ private:
+ // Copies the information from training_data_ into matcher_.
+ void CopyTrainingFeatures();
+ // Processes an image (including sending the results).
+ void ProcessImage(const CameraImage &image);
+ // Reads an image, and then performs all of our processing on it.
+ void ReadImage();
+
+ flatbuffers::Offset<
+ flatbuffers::Vector<flatbuffers::Offset<sift::ImageMatch>>>
+ PackImageMatches(flatbuffers::FlatBufferBuilder *fbb,
+ const std::vector<std::vector<cv::DMatch>> &matches);
+ flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<sift::Feature>>>
+ PackFeatures(flatbuffers::FlatBufferBuilder *fbb,
+ const std::vector<cv::KeyPoint> &keypoints,
+ const cv::Mat &descriptors);
+
+ aos::EventLoop *const event_loop_;
+ const sift::TrainingData *const training_data_;
+ V4L2Reader *const reader_;
+ cv::FlannBasedMatcher *const matcher_;
+ aos::Sender<CameraImage> image_sender_;
+ aos::Sender<sift::ImageMatchResult> result_sender_;
+ // We schedule this immediately to read an image. Having it on a timer means
+ // other things can run on the event loop in between.
+ aos::TimerHandler *const read_image_timer_;
+
+ const std::unique_ptr<frc971::vision::SIFT971_Impl> sift_{
+ new frc971::vision::SIFT971_Impl()};
+};
+
+void CameraReader::CopyTrainingFeatures() {
+ for (const sift::TrainingImage *training_image : *training_data_->images()) {
+ cv::Mat features(training_image->features()->size(), 128, CV_32F);
+ for (size_t i = 0; i < training_image->features()->size(); ++i) {
+ const sift::Feature *feature_table = training_image->features()->Get(i);
+ const flatbuffers::Vector<float> *const descriptor =
+ feature_table->descriptor();
+ CHECK_EQ(descriptor->size(), 128u) << ": Unsupported feature size";
+ cv::Mat(1, descriptor->size(), CV_32F,
+ const_cast<void *>(static_cast<const void *>(descriptor->data())))
+ .copyTo(features(cv::Range(i, i + 1), cv::Range(0, 128)));
+ }
+ matcher_->add(features);
+ }
+}
+
+void CameraReader::ProcessImage(const CameraImage &image) {
+ // First, we need to extract the brightness information. This can't really be
+ // fused into the beginning of the SIFT algorithm because the algorithm needs
+ // to look at the base image directly. It also only takes 2ms on our images.
+ // This is converting from YUYV to a grayscale image.
+ cv::Mat image_mat(
+ image.rows(), image.cols(), CV_8U);
+ CHECK(image_mat.isContinuous());
+ const int number_pixels = image.rows() * image.cols();
+ for (int i = 0; i < number_pixels; ++i) {
+ reinterpret_cast<uint8_t *>(image_mat.data)[i] =
+ image.data()->data()[i * 2];
+ }
+
+ // Next, grab the features from the image.
+ std::vector<cv::KeyPoint> keypoints;
+ cv::Mat descriptors;
+ sift_->detectAndCompute(image_mat, cv::noArray(), keypoints, descriptors);
+
+ // Then, match those features against our training data.
+ std::vector<std::vector<cv::DMatch>> matches;
+ matcher_->knnMatch(/* queryDescriptors */ descriptors, matches, /* k */ 2);
+
+ // Now, pack the results up and send them out.
+ auto builder = result_sender_.MakeBuilder();
+
+ const auto image_matches_offset = PackImageMatches(builder.fbb(), matches);
+ // TODO(Brian): PackCameraPoses (and put it in the result)
+ const auto features_offset =
+ PackFeatures(builder.fbb(), keypoints, descriptors);
+
+ sift::ImageMatchResult::Builder result_builder(*builder.fbb());
+ result_builder.add_image_matches(image_matches_offset);
+ result_builder.add_features(features_offset);
+ result_builder.add_image_monotonic_timestamp_ns(
+ image.monotonic_timestamp_ns());
+ builder.Send(result_builder.Finish());
+}
+
+void CameraReader::ReadImage() {
+ if (!reader_->ReadLatestImage()) {
+ LOG(INFO) << "No image, sleeping";
+ std::this_thread::sleep_for(std::chrono::milliseconds(10));
+ return;
+ }
+
+ ProcessImage(reader_->LatestImage());
+
+ reader_->SendLatestImage();
+}
+
+flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<sift::ImageMatch>>>
+CameraReader::PackImageMatches(
+ flatbuffers::FlatBufferBuilder *fbb,
+ const std::vector<std::vector<cv::DMatch>> &matches) {
+ // First, we need to pull out all the matches for each image. Might as well
+ // build up the Match tables at the same time.
+ std::vector<std::vector<flatbuffers::Offset<sift::Match>>> per_image_matches;
+ for (const std::vector<cv::DMatch> &image_matches : matches) {
+ for (const cv::DMatch &image_match : image_matches) {
+ sift::Match::Builder match_builder(*fbb);
+ match_builder.add_query_feature(image_match.queryIdx);
+ match_builder.add_train_feature(image_match.trainIdx);
+ if (per_image_matches.size() <= static_cast<size_t>(image_match.imgIdx)) {
+ per_image_matches.resize(image_match.imgIdx + 1);
+ }
+ per_image_matches[image_match.imgIdx].emplace_back(
+ match_builder.Finish());
+ }
+ }
+
+ // Then, we need to build up each ImageMatch table.
+ std::vector<flatbuffers::Offset<sift::ImageMatch>> image_match_tables;
+ for (size_t i = 0; i < per_image_matches.size(); ++i) {
+ const std::vector<flatbuffers::Offset<sift::Match>> &this_image_matches =
+ per_image_matches[i];
+ if (this_image_matches.empty()) {
+ continue;
+ }
+ const auto vector_offset = fbb->CreateVector(this_image_matches);
+ sift::ImageMatch::Builder image_builder(*fbb);
+ image_builder.add_train_image(i);
+ image_builder.add_matches(vector_offset);
+ image_match_tables.emplace_back(image_builder.Finish());
+ }
+
+ return fbb->CreateVector(image_match_tables);
+}
+
+flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<sift::Feature>>>
+CameraReader::PackFeatures(flatbuffers::FlatBufferBuilder *fbb,
+ const std::vector<cv::KeyPoint> &keypoints,
+ const cv::Mat &descriptors) {
+ const int number_features = keypoints.size();
+ CHECK_EQ(descriptors.rows, number_features);
+ std::vector<flatbuffers::Offset<sift::Feature>> features_vector(
+ number_features);
+ for (int i = 0; i < number_features; ++i) {
+ const auto submat = descriptors(cv::Range(i, i + 1), cv::Range(0, 128));
+ CHECK(submat.isContinuous());
+ const auto descriptor_offset =
+ fbb->CreateVector(reinterpret_cast<float *>(submat.data), 128);
+ sift::Feature::Builder feature_builder(*fbb);
+ feature_builder.add_descriptor(descriptor_offset);
+ feature_builder.add_x(keypoints[i].pt.x);
+ feature_builder.add_y(keypoints[i].pt.y);
+ feature_builder.add_size(keypoints[i].size);
+ feature_builder.add_angle(keypoints[i].angle);
+ feature_builder.add_response(keypoints[i].response);
+ feature_builder.add_octave(keypoints[i].octave);
+ CHECK_EQ(-1, keypoints[i].class_id)
+ << ": Not sure what to do with a class id";
+ features_vector[i] = feature_builder.Finish();
+ }
+ return fbb->CreateVector(features_vector);
+}
+
void CameraReaderMain() {
aos::FlatbufferDetachedBuffer<aos::Configuration> config =
aos::configuration::ReadConfig("config.json");
+ const auto training_data_bfbs = DemoSiftData();
+ const sift::TrainingData *const training_data =
+ flatbuffers::GetRoot<sift::TrainingData>(training_data_bfbs.data());
+ {
+ flatbuffers::Verifier verifier(
+ reinterpret_cast<const uint8_t *>(training_data_bfbs.data()),
+ training_data_bfbs.size());
+ CHECK(training_data->Verify(verifier));
+ }
+
+ const auto index_params = cv::makePtr<cv::flann::IndexParams>();
+ index_params->setAlgorithm(cvflann::FLANN_INDEX_KDTREE);
+ index_params->setInt("trees", 5);
+ const auto search_params =
+ cv::makePtr<cv::flann::SearchParams>(/* checks */ 50);
+ cv::FlannBasedMatcher matcher(index_params, search_params);
+
aos::ShmEventLoop event_loop(&config.message());
V4L2Reader v4l2_reader(&event_loop, "/dev/video0");
+ CameraReader camera_reader(&event_loop, training_data, &v4l2_reader, &matcher);
- while (true) {
- const auto image = v4l2_reader.ReadLatestImage();
- if (image.empty()) {
- LOG(INFO) << "No image, sleeping";
- std::this_thread::sleep_for(std::chrono::milliseconds(10));
- continue;
- }
-
- // Now, process image.
- // TODO(Brian): Actually process it, rather than just logging its size...
- LOG(INFO) << image.size();
- std::this_thread::sleep_for(std::chrono::milliseconds(70));
-
- v4l2_reader.SendLatestImage();
- }
+ event_loop.Run();
}
} // namespace