Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 1 | #include <opencv2/features2d.hpp> |
| 2 | #include <opencv2/imgproc.hpp> |
| 3 | |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 4 | #include "aos/events/shm_event_loop.h" |
| 5 | #include "aos/init.h" |
| 6 | |
Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 7 | #include "y2020/vision/sift/demo_sift.h" |
| 8 | #include "y2020/vision/sift/sift971.h" |
| 9 | #include "y2020/vision/sift/sift_generated.h" |
| 10 | #include "y2020/vision/sift/sift_training_generated.h" |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 11 | #include "y2020/vision/v4l2_reader.h" |
Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 12 | #include "y2020/vision/vision_generated.h" |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 13 | |
| 14 | namespace frc971 { |
| 15 | namespace vision { |
| 16 | namespace { |
| 17 | |
Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 18 | class CameraReader { |
| 19 | public: |
| 20 | CameraReader(aos::EventLoop *event_loop, |
| 21 | const sift::TrainingData *training_data, V4L2Reader *reader, |
| 22 | cv::FlannBasedMatcher *matcher) |
| 23 | : event_loop_(event_loop), |
| 24 | training_data_(training_data), |
| 25 | reader_(reader), |
| 26 | matcher_(matcher), |
| 27 | image_sender_(event_loop->MakeSender<CameraImage>("/camera")), |
| 28 | result_sender_( |
| 29 | event_loop->MakeSender<sift::ImageMatchResult>("/camera")), |
| 30 | read_image_timer_(event_loop->AddTimer([this]() { |
| 31 | ReadImage(); |
| 32 | read_image_timer_->Setup(event_loop_->monotonic_now()); |
| 33 | })) { |
| 34 | CopyTrainingFeatures(); |
| 35 | // Technically we don't need to do this, but doing it now avoids the first |
| 36 | // match attempt being slow. |
| 37 | matcher_->train(); |
| 38 | |
| 39 | event_loop->OnRun( |
| 40 | [this]() { read_image_timer_->Setup(event_loop_->monotonic_now()); }); |
| 41 | } |
| 42 | |
| 43 | private: |
| 44 | // Copies the information from training_data_ into matcher_. |
| 45 | void CopyTrainingFeatures(); |
| 46 | // Processes an image (including sending the results). |
| 47 | void ProcessImage(const CameraImage &image); |
| 48 | // Reads an image, and then performs all of our processing on it. |
| 49 | void ReadImage(); |
| 50 | |
| 51 | flatbuffers::Offset< |
| 52 | flatbuffers::Vector<flatbuffers::Offset<sift::ImageMatch>>> |
| 53 | PackImageMatches(flatbuffers::FlatBufferBuilder *fbb, |
| 54 | const std::vector<std::vector<cv::DMatch>> &matches); |
| 55 | flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<sift::Feature>>> |
| 56 | PackFeatures(flatbuffers::FlatBufferBuilder *fbb, |
| 57 | const std::vector<cv::KeyPoint> &keypoints, |
| 58 | const cv::Mat &descriptors); |
| 59 | |
| 60 | aos::EventLoop *const event_loop_; |
| 61 | const sift::TrainingData *const training_data_; |
| 62 | V4L2Reader *const reader_; |
| 63 | cv::FlannBasedMatcher *const matcher_; |
| 64 | aos::Sender<CameraImage> image_sender_; |
| 65 | aos::Sender<sift::ImageMatchResult> result_sender_; |
| 66 | // We schedule this immediately to read an image. Having it on a timer means |
| 67 | // other things can run on the event loop in between. |
| 68 | aos::TimerHandler *const read_image_timer_; |
| 69 | |
| 70 | const std::unique_ptr<frc971::vision::SIFT971_Impl> sift_{ |
| 71 | new frc971::vision::SIFT971_Impl()}; |
| 72 | }; |
| 73 | |
| 74 | void CameraReader::CopyTrainingFeatures() { |
| 75 | for (const sift::TrainingImage *training_image : *training_data_->images()) { |
| 76 | cv::Mat features(training_image->features()->size(), 128, CV_32F); |
| 77 | for (size_t i = 0; i < training_image->features()->size(); ++i) { |
| 78 | const sift::Feature *feature_table = training_image->features()->Get(i); |
| 79 | const flatbuffers::Vector<float> *const descriptor = |
| 80 | feature_table->descriptor(); |
| 81 | CHECK_EQ(descriptor->size(), 128u) << ": Unsupported feature size"; |
| 82 | cv::Mat(1, descriptor->size(), CV_32F, |
| 83 | const_cast<void *>(static_cast<const void *>(descriptor->data()))) |
| 84 | .copyTo(features(cv::Range(i, i + 1), cv::Range(0, 128))); |
| 85 | } |
| 86 | matcher_->add(features); |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | void CameraReader::ProcessImage(const CameraImage &image) { |
| 91 | // First, we need to extract the brightness information. This can't really be |
| 92 | // fused into the beginning of the SIFT algorithm because the algorithm needs |
| 93 | // to look at the base image directly. It also only takes 2ms on our images. |
| 94 | // This is converting from YUYV to a grayscale image. |
| 95 | cv::Mat image_mat( |
| 96 | image.rows(), image.cols(), CV_8U); |
| 97 | CHECK(image_mat.isContinuous()); |
| 98 | const int number_pixels = image.rows() * image.cols(); |
| 99 | for (int i = 0; i < number_pixels; ++i) { |
| 100 | reinterpret_cast<uint8_t *>(image_mat.data)[i] = |
| 101 | image.data()->data()[i * 2]; |
| 102 | } |
| 103 | |
| 104 | // Next, grab the features from the image. |
| 105 | std::vector<cv::KeyPoint> keypoints; |
| 106 | cv::Mat descriptors; |
| 107 | sift_->detectAndCompute(image_mat, cv::noArray(), keypoints, descriptors); |
| 108 | |
| 109 | // Then, match those features against our training data. |
| 110 | std::vector<std::vector<cv::DMatch>> matches; |
| 111 | matcher_->knnMatch(/* queryDescriptors */ descriptors, matches, /* k */ 2); |
| 112 | |
| 113 | // Now, pack the results up and send them out. |
| 114 | auto builder = result_sender_.MakeBuilder(); |
| 115 | |
| 116 | const auto image_matches_offset = PackImageMatches(builder.fbb(), matches); |
| 117 | // TODO(Brian): PackCameraPoses (and put it in the result) |
| 118 | const auto features_offset = |
| 119 | PackFeatures(builder.fbb(), keypoints, descriptors); |
| 120 | |
| 121 | sift::ImageMatchResult::Builder result_builder(*builder.fbb()); |
| 122 | result_builder.add_image_matches(image_matches_offset); |
| 123 | result_builder.add_features(features_offset); |
| 124 | result_builder.add_image_monotonic_timestamp_ns( |
| 125 | image.monotonic_timestamp_ns()); |
| 126 | builder.Send(result_builder.Finish()); |
| 127 | } |
| 128 | |
| 129 | void CameraReader::ReadImage() { |
| 130 | if (!reader_->ReadLatestImage()) { |
| 131 | LOG(INFO) << "No image, sleeping"; |
| 132 | std::this_thread::sleep_for(std::chrono::milliseconds(10)); |
| 133 | return; |
| 134 | } |
| 135 | |
| 136 | ProcessImage(reader_->LatestImage()); |
| 137 | |
| 138 | reader_->SendLatestImage(); |
| 139 | } |
| 140 | |
| 141 | flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<sift::ImageMatch>>> |
| 142 | CameraReader::PackImageMatches( |
| 143 | flatbuffers::FlatBufferBuilder *fbb, |
| 144 | const std::vector<std::vector<cv::DMatch>> &matches) { |
| 145 | // First, we need to pull out all the matches for each image. Might as well |
| 146 | // build up the Match tables at the same time. |
| 147 | std::vector<std::vector<flatbuffers::Offset<sift::Match>>> per_image_matches; |
| 148 | for (const std::vector<cv::DMatch> &image_matches : matches) { |
| 149 | for (const cv::DMatch &image_match : image_matches) { |
| 150 | sift::Match::Builder match_builder(*fbb); |
| 151 | match_builder.add_query_feature(image_match.queryIdx); |
| 152 | match_builder.add_train_feature(image_match.trainIdx); |
| 153 | if (per_image_matches.size() <= static_cast<size_t>(image_match.imgIdx)) { |
| 154 | per_image_matches.resize(image_match.imgIdx + 1); |
| 155 | } |
| 156 | per_image_matches[image_match.imgIdx].emplace_back( |
| 157 | match_builder.Finish()); |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | // Then, we need to build up each ImageMatch table. |
| 162 | std::vector<flatbuffers::Offset<sift::ImageMatch>> image_match_tables; |
| 163 | for (size_t i = 0; i < per_image_matches.size(); ++i) { |
| 164 | const std::vector<flatbuffers::Offset<sift::Match>> &this_image_matches = |
| 165 | per_image_matches[i]; |
| 166 | if (this_image_matches.empty()) { |
| 167 | continue; |
| 168 | } |
| 169 | const auto vector_offset = fbb->CreateVector(this_image_matches); |
| 170 | sift::ImageMatch::Builder image_builder(*fbb); |
| 171 | image_builder.add_train_image(i); |
| 172 | image_builder.add_matches(vector_offset); |
| 173 | image_match_tables.emplace_back(image_builder.Finish()); |
| 174 | } |
| 175 | |
| 176 | return fbb->CreateVector(image_match_tables); |
| 177 | } |
| 178 | |
| 179 | flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<sift::Feature>>> |
| 180 | CameraReader::PackFeatures(flatbuffers::FlatBufferBuilder *fbb, |
| 181 | const std::vector<cv::KeyPoint> &keypoints, |
| 182 | const cv::Mat &descriptors) { |
| 183 | const int number_features = keypoints.size(); |
| 184 | CHECK_EQ(descriptors.rows, number_features); |
| 185 | std::vector<flatbuffers::Offset<sift::Feature>> features_vector( |
| 186 | number_features); |
| 187 | for (int i = 0; i < number_features; ++i) { |
| 188 | const auto submat = descriptors(cv::Range(i, i + 1), cv::Range(0, 128)); |
| 189 | CHECK(submat.isContinuous()); |
| 190 | const auto descriptor_offset = |
| 191 | fbb->CreateVector(reinterpret_cast<float *>(submat.data), 128); |
| 192 | sift::Feature::Builder feature_builder(*fbb); |
| 193 | feature_builder.add_descriptor(descriptor_offset); |
| 194 | feature_builder.add_x(keypoints[i].pt.x); |
| 195 | feature_builder.add_y(keypoints[i].pt.y); |
| 196 | feature_builder.add_size(keypoints[i].size); |
| 197 | feature_builder.add_angle(keypoints[i].angle); |
| 198 | feature_builder.add_response(keypoints[i].response); |
| 199 | feature_builder.add_octave(keypoints[i].octave); |
| 200 | CHECK_EQ(-1, keypoints[i].class_id) |
| 201 | << ": Not sure what to do with a class id"; |
| 202 | features_vector[i] = feature_builder.Finish(); |
| 203 | } |
| 204 | return fbb->CreateVector(features_vector); |
| 205 | } |
| 206 | |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 207 | void CameraReaderMain() { |
| 208 | aos::FlatbufferDetachedBuffer<aos::Configuration> config = |
| 209 | aos::configuration::ReadConfig("config.json"); |
| 210 | |
Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 211 | const auto training_data_bfbs = DemoSiftData(); |
| 212 | const sift::TrainingData *const training_data = |
| 213 | flatbuffers::GetRoot<sift::TrainingData>(training_data_bfbs.data()); |
| 214 | { |
| 215 | flatbuffers::Verifier verifier( |
| 216 | reinterpret_cast<const uint8_t *>(training_data_bfbs.data()), |
| 217 | training_data_bfbs.size()); |
| 218 | CHECK(training_data->Verify(verifier)); |
| 219 | } |
| 220 | |
| 221 | const auto index_params = cv::makePtr<cv::flann::IndexParams>(); |
| 222 | index_params->setAlgorithm(cvflann::FLANN_INDEX_KDTREE); |
| 223 | index_params->setInt("trees", 5); |
| 224 | const auto search_params = |
| 225 | cv::makePtr<cv::flann::SearchParams>(/* checks */ 50); |
| 226 | cv::FlannBasedMatcher matcher(index_params, search_params); |
| 227 | |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 228 | aos::ShmEventLoop event_loop(&config.message()); |
| 229 | V4L2Reader v4l2_reader(&event_loop, "/dev/video0"); |
Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 230 | CameraReader camera_reader(&event_loop, training_data, &v4l2_reader, &matcher); |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 231 | |
Brian Silverman | 967e5df | 2020-02-09 16:43:34 -0800 | [diff] [blame^] | 232 | event_loop.Run(); |
Brian Silverman | 9dd793b | 2020-01-31 23:52:21 -0800 | [diff] [blame] | 233 | } |
| 234 | |
| 235 | } // namespace |
| 236 | } // namespace vision |
| 237 | } // namespace frc971 |
| 238 | |
| 239 | |
| 240 | int main(int argc, char **argv) { |
| 241 | aos::InitGoogle(&argc, &argv); |
| 242 | frc971::vision::CameraReaderMain(); |
| 243 | } |