Estimate target location from blobs
Signed-off-by: milind-u <milind.upadhyay@gmail.com>
Change-Id: Ib9bfe0dc494b1581236d1f3f57bc65c6687981e5
diff --git a/y2022/vision/BUILD b/y2022/vision/BUILD
index d65814a..e893e54 100644
--- a/y2022/vision/BUILD
+++ b/y2022/vision/BUILD
@@ -1,3 +1,5 @@
+load("@com_github_google_flatbuffers//:build_defs.bzl", "flatbuffer_cc_library")
+
cc_binary(
name = "camera_reader",
srcs = [
@@ -26,6 +28,8 @@
target_compatible_with = ["@platforms//os:linux"],
visibility = ["//y2022:__subpackages__"],
deps = [
+ ":blob_detector_lib",
+ ":target_estimator_lib",
"//aos:flatbuffer_merge",
"//aos/events:event_loop",
"//aos/network:team_number",
@@ -54,6 +58,30 @@
],
)
+cc_library(
+ name = "target_estimator_lib",
+ srcs = [
+ "target_estimator.cc",
+ ],
+ hdrs = [
+ "target_estimator.h",
+ ],
+ target_compatible_with = ["@platforms//os:linux"],
+ visibility = ["//y2022:__subpackages__"],
+ deps = [
+ ":target_estimate_fbs",
+ "//third_party:opencv",
+ ],
+)
+
+flatbuffer_cc_library(
+ name = "target_estimate_fbs",
+ srcs = ["target_estimate.fbs"],
+ gen_reflections = 1,
+ target_compatible_with = ["@platforms//os:linux"],
+ visibility = ["//y2022:__subpackages__"],
+)
+
cc_binary(
name = "viewer",
srcs = [
@@ -66,6 +94,7 @@
visibility = ["//y2022:__subpackages__"],
deps = [
":blob_detector_lib",
+ ":target_estimator_lib",
"//aos:init",
"//aos/events:shm_event_loop",
"//frc971/vision:vision_fbs",
diff --git a/y2022/vision/blob_detector.cc b/y2022/vision/blob_detector.cc
index 737363d..b76ffc5 100644
--- a/y2022/vision/blob_detector.cc
+++ b/y2022/vision/blob_detector.cc
@@ -1,6 +1,11 @@
#include "y2022/vision/blob_detector.h"
+#include <cmath>
+#include <string>
+
#include "aos/network/team_number.h"
+#include "opencv2/features2d.hpp"
+#include "opencv2/imgproc.hpp"
DEFINE_uint64(green_delta, 50,
"Required difference between green pixels vs. red and blue");
@@ -20,7 +25,8 @@
uint8_t red = pixel.val[2];
// Simple filter that looks for green pixels sufficiently brigher than
// red and blue
- if ((green > blue + 30) && (green > red + 50)) {
+ if ((green > blue + FLAGS_green_delta) &&
+ (green > red + FLAGS_green_delta)) {
binarized_image.at<uint8_t>(row, col) = 255;
} else {
binarized_image.at<uint8_t>(row, col) = 0;
@@ -69,16 +75,28 @@
std::vector<std::vector<cv::Point>> blobs,
std::vector<BlobDetector::BlobStats> blob_stats) {
std::vector<std::vector<cv::Point>> filtered_blobs;
+
auto blob_it = blobs.begin();
auto stats_it = blob_stats.begin();
while (blob_it < blobs.end() && stats_it < blob_stats.end()) {
+ // To estimate the maximum y, we can figure out the y value of the blobs
+ // when the camera is the farthest from the target, at the field corner.
+ // We can solve for the pitch of the blob:
+ // blob_pitch = atan((height_tape - height_camera) / depth) + camera_pitch
+ // The triangle with the height of the tape above the camera and the camera
+ // depth is similar to the one with the focal length in y pixels and the y
+ // coordinate offset from the center of the image.
+ // Therefore y_offset = focal_length_y * tan(blob_pitch), and
+ // y = -(y_offset - offset_y)
constexpr int kMaxY = 400;
constexpr double kTapeAspectRatio = 5.0 / 2.0;
constexpr double kAspectRatioThreshold = 1.5;
constexpr double kMinArea = 10;
- constexpr size_t kMinPoints = 2;
+ constexpr size_t kMinPoints = 6;
+
// Remove all blobs that are at the bottom of the image, have a different
// aspect ratio than the tape, or have too little area or points
+ // TODO(milind): modify to take into account that blobs will be on the side.
if ((stats_it->centroid.y <= kMaxY) &&
(std::abs(kTapeAspectRatio - stats_it->aspect_ratio) <
kAspectRatioThreshold) &&
@@ -88,6 +106,7 @@
blob_it++;
stats_it++;
}
+
return filtered_blobs;
}
@@ -99,12 +118,14 @@
CHECK_GT(view_image.cols, 0);
if (unfiltered_blobs.size() > 0) {
// Draw blobs unfilled, with red color border
- drawContours(view_image, unfiltered_blobs, -1, cv::Scalar(0, 0, 255), 0);
+ cv::drawContours(view_image, unfiltered_blobs, -1, cv::Scalar(0, 0, 255),
+ 0);
}
- drawContours(view_image, filtered_blobs, -1, cv::Scalar(0, 255, 0),
- cv::FILLED);
+ cv::drawContours(view_image, filtered_blobs, -1, cv::Scalar(0, 255, 0),
+ cv::FILLED);
+ // Draw blob centroids
for (auto stats : blob_stats) {
cv::circle(view_image, stats.centroid, 2, cv::Scalar(255, 0, 0),
cv::FILLED);
diff --git a/y2022/vision/camera_reader.cc b/y2022/vision/camera_reader.cc
index 4fa6651..f466c8b 100644
--- a/y2022/vision/camera_reader.cc
+++ b/y2022/vision/camera_reader.cc
@@ -12,6 +12,8 @@
#include "y2020/vision/sift/sift_generated.h"
#include "y2020/vision/sift/sift_training_generated.h"
#include "y2020/vision/tools/python_code/sift_training_data.h"
+#include "y2022/vision/blob_detector.h"
+#include "y2022/vision/target_estimator.h"
namespace y2022 {
namespace vision {
@@ -49,7 +51,21 @@
// Now, send our two messages-- one large, with details for remote
// debugging(features), and one smaller
- // TODO: Send blobdetection and pose results
+ // TODO: Send debugging message as well
+ std::vector<std::vector<cv::Point> > filtered_blobs, unfiltered_blobs;
+ std::vector<BlobDetector::BlobStats> blob_stats;
+ cv::Mat binarized_image =
+ cv::Mat::zeros(cv::Size(image_mat.cols, image_mat.rows), CV_8UC1);
+ cv::Mat ret_image =
+ cv::Mat::zeros(cv::Size(image_mat.cols, image_mat.rows), CV_8UC3);
+ BlobDetector::ExtractBlobs(image_mat, binarized_image, ret_image,
+ filtered_blobs, unfiltered_blobs, blob_stats);
+ // TODO(milind): use actual centroid
+ TargetEstimateT target = TargetEstimator::EstimateTargetLocation(
+ blob_stats[0].centroid, CameraIntrinsics(), CameraExtrinsics());
+
+ auto builder = target_estimate_sender_.MakeBuilder();
+ builder.CheckOk(builder.Send(TargetEstimate::Pack(*builder.fbb(), &target)));
}
void CameraReader::ReadImage() {
diff --git a/y2022/vision/camera_reader.h b/y2022/vision/camera_reader.h
index c302b15..6880248 100644
--- a/y2022/vision/camera_reader.h
+++ b/y2022/vision/camera_reader.h
@@ -15,6 +15,7 @@
#include "y2020/vision/sift/sift_generated.h"
#include "y2020/vision/sift/sift_training_generated.h"
#include "y2020/vision/tools/python_code/sift_training_data.h"
+#include "y2022/vision/target_estimate_generated.h"
namespace y2022 {
namespace vision {
@@ -32,6 +33,8 @@
camera_calibration_(FindCameraCalibration()),
reader_(reader),
image_sender_(event_loop->MakeSender<CameraImage>("/camera")),
+ target_estimate_sender_(
+ event_loop->MakeSender<TargetEstimate>("/camera")),
read_image_timer_(event_loop->AddTimer([this]() { ReadImage(); })) {
event_loop->OnRun(
[this]() { read_image_timer_->Setup(event_loop_->monotonic_now()); });
@@ -54,6 +57,16 @@
return result;
}
+ cv::Mat CameraExtrinsics() const {
+ const cv::Mat result(
+ 4, 4, CV_32F,
+ const_cast<void *>(static_cast<const void *>(
+ camera_calibration_->fixed_extrinsics()->data()->data())));
+ CHECK_EQ(result.total(),
+ camera_calibration_->fixed_extrinsics()->data()->size());
+ return result;
+ }
+
cv::Mat CameraDistCoeffs() const {
const cv::Mat result(5, 1, CV_32F,
const_cast<void *>(static_cast<const void *>(
@@ -67,9 +80,10 @@
const sift::CameraCalibration *const camera_calibration_;
V4L2Reader *const reader_;
aos::Sender<CameraImage> image_sender_;
+ aos::Sender<TargetEstimate> target_estimate_sender_;
- // We schedule this immediately to read an image. Having it on a timer means
- // other things can run on the event loop in between.
+ // We schedule this immediately to read an image. Having it on a timer
+ // means other things can run on the event loop in between.
aos::TimerHandler *const read_image_timer_;
};
diff --git a/y2022/vision/target_estimate.fbs b/y2022/vision/target_estimate.fbs
new file mode 100644
index 0000000..075c9c4
--- /dev/null
+++ b/y2022/vision/target_estimate.fbs
@@ -0,0 +1,12 @@
+namespace y2022.vision;
+
+// Contains the information the EKF wants from blobs from a single image.
+table TargetEstimate {
+ // Horizontal distance from the camera to the center of the upper hub
+ distance:double (id: 0);
+ // Angle from the camera to the target (horizontal angle in rad).
+ // Positive means right of center, negative means left.
+ angle_to_target:double (id: 1);
+
+ // TODO(milind): add confidence and blob stats
+}
diff --git a/y2022/vision/target_estimator.cc b/y2022/vision/target_estimator.cc
new file mode 100644
index 0000000..c51eb08
--- /dev/null
+++ b/y2022/vision/target_estimator.cc
@@ -0,0 +1,31 @@
+#include "y2022/vision/target_estimator.h"
+
+namespace y2022::vision {
+
+TargetEstimateT TargetEstimator::EstimateTargetLocation(
+ cv::Point2i blob_point, const cv::Mat &intrinsics,
+ const cv::Mat &extrinsics) {
+ const cv::Point2d focal_length(intrinsics.at<double>(0, 0),
+ intrinsics.at<double>(1, 1));
+ const cv::Point2d offset(intrinsics.at<double>(0, 2),
+ intrinsics.at<double>(1, 2));
+
+ // Blob pitch in camera reference frame
+ const double blob_pitch =
+ std::atan(static_cast<double>(-(blob_point.y - offset.y)) /
+ static_cast<double>(focal_length.y));
+ const double camera_height = extrinsics.at<double>(2, 3);
+ // Depth from camera to blob
+ const double depth = (kTapeHeight - camera_height) / std::tan(blob_pitch);
+
+ TargetEstimateT target;
+ target.angle_to_target =
+ std::atan2(static_cast<double>(blob_point.x - offset.x),
+ static_cast<double>(focal_length.x));
+ target.distance =
+ (depth / std::cos(target.angle_to_target)) + kUpperHubRadius;
+
+ return target;
+}
+
+} // namespace y2022::vision
diff --git a/y2022/vision/target_estimator.h b/y2022/vision/target_estimator.h
new file mode 100644
index 0000000..4988c3c
--- /dev/null
+++ b/y2022/vision/target_estimator.h
@@ -0,0 +1,26 @@
+#ifndef Y2022_VISION_POSE_ESTIMATOR_H_
+#define Y2022_VISION_POSE_ESTIMATOR_H_
+
+#include "opencv2/imgproc.hpp"
+#include "y2022/vision/target_estimate_generated.h"
+
+namespace y2022::vision {
+
+class TargetEstimator {
+ public:
+ // Computes the location of the target.
+ // blob_point is the mean (x, y) of blob pixels.
+ static TargetEstimateT EstimateTargetLocation(cv::Point2i blob_point,
+ const cv::Mat &intrinsics,
+ const cv::Mat &extrinsics);
+
+ private:
+ // Height of the center of the tape (m)
+ static constexpr double kTapeHeight = 2.58 + (0.05 / 2);
+ // Horizontal distance from tape to center of hub (m)
+ static constexpr double kUpperHubRadius = 1.22 / 2;
+};
+
+} // namespace y2022::vision
+
+#endif // Y2022_VISION_POSE_ESTIMATOR_H_
diff --git a/y2022/vision/viewer.cc b/y2022/vision/viewer.cc
index 6573308..8ecb36d 100644
--- a/y2022/vision/viewer.cc
+++ b/y2022/vision/viewer.cc
@@ -116,7 +116,6 @@
cv::Mat::zeros(cv::Size(rgb_image.cols, rgb_image.rows), CV_8UC1);
cv::Mat ret_image =
cv::Mat::zeros(cv::Size(rgb_image.cols, rgb_image.rows), CV_8UC3);
-
BlobDetector::ExtractBlobs(rgb_image, binarized_image, ret_image,
filtered_blobs, unfiltered_blobs, blob_stats);
diff --git a/y2022/y2022_pi_template.json b/y2022/y2022_pi_template.json
index 81b96d2..e80d6c3 100644
--- a/y2022/y2022_pi_template.json
+++ b/y2022/y2022_pi_template.json
@@ -99,6 +99,13 @@
"num_senders": 18
},
{
+ "name": "/pi{{ NUM }}/camera",
+ "type": "y2022.vision.TargetEstimate",
+ "source_node": "pi{{ NUM }}",
+ "frequency": 25,
+ "num_senders": 2
+ }
+ {
"name": "/logger/aos",
"type": "aos.starter.StarterRpc",
"source_node": "logger",