blob: 67736316af667dd6a058cdf3a20ad71b8adf97a4 [file] [log] [blame]
milind-u92195982022-01-22 20:29:31 -08001#include "y2022/vision/target_estimator.h"
2
Milind Upadhyayf61e1482022-02-11 20:42:55 -08003#include "absl/strings/str_format.h"
4#include "aos/time/time.h"
5#include "ceres/ceres.h"
6#include "frc971/control_loops/quaternion_utils.h"
7#include "opencv2/core/core.hpp"
8#include "opencv2/core/eigen.hpp"
9#include "opencv2/features2d.hpp"
10#include "opencv2/highgui/highgui.hpp"
11#include "opencv2/imgproc.hpp"
12
13DEFINE_bool(freeze_roll, true, "If true, don't solve for roll");
14DEFINE_bool(freeze_pitch, false, "If true, don't solve for pitch");
15DEFINE_bool(freeze_yaw, false, "If true, don't solve for yaw");
16DEFINE_bool(freeze_camera_height, true,
17 "If true, don't solve for camera height");
18DEFINE_bool(freeze_angle_to_camera, true,
19 "If true, don't solve for polar angle to camera");
20
21DEFINE_uint64(max_num_iterations, 200,
22 "Maximum number of iterations for the ceres solver");
23DEFINE_bool(solver_output, false,
24 "If true, log the solver progress and results");
25
milind-u92195982022-01-22 20:29:31 -080026namespace y2022::vision {
27
Milind Upadhyayf61e1482022-02-11 20:42:55 -080028std::vector<cv::Point3d> TargetEstimator::ComputeTapePoints() {
29 std::vector<cv::Point3d> tape_points;
30 tape_points.reserve(kNumPiecesOfTape);
milind-u92195982022-01-22 20:29:31 -080031
Milind Upadhyayf61e1482022-02-11 20:42:55 -080032 constexpr size_t kNumVisiblePiecesOfTape = 5;
33 for (size_t i = 0; i < kNumVisiblePiecesOfTape; i++) {
34 // The center piece of tape is at 0 rad, so the angle indices are offset
35 // by the number of pieces of tape on each side of it
36 const double theta_index =
37 static_cast<double>(i) - ((kNumVisiblePiecesOfTape - 1) / 2);
38 // The polar angle is a multiple of the angle between tape centers
39 double theta = theta_index * ((2.0 * M_PI) / kNumPiecesOfTape);
40 tape_points.emplace_back(kUpperHubRadius * std::cos(theta),
41 kUpperHubRadius * std::sin(theta), kTapeHeight);
42 }
milind-u92195982022-01-22 20:29:31 -080043
Milind Upadhyayf61e1482022-02-11 20:42:55 -080044 return tape_points;
45}
milind-u92195982022-01-22 20:29:31 -080046
Milind Upadhyayf61e1482022-02-11 20:42:55 -080047const std::vector<cv::Point3d> TargetEstimator::kTapePoints =
48 ComputeTapePoints();
49
50TargetEstimator::TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics)
51 : centroids_(),
52 image_(std::nullopt),
53 roll_(0.0),
54 pitch_(0.0),
55 yaw_(M_PI),
56 distance_(3.0),
57 angle_to_camera_(0.0),
58 // TODO(milind): add IMU height
59 camera_height_(extrinsics.at<double>(2, 3)) {
60 cv::cv2eigen(intrinsics, intrinsics_);
61 cv::cv2eigen(extrinsics, extrinsics_);
62}
63
64namespace {
65void SetBoundsOrFreeze(double *param, bool freeze, double min, double max,
66 ceres::Problem *problem) {
67 if (freeze) {
68 problem->SetParameterization(
69 param, new ceres::SubsetParameterization(1, std::vector<int>{0}));
70 } else {
71 problem->SetParameterLowerBound(param, 0, min);
72 problem->SetParameterUpperBound(param, 0, max);
73 }
74}
75} // namespace
76
77void TargetEstimator::Solve(const std::vector<cv::Point> &centroids,
78 std::optional<cv::Mat> image) {
79 auto start = aos::monotonic_clock::now();
80
81 centroids_ = centroids;
82 image_ = image;
83
84 ceres::Problem problem;
85
86 // Set up the only cost function (also known as residual). This uses
87 // auto-differentiation to obtain the derivative (jacobian).
88 ceres::CostFunction *cost_function =
89 new ceres::AutoDiffCostFunction<TargetEstimator, ceres::DYNAMIC, 1, 1, 1,
90 1, 1, 1>(this, centroids_.size() * 2,
91 ceres::DO_NOT_TAKE_OWNERSHIP);
92
93 // TODO(milind): add loss function when we get more noisy data
94 problem.AddResidualBlock(cost_function, nullptr, &roll_, &pitch_, &yaw_,
95 &distance_, &angle_to_camera_, &camera_height_);
96
97 // TODO(milind): seed values at localizer output, and constrain to be close to
98 // that.
99
100 // Constrain the rotation, otherwise there can be multiple solutions.
101 // There shouldn't be too much roll or pitch
102 constexpr double kMaxRoll = 0.1;
103 SetBoundsOrFreeze(&roll_, FLAGS_freeze_roll, -kMaxRoll, kMaxRoll, &problem);
104
105 constexpr double kPitch = -35.0 * M_PI / 180.0;
106 constexpr double kMaxPitchDelta = 0.15;
107 SetBoundsOrFreeze(&pitch_, FLAGS_freeze_pitch, kPitch - kMaxPitchDelta,
108 kPitch + kMaxPitchDelta, &problem);
109 // Constrain the yaw to where the target would be visible
110 constexpr double kMaxYawDelta = M_PI / 4.0;
111 SetBoundsOrFreeze(&yaw_, FLAGS_freeze_yaw, M_PI - kMaxYawDelta,
112 M_PI + kMaxYawDelta, &problem);
113
114 constexpr double kMaxHeightDelta = 0.1;
115 SetBoundsOrFreeze(&camera_height_, FLAGS_freeze_camera_height,
116 camera_height_ - kMaxHeightDelta,
117 camera_height_ + kMaxHeightDelta, &problem);
118
119 // Distances shouldn't be too close to the target or too far
120 constexpr double kMinDistance = 1.0;
121 constexpr double kMaxDistance = 10.0;
122 SetBoundsOrFreeze(&distance_, false, kMinDistance, kMaxDistance, &problem);
123
124 // Keep the angle between +/- half of the angle between piece of tape
125 constexpr double kMaxAngle = ((2.0 * M_PI) / kNumPiecesOfTape) / 2.0;
126 SetBoundsOrFreeze(&angle_to_camera_, FLAGS_freeze_angle_to_camera, -kMaxAngle,
127 kMaxAngle, &problem);
128
129 ceres::Solver::Options options;
130 options.minimizer_progress_to_stdout = FLAGS_solver_output;
131 options.gradient_tolerance = 1e-12;
132 options.function_tolerance = 1e-16;
133 options.parameter_tolerance = 1e-12;
134 options.max_num_iterations = FLAGS_max_num_iterations;
135 ceres::Solver::Summary summary;
136 ceres::Solve(options, &problem, &summary);
137
138 auto end = aos::monotonic_clock::now();
139 LOG(INFO) << "Target estimation elapsed time: "
140 << std::chrono::duration<double, std::milli>(end - start).count()
141 << " ms";
142
Milind Upadhyay14279de2022-02-26 16:07:53 -0800143 // Use a sigmoid to convert the final cost into a confidence for the
144 // localizer. Fit a sigmoid to the points of (0, 1) and two other reasonable
145 // residual-confidence combinations using
146 // https://www.desmos.com/calculator/jj7p8zk0w2
147 constexpr double kSigmoidCapacity = 1.206;
148 // Stretch the sigmoid out correctly.
149 // Currently, good estimates have final costs of around 1 to 2 pixels.
150 constexpr double kSigmoidScalar = 0.2061;
151 constexpr double kSigmoidGrowthRate = -0.1342;
152 if (centroids_.size() > 0) {
153 confidence_ = kSigmoidCapacity /
154 (1.0 + kSigmoidScalar * std::exp(-kSigmoidGrowthRate *
155 summary.final_cost));
156 } else {
157 // If we detected no blobs, the confidence should be zero and not depending
158 // on the final cost, which would be 0.
159 confidence_ = 0.0;
160 }
161
Milind Upadhyayf61e1482022-02-11 20:42:55 -0800162 if (FLAGS_solver_output) {
163 LOG(INFO) << summary.FullReport();
164
165 LOG(INFO) << "roll: " << roll_;
166 LOG(INFO) << "pitch: " << pitch_;
167 LOG(INFO) << "yaw: " << yaw_;
168 LOG(INFO) << "angle to target (based on yaw): " << angle_to_target();
169 LOG(INFO) << "angle to camera (polar): " << angle_to_camera_;
170 LOG(INFO) << "distance (polar): " << distance_;
171 LOG(INFO) << "camera height: " << camera_height_;
Milind Upadhyay14279de2022-02-26 16:07:53 -0800172 LOG(INFO) << "confidence: " << confidence_;
Milind Upadhyayf61e1482022-02-11 20:42:55 -0800173 }
174}
175
176namespace {
177// Hacks to extract a double from a scalar, which is either a ceres jet or a
178// double. Only used for debugging and displaying.
179template <typename S>
180double ScalarToDouble(S s) {
181 const double *ptr = reinterpret_cast<double *>(&s);
182 return *ptr;
183}
184
185template <typename S>
186cv::Point2d ScalarPointToDouble(cv::Point_<S> p) {
187 return cv::Point2d(ScalarToDouble(p.x), ScalarToDouble(p.y));
188}
189} // namespace
190
191template <typename S>
192bool TargetEstimator::operator()(const S *const roll, const S *const pitch,
193 const S *const yaw, const S *const distance,
194 const S *const theta,
195 const S *const camera_height,
196 S *residual) const {
197 using Vector3s = Eigen::Matrix<S, 3, 1>;
198 using Affine3s = Eigen::Transform<S, 3, Eigen::Affine>;
199
200 Eigen::AngleAxis<S> roll_angle(*roll, Vector3s::UnitX());
201 Eigen::AngleAxis<S> pitch_angle(*pitch, Vector3s::UnitY());
202 Eigen::AngleAxis<S> yaw_angle(*yaw, Vector3s::UnitZ());
203 // Construct the rotation and translation of the camera in the hub's frame
204 Eigen::Quaternion<S> R_camera_hub = yaw_angle * pitch_angle * roll_angle;
205 Vector3s T_camera_hub(*distance * ceres::cos(*theta),
206 *distance * ceres::sin(*theta), *camera_height);
207
208 Affine3s H_camera_hub = Eigen::Translation<S, 3>(T_camera_hub) * R_camera_hub;
209
210 std::vector<cv::Point_<S>> tape_points_proj;
211 for (cv::Point3d tape_point_hub : kTapePoints) {
212 Vector3s tape_point_hub_eigen(S(tape_point_hub.x), S(tape_point_hub.y),
213 S(tape_point_hub.z));
214
215 // With X, Y, Z being world axes and x, y, z being camera axes,
216 // x = Y, y = Z, z = -X
217 static const Eigen::Matrix3d kCameraAxisConversion =
218 (Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0)
219 .finished();
220 // Project the 3d tape point onto the image using the transformation and
221 // intrinsics
222 Vector3s tape_point_proj =
223 intrinsics_ * (kCameraAxisConversion *
224 (H_camera_hub.inverse() * tape_point_hub_eigen));
225
226 // Normalize the projected point
227 tape_points_proj.emplace_back(tape_point_proj.x() / tape_point_proj.z(),
228 tape_point_proj.y() / tape_point_proj.z());
229 VLOG(1) << "Projected tape point: "
230 << ScalarPointToDouble(
231 tape_points_proj[tape_points_proj.size() - 1]);
232 }
233
234 for (size_t i = 0; i < centroids_.size(); i++) {
235 const auto distance = DistanceFromTape(i, tape_points_proj);
236 // Set the residual to the (x, y) distance of the centroid from the
237 // nearest projected piece of tape
238 residual[i * 2] = distance.x;
239 residual[(i * 2) + 1] = distance.y;
240 }
241
242 if (image_.has_value()) {
243 // Draw the current stage of the solving
244 cv::Mat image = image_->clone();
245 for (size_t i = 0; i < tape_points_proj.size() - 1; i++) {
246 cv::line(image, ScalarPointToDouble(tape_points_proj[i]),
247 ScalarPointToDouble(tape_points_proj[i + 1]),
248 cv::Scalar(255, 255, 255));
249 cv::circle(image, ScalarPointToDouble(tape_points_proj[i]), 2,
250 cv::Scalar(255, 20, 147), cv::FILLED);
251 cv::circle(image, ScalarPointToDouble(tape_points_proj[i + 1]), 2,
252 cv::Scalar(255, 20, 147), cv::FILLED);
253 }
254 cv::imshow("image", image);
255 cv::waitKey(10);
256 }
257
258 return true;
259}
260
261namespace {
262template <typename S>
263cv::Point_<S> Distance(cv::Point p, cv::Point_<S> q) {
264 return cv::Point_<S>(S(p.x) - q.x, S(p.y) - q.y);
265}
266
267template <typename S>
268bool Less(cv::Point_<S> distance_1, cv::Point_<S> distance_2) {
269 return (ceres::pow(distance_1.x, 2) + ceres::pow(distance_1.y, 2) <
270 ceres::pow(distance_2.x, 2) + ceres::pow(distance_2.y, 2));
271}
272} // namespace
273
274template <typename S>
275cv::Point_<S> TargetEstimator::DistanceFromTape(
276 size_t centroid_index,
277 const std::vector<cv::Point_<S>> &tape_points) const {
278 // Figure out the middle index in the tape points
279 size_t middle_index = centroids_.size() / 2;
280 if (centroids_.size() % 2 == 0) {
281 // There are two possible middles in this case. Figure out which one fits
282 // the current better
283 const auto tape_middle = tape_points[tape_points.size() / 2];
284 const auto middle_distance_1 =
285 Distance(centroids_[(centroids_.size() / 2) - 1], tape_middle);
286 const auto middle_distance_2 =
287 Distance(centroids_[centroids_.size() / 2], tape_middle);
288 if (Less(middle_distance_1, middle_distance_2)) {
289 middle_index--;
290 }
291 }
292
293 auto distance = cv::Point_<S>(std::numeric_limits<S>::infinity(),
294 std::numeric_limits<S>::infinity());
295 if (centroid_index == middle_index) {
296 // Fix the middle centroid so the solver can't go too far off
297 distance =
298 Distance(centroids_[middle_index], tape_points[tape_points.size() / 2]);
299 } else {
300 // Give the other centroids some freedom in case some are split into pieces
301 for (auto tape_point : tape_points) {
302 const auto current_distance =
303 Distance(centroids_[centroid_index], tape_point);
304 if (Less(current_distance, distance)) {
305 distance = current_distance;
306 }
307 }
308 }
309
310 return distance;
311}
312
313namespace {
314void DrawEstimateValues(double distance, double angle_to_target,
315 double angle_to_camera, double roll, double pitch,
Milind Upadhyay14279de2022-02-26 16:07:53 -0800316 double yaw, double confidence, cv::Mat view_image) {
Milind Upadhyayf61e1482022-02-11 20:42:55 -0800317 constexpr int kTextX = 10;
Milind Upadhyay14279de2022-02-26 16:07:53 -0800318 int text_y = 250;
Milind Upadhyayf61e1482022-02-11 20:42:55 -0800319 constexpr int kTextSpacing = 35;
320
321 const auto kTextColor = cv::Scalar(0, 255, 255);
322 constexpr double kFontScale = 1.0;
323
324 cv::putText(view_image, absl::StrFormat("Distance: %.3f", distance),
325 cv::Point(kTextX, text_y += kTextSpacing),
326 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
327 cv::putText(view_image,
328 absl::StrFormat("Angle to target: %.3f", angle_to_target),
329 cv::Point(kTextX, text_y += kTextSpacing),
330 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
331 cv::putText(view_image,
332 absl::StrFormat("Angle to camera: %.3f", angle_to_camera),
333 cv::Point(kTextX, text_y += kTextSpacing),
334 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
335
336 cv::putText(
337 view_image,
338 absl::StrFormat("Roll: %.3f, pitch: %.3f, yaw: %.3f", roll, pitch, yaw),
339 cv::Point(kTextX, text_y += kTextSpacing), cv::FONT_HERSHEY_DUPLEX,
340 kFontScale, kTextColor, 2);
Milind Upadhyay14279de2022-02-26 16:07:53 -0800341
342 cv::putText(view_image, absl::StrFormat("Confidence: %.3f", confidence),
343 cv::Point(kTextX, text_y += kTextSpacing),
344 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
Milind Upadhyayf61e1482022-02-11 20:42:55 -0800345}
346} // namespace
347
348void TargetEstimator::DrawEstimate(const TargetEstimate &target_estimate,
349 cv::Mat view_image) {
350 DrawEstimateValues(target_estimate.distance(),
351 target_estimate.angle_to_target(),
352 target_estimate.angle_to_camera(),
353 target_estimate.rotation_camera_hub()->roll(),
354 target_estimate.rotation_camera_hub()->pitch(),
Milind Upadhyay14279de2022-02-26 16:07:53 -0800355 target_estimate.rotation_camera_hub()->yaw(),
356 target_estimate.confidence(), view_image);
Milind Upadhyayf61e1482022-02-11 20:42:55 -0800357}
358
359void TargetEstimator::DrawEstimate(cv::Mat view_image) const {
360 DrawEstimateValues(distance_, angle_to_target(), angle_to_camera_, roll_,
Milind Upadhyay14279de2022-02-26 16:07:53 -0800361 pitch_, yaw_, confidence_, view_image);
milind-u92195982022-01-22 20:29:31 -0800362}
363
364} // namespace y2022::vision