blob: 3ebec95ec88f2911297f3d93e978e46dd0432d97 [file] [log] [blame]
milind-u92195982022-01-22 20:29:31 -08001#include "y2022/vision/target_estimator.h"
2
Milind Upadhyayf61e1482022-02-11 20:42:55 -08003#include "absl/strings/str_format.h"
4#include "aos/time/time.h"
5#include "ceres/ceres.h"
6#include "frc971/control_loops/quaternion_utils.h"
7#include "opencv2/core/core.hpp"
8#include "opencv2/core/eigen.hpp"
9#include "opencv2/features2d.hpp"
10#include "opencv2/highgui/highgui.hpp"
11#include "opencv2/imgproc.hpp"
12
13DEFINE_bool(freeze_roll, true, "If true, don't solve for roll");
14DEFINE_bool(freeze_pitch, false, "If true, don't solve for pitch");
15DEFINE_bool(freeze_yaw, false, "If true, don't solve for yaw");
16DEFINE_bool(freeze_camera_height, true,
17 "If true, don't solve for camera height");
18DEFINE_bool(freeze_angle_to_camera, true,
19 "If true, don't solve for polar angle to camera");
20
21DEFINE_uint64(max_num_iterations, 200,
22 "Maximum number of iterations for the ceres solver");
23DEFINE_bool(solver_output, false,
24 "If true, log the solver progress and results");
25
milind-u92195982022-01-22 20:29:31 -080026namespace y2022::vision {
27
Milind Upadhyayf61e1482022-02-11 20:42:55 -080028std::vector<cv::Point3d> TargetEstimator::ComputeTapePoints() {
29 std::vector<cv::Point3d> tape_points;
30 tape_points.reserve(kNumPiecesOfTape);
milind-u92195982022-01-22 20:29:31 -080031
Milind Upadhyayf61e1482022-02-11 20:42:55 -080032 constexpr size_t kNumVisiblePiecesOfTape = 5;
33 for (size_t i = 0; i < kNumVisiblePiecesOfTape; i++) {
34 // The center piece of tape is at 0 rad, so the angle indices are offset
35 // by the number of pieces of tape on each side of it
36 const double theta_index =
37 static_cast<double>(i) - ((kNumVisiblePiecesOfTape - 1) / 2);
38 // The polar angle is a multiple of the angle between tape centers
39 double theta = theta_index * ((2.0 * M_PI) / kNumPiecesOfTape);
40 tape_points.emplace_back(kUpperHubRadius * std::cos(theta),
41 kUpperHubRadius * std::sin(theta), kTapeHeight);
42 }
milind-u92195982022-01-22 20:29:31 -080043
Milind Upadhyayf61e1482022-02-11 20:42:55 -080044 return tape_points;
45}
milind-u92195982022-01-22 20:29:31 -080046
Milind Upadhyayf61e1482022-02-11 20:42:55 -080047const std::vector<cv::Point3d> TargetEstimator::kTapePoints =
48 ComputeTapePoints();
49
50TargetEstimator::TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics)
51 : centroids_(),
52 image_(std::nullopt),
53 roll_(0.0),
54 pitch_(0.0),
55 yaw_(M_PI),
56 distance_(3.0),
57 angle_to_camera_(0.0),
58 // TODO(milind): add IMU height
59 camera_height_(extrinsics.at<double>(2, 3)) {
60 cv::cv2eigen(intrinsics, intrinsics_);
61 cv::cv2eigen(extrinsics, extrinsics_);
62}
63
64namespace {
65void SetBoundsOrFreeze(double *param, bool freeze, double min, double max,
66 ceres::Problem *problem) {
67 if (freeze) {
68 problem->SetParameterization(
69 param, new ceres::SubsetParameterization(1, std::vector<int>{0}));
70 } else {
71 problem->SetParameterLowerBound(param, 0, min);
72 problem->SetParameterUpperBound(param, 0, max);
73 }
74}
75} // namespace
76
77void TargetEstimator::Solve(const std::vector<cv::Point> &centroids,
78 std::optional<cv::Mat> image) {
79 auto start = aos::monotonic_clock::now();
80
81 centroids_ = centroids;
82 image_ = image;
83
84 ceres::Problem problem;
85
86 // Set up the only cost function (also known as residual). This uses
87 // auto-differentiation to obtain the derivative (jacobian).
88 ceres::CostFunction *cost_function =
89 new ceres::AutoDiffCostFunction<TargetEstimator, ceres::DYNAMIC, 1, 1, 1,
90 1, 1, 1>(this, centroids_.size() * 2,
91 ceres::DO_NOT_TAKE_OWNERSHIP);
92
93 // TODO(milind): add loss function when we get more noisy data
94 problem.AddResidualBlock(cost_function, nullptr, &roll_, &pitch_, &yaw_,
95 &distance_, &angle_to_camera_, &camera_height_);
96
97 // TODO(milind): seed values at localizer output, and constrain to be close to
98 // that.
99
100 // Constrain the rotation, otherwise there can be multiple solutions.
101 // There shouldn't be too much roll or pitch
102 constexpr double kMaxRoll = 0.1;
103 SetBoundsOrFreeze(&roll_, FLAGS_freeze_roll, -kMaxRoll, kMaxRoll, &problem);
104
105 constexpr double kPitch = -35.0 * M_PI / 180.0;
106 constexpr double kMaxPitchDelta = 0.15;
107 SetBoundsOrFreeze(&pitch_, FLAGS_freeze_pitch, kPitch - kMaxPitchDelta,
108 kPitch + kMaxPitchDelta, &problem);
109 // Constrain the yaw to where the target would be visible
110 constexpr double kMaxYawDelta = M_PI / 4.0;
111 SetBoundsOrFreeze(&yaw_, FLAGS_freeze_yaw, M_PI - kMaxYawDelta,
112 M_PI + kMaxYawDelta, &problem);
113
114 constexpr double kMaxHeightDelta = 0.1;
115 SetBoundsOrFreeze(&camera_height_, FLAGS_freeze_camera_height,
116 camera_height_ - kMaxHeightDelta,
117 camera_height_ + kMaxHeightDelta, &problem);
118
119 // Distances shouldn't be too close to the target or too far
120 constexpr double kMinDistance = 1.0;
121 constexpr double kMaxDistance = 10.0;
122 SetBoundsOrFreeze(&distance_, false, kMinDistance, kMaxDistance, &problem);
123
124 // Keep the angle between +/- half of the angle between piece of tape
125 constexpr double kMaxAngle = ((2.0 * M_PI) / kNumPiecesOfTape) / 2.0;
126 SetBoundsOrFreeze(&angle_to_camera_, FLAGS_freeze_angle_to_camera, -kMaxAngle,
127 kMaxAngle, &problem);
128
129 ceres::Solver::Options options;
130 options.minimizer_progress_to_stdout = FLAGS_solver_output;
131 options.gradient_tolerance = 1e-12;
132 options.function_tolerance = 1e-16;
133 options.parameter_tolerance = 1e-12;
134 options.max_num_iterations = FLAGS_max_num_iterations;
135 ceres::Solver::Summary summary;
136 ceres::Solve(options, &problem, &summary);
137
138 auto end = aos::monotonic_clock::now();
139 LOG(INFO) << "Target estimation elapsed time: "
140 << std::chrono::duration<double, std::milli>(end - start).count()
141 << " ms";
142
143 if (FLAGS_solver_output) {
144 LOG(INFO) << summary.FullReport();
145
146 LOG(INFO) << "roll: " << roll_;
147 LOG(INFO) << "pitch: " << pitch_;
148 LOG(INFO) << "yaw: " << yaw_;
149 LOG(INFO) << "angle to target (based on yaw): " << angle_to_target();
150 LOG(INFO) << "angle to camera (polar): " << angle_to_camera_;
151 LOG(INFO) << "distance (polar): " << distance_;
152 LOG(INFO) << "camera height: " << camera_height_;
153 }
154}
155
156namespace {
157// Hacks to extract a double from a scalar, which is either a ceres jet or a
158// double. Only used for debugging and displaying.
159template <typename S>
160double ScalarToDouble(S s) {
161 const double *ptr = reinterpret_cast<double *>(&s);
162 return *ptr;
163}
164
165template <typename S>
166cv::Point2d ScalarPointToDouble(cv::Point_<S> p) {
167 return cv::Point2d(ScalarToDouble(p.x), ScalarToDouble(p.y));
168}
169} // namespace
170
171template <typename S>
172bool TargetEstimator::operator()(const S *const roll, const S *const pitch,
173 const S *const yaw, const S *const distance,
174 const S *const theta,
175 const S *const camera_height,
176 S *residual) const {
177 using Vector3s = Eigen::Matrix<S, 3, 1>;
178 using Affine3s = Eigen::Transform<S, 3, Eigen::Affine>;
179
180 Eigen::AngleAxis<S> roll_angle(*roll, Vector3s::UnitX());
181 Eigen::AngleAxis<S> pitch_angle(*pitch, Vector3s::UnitY());
182 Eigen::AngleAxis<S> yaw_angle(*yaw, Vector3s::UnitZ());
183 // Construct the rotation and translation of the camera in the hub's frame
184 Eigen::Quaternion<S> R_camera_hub = yaw_angle * pitch_angle * roll_angle;
185 Vector3s T_camera_hub(*distance * ceres::cos(*theta),
186 *distance * ceres::sin(*theta), *camera_height);
187
188 Affine3s H_camera_hub = Eigen::Translation<S, 3>(T_camera_hub) * R_camera_hub;
189
190 std::vector<cv::Point_<S>> tape_points_proj;
191 for (cv::Point3d tape_point_hub : kTapePoints) {
192 Vector3s tape_point_hub_eigen(S(tape_point_hub.x), S(tape_point_hub.y),
193 S(tape_point_hub.z));
194
195 // With X, Y, Z being world axes and x, y, z being camera axes,
196 // x = Y, y = Z, z = -X
197 static const Eigen::Matrix3d kCameraAxisConversion =
198 (Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0)
199 .finished();
200 // Project the 3d tape point onto the image using the transformation and
201 // intrinsics
202 Vector3s tape_point_proj =
203 intrinsics_ * (kCameraAxisConversion *
204 (H_camera_hub.inverse() * tape_point_hub_eigen));
205
206 // Normalize the projected point
207 tape_points_proj.emplace_back(tape_point_proj.x() / tape_point_proj.z(),
208 tape_point_proj.y() / tape_point_proj.z());
209 VLOG(1) << "Projected tape point: "
210 << ScalarPointToDouble(
211 tape_points_proj[tape_points_proj.size() - 1]);
212 }
213
214 for (size_t i = 0; i < centroids_.size(); i++) {
215 const auto distance = DistanceFromTape(i, tape_points_proj);
216 // Set the residual to the (x, y) distance of the centroid from the
217 // nearest projected piece of tape
218 residual[i * 2] = distance.x;
219 residual[(i * 2) + 1] = distance.y;
220 }
221
222 if (image_.has_value()) {
223 // Draw the current stage of the solving
224 cv::Mat image = image_->clone();
225 for (size_t i = 0; i < tape_points_proj.size() - 1; i++) {
226 cv::line(image, ScalarPointToDouble(tape_points_proj[i]),
227 ScalarPointToDouble(tape_points_proj[i + 1]),
228 cv::Scalar(255, 255, 255));
229 cv::circle(image, ScalarPointToDouble(tape_points_proj[i]), 2,
230 cv::Scalar(255, 20, 147), cv::FILLED);
231 cv::circle(image, ScalarPointToDouble(tape_points_proj[i + 1]), 2,
232 cv::Scalar(255, 20, 147), cv::FILLED);
233 }
234 cv::imshow("image", image);
235 cv::waitKey(10);
236 }
237
238 return true;
239}
240
241namespace {
242template <typename S>
243cv::Point_<S> Distance(cv::Point p, cv::Point_<S> q) {
244 return cv::Point_<S>(S(p.x) - q.x, S(p.y) - q.y);
245}
246
247template <typename S>
248bool Less(cv::Point_<S> distance_1, cv::Point_<S> distance_2) {
249 return (ceres::pow(distance_1.x, 2) + ceres::pow(distance_1.y, 2) <
250 ceres::pow(distance_2.x, 2) + ceres::pow(distance_2.y, 2));
251}
252} // namespace
253
254template <typename S>
255cv::Point_<S> TargetEstimator::DistanceFromTape(
256 size_t centroid_index,
257 const std::vector<cv::Point_<S>> &tape_points) const {
258 // Figure out the middle index in the tape points
259 size_t middle_index = centroids_.size() / 2;
260 if (centroids_.size() % 2 == 0) {
261 // There are two possible middles in this case. Figure out which one fits
262 // the current better
263 const auto tape_middle = tape_points[tape_points.size() / 2];
264 const auto middle_distance_1 =
265 Distance(centroids_[(centroids_.size() / 2) - 1], tape_middle);
266 const auto middle_distance_2 =
267 Distance(centroids_[centroids_.size() / 2], tape_middle);
268 if (Less(middle_distance_1, middle_distance_2)) {
269 middle_index--;
270 }
271 }
272
273 auto distance = cv::Point_<S>(std::numeric_limits<S>::infinity(),
274 std::numeric_limits<S>::infinity());
275 if (centroid_index == middle_index) {
276 // Fix the middle centroid so the solver can't go too far off
277 distance =
278 Distance(centroids_[middle_index], tape_points[tape_points.size() / 2]);
279 } else {
280 // Give the other centroids some freedom in case some are split into pieces
281 for (auto tape_point : tape_points) {
282 const auto current_distance =
283 Distance(centroids_[centroid_index], tape_point);
284 if (Less(current_distance, distance)) {
285 distance = current_distance;
286 }
287 }
288 }
289
290 return distance;
291}
292
293namespace {
294void DrawEstimateValues(double distance, double angle_to_target,
295 double angle_to_camera, double roll, double pitch,
296 double yaw, cv::Mat view_image) {
297 constexpr int kTextX = 10;
298 int text_y = 330;
299 constexpr int kTextSpacing = 35;
300
301 const auto kTextColor = cv::Scalar(0, 255, 255);
302 constexpr double kFontScale = 1.0;
303
304 cv::putText(view_image, absl::StrFormat("Distance: %.3f", distance),
305 cv::Point(kTextX, text_y += kTextSpacing),
306 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
307 cv::putText(view_image,
308 absl::StrFormat("Angle to target: %.3f", angle_to_target),
309 cv::Point(kTextX, text_y += kTextSpacing),
310 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
311 cv::putText(view_image,
312 absl::StrFormat("Angle to camera: %.3f", angle_to_camera),
313 cv::Point(kTextX, text_y += kTextSpacing),
314 cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
315
316 cv::putText(
317 view_image,
318 absl::StrFormat("Roll: %.3f, pitch: %.3f, yaw: %.3f", roll, pitch, yaw),
319 cv::Point(kTextX, text_y += kTextSpacing), cv::FONT_HERSHEY_DUPLEX,
320 kFontScale, kTextColor, 2);
321}
322} // namespace
323
324void TargetEstimator::DrawEstimate(const TargetEstimate &target_estimate,
325 cv::Mat view_image) {
326 DrawEstimateValues(target_estimate.distance(),
327 target_estimate.angle_to_target(),
328 target_estimate.angle_to_camera(),
329 target_estimate.rotation_camera_hub()->roll(),
330 target_estimate.rotation_camera_hub()->pitch(),
331 target_estimate.rotation_camera_hub()->yaw(), view_image);
332}
333
334void TargetEstimator::DrawEstimate(cv::Mat view_image) const {
335 DrawEstimateValues(distance_, angle_to_target(), angle_to_camera_, roll_,
336 pitch_, yaw_, view_image);
milind-u92195982022-01-22 20:29:31 -0800337}
338
339} // namespace y2022::vision