milind-u | 9219598 | 2022-01-22 20:29:31 -0800 | [diff] [blame] | 1 | #include "y2022/vision/target_estimator.h" |
| 2 | |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 3 | #include "absl/strings/str_format.h" |
| 4 | #include "aos/time/time.h" |
| 5 | #include "ceres/ceres.h" |
| 6 | #include "frc971/control_loops/quaternion_utils.h" |
| 7 | #include "opencv2/core/core.hpp" |
| 8 | #include "opencv2/core/eigen.hpp" |
| 9 | #include "opencv2/features2d.hpp" |
| 10 | #include "opencv2/highgui/highgui.hpp" |
| 11 | #include "opencv2/imgproc.hpp" |
| 12 | |
| 13 | DEFINE_bool(freeze_roll, true, "If true, don't solve for roll"); |
| 14 | DEFINE_bool(freeze_pitch, false, "If true, don't solve for pitch"); |
| 15 | DEFINE_bool(freeze_yaw, false, "If true, don't solve for yaw"); |
| 16 | DEFINE_bool(freeze_camera_height, true, |
| 17 | "If true, don't solve for camera height"); |
| 18 | DEFINE_bool(freeze_angle_to_camera, true, |
| 19 | "If true, don't solve for polar angle to camera"); |
| 20 | |
| 21 | DEFINE_uint64(max_num_iterations, 200, |
| 22 | "Maximum number of iterations for the ceres solver"); |
| 23 | DEFINE_bool(solver_output, false, |
| 24 | "If true, log the solver progress and results"); |
| 25 | |
milind-u | 9219598 | 2022-01-22 20:29:31 -0800 | [diff] [blame] | 26 | namespace y2022::vision { |
| 27 | |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 28 | std::vector<cv::Point3d> TargetEstimator::ComputeTapePoints() { |
| 29 | std::vector<cv::Point3d> tape_points; |
| 30 | tape_points.reserve(kNumPiecesOfTape); |
milind-u | 9219598 | 2022-01-22 20:29:31 -0800 | [diff] [blame] | 31 | |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 32 | constexpr size_t kNumVisiblePiecesOfTape = 5; |
| 33 | for (size_t i = 0; i < kNumVisiblePiecesOfTape; i++) { |
| 34 | // The center piece of tape is at 0 rad, so the angle indices are offset |
| 35 | // by the number of pieces of tape on each side of it |
| 36 | const double theta_index = |
| 37 | static_cast<double>(i) - ((kNumVisiblePiecesOfTape - 1) / 2); |
| 38 | // The polar angle is a multiple of the angle between tape centers |
| 39 | double theta = theta_index * ((2.0 * M_PI) / kNumPiecesOfTape); |
| 40 | tape_points.emplace_back(kUpperHubRadius * std::cos(theta), |
| 41 | kUpperHubRadius * std::sin(theta), kTapeHeight); |
| 42 | } |
milind-u | 9219598 | 2022-01-22 20:29:31 -0800 | [diff] [blame] | 43 | |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 44 | return tape_points; |
| 45 | } |
milind-u | 9219598 | 2022-01-22 20:29:31 -0800 | [diff] [blame] | 46 | |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 47 | const std::vector<cv::Point3d> TargetEstimator::kTapePoints = |
| 48 | ComputeTapePoints(); |
| 49 | |
| 50 | TargetEstimator::TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics) |
| 51 | : centroids_(), |
| 52 | image_(std::nullopt), |
| 53 | roll_(0.0), |
| 54 | pitch_(0.0), |
| 55 | yaw_(M_PI), |
| 56 | distance_(3.0), |
| 57 | angle_to_camera_(0.0), |
| 58 | // TODO(milind): add IMU height |
| 59 | camera_height_(extrinsics.at<double>(2, 3)) { |
| 60 | cv::cv2eigen(intrinsics, intrinsics_); |
| 61 | cv::cv2eigen(extrinsics, extrinsics_); |
| 62 | } |
| 63 | |
| 64 | namespace { |
| 65 | void SetBoundsOrFreeze(double *param, bool freeze, double min, double max, |
| 66 | ceres::Problem *problem) { |
| 67 | if (freeze) { |
| 68 | problem->SetParameterization( |
| 69 | param, new ceres::SubsetParameterization(1, std::vector<int>{0})); |
| 70 | } else { |
| 71 | problem->SetParameterLowerBound(param, 0, min); |
| 72 | problem->SetParameterUpperBound(param, 0, max); |
| 73 | } |
| 74 | } |
| 75 | } // namespace |
| 76 | |
| 77 | void TargetEstimator::Solve(const std::vector<cv::Point> ¢roids, |
| 78 | std::optional<cv::Mat> image) { |
| 79 | auto start = aos::monotonic_clock::now(); |
| 80 | |
| 81 | centroids_ = centroids; |
| 82 | image_ = image; |
| 83 | |
| 84 | ceres::Problem problem; |
| 85 | |
| 86 | // Set up the only cost function (also known as residual). This uses |
| 87 | // auto-differentiation to obtain the derivative (jacobian). |
| 88 | ceres::CostFunction *cost_function = |
| 89 | new ceres::AutoDiffCostFunction<TargetEstimator, ceres::DYNAMIC, 1, 1, 1, |
| 90 | 1, 1, 1>(this, centroids_.size() * 2, |
| 91 | ceres::DO_NOT_TAKE_OWNERSHIP); |
| 92 | |
| 93 | // TODO(milind): add loss function when we get more noisy data |
| 94 | problem.AddResidualBlock(cost_function, nullptr, &roll_, &pitch_, &yaw_, |
| 95 | &distance_, &angle_to_camera_, &camera_height_); |
| 96 | |
| 97 | // TODO(milind): seed values at localizer output, and constrain to be close to |
| 98 | // that. |
| 99 | |
| 100 | // Constrain the rotation, otherwise there can be multiple solutions. |
| 101 | // There shouldn't be too much roll or pitch |
| 102 | constexpr double kMaxRoll = 0.1; |
| 103 | SetBoundsOrFreeze(&roll_, FLAGS_freeze_roll, -kMaxRoll, kMaxRoll, &problem); |
| 104 | |
| 105 | constexpr double kPitch = -35.0 * M_PI / 180.0; |
| 106 | constexpr double kMaxPitchDelta = 0.15; |
| 107 | SetBoundsOrFreeze(&pitch_, FLAGS_freeze_pitch, kPitch - kMaxPitchDelta, |
| 108 | kPitch + kMaxPitchDelta, &problem); |
| 109 | // Constrain the yaw to where the target would be visible |
| 110 | constexpr double kMaxYawDelta = M_PI / 4.0; |
| 111 | SetBoundsOrFreeze(&yaw_, FLAGS_freeze_yaw, M_PI - kMaxYawDelta, |
| 112 | M_PI + kMaxYawDelta, &problem); |
| 113 | |
| 114 | constexpr double kMaxHeightDelta = 0.1; |
| 115 | SetBoundsOrFreeze(&camera_height_, FLAGS_freeze_camera_height, |
| 116 | camera_height_ - kMaxHeightDelta, |
| 117 | camera_height_ + kMaxHeightDelta, &problem); |
| 118 | |
| 119 | // Distances shouldn't be too close to the target or too far |
| 120 | constexpr double kMinDistance = 1.0; |
| 121 | constexpr double kMaxDistance = 10.0; |
| 122 | SetBoundsOrFreeze(&distance_, false, kMinDistance, kMaxDistance, &problem); |
| 123 | |
| 124 | // Keep the angle between +/- half of the angle between piece of tape |
| 125 | constexpr double kMaxAngle = ((2.0 * M_PI) / kNumPiecesOfTape) / 2.0; |
| 126 | SetBoundsOrFreeze(&angle_to_camera_, FLAGS_freeze_angle_to_camera, -kMaxAngle, |
| 127 | kMaxAngle, &problem); |
| 128 | |
| 129 | ceres::Solver::Options options; |
| 130 | options.minimizer_progress_to_stdout = FLAGS_solver_output; |
| 131 | options.gradient_tolerance = 1e-12; |
| 132 | options.function_tolerance = 1e-16; |
| 133 | options.parameter_tolerance = 1e-12; |
| 134 | options.max_num_iterations = FLAGS_max_num_iterations; |
| 135 | ceres::Solver::Summary summary; |
| 136 | ceres::Solve(options, &problem, &summary); |
| 137 | |
| 138 | auto end = aos::monotonic_clock::now(); |
| 139 | LOG(INFO) << "Target estimation elapsed time: " |
| 140 | << std::chrono::duration<double, std::milli>(end - start).count() |
| 141 | << " ms"; |
| 142 | |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 143 | // Use a sigmoid to convert the final cost into a confidence for the |
| 144 | // localizer. Fit a sigmoid to the points of (0, 1) and two other reasonable |
| 145 | // residual-confidence combinations using |
| 146 | // https://www.desmos.com/calculator/jj7p8zk0w2 |
| 147 | constexpr double kSigmoidCapacity = 1.206; |
| 148 | // Stretch the sigmoid out correctly. |
| 149 | // Currently, good estimates have final costs of around 1 to 2 pixels. |
| 150 | constexpr double kSigmoidScalar = 0.2061; |
| 151 | constexpr double kSigmoidGrowthRate = -0.1342; |
| 152 | if (centroids_.size() > 0) { |
| 153 | confidence_ = kSigmoidCapacity / |
| 154 | (1.0 + kSigmoidScalar * std::exp(-kSigmoidGrowthRate * |
| 155 | summary.final_cost)); |
| 156 | } else { |
| 157 | // If we detected no blobs, the confidence should be zero and not depending |
| 158 | // on the final cost, which would be 0. |
| 159 | confidence_ = 0.0; |
| 160 | } |
| 161 | |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 162 | if (FLAGS_solver_output) { |
| 163 | LOG(INFO) << summary.FullReport(); |
| 164 | |
| 165 | LOG(INFO) << "roll: " << roll_; |
| 166 | LOG(INFO) << "pitch: " << pitch_; |
| 167 | LOG(INFO) << "yaw: " << yaw_; |
| 168 | LOG(INFO) << "angle to target (based on yaw): " << angle_to_target(); |
| 169 | LOG(INFO) << "angle to camera (polar): " << angle_to_camera_; |
| 170 | LOG(INFO) << "distance (polar): " << distance_; |
| 171 | LOG(INFO) << "camera height: " << camera_height_; |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 172 | LOG(INFO) << "confidence: " << confidence_; |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 173 | } |
| 174 | } |
| 175 | |
| 176 | namespace { |
| 177 | // Hacks to extract a double from a scalar, which is either a ceres jet or a |
| 178 | // double. Only used for debugging and displaying. |
| 179 | template <typename S> |
| 180 | double ScalarToDouble(S s) { |
| 181 | const double *ptr = reinterpret_cast<double *>(&s); |
| 182 | return *ptr; |
| 183 | } |
| 184 | |
| 185 | template <typename S> |
| 186 | cv::Point2d ScalarPointToDouble(cv::Point_<S> p) { |
| 187 | return cv::Point2d(ScalarToDouble(p.x), ScalarToDouble(p.y)); |
| 188 | } |
| 189 | } // namespace |
| 190 | |
| 191 | template <typename S> |
| 192 | bool TargetEstimator::operator()(const S *const roll, const S *const pitch, |
| 193 | const S *const yaw, const S *const distance, |
| 194 | const S *const theta, |
| 195 | const S *const camera_height, |
| 196 | S *residual) const { |
| 197 | using Vector3s = Eigen::Matrix<S, 3, 1>; |
| 198 | using Affine3s = Eigen::Transform<S, 3, Eigen::Affine>; |
| 199 | |
| 200 | Eigen::AngleAxis<S> roll_angle(*roll, Vector3s::UnitX()); |
| 201 | Eigen::AngleAxis<S> pitch_angle(*pitch, Vector3s::UnitY()); |
| 202 | Eigen::AngleAxis<S> yaw_angle(*yaw, Vector3s::UnitZ()); |
| 203 | // Construct the rotation and translation of the camera in the hub's frame |
| 204 | Eigen::Quaternion<S> R_camera_hub = yaw_angle * pitch_angle * roll_angle; |
| 205 | Vector3s T_camera_hub(*distance * ceres::cos(*theta), |
| 206 | *distance * ceres::sin(*theta), *camera_height); |
| 207 | |
| 208 | Affine3s H_camera_hub = Eigen::Translation<S, 3>(T_camera_hub) * R_camera_hub; |
| 209 | |
| 210 | std::vector<cv::Point_<S>> tape_points_proj; |
| 211 | for (cv::Point3d tape_point_hub : kTapePoints) { |
| 212 | Vector3s tape_point_hub_eigen(S(tape_point_hub.x), S(tape_point_hub.y), |
| 213 | S(tape_point_hub.z)); |
| 214 | |
| 215 | // With X, Y, Z being world axes and x, y, z being camera axes, |
| 216 | // x = Y, y = Z, z = -X |
| 217 | static const Eigen::Matrix3d kCameraAxisConversion = |
| 218 | (Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0) |
| 219 | .finished(); |
| 220 | // Project the 3d tape point onto the image using the transformation and |
| 221 | // intrinsics |
| 222 | Vector3s tape_point_proj = |
| 223 | intrinsics_ * (kCameraAxisConversion * |
| 224 | (H_camera_hub.inverse() * tape_point_hub_eigen)); |
| 225 | |
| 226 | // Normalize the projected point |
| 227 | tape_points_proj.emplace_back(tape_point_proj.x() / tape_point_proj.z(), |
| 228 | tape_point_proj.y() / tape_point_proj.z()); |
| 229 | VLOG(1) << "Projected tape point: " |
| 230 | << ScalarPointToDouble( |
| 231 | tape_points_proj[tape_points_proj.size() - 1]); |
| 232 | } |
| 233 | |
| 234 | for (size_t i = 0; i < centroids_.size(); i++) { |
| 235 | const auto distance = DistanceFromTape(i, tape_points_proj); |
| 236 | // Set the residual to the (x, y) distance of the centroid from the |
| 237 | // nearest projected piece of tape |
| 238 | residual[i * 2] = distance.x; |
| 239 | residual[(i * 2) + 1] = distance.y; |
| 240 | } |
| 241 | |
| 242 | if (image_.has_value()) { |
| 243 | // Draw the current stage of the solving |
| 244 | cv::Mat image = image_->clone(); |
| 245 | for (size_t i = 0; i < tape_points_proj.size() - 1; i++) { |
| 246 | cv::line(image, ScalarPointToDouble(tape_points_proj[i]), |
| 247 | ScalarPointToDouble(tape_points_proj[i + 1]), |
| 248 | cv::Scalar(255, 255, 255)); |
| 249 | cv::circle(image, ScalarPointToDouble(tape_points_proj[i]), 2, |
| 250 | cv::Scalar(255, 20, 147), cv::FILLED); |
| 251 | cv::circle(image, ScalarPointToDouble(tape_points_proj[i + 1]), 2, |
| 252 | cv::Scalar(255, 20, 147), cv::FILLED); |
| 253 | } |
| 254 | cv::imshow("image", image); |
| 255 | cv::waitKey(10); |
| 256 | } |
| 257 | |
| 258 | return true; |
| 259 | } |
| 260 | |
| 261 | namespace { |
| 262 | template <typename S> |
| 263 | cv::Point_<S> Distance(cv::Point p, cv::Point_<S> q) { |
| 264 | return cv::Point_<S>(S(p.x) - q.x, S(p.y) - q.y); |
| 265 | } |
| 266 | |
| 267 | template <typename S> |
| 268 | bool Less(cv::Point_<S> distance_1, cv::Point_<S> distance_2) { |
| 269 | return (ceres::pow(distance_1.x, 2) + ceres::pow(distance_1.y, 2) < |
| 270 | ceres::pow(distance_2.x, 2) + ceres::pow(distance_2.y, 2)); |
| 271 | } |
| 272 | } // namespace |
| 273 | |
| 274 | template <typename S> |
| 275 | cv::Point_<S> TargetEstimator::DistanceFromTape( |
| 276 | size_t centroid_index, |
| 277 | const std::vector<cv::Point_<S>> &tape_points) const { |
| 278 | // Figure out the middle index in the tape points |
| 279 | size_t middle_index = centroids_.size() / 2; |
| 280 | if (centroids_.size() % 2 == 0) { |
| 281 | // There are two possible middles in this case. Figure out which one fits |
| 282 | // the current better |
| 283 | const auto tape_middle = tape_points[tape_points.size() / 2]; |
| 284 | const auto middle_distance_1 = |
| 285 | Distance(centroids_[(centroids_.size() / 2) - 1], tape_middle); |
| 286 | const auto middle_distance_2 = |
| 287 | Distance(centroids_[centroids_.size() / 2], tape_middle); |
| 288 | if (Less(middle_distance_1, middle_distance_2)) { |
| 289 | middle_index--; |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | auto distance = cv::Point_<S>(std::numeric_limits<S>::infinity(), |
| 294 | std::numeric_limits<S>::infinity()); |
| 295 | if (centroid_index == middle_index) { |
| 296 | // Fix the middle centroid so the solver can't go too far off |
| 297 | distance = |
| 298 | Distance(centroids_[middle_index], tape_points[tape_points.size() / 2]); |
| 299 | } else { |
| 300 | // Give the other centroids some freedom in case some are split into pieces |
| 301 | for (auto tape_point : tape_points) { |
| 302 | const auto current_distance = |
| 303 | Distance(centroids_[centroid_index], tape_point); |
| 304 | if (Less(current_distance, distance)) { |
| 305 | distance = current_distance; |
| 306 | } |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | return distance; |
| 311 | } |
| 312 | |
| 313 | namespace { |
| 314 | void DrawEstimateValues(double distance, double angle_to_target, |
| 315 | double angle_to_camera, double roll, double pitch, |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 316 | double yaw, double confidence, cv::Mat view_image) { |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 317 | constexpr int kTextX = 10; |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 318 | int text_y = 250; |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 319 | constexpr int kTextSpacing = 35; |
| 320 | |
| 321 | const auto kTextColor = cv::Scalar(0, 255, 255); |
| 322 | constexpr double kFontScale = 1.0; |
| 323 | |
| 324 | cv::putText(view_image, absl::StrFormat("Distance: %.3f", distance), |
| 325 | cv::Point(kTextX, text_y += kTextSpacing), |
| 326 | cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2); |
| 327 | cv::putText(view_image, |
| 328 | absl::StrFormat("Angle to target: %.3f", angle_to_target), |
| 329 | cv::Point(kTextX, text_y += kTextSpacing), |
| 330 | cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2); |
| 331 | cv::putText(view_image, |
| 332 | absl::StrFormat("Angle to camera: %.3f", angle_to_camera), |
| 333 | cv::Point(kTextX, text_y += kTextSpacing), |
| 334 | cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2); |
| 335 | |
| 336 | cv::putText( |
| 337 | view_image, |
| 338 | absl::StrFormat("Roll: %.3f, pitch: %.3f, yaw: %.3f", roll, pitch, yaw), |
| 339 | cv::Point(kTextX, text_y += kTextSpacing), cv::FONT_HERSHEY_DUPLEX, |
| 340 | kFontScale, kTextColor, 2); |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 341 | |
| 342 | cv::putText(view_image, absl::StrFormat("Confidence: %.3f", confidence), |
| 343 | cv::Point(kTextX, text_y += kTextSpacing), |
| 344 | cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2); |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 345 | } |
| 346 | } // namespace |
| 347 | |
| 348 | void TargetEstimator::DrawEstimate(const TargetEstimate &target_estimate, |
| 349 | cv::Mat view_image) { |
| 350 | DrawEstimateValues(target_estimate.distance(), |
| 351 | target_estimate.angle_to_target(), |
| 352 | target_estimate.angle_to_camera(), |
| 353 | target_estimate.rotation_camera_hub()->roll(), |
| 354 | target_estimate.rotation_camera_hub()->pitch(), |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 355 | target_estimate.rotation_camera_hub()->yaw(), |
| 356 | target_estimate.confidence(), view_image); |
Milind Upadhyay | f61e148 | 2022-02-11 20:42:55 -0800 | [diff] [blame] | 357 | } |
| 358 | |
| 359 | void TargetEstimator::DrawEstimate(cv::Mat view_image) const { |
| 360 | DrawEstimateValues(distance_, angle_to_target(), angle_to_camera_, roll_, |
Milind Upadhyay | 14279de | 2022-02-26 16:07:53 -0800 | [diff] [blame^] | 361 | pitch_, yaw_, confidence_, view_image); |
milind-u | 9219598 | 2022-01-22 20:29:31 -0800 | [diff] [blame] | 362 | } |
| 363 | |
| 364 | } // namespace y2022::vision |