Blame - y2022/vision/target_estimator.cc - RealtimeRoboticsGroup/test

blob: 67736316af667dd6a058cdf3a20ad71b8adf97a4 [file] [log] [blame]

milind-u	9219598	2022-01-22 20:29:31 -0800	[diff] [blame]	1	#include "y2022/vision/target_estimator.h"
				2
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	3	#include "absl/strings/str_format.h"
				4	#include "aos/time/time.h"
				5	#include "ceres/ceres.h"
				6	#include "frc971/control_loops/quaternion_utils.h"
				7	#include "opencv2/core/core.hpp"
				8	#include "opencv2/core/eigen.hpp"
				9	#include "opencv2/features2d.hpp"
				10	#include "opencv2/highgui/highgui.hpp"
				11	#include "opencv2/imgproc.hpp"
				12
				13	DEFINE_bool(freeze_roll, true, "If true, don't solve for roll");
				14	DEFINE_bool(freeze_pitch, false, "If true, don't solve for pitch");
				15	DEFINE_bool(freeze_yaw, false, "If true, don't solve for yaw");
				16	DEFINE_bool(freeze_camera_height, true,
				17	"If true, don't solve for camera height");
				18	DEFINE_bool(freeze_angle_to_camera, true,
				19	"If true, don't solve for polar angle to camera");
				20
				21	DEFINE_uint64(max_num_iterations, 200,
				22	"Maximum number of iterations for the ceres solver");
				23	DEFINE_bool(solver_output, false,
				24	"If true, log the solver progress and results");
				25
milind-u	9219598	2022-01-22 20:29:31 -0800	[diff] [blame]	26	namespace y2022::vision {
				27
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	28	std::vector<cv::Point3d> TargetEstimator::ComputeTapePoints() {
				29	std::vector<cv::Point3d> tape_points;
				30	tape_points.reserve(kNumPiecesOfTape);
milind-u	9219598	2022-01-22 20:29:31 -0800	[diff] [blame]	31
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	32	constexpr size_t kNumVisiblePiecesOfTape = 5;
				33	for (size_t i = 0; i < kNumVisiblePiecesOfTape; i++) {
				34	// The center piece of tape is at 0 rad, so the angle indices are offset
				35	// by the number of pieces of tape on each side of it
				36	const double theta_index =
				37	static_cast<double>(i) - ((kNumVisiblePiecesOfTape - 1) / 2);
				38	// The polar angle is a multiple of the angle between tape centers
				39	double theta = theta_index * ((2.0 * M_PI) / kNumPiecesOfTape);
				40	tape_points.emplace_back(kUpperHubRadius * std::cos(theta),
				41	kUpperHubRadius * std::sin(theta), kTapeHeight);
				42	}
milind-u	9219598	2022-01-22 20:29:31 -0800	[diff] [blame]	43
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	44	return tape_points;
				45	}
milind-u	9219598	2022-01-22 20:29:31 -0800	[diff] [blame]	46
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	47	const std::vector<cv::Point3d> TargetEstimator::kTapePoints =
				48	ComputeTapePoints();
				49
				50	TargetEstimator::TargetEstimator(cv::Mat intrinsics, cv::Mat extrinsics)
				51	: centroids_(),
				52	image_(std::nullopt),
				53	roll_(0.0),
				54	pitch_(0.0),
				55	yaw_(M_PI),
				56	distance_(3.0),
				57	angle_to_camera_(0.0),
				58	// TODO(milind): add IMU height
				59	camera_height_(extrinsics.at<double>(2, 3)) {
				60	cv::cv2eigen(intrinsics, intrinsics_);
				61	cv::cv2eigen(extrinsics, extrinsics_);
				62	}
				63
				64	namespace {
				65	void SetBoundsOrFreeze(double *param, bool freeze, double min, double max,
				66	ceres::Problem *problem) {
				67	if (freeze) {
				68	problem->SetParameterization(
				69	param, new ceres::SubsetParameterization(1, std::vector<int>{0}));
				70	} else {
				71	problem->SetParameterLowerBound(param, 0, min);
				72	problem->SetParameterUpperBound(param, 0, max);
				73	}
				74	}
				75	} // namespace
				76
				77	void TargetEstimator::Solve(const std::vector<cv::Point> &centroids,
				78	std::optional<cv::Mat> image) {
				79	auto start = aos::monotonic_clock::now();
				80
				81	centroids_ = centroids;
				82	image_ = image;
				83
				84	ceres::Problem problem;
				85
				86	// Set up the only cost function (also known as residual). This uses
				87	// auto-differentiation to obtain the derivative (jacobian).
				88	ceres::CostFunction *cost_function =
				89	new ceres::AutoDiffCostFunction<TargetEstimator, ceres::DYNAMIC, 1, 1, 1,
				90	1, 1, 1>(this, centroids_.size() * 2,
				91	ceres::DO_NOT_TAKE_OWNERSHIP);
				92
				93	// TODO(milind): add loss function when we get more noisy data
				94	problem.AddResidualBlock(cost_function, nullptr, &roll_, &pitch_, &yaw_,
				95	&distance_, &angle_to_camera_, &camera_height_);
				96
				97	// TODO(milind): seed values at localizer output, and constrain to be close to
				98	// that.
				99
				100	// Constrain the rotation, otherwise there can be multiple solutions.
				101	// There shouldn't be too much roll or pitch
				102	constexpr double kMaxRoll = 0.1;
				103	SetBoundsOrFreeze(&roll_, FLAGS_freeze_roll, -kMaxRoll, kMaxRoll, &problem);
				104
				105	constexpr double kPitch = -35.0 * M_PI / 180.0;
				106	constexpr double kMaxPitchDelta = 0.15;
				107	SetBoundsOrFreeze(&pitch_, FLAGS_freeze_pitch, kPitch - kMaxPitchDelta,
				108	kPitch + kMaxPitchDelta, &problem);
				109	// Constrain the yaw to where the target would be visible
				110	constexpr double kMaxYawDelta = M_PI / 4.0;
				111	SetBoundsOrFreeze(&yaw_, FLAGS_freeze_yaw, M_PI - kMaxYawDelta,
				112	M_PI + kMaxYawDelta, &problem);
				113
				114	constexpr double kMaxHeightDelta = 0.1;
				115	SetBoundsOrFreeze(&camera_height_, FLAGS_freeze_camera_height,
				116	camera_height_ - kMaxHeightDelta,
				117	camera_height_ + kMaxHeightDelta, &problem);
				118
				119	// Distances shouldn't be too close to the target or too far
				120	constexpr double kMinDistance = 1.0;
				121	constexpr double kMaxDistance = 10.0;
				122	SetBoundsOrFreeze(&distance_, false, kMinDistance, kMaxDistance, &problem);
				123
				124	// Keep the angle between +/- half of the angle between piece of tape
				125	constexpr double kMaxAngle = ((2.0 * M_PI) / kNumPiecesOfTape) / 2.0;
				126	SetBoundsOrFreeze(&angle_to_camera_, FLAGS_freeze_angle_to_camera, -kMaxAngle,
				127	kMaxAngle, &problem);
				128
				129	ceres::Solver::Options options;
				130	options.minimizer_progress_to_stdout = FLAGS_solver_output;
				131	options.gradient_tolerance = 1e-12;
				132	options.function_tolerance = 1e-16;
				133	options.parameter_tolerance = 1e-12;
				134	options.max_num_iterations = FLAGS_max_num_iterations;
				135	ceres::Solver::Summary summary;
				136	ceres::Solve(options, &problem, &summary);
				137
				138	auto end = aos::monotonic_clock::now();
				139	LOG(INFO) << "Target estimation elapsed time: "
				140	<< std::chrono::duration<double, std::milli>(end - start).count()
				141	<< " ms";
				142
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	143	// Use a sigmoid to convert the final cost into a confidence for the
				144	// localizer. Fit a sigmoid to the points of (0, 1) and two other reasonable
				145	// residual-confidence combinations using
				146	// https://www.desmos.com/calculator/jj7p8zk0w2
				147	constexpr double kSigmoidCapacity = 1.206;
				148	// Stretch the sigmoid out correctly.
				149	// Currently, good estimates have final costs of around 1 to 2 pixels.
				150	constexpr double kSigmoidScalar = 0.2061;
				151	constexpr double kSigmoidGrowthRate = -0.1342;
				152	if (centroids_.size() > 0) {
				153	confidence_ = kSigmoidCapacity /
				154	(1.0 + kSigmoidScalar * std::exp(-kSigmoidGrowthRate *
				155	summary.final_cost));
				156	} else {
				157	// If we detected no blobs, the confidence should be zero and not depending
				158	// on the final cost, which would be 0.
				159	confidence_ = 0.0;
				160	}
				161
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	162	if (FLAGS_solver_output) {
				163	LOG(INFO) << summary.FullReport();
				164
				165	LOG(INFO) << "roll: " << roll_;
				166	LOG(INFO) << "pitch: " << pitch_;
				167	LOG(INFO) << "yaw: " << yaw_;
				168	LOG(INFO) << "angle to target (based on yaw): " << angle_to_target();
				169	LOG(INFO) << "angle to camera (polar): " << angle_to_camera_;
				170	LOG(INFO) << "distance (polar): " << distance_;
				171	LOG(INFO) << "camera height: " << camera_height_;
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	172	LOG(INFO) << "confidence: " << confidence_;
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	173	}
				174	}
				175
				176	namespace {
				177	// Hacks to extract a double from a scalar, which is either a ceres jet or a
				178	// double. Only used for debugging and displaying.
				179	template <typename S>
				180	double ScalarToDouble(S s) {
				181	const double ptr = reinterpret_cast<double >(&s);
				182	return *ptr;
				183	}
				184
				185	template <typename S>
				186	cv::Point2d ScalarPointToDouble(cv::Point_<S> p) {
				187	return cv::Point2d(ScalarToDouble(p.x), ScalarToDouble(p.y));
				188	}
				189	} // namespace
				190
				191	template <typename S>
				192	bool TargetEstimator::operator()(const S const roll, const S const pitch,
				193	const S const yaw, const S const distance,
				194	const S *const theta,
				195	const S *const camera_height,
				196	S *residual) const {
				197	using Vector3s = Eigen::Matrix<S, 3, 1>;
				198	using Affine3s = Eigen::Transform<S, 3, Eigen::Affine>;
				199
				200	Eigen::AngleAxis<S> roll_angle(*roll, Vector3s::UnitX());
				201	Eigen::AngleAxis<S> pitch_angle(*pitch, Vector3s::UnitY());
				202	Eigen::AngleAxis<S> yaw_angle(*yaw, Vector3s::UnitZ());
				203	// Construct the rotation and translation of the camera in the hub's frame
				204	Eigen::Quaternion<S> R_camera_hub = yaw_angle * pitch_angle * roll_angle;
				205	Vector3s T_camera_hub(distance ceres::cos(*theta),
				206	distance ceres::sin(theta), camera_height);
				207
				208	Affine3s H_camera_hub = Eigen::Translation<S, 3>(T_camera_hub) * R_camera_hub;
				209
				210	std::vector<cv::Point_<S>> tape_points_proj;
				211	for (cv::Point3d tape_point_hub : kTapePoints) {
				212	Vector3s tape_point_hub_eigen(S(tape_point_hub.x), S(tape_point_hub.y),
				213	S(tape_point_hub.z));
				214
				215	// With X, Y, Z being world axes and x, y, z being camera axes,
				216	// x = Y, y = Z, z = -X
				217	static const Eigen::Matrix3d kCameraAxisConversion =
				218	(Eigen::Matrix3d() << 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, -1.0, 0.0, 0.0)
				219	.finished();
				220	// Project the 3d tape point onto the image using the transformation and
				221	// intrinsics
				222	Vector3s tape_point_proj =
				223	intrinsics_ * (kCameraAxisConversion *
				224	(H_camera_hub.inverse() * tape_point_hub_eigen));
				225
				226	// Normalize the projected point
				227	tape_points_proj.emplace_back(tape_point_proj.x() / tape_point_proj.z(),
				228	tape_point_proj.y() / tape_point_proj.z());
				229	VLOG(1) << "Projected tape point: "
				230	<< ScalarPointToDouble(
				231	tape_points_proj[tape_points_proj.size() - 1]);
				232	}
				233
				234	for (size_t i = 0; i < centroids_.size(); i++) {
				235	const auto distance = DistanceFromTape(i, tape_points_proj);
				236	// Set the residual to the (x, y) distance of the centroid from the
				237	// nearest projected piece of tape
				238	residual[i * 2] = distance.x;
				239	residual[(i * 2) + 1] = distance.y;
				240	}
				241
				242	if (image_.has_value()) {
				243	// Draw the current stage of the solving
				244	cv::Mat image = image_->clone();
				245	for (size_t i = 0; i < tape_points_proj.size() - 1; i++) {
				246	cv::line(image, ScalarPointToDouble(tape_points_proj[i]),
				247	ScalarPointToDouble(tape_points_proj[i + 1]),
				248	cv::Scalar(255, 255, 255));
				249	cv::circle(image, ScalarPointToDouble(tape_points_proj[i]), 2,
				250	cv::Scalar(255, 20, 147), cv::FILLED);
				251	cv::circle(image, ScalarPointToDouble(tape_points_proj[i + 1]), 2,
				252	cv::Scalar(255, 20, 147), cv::FILLED);
				253	}
				254	cv::imshow("image", image);
				255	cv::waitKey(10);
				256	}
				257
				258	return true;
				259	}
				260
				261	namespace {
				262	template <typename S>
				263	cv::Point_<S> Distance(cv::Point p, cv::Point_<S> q) {
				264	return cv::Point_<S>(S(p.x) - q.x, S(p.y) - q.y);
				265	}
				266
				267	template <typename S>
				268	bool Less(cv::Point_<S> distance_1, cv::Point_<S> distance_2) {
				269	return (ceres::pow(distance_1.x, 2) + ceres::pow(distance_1.y, 2) <
				270	ceres::pow(distance_2.x, 2) + ceres::pow(distance_2.y, 2));
				271	}
				272	} // namespace
				273
				274	template <typename S>
				275	cv::Point_<S> TargetEstimator::DistanceFromTape(
				276	size_t centroid_index,
				277	const std::vector<cv::Point_<S>> &tape_points) const {
				278	// Figure out the middle index in the tape points
				279	size_t middle_index = centroids_.size() / 2;
				280	if (centroids_.size() % 2 == 0) {
				281	// There are two possible middles in this case. Figure out which one fits
				282	// the current better
				283	const auto tape_middle = tape_points[tape_points.size() / 2];
				284	const auto middle_distance_1 =
				285	Distance(centroids_[(centroids_.size() / 2) - 1], tape_middle);
				286	const auto middle_distance_2 =
				287	Distance(centroids_[centroids_.size() / 2], tape_middle);
				288	if (Less(middle_distance_1, middle_distance_2)) {
				289	middle_index--;
				290	}
				291	}
				292
				293	auto distance = cv::Point_<S>(std::numeric_limits<S>::infinity(),
				294	std::numeric_limits<S>::infinity());
				295	if (centroid_index == middle_index) {
				296	// Fix the middle centroid so the solver can't go too far off
				297	distance =
				298	Distance(centroids_[middle_index], tape_points[tape_points.size() / 2]);
				299	} else {
				300	// Give the other centroids some freedom in case some are split into pieces
				301	for (auto tape_point : tape_points) {
				302	const auto current_distance =
				303	Distance(centroids_[centroid_index], tape_point);
				304	if (Less(current_distance, distance)) {
				305	distance = current_distance;
				306	}
				307	}
				308	}
				309
				310	return distance;
				311	}
				312
				313	namespace {
				314	void DrawEstimateValues(double distance, double angle_to_target,
				315	double angle_to_camera, double roll, double pitch,
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	316	double yaw, double confidence, cv::Mat view_image) {
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	317	constexpr int kTextX = 10;
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	318	int text_y = 250;
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	319	constexpr int kTextSpacing = 35;
				320
				321	const auto kTextColor = cv::Scalar(0, 255, 255);
				322	constexpr double kFontScale = 1.0;
				323
				324	cv::putText(view_image, absl::StrFormat("Distance: %.3f", distance),
				325	cv::Point(kTextX, text_y += kTextSpacing),
				326	cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
				327	cv::putText(view_image,
				328	absl::StrFormat("Angle to target: %.3f", angle_to_target),
				329	cv::Point(kTextX, text_y += kTextSpacing),
				330	cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
				331	cv::putText(view_image,
				332	absl::StrFormat("Angle to camera: %.3f", angle_to_camera),
				333	cv::Point(kTextX, text_y += kTextSpacing),
				334	cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
				335
				336	cv::putText(
				337	view_image,
				338	absl::StrFormat("Roll: %.3f, pitch: %.3f, yaw: %.3f", roll, pitch, yaw),
				339	cv::Point(kTextX, text_y += kTextSpacing), cv::FONT_HERSHEY_DUPLEX,
				340	kFontScale, kTextColor, 2);
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	341
				342	cv::putText(view_image, absl::StrFormat("Confidence: %.3f", confidence),
				343	cv::Point(kTextX, text_y += kTextSpacing),
				344	cv::FONT_HERSHEY_DUPLEX, kFontScale, kTextColor, 2);
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	345	}
				346	} // namespace
				347
				348	void TargetEstimator::DrawEstimate(const TargetEstimate &target_estimate,
				349	cv::Mat view_image) {
				350	DrawEstimateValues(target_estimate.distance(),
				351	target_estimate.angle_to_target(),
				352	target_estimate.angle_to_camera(),
				353	target_estimate.rotation_camera_hub()->roll(),
				354	target_estimate.rotation_camera_hub()->pitch(),
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	355	target_estimate.rotation_camera_hub()->yaw(),
				356	target_estimate.confidence(), view_image);
Milind Upadhyay	f61e148	2022-02-11 20:42:55 -0800	[diff] [blame]	357	}
				358
				359	void TargetEstimator::DrawEstimate(cv::Mat view_image) const {
				360	DrawEstimateValues(distance_, angle_to_target(), angle_to_camera_, roll_,
Milind Upadhyay	14279de	2022-02-26 16:07:53 -0800	[diff] [blame^]	361	pitch_, yaw_, confidence_, view_image);
milind-u	9219598	2022-01-22 20:29:31 -0800	[diff] [blame]	362	}
				363
				364	} // namespace y2022::vision