Blame - y2023/vision/yolov5.cc - RealtimeRoboticsGroup/test

blob: 473437c5ef9da53a6a79104a2f96f21b0fa39ec4 [file] [log] [blame]

Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	1	#include "yolov5.h"
				2
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	3	#include <tensorflow/lite/c/common.h>
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	4	#include <tensorflow/lite/interpreter.h>
				5	#include <tensorflow/lite/kernels/register.h>
				6	#include <tensorflow/lite/model.h>
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	7	#include <tflite/public/edgetpu.h>
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	8	#include <tflite/public/edgetpu_c.h>
				9
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	10	#include <chrono>
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	11	#include <opencv2/dnn.hpp>
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	12	#include <string>
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	13
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	14	#include "absl/types/span.h"
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	15	#include "gflags/gflags.h"
				16	#include "glog/logging.h"
				17
				18	DEFINE_double(conf_threshold, 0.9,
				19	"Threshold value for confidence scores. Detections with a "
				20	"confidence score below this value will be ignored.");
				21
				22	DEFINE_double(
				23	nms_threshold, 0.5,
				24	"Threshold value for non-maximum suppression. Detections with an "
				25	"intersection-over-union value below this value will be removed.");
				26
				27	DEFINE_int32(nthreads, 6, "Number of threads to use during inference.");
				28
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	29	DEFINE_bool(visualize_detections, false, "Display inference output");
				30
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	31	namespace y2023 {
				32	namespace vision {
				33
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	34	class YOLOV5Impl : public YOLOV5 {
				35	public:
				36	// Takes a model path as string and and loads a pre-trained
				37	// YOLOv5 model from the specified path.
				38	void LoadModel(const std::string path);
				39
				40	// Takes an image and returns a Detection.
				41	std::vector<Detection> ProcessImage(cv::Mat image);
				42
				43	private:
				44	// Convert an OpenCV Mat object to a tensor input
				45	// that can be fed to the TensorFlow Lite model.
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	46	void ConvertCVMatToTensor(cv::Mat src, absl::Span<uint8_t> tensor);
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	47
				48	// Resizes, converts color space, and converts
				49	// image data type before inference.
				50	void Preprocess(cv::Mat image);
				51
				52	// Converts a TensorFlow Lite tensor to a 2D vector.
				53	std::vector<std::vector<float>> TensorToVector2D(TfLiteTensor *src_tensor,
				54	const int rows,
				55	const int columns);
				56
				57	// Performs non-maximum suppression to remove overlapping bounding boxes.
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	58	std::vector<Detection> NonMaximumSupression(
				59	const std::vector<std::vector<float>> &orig_preds, const int rows,
				60	const int columns, std::vector<Detection> *detections,
				61	std::vector<int> *indices);
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	62	// Models
				63	std::unique_ptr<tflite::FlatBufferModel> model_;
				64	std::unique_ptr<tflite::Interpreter> interpreter_;
				65	tflite::StderrReporter error_reporter_;
				66
				67	// Parameters of interpreter's input
				68	int input_;
				69	int in_height_;
				70	int in_width_;
				71	int in_channels_;
				72	int in_type_;
				73
				74	// Parameters of original image
				75	int img_height_;
				76	int img_width_;
				77
				78	// Input of the interpreter
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	79	absl::Span<uint8_t> input_8_;
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	80
				81	// Subtract this offset from class labels to get the actual label.
				82	static constexpr int kClassIdOffset = 5;
				83	};
				84
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	85	std::unique_ptr<YOLOV5> MakeYOLOV5() { return std::make_unique<YOLOV5Impl>(); }
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	86
				87	void YOLOV5Impl::LoadModel(const std::string path) {
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	88	VLOG(1) << "Load model: Start";
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	89
				90	tflite::ops::builtin::BuiltinOpResolver resolver;
				91
				92	model_ = tflite::FlatBufferModel::VerifyAndBuildFromFile(path.c_str());
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	93	CHECK(model_);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	94	CHECK(model_->initialized());
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	95	VLOG(1) << "Load model: Build model from file success";
				96
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	97	CHECK_EQ(tflite::InterpreterBuilder(*model_, resolver)(&interpreter_),
				98	kTfLiteOk);
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	99	VLOG(1) << "Load model: Interpreter builder success";
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	100
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	101	size_t num_devices;
				102	std::unique_ptr<edgetpu_device, decltype(&edgetpu_free_devices)> devices(
				103	edgetpu_list_devices(&num_devices), &edgetpu_free_devices);
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	104
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	105	CHECK_EQ(num_devices, 1ul);
				106	const auto &device = devices.get()[0];
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	107	VLOG(1) << "Load model: Got Devices";
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	108
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	109	auto *delegate =
				110	edgetpu_create_delegate(device.type, device.path, nullptr, 0);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	111
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	112	interpreter_->ModifyGraphWithDelegate(delegate);
				113
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	114	VLOG(1) << "Load model: Modify graph with delegate complete";
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	115
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	116	TfLiteStatus status = interpreter_->AllocateTensors();
				117	CHECK_EQ(status, kTfLiteOk);
				118	CHECK(interpreter_);
				119
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	120	VLOG(1) << "Load model: Allocate tensors success";
				121
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	122	input_ = interpreter_->inputs()[0];
				123	TfLiteIntArray *dims = interpreter_->tensor(input_)->dims;
				124	in_height_ = dims->data[1];
				125	in_width_ = dims->data[2];
				126	in_channels_ = dims->data[3];
				127	in_type_ = interpreter_->tensor(input_)->type;
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	128
				129	int tensor_size = 1;
				130	for (int i = 0; i < dims->size; i++) {
				131	tensor_size *= dims->data[i];
				132	}
				133	input_8_ =
				134	absl::Span(interpreter_->typed_tensor<uint8_t>(input_), tensor_size);
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	135
				136	interpreter_->SetNumThreads(FLAGS_nthreads);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	137
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	138	VLOG(1) << "Load model: Done";
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	139	}
				140
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	141	void YOLOV5Impl::ConvertCVMatToTensor(cv::Mat src, absl::Span<uint8_t> tensor) {
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	142	CHECK(src.type() == CV_8UC3);
				143	int n = 0, nc = src.channels(), ne = src.elemSize();
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	144	VLOG(2) << "ConvertCVMatToTensor: Rows " << src.rows;
				145	VLOG(2) << "ConvertCVMatToTensor: Cols " << src.cols;
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	146	for (int y = 0; y < src.rows; ++y) {
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	147	auto *row_ptr = src.ptr<uint8_t>(y);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	148	for (int x = 0; x < src.cols; ++x) {
				149	for (int c = 0; c < nc; ++c) {
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	150	tensor[n++] = (row_ptr + x ne + c);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	151	}
				152	}
				153	}
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	154	}
				155
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	156	std::vector<std::vector<float>> YOLOV5Impl::TensorToVector2D(
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	157	TfLiteTensor *src_tensor, const int rows, const int columns) {
				158	auto scale = src_tensor->params.scale;
				159	auto zero_point = src_tensor->params.zero_point;
				160	std::vector<std::vector<float>> result_vec;
				161	for (int32_t i = 0; i < rows; i++) {
				162	std::vector<float> row_values;
				163	for (int32_t j = 0; j < columns; j++) {
				164	float val_float =
				165	((static_cast<int32_t>(src_tensor->data.uint8[i * columns + j])) -
				166	zero_point) *
				167	scale;
				168	row_values.push_back(val_float);
				169	}
				170	result_vec.push_back(row_values);
				171	}
				172	return result_vec;
				173	}
				174
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	175	std::vector<Detection> YOLOV5Impl::NonMaximumSupression(
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	176	const std::vector<std::vector<float>> &orig_preds, const int rows,
				177	const int columns, std::vector<Detection> *detections,
				178	std::vector<int> *indices)
				179
				180	{
				181	std::vector<float> scores;
				182	double confidence;
				183	cv::Point class_id;
				184
				185	for (int i = 0; i < rows; i++) {
				186	if (orig_preds[i][4] > FLAGS_conf_threshold) {
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	187	float x = orig_preds[i][0];
				188	float y = orig_preds[i][1];
				189	float w = orig_preds[i][2];
				190	float h = orig_preds[i][3];
				191	int left = static_cast<int>((x - 0.5 * w) * img_width_);
				192	int top = static_cast<int>((y - 0.5 * h) * img_height_);
				193	int width = static_cast<int>(w * img_width_);
				194	int height = static_cast<int>(h * img_height_);
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	195
				196	for (int j = 5; j < columns; j++) {
				197	scores.push_back(orig_preds[i][j] * orig_preds[i][4]);
				198	}
				199
				200	cv::minMaxLoc(scores, nullptr, &confidence, nullptr, &class_id);
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	201	scores.clear();
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	202	if (confidence > FLAGS_conf_threshold) {
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	203	Detection detection{cv::Rect(left, top, width, height), confidence,
				204	class_id.x};
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	205	detections->push_back(detection);
				206	}
				207	}
				208	}
				209
				210	std::vector<cv::Rect> boxes;
				211	std::vector<float> confidences;
				212
				213	for (const Detection &d : *detections) {
				214	boxes.push_back(d.box);
				215	confidences.push_back(d.confidence);
				216	}
				217
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	218	cv::dnn::NMSBoxes(boxes, confidences, FLAGS_conf_threshold,
				219	FLAGS_nms_threshold, *indices);
				220
				221	std::vector<Detection> filtered_detections;
				222	for (size_t i = 0; i < indices->size(); i++) {
				223	filtered_detections.push_back((detections)[(indices)[i]]);
				224	}
				225
				226	VLOG(1) << "NonMaximumSupression: " << detections->size() - indices->size()
				227	<< " detections filtered out";
				228
				229	return filtered_detections;
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	230	}
				231
Filip Kujawa	26a2366	2023-04-08 16:19:13 -0700	[diff] [blame]	232	std::vector<Detection> YOLOV5Impl::ProcessImage(cv::Mat frame) {
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	233	VLOG(1) << "\n";
				234
				235	auto start = std::chrono::high_resolution_clock::now();
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	236	img_height_ = frame.rows;
				237	img_width_ = frame.cols;
				238
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	239	cv::resize(frame, frame, cv::Size(in_height_, in_width_), cv::INTER_CUBIC);
				240	cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
				241	frame.convertTo(frame, CV_8U);
				242
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	243	ConvertCVMatToTensor(frame, input_8_);
				244
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	245	TfLiteStatus status = interpreter_->Invoke();
				246	CHECK_EQ(status, kTfLiteOk);
				247
				248	int output_tensor_index = interpreter_->outputs()[0];
				249	TfLiteIntArray *out_dims = interpreter_->tensor(output_tensor_index)->dims;
				250	int num_rows = out_dims->data[1];
				251	int num_columns = out_dims->data[2];
				252
				253	TfLiteTensor *src_tensor = interpreter_->tensor(interpreter_->outputs()[0]);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	254
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	255	std::vector<std::vector<float>> orig_preds =
				256	TensorToVector2D(src_tensor, num_rows, num_columns);
				257
				258	std::vector<int> indices;
				259	std::vector<Detection> detections;
				260
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	261	std::vector<Detection> filtered_detections;
				262	filtered_detections = NonMaximumSupression(orig_preds, num_rows, num_columns,
				263	&detections, &indices);
				264	VLOG(1) << "---";
				265	for (size_t i = 0; i < filtered_detections.size(); i++) {
				266	VLOG(1) << "Detection #" << i << " \| Class ID #"
				267	<< filtered_detections[i].class_id << " @ "
				268	<< filtered_detections[i].confidence << " confidence";
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	269	}
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	270
				271	VLOG(1) << "---";
				272
				273	auto stop = std::chrono::high_resolution_clock::now();
				274
				275	VLOG(1) << "Inference time: "
				276	<< std::chrono::duration_cast<std::chrono::milliseconds>(stop - start)
				277	.count();
				278
				279	if (FLAGS_visualize_detections) {
				280	cv::resize(frame, frame, cv::Size(img_width_, img_height_), 0, 0, true);
				281	for (size_t i = 0; i < filtered_detections.size(); i++) {
				282	VLOG(1) << "Bounding Box \| X: " << filtered_detections[i].box.x
				283	<< " Y: " << filtered_detections[i].box.y
				284	<< " W: " << filtered_detections[i].box.width
				285	<< " H: " << filtered_detections[i].box.height;
				286	cv::rectangle(frame, filtered_detections[i].box, cv::Scalar(255, 0, 0),
				287	2);
				288	cv::putText(
				289	frame, std::to_string(filtered_detections[i].class_id),
				290	cv::Point(filtered_detections[i].box.x, filtered_detections[i].box.y),
				291	cv::FONT_HERSHEY_COMPLEX, 1.0, cv::Scalar(0, 0, 255), 1, cv::LINE_AA);
				292	}
				293	cv::cvtColor(frame, frame, cv::COLOR_BGR2RGB);
				294	cv::imshow("yolo", frame);
				295	cv::waitKey(10);
Filip Kujawa	8c76e5d	2023-04-08 16:20:27 -0700	[diff] [blame]	296	}
Filip Kujawa	f3b8adb	2023-04-07 21:00:49 -0700	[diff] [blame]	297
				298	return filtered_detections;
Filip Kujawa	dc7d47c	2023-04-08 16:16:51 -0700	[diff] [blame]	299	};
				300
				301	} // namespace vision
				302	} // namespace y2023