Blame - third_party/akaze/AKAZEFeatures.cpp - RealtimeRoboticsGroup/test

blob: 2827dd546a7017b90d9070cd1c0c9602fe40d62c [file] [log] [blame]

Yash Chainani	5458dea	2022-06-29 21:05:02 -0700	[diff] [blame]	1	/**
				2	* @file AKAZEFeatures.cpp
				3	* @brief Main class for detecting and describing binary features in an
				4	* accelerated nonlinear scale space
				5	* @date Sep 15, 2013
				6	* @author Pablo F. Alcantarilla, Jesus Nuevo
				7	*/
				8
				9	#include "AKAZEFeatures.h"
				10
				11	#include <cstdint>
				12	#include <cstring>
				13	#include <iostream>
				14	#include <opencv2/core.hpp>
				15	#include <opencv2/core/hal/hal.hpp>
				16	#include <opencv2/imgproc.hpp>
				17
				18	#include "fed.h"
				19	#include "nldiffusion_functions.h"
				20	#include "utils.h"
				21
				22	#ifdef AKAZE_USE_CPP11_THREADING
				23	#include <atomic>
				24	#include <functional> // std::ref
				25	#include <future>
				26	#include <thread>
				27	#endif
				28
				29	// Taken from opencv2/internal.hpp: IEEE754 constants and macros
				30	#define CV_TOGGLE_FLT(x) ((x) ^ ((int)(x) < 0 ? 0x7fffffff : 0))
				31
				32	// Namespaces
				33	namespace cv {
				34	using namespace std;
				35
				36	/// Internal Functions
				37	inline void Compute_Main_Orientation(cv::KeyPoint& kpt,
				38	const TEvolutionV2& evolution_);
				39	static void generateDescriptorSubsampleV2(cv::Mat& sampleList,
				40	cv::Mat& comparisons, int nbits,
				41	int pattern_size, int nchannels);
				42
				43	/* ************************************************************************* */
				44	/**
				45	* @brief AKAZEFeatures constructor with input options
				46	* @param options AKAZEFeatures configuration options
				47	* @note This constructor allocates memory for the nonlinear scale space
				48	*/
				49	AKAZEFeaturesV2::AKAZEFeaturesV2(const AKAZEOptionsV2& options)
				50	: options_(options) {
				51	cout << "AKAZEFeaturesV2 constructor called" << endl;
				52
				53	#ifdef AKAZE_USE_CPP11_THREADING
				54	cout << "hardware_concurrency: " << thread::hardware_concurrency() << endl;
				55	#endif
				56
				57	reordering_ = true;
				58
				59	if (options_.descriptor_size > 0 &&
				60	options_.descriptor >= AKAZE::DESCRIPTOR_MLDB_UPRIGHT) {
				61	generateDescriptorSubsampleV2(
				62	descriptorSamples_, descriptorBits_, options_.descriptor_size,
				63	options_.descriptor_pattern_size, options_.descriptor_channels);
				64	}
				65
				66	Allocate_Memory_Evolution();
				67	}
				68
				69	/* ************************************************************************* */
				70	/**
				71	* @brief This method allocates the memory for the nonlinear diffusion evolution
				72	*/
				73	void AKAZEFeaturesV2::Allocate_Memory_Evolution(void) {
				74	CV_Assert(options_.img_height > 2 &&
				75	options_.img_width > 2); // The size of modgs_ must be positive
				76
				77	// Set maximum size of the area for the descriptor computation
				78	float smax = 0.0;
				79	if (options_.descriptor == AKAZE::DESCRIPTOR_MLDB_UPRIGHT \|\|
				80	options_.descriptor == AKAZE::DESCRIPTOR_MLDB) {
				81	smax = 10.0f * sqrtf(2.0f);
				82	} else if (options_.descriptor == AKAZE::DESCRIPTOR_KAZE_UPRIGHT \|\|
				83	options_.descriptor == AKAZE::DESCRIPTOR_KAZE) {
				84	smax = 12.0f * sqrtf(2.0f);
				85	}
				86
				87	// Allocate the dimension of the matrices for the evolution
				88	int level_height = options_.img_height;
				89	int level_width = options_.img_width;
				90	int power = 1;
				91
				92	for (int i = 0; i < options_.omax; i++) {
				93	for (int j = 0; j < options_.nsublevels; j++) {
				94	TEvolutionV2 step;
				95	step.Lt.create(level_height, level_width, CV_32FC1);
				96	step.Ldet.create(level_height, level_width, CV_32FC1);
				97	step.Lsmooth.create(level_height, level_width, CV_32FC1);
				98	step.Lx.create(level_height, level_width, CV_32FC1);
				99	step.Ly.create(level_height, level_width, CV_32FC1);
				100	step.Lxx.create(level_height, level_width, CV_32FC1);
				101	step.Lxy.create(level_height, level_width, CV_32FC1);
				102	step.Lyy.create(level_height, level_width, CV_32FC1);
				103	step.esigma =
				104	options_.soffset * pow(2.f, (float)j / options_.nsublevels + i);
				105	step.sigma_size =
				106	fRoundV2(step.esigma * options_.derivative_factor /
				107	power); // In fact sigma_size only depends on j
				108	step.border = fRoundV2(smax * step.sigma_size) + 1;
				109	step.etime = 0.5f * (step.esigma * step.esigma);
				110	step.octave = i;
				111	step.sublevel = j;
				112	step.octave_ratio = (float)power;
				113
				114	// Descriptors cannot be computed for the points on the border
				115	if (step.border * 2 + 1 >= level_width \|\|
				116	step.border * 2 + 1 >= level_height)
				117	goto out; // The image becomes too small
				118
				119	// Pre-calculate the derivative kernels
				120	compute_scharr_derivative_kernelsV2(step.DxKx, step.DxKy, 1, 0,
				121	step.sigma_size);
				122	compute_scharr_derivative_kernelsV2(step.DyKx, step.DyKy, 0, 1,
				123	step.sigma_size);
				124
				125	evolution_.push_back(step);
				126	}
				127
				128	power <<= 1;
				129	level_height >>= 1;
				130	level_width >>= 1;
				131
				132	// The next octave becomes too small
				133	if (level_width < 80 \|\| level_height < 40) {
				134	options_.omax = i + 1;
				135	break;
				136	}
				137	}
				138	out:
				139
				140	// Allocate memory for workspaces
				141	lx_.create(options_.img_height, options_.img_width, CV_32FC1);
				142	ly_.create(options_.img_height, options_.img_width, CV_32FC1);
				143	lflow_.create(options_.img_height, options_.img_width, CV_32FC1);
				144	lstep_.create(options_.img_height, options_.img_width, CV_32FC1);
				145	histgram_.create(1, options_.kcontrast_nbins, CV_32SC1);
				146	modgs_.create(1, (options_.img_height - 2) * (options_.img_width - 2),
				147	CV_32FC1); // excluding the border
				148
				149	kpts_aux_.resize(evolution_.size());
				150	for (size_t i = 0; i < evolution_.size(); i++)
				151	kpts_aux_[i].reserve(
				152	1024); // reserve 1K points' space for each evolution step
				153
				154	// Allocate memory for the number of cycles and time steps
				155	tsteps_.resize(evolution_.size() - 1);
				156	for (size_t i = 1; i < evolution_.size(); i++) {
				157	fed_tau_by_process_timeV2(evolution_[i].etime - evolution_[i - 1].etime, 1,
				158	0.25f, reordering_, tsteps_[i - 1]);
				159	}
				160
				161	#ifdef AKAZE_USE_CPP11_THREADING
				162	tasklist_.resize(2);
				163	for (auto& list : tasklist_) list.resize(evolution_.size());
				164
				165	vector<atomic_int> atomic_vec(evolution_.size());
				166	taskdeps_.swap(atomic_vec);
				167	#endif
				168	}
				169
				170	/* ************************************************************************* */
				171	/**
				172	* @brief This function wraps the parallel computation of Scharr derivatives.
				173	* @param Lsmooth Input image to compute Scharr derivatives.
				174	* @param Lx Output derivative image (horizontal)
				175	* @param Ly Output derivative image (vertical)
				176	* should be parallelized or not.
				177	*/
				178	static inline void image_derivatives(const cv::Mat& Lsmooth, cv::Mat& Lx,
				179	cv::Mat& Ly) {
				180	#ifdef AKAZE_USE_CPP11_THREADING
				181
				182	if (getNumThreads() > 1 && (Lsmooth.rows * Lsmooth.cols) > (1 << 15)) {
				183	auto task = async(launch::async, image_derivatives_scharrV2, ref(Lsmooth),
				184	ref(Lx), 1, 0);
				185
				186	image_derivatives_scharrV2(Lsmooth, Ly, 0, 1);
				187	task.get();
				188	return;
				189	}
				190
				191	// Fall back to the serial path if Lsmooth is small or OpenCV parallelization
				192	// is disabled
				193	#endif
				194
				195	image_derivatives_scharrV2(Lsmooth, Lx, 1, 0);
				196	image_derivatives_scharrV2(Lsmooth, Ly, 0, 1);
				197	}
				198
				199	/* ************************************************************************* */
				200	/**
				201	* @brief This method compute the first evolution step of the nonlinear scale
				202	* space
				203	* @param img Input image for which the nonlinear scale space needs to be
				204	* created
				205	* @return kcontrast factor
				206	*/
				207	float AKAZEFeaturesV2::Compute_Base_Evolution_Level(const cv::Mat& img) {
				208	Mat Lsmooth(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1,
				209	lflow_.data /* like-a-union */);
				210	Mat Lx(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1, lx_.data);
				211	Mat Ly(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1, ly_.data);
				212
				213	#ifdef AKAZE_USE_CPP11_THREADING
				214
				215	if (getNumThreads() > 2 && (img.rows * img.cols) > (1 << 16)) {
				216	auto e0_Lsmooth = async(launch::async, gaussian_2D_convolutionV2, ref(img),
				217	ref(evolution_[0].Lsmooth), 0, 0, options_.soffset);
				218
				219	gaussian_2D_convolutionV2(img, Lsmooth, 0, 0, 1.0f);
				220	image_derivatives(Lsmooth, Lx, Ly);
				221	kcontrast_ =
				222	async(launch::async, compute_k_percentileV2, Lx, Ly,
				223	options_.kcontrast_percentile, ref(modgs_), ref(histgram_));
				224
				225	e0_Lsmooth.get();
				226	Compute_Determinant_Hessian_Response(0);
				227
				228	evolution_[0].Lsmooth.copyTo(evolution_[0].Lt);
				229	return 1.0f;
				230	}
				231
				232	#endif
				233
				234	// Compute the determinant Hessian
				235	gaussian_2D_convolutionV2(img, evolution_[0].Lsmooth, 0, 0, options_.soffset);
				236	Compute_Determinant_Hessian_Response(0);
				237
				238	// Compute the kcontrast factor using local variables
				239	gaussian_2D_convolutionV2(img, Lsmooth, 0, 0, 1.0f);
				240	image_derivatives(Lsmooth, Lx, Ly);
				241	float kcontrast = compute_k_percentileV2(
				242	Lx, Ly, options_.kcontrast_percentile, modgs_, histgram_);
				243
				244	// Copy the smoothed original image to the first level of the evolution Lt
				245	evolution_[0].Lsmooth.copyTo(evolution_[0].Lt);
				246
				247	return kcontrast;
				248	}
				249
				250	/* ************************************************************************* */
				251	/**
				252	* @brief This method creates the nonlinear scale space for a given image
				253	* @param img Input image for which the nonlinear scale space needs to be
				254	* created
				255	* @return 0 if the nonlinear scale space was created successfully, -1 otherwise
				256	*/
				257	int AKAZEFeaturesV2::Create_Nonlinear_Scale_Space(const Mat& img) {
				258	CV_Assert(evolution_.size() > 0);
				259
				260	// Setup the gray-scale image
				261	const Mat* gray = &img;
				262	if (img.channels() != 1) {
				263	cvtColor(img, gray_, COLOR_BGR2GRAY);
				264	gray = &gray_;
				265	}
				266
				267	if (gray->type() == CV_8UC1) {
				268	gray->convertTo(evolution_[0].Lt, CV_32F, 1 / 255.0);
				269	gray = &evolution_[0].Lt;
				270	} else if (gray->type() == CV_16UC1) {
				271	gray->convertTo(evolution_[0].Lt, CV_32F, 1 / 65535.0);
				272	gray = &evolution_[0].Lt;
				273	}
				274	CV_Assert(gray->type() == CV_32FC1);
				275
				276	// Handle the trivial case
				277	if (evolution_.size() == 1) {
				278	gaussian_2D_convolutionV2(*gray, evolution_[0].Lsmooth, 0, 0,
				279	options_.soffset);
				280	evolution_[0].Lsmooth.copyTo(evolution_[0].Lt);
				281	Compute_Determinant_Hessian_Response_Single(0);
				282	return 0;
				283	}
				284
				285	// First compute Lsmooth, Hessian, and the kcontrast factor for the base
				286	// evolution level
				287	float kcontrast = Compute_Base_Evolution_Level(*gray);
				288
				289	// Prepare Mats to be used as local workspace
				290	Mat Lx(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1, lx_.data);
				291	Mat Ly(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1, ly_.data);
				292	Mat Lflow(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1,
				293	lflow_.data);
				294	Mat Lstep(evolution_[0].Lt.rows, evolution_[0].Lt.cols, CV_32FC1,
				295	lstep_.data);
				296
				297	// Now generate the rest of evolution levels
				298	for (size_t i = 1; i < evolution_.size(); i++) {
				299	if (evolution_[i].octave > evolution_[i - 1].octave) {
				300	halfsample_imageV2(evolution_[i - 1].Lt, evolution_[i].Lt);
				301	kcontrast = kcontrast * 0.75f;
				302
				303	// Resize the workspace images to fit Lt
				304	Lx = cv::Mat(evolution_[i].Lt.rows, evolution_[i].Lt.cols, CV_32FC1,
				305	lx_.data);
				306	Ly = cv::Mat(evolution_[i].Lt.rows, evolution_[i].Lt.cols, CV_32FC1,
				307	ly_.data);
				308	Lflow = cv::Mat(evolution_[i].Lt.rows, evolution_[i].Lt.cols, CV_32FC1,
				309	lflow_.data);
				310	Lstep = cv::Mat(evolution_[i].Lt.rows, evolution_[i].Lt.cols, CV_32FC1,
				311	lstep_.data);
				312	} else {
				313	evolution_[i - 1].Lt.copyTo(evolution_[i].Lt);
				314	}
				315
				316	gaussian_2D_convolutionV2(evolution_[i].Lt, evolution_[i].Lsmooth, 0, 0,
				317	1.0f);
				318
				319	#ifdef AKAZE_USE_CPP11_THREADING
				320	if (kcontrast_.valid())
				321	kcontrast *=
				322	kcontrast_
				323	.get(); /* Join the kcontrast task so Lx and Ly can be reused */
				324	#endif
				325
				326	// Compute the Gaussian derivatives Lx and Ly
				327	image_derivatives(evolution_[i].Lsmooth, Lx, Ly);
				328
				329	// Compute the Hessian for feature detection
				330	Compute_Determinant_Hessian_Response((int)i);
				331
				332	// Compute the conductivity equation Lflow
				333	switch (options_.diffusivity) {
				334	case KAZE::DIFF_PM_G1:
				335	pm_g1V2(Lx, Ly, Lflow, kcontrast);
				336	break;
				337	case KAZE::DIFF_PM_G2:
				338	pm_g2V2(Lx, Ly, Lflow, kcontrast);
				339	break;
				340	case KAZE::DIFF_WEICKERT:
				341	weickert_diffusivityV2(Lx, Ly, Lflow, kcontrast);
				342	break;
				343	case KAZE::DIFF_CHARBONNIER:
				344	charbonnier_diffusivityV2(Lx, Ly, Lflow, kcontrast);
				345	break;
				346	default:
				347	CV_Error(options_.diffusivity, "Diffusivity is not supported");
				348	break;
				349	}
				350
				351	// Perform Fast Explicit Diffusion on Lt
				352	const int total = Lstep.rows * Lstep.cols;
				353	float* lt = evolution_[i].Lt.ptr<float>(0);
				354	float* lstep = Lstep.ptr<float>(0);
				355	std::vector<float>& tsteps = tsteps_[i - 1];
				356
				357	for (size_t j = 0; j < tsteps.size(); j++) {
				358	nld_step_scalarV2(evolution_[i].Lt, Lflow, Lstep);
				359
				360	const float step_size = tsteps[j];
				361	for (int k = 0; k < total; k++) lt[k] += lstep[k] * 0.5f * step_size;
				362	}
				363	}
				364
				365	#ifdef AKAZE_USE_CPP11_THREADING
				366
				367	if (getNumThreads() > 1) {
				368	// Wait all background tasks to finish
				369	for (size_t i = 0; i < evolution_.size(); i++) {
				370	tasklist_[0][i].get();
				371	tasklist_[1][i].get();
				372	}
				373	}
				374
				375	#endif
				376
				377	return 0;
				378	}
				379
				380	/* ************************************************************************* */
				381	/**
				382	* @brief This method selects interesting keypoints through the nonlinear scale
				383	* space
				384	* @param kpts Vector of detected keypoints
				385	*/
				386	void AKAZEFeaturesV2::Feature_Detection(std::vector<KeyPoint>& kpts) {
				387	Find_Scale_Space_Extrema(kpts_aux_);
				388	Do_Subpixel_Refinement(kpts_aux_, kpts);
				389	}
				390
				391	/* ************************************************************************* */
				392	/**
				393	* @brief This method computes the feature detector response for the nonlinear
				394	* scale space
				395	* @param level The evolution level to compute Hessian determinant
				396	* @note We use the Hessian determinant as the feature detector response
				397	*/
				398	inline void AKAZEFeaturesV2::Compute_Determinant_Hessian_Response_Single(
				399	const int level) {
				400	TEvolutionV2& e = evolution_[level];
				401
				402	const int total = e.Lsmooth.cols * e.Lsmooth.rows;
				403	float* lxx = e.Lxx.ptr<float>(0);
				404	float* lxy = e.Lxy.ptr<float>(0);
				405	float* lyy = e.Lyy.ptr<float>(0);
				406	float* ldet = e.Ldet.ptr<float>(0);
				407
				408	// Firstly compute the multiscale derivatives
				409	sepFilter2D(e.Lsmooth, e.Lx, CV_32F, e.DxKx, e.DxKy);
				410	sepFilter2D(e.Lx, e.Lxx, CV_32F, e.DxKx, e.DxKy);
				411	sepFilter2D(e.Lx, e.Lxy, CV_32F, e.DyKx, e.DyKy);
				412	sepFilter2D(e.Lsmooth, e.Ly, CV_32F, e.DyKx, e.DyKy);
				413	sepFilter2D(e.Ly, e.Lyy, CV_32F, e.DyKx, e.DyKy);
				414
				415	// Compute Ldet by Lxx.mul(Lyy) - Lxy.mul(Lxy)
				416	for (int j = 0; j < total; j++) ldet[j] = lxx[j] * lyy[j] - lxy[j] * lxy[j];
				417	}
				418
				419	#ifdef AKAZE_USE_CPP11_THREADING
				420
				421	/* ************************************************************************* */
				422	/**
				423	* @brief This method computes the feature detector response for the nonlinear
				424	* scale space
				425	* @param level The evolution level to compute Hessian determinant
				426	* @note This is parallelized version of
				427	* Compute_Determinant_Hessian_Response_Single()
				428	*/
				429	void AKAZEFeaturesV2::Compute_Determinant_Hessian_Response(const int level) {
				430	if (getNumThreads() == 1) {
				431	Compute_Determinant_Hessian_Response_Single(level);
				432	return;
				433	}
				434
				435	TEvolutionV2& e = evolution_[level];
				436	atomic_int& dep = taskdeps_[level];
				437
				438	const int total = e.Lsmooth.cols * e.Lsmooth.rows;
				439	float* lxx = e.Lxx.ptr<float>(0);
				440	float* lxy = e.Lxy.ptr<float>(0);
				441	float* lyy = e.Lyy.ptr<float>(0);
				442	float* ldet = e.Ldet.ptr<float>(0);
				443
				444	dep = 0;
				445
				446	tasklist_[0][level] = async(launch::async, [=, &e, &dep] {
				447	sepFilter2D(e.Lsmooth, e.Ly, CV_32F, e.DyKx, e.DyKy);
				448	sepFilter2D(e.Ly, e.Lyy, CV_32F, e.DyKx, e.DyKy);
				449
				450	if (dep.fetch_add(1, memory_order_relaxed) != 1)
				451	return; // The other dependency is not ready
				452
				453	sepFilter2D(e.Lx, e.Lxy, CV_32F, e.DyKx, e.DyKy);
				454	for (int j = 0; j < total; j++) ldet[j] = lxx[j] * lyy[j] - lxy[j] * lxy[j];
				455	});
				456
				457	tasklist_[1][level] = async(launch::async, [=, &e, &dep] {
				458	sepFilter2D(e.Lsmooth, e.Lx, CV_32F, e.DxKx, e.DxKy);
				459	sepFilter2D(e.Lx, e.Lxx, CV_32F, e.DxKx, e.DxKy);
				460
				461	if (dep.fetch_add(1, memory_order_relaxed) != 1)
				462	return; // The other dependency is not ready
				463
				464	sepFilter2D(e.Lx, e.Lxy, CV_32F, e.DyKx, e.DyKy);
				465	for (int j = 0; j < total; j++) ldet[j] = lxx[j] * lyy[j] - lxy[j] * lxy[j];
				466	});
				467
				468	// tasklist_[1,2][level] have to be waited later on
				469	}
				470
				471	#else
				472
				473	void AKAZEFeaturesV2::Compute_Determinant_Hessian_Response(const int level) {
				474	Compute_Determinant_Hessian_Response_Single(level);
				475	}
				476
				477	#endif
				478
				479	/* ************************************************************************* */
				480	/**
				481	* @brief This method searches v for a neighbor point of the point candidate p
				482	* @param p The keypoint candidate to search a neighbor
				483	* @param v The vector to store the points to be searched
				484	* @param offset The starting location in the vector v to be searched at
				485	* @param idx The index of the vector v if a neighbor is found
				486	* @return true if a neighbor point is found; false otherwise
				487	*/
				488	inline bool find_neighbor_point(const KeyPoint& p, const vector<KeyPoint>& v,
				489	const int offset, int& idx) {
				490	const int sz = (int)v.size();
				491
				492	for (int i = offset; i < sz; i++) {
				493	if (v[i].class_id == -1) // Skip a deleted point
				494	continue;
				495
				496	float dx = p.pt.x - v[i].pt.x;
				497	float dy = p.pt.y - v[i].pt.y;
				498	if (dx * dx + dy * dy <= p.size * p.size) {
				499	idx = i;
				500	return true;
				501	}
				502	}
				503
				504	return false;
				505	}
				506
				507	inline bool find_neighbor_point_inv(const KeyPoint& p,
				508	const vector<KeyPoint>& v, const int offset,
				509	int& idx) {
				510	const int sz = (int)v.size();
				511
				512	for (int i = offset; i < sz; i++) {
				513	if (v[i].class_id == -1) // Skip a deleted point
				514	continue;
				515
				516	float dx = p.pt.x - v[i].pt.x;
				517	float dy = p.pt.y - v[i].pt.y;
				518	if (dx * dx + dy * dy <= v[i].size * v[i].size) {
				519	idx = i;
				520	return true;
				521	}
				522	}
				523
				524	return false;
				525	}
				526
				527	/* ************************************************************************* */
				528	/**
				529	* @brief This method finds extrema in the nonlinear scale space
				530	* @param kpts_aux Output vectors of detected keypoints; one vector for each
				531	* evolution level
				532	*/
				533	inline void AKAZEFeaturesV2::Find_Scale_Space_Extrema_Single(
				534	std::vector<vector<KeyPoint>>& kpts_aux) {
				535	// Clear the workspace to hold the keypoint candidates
				536	for (size_t i = 0; i < kpts_aux_.size(); i++) kpts_aux_[i].clear();
				537
				538	for (int i = 0; i < (int)evolution_.size(); i++) {
				539	const TEvolutionV2& step = evolution_[i];
				540
				541	const float* prev = step.Ldet.ptr<float>(step.border - 1);
				542	const float* curr = step.Ldet.ptr<float>(step.border);
				543	const float* next = step.Ldet.ptr<float>(step.border + 1);
				544
				545	for (int y = step.border; y < step.Ldet.rows - step.border; y++) {
				546	for (int x = step.border; x < step.Ldet.cols - step.border; x++) {
				547	const float value = curr[x];
				548
				549	// Filter the points with the detector threshold
				550	if (value <= options_.dthreshold) continue;
				551	if (value <= curr[x - 1] \|\| value <= curr[x + 1]) continue;
				552	if (value <= prev[x - 1] \|\| value <= prev[x] \|\| value <= prev[x + 1])
				553	continue;
				554	if (value <= next[x - 1] \|\| value <= next[x] \|\| value <= next[x + 1])
				555	continue;
				556
				557	KeyPoint point(/* x / static_cast<float>(x step.octave_ratio),
				558	/* y / static_cast<float>(y step.octave_ratio),
				559	/* size / step.esigma options_.derivative_factor,
				560	/* angle */ -1,
				561	/* response */ value,
				562	/* octave */ step.octave,
				563	/* class_id */ i);
				564
				565	int idx = 0;
				566
				567	// Compare response with the same scale
				568	if (find_neighbor_point(point, kpts_aux[i], 0, idx)) {
				569	if (point.response > kpts_aux[i][idx].response)
				570	kpts_aux[i][idx] = point; // Replace the old point
				571	continue;
				572	}
				573
				574	// Compare response with the lower scale
				575	if (i > 0 && find_neighbor_point(point, kpts_aux[i - 1], 0, idx)) {
				576	if (point.response > kpts_aux[i - 1][idx].response) {
				577	kpts_aux[i - 1][idx].class_id = -1; // Mark it as deleted
				578	kpts_aux[i].push_back(
				579	point); // Insert the new point to the right layer
				580	}
				581	continue;
				582	}
				583
				584	kpts_aux[i].push_back(point); // A good keypoint candidate is found
				585	}
				586	prev = curr;
				587	curr = next;
				588	next += step.Ldet.cols;
				589	}
				590	}
				591
				592	// Now filter points with the upper scale level
				593	for (int i = 0; i < (int)kpts_aux.size() - 1; i++) {
				594	for (int j = 0; j < (int)kpts_aux[i].size(); j++) {
				595	KeyPoint& pt = kpts_aux[i][j];
				596
				597	if (pt.class_id == -1) // Skip a deleted point
				598	continue;
				599
				600	int idx = 0;
				601	while (find_neighbor_point_inv(pt, kpts_aux[i + 1], idx, idx)) {
				602	if (pt.response > kpts_aux[i + 1][idx].response)
				603	kpts_aux[i + 1][idx].class_id = -1;
				604	++idx;
				605	}
				606	}
				607	}
				608	}
				609
				610	#ifndef AKAZE_USE_CPP11_THREADING
				611
				612	/* ************************************************************************* */
				613	/**
				614	* @brief This method finds extrema in the nonlinear scale space
				615	* @param kpts_aux Output vectors of detected keypoints; one vector for each
				616	* evolution level
				617	* @note This is parallelized version of Find_Scale_Space_Extrema()
				618	*/
				619	void AKAZEFeaturesV2::Find_Scale_Space_Extrema(
				620	std::vector<vector<KeyPoint>>& kpts_aux) {
				621	if (getNumThreads() == 1) {
				622	Find_Scale_Space_Extrema_Single(kpts_aux);
				623	return;
				624	}
				625
				626	for (int i = 0; i < (int)evolution_.size(); i++) {
				627	const TEvolutionV2& step = evolution_[i];
				628	vector<cv::KeyPoint>& kpts = kpts_aux[i];
				629
				630	// Clear the workspace to hold the keypoint candidates
				631	kpts_aux_[i].clear();
				632
				633	auto mode = (i > 0 ? launch::async : launch::deferred);
				634	tasklist_[0][i] = async(
				635	mode,
				636	[&step, &kpts, i](const AKAZEOptionsV2& opt) {
				637	const float* prev = step.Ldet.ptr<float>(step.border - 1);
				638	const float* curr = step.Ldet.ptr<float>(step.border);
				639	const float* next = step.Ldet.ptr<float>(step.border + 1);
				640
				641	for (int y = step.border; y < step.Ldet.rows - step.border; y++) {
				642	for (int x = step.border; x < step.Ldet.cols - step.border; x++) {
				643	const float value = curr[x];
				644
				645	// Filter the points with the detector threshold
				646	if (value <= opt.dthreshold) continue;
				647	if (value <= curr[x - 1] \|\| value <= curr[x + 1]) continue;
				648	if (value <= prev[x - 1] \|\| value <= prev[x] \|\|
				649	value <= prev[x + 1])
				650	continue;
				651	if (value <= next[x - 1] \|\| value <= next[x] \|\|
				652	value <= next[x + 1])
				653	continue;
				654
				655	KeyPoint point(/* x / static_cast<float>(x step.octave_ratio),
				656	/* y / static_cast<float>(y step.octave_ratio),
				657	/* size / step.esigma opt.derivative_factor,
				658	/* angle */ -1,
				659	/* response */ value,
				660	/* octave */ step.octave,
				661	/* class_id */ i);
				662
				663	int idx = 0;
				664
				665	// Compare response with the same scale
				666	if (find_neighbor_point(point, kpts, 0, idx)) {
				667	if (point.response > kpts[idx].response)
				668	kpts[idx] = point; // Replace the old point
				669	continue;
				670	}
				671
				672	kpts.push_back(point);
				673	}
				674
				675	prev = curr;
				676	curr = next;
				677	next += step.Ldet.cols;
				678	}
				679	},
				680	options_);
				681	}
				682
				683	tasklist_[0][0].get();
				684
				685	// Filter points with the lower scale level
				686	for (int i = 1; i < (int)kpts_aux.size(); i++) {
				687	tasklist_[0][i].get();
				688
				689	for (int j = 0; j < (int)kpts_aux[i].size(); j++) {
				690	KeyPoint& pt = kpts_aux[i][j];
				691
				692	int idx = 0;
				693	while (find_neighbor_point(pt, kpts_aux[i - 1], idx, idx)) {
				694	if (pt.response > kpts_aux[i - 1][idx].response)
				695	kpts_aux[i - 1][idx].class_id = -1;
				696	// else this pt may be pruned by the upper scale
				697	++idx;
				698	}
				699	}
				700	}
				701
				702	// Now filter points with the upper scale level (the other direction)
				703	for (int i = (int)kpts_aux.size() - 2; i >= 0; i--) {
				704	for (int j = 0; j < (int)kpts_aux[i].size(); j++) {
				705	KeyPoint& pt = kpts_aux[i][j];
				706
				707	if (pt.class_id == -1) // Skip a deleted point
				708	continue;
				709
				710	int idx = 0;
				711	while (find_neighbor_point_inv(pt, kpts_aux[i + 1], idx, idx)) {
				712	if (pt.response > kpts_aux[i + 1][idx].response)
				713	kpts_aux[i + 1][idx].class_id = -1;
				714	++idx;
				715	}
				716	}
				717	}
				718	}
				719
				720	#else
				721
				722	void AKAZEFeaturesV2::Find_Scale_Space_Extrema(
				723	std::vector<vector<KeyPoint>>& kpts_aux) {
				724	Find_Scale_Space_Extrema_Single(kpts_aux);
				725	}
				726
				727	#endif
				728
				729	/* ************************************************************************* */
				730	/**
				731	* @brief This method performs subpixel refinement of the detected keypoints
				732	* @param kpts_aux Input vectors of detected keypoints, sorted by evolution
				733	* levels
				734	* @param kpts Output vector of the final refined keypoints
				735	*/
				736	void AKAZEFeaturesV2::Do_Subpixel_Refinement(
				737	std::vector<std::vector<KeyPoint>>& kpts_aux, std::vector<KeyPoint>& kpts) {
				738	// Clear the keypoint vector
				739	kpts.clear();
				740
				741	for (int i = 0; i < (int)kpts_aux.size(); i++) {
				742	const float* const ldet = evolution_[i].Ldet.ptr<float>(0);
				743	const float ratio = evolution_[i].octave_ratio;
				744	const int cols = evolution_[i].Ldet.cols;
				745
				746	for (int j = 0; j < (int)kpts_aux[i].size(); j++) {
				747	KeyPoint& kp = kpts_aux[i][j];
				748
				749	if (kp.class_id == -1) continue; // Skip a deleted keypoint
				750
				751	int x = (int)(kp.pt.x / ratio);
				752	int y = (int)(kp.pt.y / ratio);
				753
				754	// Compute the gradient
				755	float Dx = 0.5f * (ldet[y * cols + x + 1] - ldet[y * cols + x - 1]);
				756	float Dy = 0.5f * (ldet[(y + 1) * cols + x] - ldet[(y - 1) * cols + x]);
				757
				758	// Compute the Hessian
				759	float Dxx = ldet[y * cols + x + 1] + ldet[y * cols + x - 1] -
				760	2.0f * ldet[y * cols + x];
				761	float Dyy = ldet[(y + 1) * cols + x] + ldet[(y - 1) * cols + x] -
				762	2.0f * ldet[y * cols + x];
				763	float Dxy =
				764	0.25f * (ldet[(y + 1) * cols + x + 1] + ldet[(y - 1) * cols + x - 1] -
				765	ldet[(y - 1) * cols + x + 1] - ldet[(y + 1) * cols + x - 1]);
				766
				767	// Solve the linear system
				768	Matx22f A{Dxx, Dxy, Dxy, Dyy};
				769	Vec2f b{-Dx, -Dy};
				770	Vec2f dst{0.0f, 0.0f};
				771	solve(A, b, dst, DECOMP_LU);
				772
				773	float dx = dst(0);
				774	float dy = dst(1);
				775
				776	if (fabs(dx) > 1.0f \|\| fabs(dy) > 1.0f)
				777	continue; // Ignore the point that is not stable
				778
				779	// Refine the coordinates
				780	kp.pt.x += dx * ratio;
				781	kp.pt.y += dy * ratio;
				782
				783	kp.angle = 0.0;
				784	kp.size *= 2.0f; // In OpenCV the size of a keypoint is the diameter
				785
				786	// Push the refined keypoint to the final storage
				787	kpts.push_back(kp);
				788	}
				789	}
				790	}
				791
				792	/* ************************************************************************* */
				793
				794	class SURF_Descriptor_Upright_64_InvokerV2 : public ParallelLoopBody {
				795	public:
				796	SURF_Descriptor_Upright_64_InvokerV2(
				797	std::vector<KeyPoint>& kpts, Mat& desc,
				798	const std::vector<TEvolutionV2>& evolution)
				799	: keypoints_(kpts), descriptors_(desc), evolution_(evolution) {}
				800
				801	void operator()(const Range& range) const {
				802	for (int i = range.start; i < range.end; i++) {
				803	Get_SURF_Descriptor_Upright_64(keypoints_[i], descriptors_.ptr<float>(i));
				804	}
				805	}
				806
				807	void Get_SURF_Descriptor_Upright_64(const KeyPoint& kpt, float* desc) const;
				808
				809	private:
				810	std::vector<KeyPoint>& keypoints_;
				811	Mat& descriptors_;
				812	const std::vector<TEvolutionV2>& evolution_;
				813	};
				814
				815	class SURF_Descriptor_64_InvokerV2 : public ParallelLoopBody {
				816	public:
				817	SURF_Descriptor_64_InvokerV2(std::vector<KeyPoint>& kpts, Mat& desc,
				818	const std::vector<TEvolutionV2>& evolution)
				819	: keypoints_(kpts), descriptors_(desc), evolution_(evolution) {}
				820
				821	void operator()(const Range& range) const {
				822	for (int i = range.start; i < range.end; i++) {
				823	KeyPoint& kp = keypoints_[i];
				824	Compute_Main_Orientation(kp, evolution_[kp.class_id]);
				825	Get_SURF_Descriptor_64(kp, descriptors_.ptr<float>(i));
				826	}
				827	}
				828
				829	void Get_SURF_Descriptor_64(const KeyPoint& kpt, float* desc) const;
				830
				831	private:
				832	std::vector<KeyPoint>& keypoints_;
				833	Mat& descriptors_;
				834	const std::vector<TEvolutionV2>& evolution_;
				835	};
				836
				837	class MSURF_Upright_Descriptor_64_InvokerV2 : public ParallelLoopBody {
				838	public:
				839	MSURF_Upright_Descriptor_64_InvokerV2(
				840	std::vector<KeyPoint>& kpts, Mat& desc,
				841	const std::vector<TEvolutionV2>& evolution)
				842	: keypoints_(kpts), descriptors_(desc), evolution_(evolution) {}
				843
				844	void operator()(const Range& range) const {
				845	for (int i = range.start; i < range.end; i++) {
				846	Get_MSURF_Upright_Descriptor_64(keypoints_[i],
				847	descriptors_.ptr<float>(i));
				848	}
				849	}
				850
				851	void Get_MSURF_Upright_Descriptor_64(const KeyPoint& kpt, float* desc) const;
				852
				853	private:
				854	std::vector<KeyPoint>& keypoints_;
				855	Mat& descriptors_;
				856	const std::vector<TEvolutionV2>& evolution_;
				857	};
				858
				859	class MSURF_Descriptor_64_InvokerV2 : public ParallelLoopBody {
				860	public:
				861	MSURF_Descriptor_64_InvokerV2(std::vector<KeyPoint>& kpts, Mat& desc,
				862	const std::vector<TEvolutionV2>& evolution)
				863	: keypoints_(kpts), descriptors_(desc), evolution_(evolution) {}
				864
				865	void operator()(const Range& range) const {
				866	for (int i = range.start; i < range.end; i++) {
				867	Compute_Main_Orientation(keypoints_[i],
				868	evolution_[keypoints_[i].class_id]);
				869	Get_MSURF_Descriptor_64(keypoints_[i], descriptors_.ptr<float>(i));
				870	}
				871	}
				872
				873	void Get_MSURF_Descriptor_64(const KeyPoint& kpt, float* desc) const;
				874
				875	private:
				876	std::vector<KeyPoint>& keypoints_;
				877	Mat& descriptors_;
				878	const std::vector<TEvolutionV2>& evolution_;
				879	};
				880
				881	class Upright_MLDB_Full_Descriptor_InvokerV2 : public ParallelLoopBody {
				882	public:
				883	Upright_MLDB_Full_Descriptor_InvokerV2(
				884	std::vector<KeyPoint>& kpts, Mat& desc,
				885	const std::vector<TEvolutionV2>& evolution, const AKAZEOptionsV2& options)
				886	: keypoints_(kpts),
				887	descriptors_(desc),
				888	evolution_(evolution),
				889	options_(options) {}
				890
				891	void operator()(const Range& range) const {
				892	for (int i = range.start; i < range.end; i++) {
				893	Get_Upright_MLDB_Full_Descriptor(keypoints_[i],
				894	descriptors_.ptr<unsigned char>(i));
				895	}
				896	}
				897
				898	void Get_Upright_MLDB_Full_Descriptor(const KeyPoint& kpt,
				899	unsigned char* desc) const;
				900
				901	private:
				902	std::vector<KeyPoint>& keypoints_;
				903	Mat& descriptors_;
				904	const std::vector<TEvolutionV2>& evolution_;
				905	const AKAZEOptionsV2& options_;
				906	};
				907
				908	class Upright_MLDB_Descriptor_Subset_InvokerV2 : public ParallelLoopBody {
				909	public:
				910	Upright_MLDB_Descriptor_Subset_InvokerV2(
				911	std::vector<KeyPoint>& kpts, Mat& desc,
				912	const std::vector<TEvolutionV2>& evolution, const AKAZEOptionsV2& options,
				913	const Mat& descriptorSamples, const Mat& descriptorBits)
				914	: keypoints_(kpts),
				915	descriptors_(desc),
				916	evolution_(evolution),
				917	options_(options),
				918	descriptorSamples_(descriptorSamples),
				919	descriptorBits_(descriptorBits) {}
				920
				921	void operator()(const Range& range) const {
				922	for (int i = range.start; i < range.end; i++) {
				923	Get_Upright_MLDB_Descriptor_Subset(keypoints_[i],
				924	descriptors_.ptr<unsigned char>(i));
				925	}
				926	}
				927
				928	void Get_Upright_MLDB_Descriptor_Subset(const KeyPoint& kpt,
				929	unsigned char* desc) const;
				930
				931	private:
				932	std::vector<KeyPoint>& keypoints_;
				933	Mat& descriptors_;
				934	const std::vector<TEvolutionV2>& evolution_;
				935	const AKAZEOptionsV2& options_;
				936
				937	const Mat& descriptorSamples_; // List of positions in the grids to sample
				938	// LDB bits from.
				939	const Mat& descriptorBits_;
				940	};
				941
				942	class MLDB_Full_Descriptor_InvokerV2 : public ParallelLoopBody {
				943	public:
				944	MLDB_Full_Descriptor_InvokerV2(std::vector<KeyPoint>& kpts, Mat& desc,
				945	const std::vector<TEvolutionV2>& evolution,
				946	const AKAZEOptionsV2& options)
				947	: keypoints_(kpts),
				948	descriptors_(desc),
				949	evolution_(evolution),
				950	options_(options) {}
				951
				952	void operator()(const Range& range) const {
				953	for (int i = range.start; i < range.end; i++) {
				954	Compute_Main_Orientation(keypoints_[i],
				955	evolution_[keypoints_[i].class_id]);
				956	Get_MLDB_Full_Descriptor(keypoints_[i],
				957	descriptors_.ptr<unsigned char>(i));
				958	keypoints_[i].angle *= (float)(180.0 / CV_PI);
				959	}
				960	}
				961
				962	void Get_MLDB_Full_Descriptor(const KeyPoint& kpt, unsigned char* desc) const;
				963	void MLDB_Fill_Values(float* values, int sample_step, int level, float xf,
				964	float yf, float co, float si, float scale) const;
				965	void MLDB_Binary_Comparisons(float* values, unsigned char* desc, int count,
				966	int& dpos) const;
				967
				968	private:
				969	std::vector<KeyPoint>& keypoints_;
				970	Mat& descriptors_;
				971	const std::vector<TEvolutionV2>& evolution_;
				972	const AKAZEOptionsV2& options_;
				973	};
				974
				975	class MLDB_Descriptor_Subset_InvokerV2 : public ParallelLoopBody {
				976	public:
				977	MLDB_Descriptor_Subset_InvokerV2(std::vector<KeyPoint>& kpts, Mat& desc,
				978	const std::vector<TEvolutionV2>& evolution,
				979	const AKAZEOptionsV2& options,
				980	const Mat& descriptorSamples,
				981	const Mat& descriptorBits)
				982	: keypoints_(kpts),
				983	descriptors_(desc),
				984	evolution_(evolution),
				985	options_(options),
				986	descriptorSamples_(descriptorSamples),
				987	descriptorBits_(descriptorBits) {}
				988
				989	void operator()(const Range& range) const {
				990	for (int i = range.start; i < range.end; i++) {
				991	Compute_Main_Orientation(keypoints_[i],
				992	evolution_[keypoints_[i].class_id]);
				993	Get_MLDB_Descriptor_Subset(keypoints_[i],
				994	descriptors_.ptr<unsigned char>(i));
				995	keypoints_[i].angle *= (float)(180.0 / CV_PI);
				996	}
				997	}
				998
				999	void Get_MLDB_Descriptor_Subset(const KeyPoint& kpt,
				1000	unsigned char* desc) const;
				1001
				1002	private:
				1003	std::vector<KeyPoint>& keypoints_;
				1004	Mat& descriptors_;
				1005	const std::vector<TEvolutionV2>& evolution_;
				1006	const AKAZEOptionsV2& options_;
				1007
				1008	const Mat& descriptorSamples_; // List of positions in the grids to sample
				1009	// LDB bits from.
				1010	const Mat& descriptorBits_;
				1011	};
				1012
				1013	/**
				1014	* @brief This method computes the set of descriptors through the nonlinear
				1015	* scale space
				1016	* @param kpts Vector of detected keypoints
				1017	* @param desc Matrix to store the descriptors
				1018	*/
				1019	void AKAZEFeaturesV2::Compute_Descriptors(std::vector<KeyPoint>& kpts,
				1020	Mat& desc) {
				1021	for (size_t i = 0; i < kpts.size(); i++) {
				1022	CV_Assert(0 <= kpts[i].class_id &&
				1023	kpts[i].class_id < static_cast<int>(evolution_.size()));
				1024	}
				1025
				1026	// Allocate memory for the descriptor matrix
				1027	if (options_.descriptor < AKAZE::DESCRIPTOR_MLDB_UPRIGHT) {
				1028	desc.create((int)kpts.size(), 64, CV_32FC1);
				1029	} else {
				1030	// We use the full length binary descriptor -> 486 bits
				1031	if (options_.descriptor_size == 0) {
				1032	int t = (6 + 36 + 120) * options_.descriptor_channels;
				1033	desc.create((int)kpts.size(), (int)ceil(t / 8.), CV_8UC1);
				1034	} else {
				1035	// We use the random bit selection length binary descriptor
				1036	desc.create((int)kpts.size(), (int)ceil(options_.descriptor_size / 8.),
				1037	CV_8UC1);
				1038	}
				1039	}
				1040
				1041	// Compute descriptors by blocks of 16 keypoints
				1042	const double stride = kpts.size() / (double)(1 << 4);
				1043
				1044	switch (options_.descriptor) {
				1045	case AKAZE::DESCRIPTOR_KAZE_UPRIGHT: // Upright descriptors, not invariant
				1046	// to rotation
				1047	{
				1048	parallel_for_(
				1049	Range(0, (int)kpts.size()),
				1050	MSURF_Upright_Descriptor_64_InvokerV2(kpts, desc, evolution_),
				1051	stride);
				1052	} break;
				1053	case AKAZE::DESCRIPTOR_KAZE: {
				1054	parallel_for_(Range(0, (int)kpts.size()),
				1055	MSURF_Descriptor_64_InvokerV2(kpts, desc, evolution_),
				1056	stride);
				1057	} break;
				1058	case AKAZE::DESCRIPTOR_MLDB_UPRIGHT: // Upright descriptors, not invariant
				1059	// to rotation
				1060	{
				1061	if (options_.descriptor_size == 0)
				1062	parallel_for_(Range(0, (int)kpts.size()),
				1063	Upright_MLDB_Full_Descriptor_InvokerV2(
				1064	kpts, desc, evolution_, options_),
				1065	stride);
				1066	else
				1067	parallel_for_(Range(0, (int)kpts.size()),
				1068	Upright_MLDB_Descriptor_Subset_InvokerV2(
				1069	kpts, desc, evolution_, options_, descriptorSamples_,
				1070	descriptorBits_),
				1071	stride);
				1072	} break;
				1073	case AKAZE::DESCRIPTOR_MLDB: {
				1074	if (options_.descriptor_size == 0)
				1075	parallel_for_(
				1076	Range(0, (int)kpts.size()),
				1077	MLDB_Full_Descriptor_InvokerV2(kpts, desc, evolution_, options_),
				1078	stride);
				1079	else
				1080	parallel_for_(Range(0, (int)kpts.size()),
				1081	MLDB_Descriptor_Subset_InvokerV2(
				1082	kpts, desc, evolution_, options_, descriptorSamples_,
				1083	descriptorBits_),
				1084	stride);
				1085	} break;
				1086	}
				1087	}
				1088
				1089	/* ************************************************************************* */
				1090	/**
				1091	* @brief This function samples the derivative responses Lx and Ly for the
				1092	* points within the radius of 6*scale from (x0, y0), then multiply 2D Gaussian
				1093	* weight
				1094	* @param Lx Horizontal derivative
				1095	* @param Ly Vertical derivative
				1096	* @param x0 X-coordinate of the center point
				1097	* @param y0 Y-coordinate of the center point
				1098	* @param scale The sampling step
				1099	* @param resX Output array of the weighted horizontal derivative responses
				1100	* @param resY Output array of the weighted vertical derivative responses
				1101	*/
				1102	static inline void Sample_Derivative_Response_Radius6(
				1103	const Mat& Lx, const Mat& Ly, const int x0, const int y0, const int scale,
				1104	float* resX, float* resY) {
				1105	/* *************************************************************************
				1106	*/
				1107	/// Lookup table for 2d gaussian (sigma = 2.5) where (0,0) is top left and
				1108	/// (6,6) is bottom right
				1109	static const float gauss25[7][7] = {
				1110	{0.02546481f, 0.02350698f, 0.01849125f, 0.01239505f, 0.00708017f,
				1111	0.00344629f, 0.00142946f},
				1112	{0.02350698f, 0.02169968f, 0.01706957f, 0.01144208f, 0.00653582f,
				1113	0.00318132f, 0.00131956f},
				1114	{0.01849125f, 0.01706957f, 0.01342740f, 0.00900066f, 0.00514126f,
				1115	0.00250252f, 0.00103800f},
				1116	{0.01239505f, 0.01144208f, 0.00900066f, 0.00603332f, 0.00344629f,
				1117	0.00167749f, 0.00069579f},
				1118	{0.00708017f, 0.00653582f, 0.00514126f, 0.00344629f, 0.00196855f,
				1119	0.00095820f, 0.00039744f},
				1120	{0.00344629f, 0.00318132f, 0.00250252f, 0.00167749f, 0.00095820f,
				1121	0.00046640f, 0.00019346f},
				1122	{0.00142946f, 0.00131956f, 0.00103800f, 0.00069579f, 0.00039744f,
				1123	0.00019346f, 0.00008024f}};
				1124	static const int id[] = {6, 5, 4, 3, 2, 1, 0, 1, 2, 3, 4, 5, 6};
				1125	static const struct gtable {
				1126	float weight[109];
				1127	int8_t xidx[109];
				1128	int8_t yidx[109];
				1129
				1130	explicit gtable(void) {
				1131	// Generate the weight and indices by one-time initialization
				1132	int k = 0;
				1133	for (int i = -6; i <= 6; ++i) {
				1134	for (int j = -6; j <= 6; ++j) {
				1135	if (i * i + j * j < 36) {
				1136	weight[k] = gauss25[id[i + 6]][id[j + 6]];
				1137	yidx[k] = i;
				1138	xidx[k] = j;
				1139	++k;
				1140	}
				1141	}
				1142	}
				1143	CV_DbgAssert(k == 109);
				1144	}
				1145	} g;
				1146
				1147	const float* lx = Lx.ptr<float>(0);
				1148	const float* ly = Ly.ptr<float>(0);
				1149	int cols = Lx.cols;
				1150
				1151	for (int i = 0; i < 109; i++) {
				1152	int j = (y0 + g.yidx[i] * scale) * cols + (x0 + g.xidx[i] * scale);
				1153
				1154	resX[i] = g.weight[i] * lx[j];
				1155	resY[i] = g.weight[i] * ly[j];
				1156	}
				1157	}
				1158
				1159	/* ************************************************************************* */
				1160	/**
				1161	* @brief This function sorts a[] by quantized float values
				1162	* @param a[] Input floating point array to sort
				1163	* @param n The length of a[]
				1164	* @param quantum The interval to convert a[i]'s float values to integers
				1165	* @param max The upper bound of a[], meaning a[i] must be in [0, max]
				1166	* @param idx[] Output array of the indices: a[idx[i]] forms a sorted array
				1167	* @param cum[] Output array of the starting indices of quantized floats
				1168	* @note The values of a[] in [kquantum, (k + 1)quantum) is labeled by
				1169	* the integer k, which is calculated by floor(a[i]/quantum). After sorting,
				1170	* the values from a[idx[cum[k]]] to a[idx[cum[k+1]-1]] are all labeled by k.
				1171	* This sorting is unstable to reduce the memory access.
				1172	*/
				1173	static inline void quantized_counting_sort(const float a[], const int n,
				1174	const float quantum, const float max,
				1175	uint8_t idx[], uint8_t cum[]) {
				1176	const int nkeys = (int)(max / quantum);
				1177
				1178	// The size of cum[] must be nkeys + 1
				1179	memset(cum, 0, nkeys + 1);
				1180
				1181	// Count up the quantized values
				1182	for (int i = 0; i < n; i++) cum[(int)(a[i] / quantum)]++;
				1183
				1184	// Compute the inclusive prefix sum i.e. the end indices; cum[nkeys] is the
				1185	// total
				1186	for (int i = 1; i <= nkeys; i++) cum[i] += cum[i - 1];
				1187
				1188	// Generate the sorted indices; cum[] becomes the exclusive prefix sum i.e.
				1189	// the start indices of keys
				1190	for (int i = 0; i < n; i++) idx[--cum[(int)(a[i] / quantum)]] = i;
				1191	}
				1192
				1193	/* ************************************************************************* */
				1194	/**
				1195	* @brief This function computes the main orientation for a given keypoint
				1196	* @param kpt Input keypoint
				1197	* @note The orientation is computed using a similar approach as described in
				1198	* the original SURF method. See Bay et al., Speeded Up Robust Features, ECCV
				1199	* 2006
				1200	*/
				1201	inline void Compute_Main_Orientation(KeyPoint& kpt, const TEvolutionV2& e) {
				1202	// Get the information from the keypoint
				1203	int scale = fRoundV2(0.5f * kpt.size / e.octave_ratio);
				1204	int x0 = fRoundV2(kpt.pt.x / e.octave_ratio);
				1205	int y0 = fRoundV2(kpt.pt.y / e.octave_ratio);
				1206
				1207	// Sample derivatives responses for the points within radius of 6*scale
				1208	const int ang_size = 109;
				1209	float resX[ang_size], resY[ang_size];
				1210	Sample_Derivative_Response_Radius6(e.Lx, e.Ly, x0, y0, scale, resX, resY);
				1211
				1212	// Compute the angle of each gradient vector
				1213	float Ang[ang_size];
				1214	hal::fastAtan2(resY, resX, Ang, ang_size, false);
				1215
				1216	// Sort by the angles; angles are labeled by slices of 0.15 radian
				1217	const int slices = 42;
				1218	const float ang_step = (float)(2.0 * CV_PI / slices);
				1219	uint8_t slice[slices + 1];
				1220	uint8_t sorted_idx[ang_size];
				1221	quantized_counting_sort(Ang, ang_size, ang_step, (float)(2.0 * CV_PI),
				1222	sorted_idx, slice);
				1223
				1224	// Find the main angle by sliding a window of 7-slice size(=PI/3) around the
				1225	// keypoint
				1226	const int win = 7;
				1227
				1228	float maxX = 0.0f, maxY = 0.0f;
				1229	for (int i = slice[0]; i < slice[win]; i++) {
				1230	maxX += resX[sorted_idx[i]];
				1231	maxY += resY[sorted_idx[i]];
				1232	}
				1233	float maxNorm = maxX * maxX + maxY * maxY;
				1234
				1235	for (int sn = 1; sn <= slices - win; sn++) {
				1236	if (slice[sn] == slice[sn - 1] && slice[sn + win] == slice[sn + win - 1])
				1237	continue; // The contents of the window didn't change; don't repeat the
				1238	// computation
				1239
				1240	float sumX = 0.0f, sumY = 0.0f;
				1241	for (int i = slice[sn]; i < slice[sn + win]; i++) {
				1242	sumX += resX[sorted_idx[i]];
				1243	sumY += resY[sorted_idx[i]];
				1244	}
				1245
				1246	float norm = sumX * sumX + sumY * sumY;
				1247	if (norm > maxNorm)
				1248	maxNorm = norm, maxX = sumX, maxY = sumY; // Found bigger one; update
				1249	}
				1250
				1251	for (int sn = slices - win + 1; sn < slices; sn++) {
				1252	int remain = sn + win - slices;
				1253
				1254	if (slice[sn] == slice[sn - 1] && slice[remain] == slice[remain - 1])
				1255	continue;
				1256
				1257	float sumX = 0.0f, sumY = 0.0f;
				1258	for (int i = slice[sn]; i < slice[slices]; i++) {
				1259	sumX += resX[sorted_idx[i]];
				1260	sumY += resY[sorted_idx[i]];
				1261	}
				1262	for (int i = slice[0]; i < slice[remain]; i++) {
				1263	sumX += resX[sorted_idx[i]];
				1264	sumY += resY[sorted_idx[i]];
				1265	}
				1266
				1267	float norm = sumX * sumX + sumY * sumY;
				1268	if (norm > maxNorm) maxNorm = norm, maxX = sumX, maxY = sumY;
				1269	}
				1270
				1271	// Store the final result
				1272	kpt.angle = getAngleV2(maxX, maxY);
				1273	}
				1274
				1275	/* ************************************************************************* */
				1276	/**
				1277	* @brief This method computes the upright descriptor (not rotation invariant)
				1278	* of the provided keypoint
				1279	* @param kpt Input keypoint
				1280	* @param desc Descriptor vector
				1281	* @note Rectangular grid of 24 s x 24 s. Descriptor Length 64. The descriptor
				1282	* is inspired from Agrawal et al., CenSurE: Center Surround Extremas for
				1283	* Realtime Feature Detection and Matching, ECCV 2008
				1284	*/
				1285	void MSURF_Upright_Descriptor_64_InvokerV2::Get_MSURF_Upright_Descriptor_64(
				1286	const KeyPoint& kpt, float* desc) const {
				1287	float dx = 0.0, dy = 0.0, mdx = 0.0, mdy = 0.0, gauss_s1 = 0.0,
				1288	gauss_s2 = 0.0;
				1289	float rx = 0.0, ry = 0.0, len = 0.0, xf = 0.0, yf = 0.0, ys = 0.0, xs = 0.0;
				1290	float sample_x = 0.0, sample_y = 0.0;
				1291	int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
				1292	int x2 = 0, y2 = 0, kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
				1293	float fx = 0.0, fy = 0.0, ratio = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0,
				1294	res4 = 0.0;
				1295	int scale = 0, dsize = 0, level = 0;
				1296
				1297	// Subregion centers for the 4x4 gaussian weighting
				1298	float cx = -0.5f, cy = 0.5f;
				1299
				1300	// Set the descriptor size and the sample and pattern sizes
				1301	dsize = 64;
				1302	sample_step = 5;
				1303	pattern_size = 12;
				1304
				1305	// Get the information from the keypoint
				1306	level = kpt.class_id;
				1307	ratio = evolution_[level].octave_ratio;
				1308	scale = fRoundV2(0.5f * kpt.size / ratio);
				1309	yf = kpt.pt.y / ratio;
				1310	xf = kpt.pt.x / ratio;
				1311
				1312	i = -8;
				1313
				1314	// Calculate descriptor for this interest point
				1315	// Area of size 24 s x 24 s
				1316	while (i < pattern_size) {
				1317	j = -8;
				1318	i = i - 4;
				1319
				1320	cx += 1.0f;
				1321	cy = -0.5f;
				1322
				1323	while (j < pattern_size) {
				1324	dx = dy = mdx = mdy = 0.0;
				1325	cy += 1.0f;
				1326	j = j - 4;
				1327
				1328	ky = i + sample_step;
				1329	kx = j + sample_step;
				1330
				1331	ys = yf + (ky * scale);
				1332	xs = xf + (kx * scale);
				1333
				1334	for (int k = i; k < i + 9; k++) {
				1335	for (int l = j; l < j + 9; l++) {
				1336	sample_y = k * scale + yf;
				1337	sample_x = l * scale + xf;
				1338
				1339	// Get the gaussian weighted x and y responses
				1340	gauss_s1 = gaussianV2(xs - sample_x, ys - sample_y, 2.50f * scale);
				1341
				1342	y1 = (int)(sample_y - .5);
				1343	x1 = (int)(sample_x - .5);
				1344
				1345	y2 = (int)(sample_y + .5);
				1346	x2 = (int)(sample_x + .5);
				1347
				1348	fx = sample_x - x1;
				1349	fy = sample_y - y1;
				1350
				1351	res1 = *(evolution_[level].Lx.ptr<float>(y1) + x1);
				1352	res2 = *(evolution_[level].Lx.ptr<float>(y1) + x2);
				1353	res3 = *(evolution_[level].Lx.ptr<float>(y2) + x1);
				1354	res4 = *(evolution_[level].Lx.ptr<float>(y2) + x2);
				1355	rx = (1.0f - fx) * (1.0f - fy) * res1 + fx * (1.0f - fy) * res2 +
				1356	(1.0f - fx) * fy * res3 + fx * fy * res4;
				1357
				1358	res1 = *(evolution_[level].Ly.ptr<float>(y1) + x1);
				1359	res2 = *(evolution_[level].Ly.ptr<float>(y1) + x2);
				1360	res3 = *(evolution_[level].Ly.ptr<float>(y2) + x1);
				1361	res4 = *(evolution_[level].Ly.ptr<float>(y2) + x2);
				1362	ry = (1.0f - fx) * (1.0f - fy) * res1 + fx * (1.0f - fy) * res2 +
				1363	(1.0f - fx) * fy * res3 + fx * fy * res4;
				1364
				1365	rx = gauss_s1 * rx;
				1366	ry = gauss_s1 * ry;
				1367
				1368	// Sum the derivatives to the cumulative descriptor
				1369	dx += rx;
				1370	dy += ry;
				1371	mdx += fabs(rx);
				1372	mdy += fabs(ry);
				1373	}
				1374	}
				1375
				1376	// Add the values to the descriptor vector
				1377	gauss_s2 = gaussianV2(cx - 2.0f, cy - 2.0f, 1.5f);
				1378
				1379	desc[dcount++] = dx * gauss_s2;
				1380	desc[dcount++] = dy * gauss_s2;
				1381	desc[dcount++] = mdx * gauss_s2;
				1382	desc[dcount++] = mdy * gauss_s2;
				1383
				1384	len += (dx * dx + dy * dy + mdx * mdx + mdy * mdy) * gauss_s2 * gauss_s2;
				1385
				1386	j += 9;
				1387	}
				1388
				1389	i += 9;
				1390	}
				1391
				1392	// convert to unit vector
				1393	len = sqrt(len);
				1394
				1395	for (i = 0; i < dsize; i++) {
				1396	desc[i] /= len;
				1397	}
				1398	}
				1399
				1400	/* ************************************************************************* */
				1401	/**
				1402	* @brief This method computes the descriptor of the provided keypoint given the
				1403	* main orientation of the keypoint
				1404	* @param kpt Input keypoint
				1405	* @param desc Descriptor vector
				1406	* @note Rectangular grid of 24 s x 24 s. Descriptor Length 64. The descriptor
				1407	* is inspired from Agrawal et al., CenSurE: Center Surround Extremas for
				1408	* Realtime Feature Detection and Matching, ECCV 2008
				1409	*/
				1410	void MSURF_Descriptor_64_InvokerV2::Get_MSURF_Descriptor_64(const KeyPoint& kpt,
				1411	float* desc) const {
				1412	float dx = 0.0, dy = 0.0, mdx = 0.0, mdy = 0.0, gauss_s1 = 0.0,
				1413	gauss_s2 = 0.0;
				1414	float rx = 0.0, ry = 0.0, rrx = 0.0, rry = 0.0, len = 0.0, xf = 0.0, yf = 0.0,
				1415	ys = 0.0, xs = 0.0;
				1416	float sample_x = 0.0, sample_y = 0.0, co = 0.0, si = 0.0, angle = 0.0;
				1417	float fx = 0.0, fy = 0.0, ratio = 0.0, res1 = 0.0, res2 = 0.0, res3 = 0.0,
				1418	res4 = 0.0;
				1419	int x1 = 0, y1 = 0, x2 = 0, y2 = 0, sample_step = 0, pattern_size = 0;
				1420	int kx = 0, ky = 0, i = 0, j = 0, dcount = 0;
				1421	int scale = 0, dsize = 0, level = 0;
				1422
				1423	// Subregion centers for the 4x4 gaussian weighting
				1424	float cx = -0.5f, cy = 0.5f;
				1425
				1426	// Set the descriptor size and the sample and pattern sizes
				1427	dsize = 64;
				1428	sample_step = 5;
				1429	pattern_size = 12;
				1430
				1431	// Get the information from the keypoint
				1432	level = kpt.class_id;
				1433	ratio = evolution_[level].octave_ratio;
				1434	scale = fRoundV2(0.5f * kpt.size / ratio);
				1435	angle = kpt.angle;
				1436	yf = kpt.pt.y / ratio;
				1437	xf = kpt.pt.x / ratio;
				1438	co = cos(angle);
				1439	si = sin(angle);
				1440
				1441	i = -8;
				1442
				1443	// Calculate descriptor for this interest point
				1444	// Area of size 24 s x 24 s
				1445	while (i < pattern_size) {
				1446	j = -8;
				1447	i = i - 4;
				1448
				1449	cx += 1.0f;
				1450	cy = -0.5f;
				1451
				1452	while (j < pattern_size) {
				1453	dx = dy = mdx = mdy = 0.0;
				1454	cy += 1.0f;
				1455	j = j - 4;
				1456
				1457	ky = i + sample_step;
				1458	kx = j + sample_step;
				1459
				1460	xs = xf + (-kx * scale * si + ky * scale * co);
				1461	ys = yf + (kx * scale * co + ky * scale * si);
				1462
				1463	for (int k = i; k < i + 9; ++k) {
				1464	for (int l = j; l < j + 9; ++l) {
				1465	// Get coords of sample point on the rotated axis
				1466	sample_y = yf + (l * scale * co + k * scale * si);
				1467	sample_x = xf + (-l * scale * si + k * scale * co);
				1468
				1469	// Get the gaussian weighted x and y responses
				1470	gauss_s1 = gaussianV2(xs - sample_x, ys - sample_y, 2.5f * scale);
				1471
				1472	y1 = fRoundV2(sample_y - 0.5f);
				1473	x1 = fRoundV2(sample_x - 0.5f);
				1474
				1475	y2 = fRoundV2(sample_y + 0.5f);
				1476	x2 = fRoundV2(sample_x + 0.5f);
				1477
				1478	fx = sample_x - x1;
				1479	fy = sample_y - y1;
				1480
				1481	res1 = *(evolution_[level].Lx.ptr<float>(y1) + x1);
				1482	res2 = *(evolution_[level].Lx.ptr<float>(y1) + x2);
				1483	res3 = *(evolution_[level].Lx.ptr<float>(y2) + x1);
				1484	res4 = *(evolution_[level].Lx.ptr<float>(y2) + x2);
				1485	rx = (1.0f - fx) * (1.0f - fy) * res1 + fx * (1.0f - fy) * res2 +
				1486	(1.0f - fx) * fy * res3 + fx * fy * res4;
				1487
				1488	res1 = *(evolution_[level].Ly.ptr<float>(y1) + x1);
				1489	res2 = *(evolution_[level].Ly.ptr<float>(y1) + x2);
				1490	res3 = *(evolution_[level].Ly.ptr<float>(y2) + x1);
				1491	res4 = *(evolution_[level].Ly.ptr<float>(y2) + x2);
				1492	ry = (1.0f - fx) * (1.0f - fy) * res1 + fx * (1.0f - fy) * res2 +
				1493	(1.0f - fx) * fy * res3 + fx * fy * res4;
				1494
				1495	// Get the x and y derivatives on the rotated axis
				1496	rry = gauss_s1 * (rx * co + ry * si);
				1497	rrx = gauss_s1 * (-rx * si + ry * co);
				1498
				1499	// Sum the derivatives to the cumulative descriptor
				1500	dx += rrx;
				1501	dy += rry;
				1502	mdx += fabs(rrx);
				1503	mdy += fabs(rry);
				1504	}
				1505	}
				1506
				1507	// Add the values to the descriptor vector
				1508	gauss_s2 = gaussianV2(cx - 2.0f, cy - 2.0f, 1.5f);
				1509	desc[dcount++] = dx * gauss_s2;
				1510	desc[dcount++] = dy * gauss_s2;
				1511	desc[dcount++] = mdx * gauss_s2;
				1512	desc[dcount++] = mdy * gauss_s2;
				1513
				1514	len += (dx * dx + dy * dy + mdx * mdx + mdy * mdy) * gauss_s2 * gauss_s2;
				1515
				1516	j += 9;
				1517	}
				1518
				1519	i += 9;
				1520	}
				1521
				1522	// convert to unit vector
				1523	len = sqrt(len);
				1524
				1525	for (i = 0; i < dsize; i++) {
				1526	desc[i] /= len;
				1527	}
				1528	}
				1529
				1530	/* ************************************************************************* */
				1531	/**
				1532	* @brief This method computes the rupright descriptor (not rotation invariant)
				1533	* of the provided keypoint
				1534	* @param kpt Input keypoint
				1535	* @param desc Descriptor vector
				1536	*/
				1537	void Upright_MLDB_Full_Descriptor_InvokerV2::Get_Upright_MLDB_Full_Descriptor(
				1538	const KeyPoint& kpt, unsigned char* desc) const {
				1539	float di = 0.0, dx = 0.0, dy = 0.0;
				1540	float ri = 0.0, rx = 0.0, ry = 0.0, xf = 0.0, yf = 0.0;
				1541	float sample_x = 0.0, sample_y = 0.0, ratio = 0.0;
				1542	int x1 = 0, y1 = 0, sample_step = 0, pattern_size = 0;
				1543	int level = 0, nsamples = 0, scale = 0;
				1544	int dcount1 = 0, dcount2 = 0;
				1545
				1546	CV_DbgAssert(options_.descriptor_channels <= 3);
				1547
				1548	// Matrices for the M-LDB descriptor: the dimensions are [grid size] by
				1549	// [channel size]
				1550	float values_1[4][3];
				1551	float values_2[9][3];
				1552	float values_3[16][3];
				1553
				1554	// Get the information from the keypoint
				1555	level = kpt.class_id;
				1556	ratio = evolution_[level].octave_ratio;
				1557	scale = evolution_[level].sigma_size;
				1558	yf = kpt.pt.y / ratio;
				1559	xf = kpt.pt.x / ratio;
				1560
				1561	// First 2x2 grid
				1562	pattern_size = options_.descriptor_pattern_size;
				1563	sample_step = pattern_size;
				1564
				1565	for (int i = -pattern_size; i < pattern_size; i += sample_step) {
				1566	for (int j = -pattern_size; j < pattern_size; j += sample_step) {
				1567	di = dx = dy = 0.0;
				1568	nsamples = 0;
				1569
				1570	for (int k = i; k < i + sample_step; k++) {
				1571	for (int l = j; l < j + sample_step; l++) {
				1572	// Get the coordinates of the sample point
				1573	sample_y = yf + l * scale;
				1574	sample_x = xf + k * scale;
				1575
				1576	y1 = fRoundV2(sample_y);
				1577	x1 = fRoundV2(sample_x);
				1578
				1579	ri = *(evolution_[level].Lt.ptr<float>(y1) + x1);
				1580	rx = *(evolution_[level].Lx.ptr<float>(y1) + x1);
				1581	ry = *(evolution_[level].Ly.ptr<float>(y1) + x1);
				1582
				1583	di += ri;
				1584	dx += rx;
				1585	dy += ry;
				1586	nsamples++;
				1587	}
				1588	}
				1589
				1590	di /= nsamples;
				1591	dx /= nsamples;
				1592	dy /= nsamples;
				1593
				1594	values_1[dcount2][0] = di;
				1595	values_1[dcount2][1] = dx;
				1596	values_1[dcount2][2] = dy;
				1597	dcount2++;
				1598	}
				1599	}
				1600
				1601	// Do binary comparison first level
				1602	for (int i = 0; i < 4; i++) {
				1603	for (int j = i + 1; j < 4; j++) {
				1604	if (values_1[i][0] > values_1[j][0]) {
				1605	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1606	} else {
				1607	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1608	}
				1609	dcount1++;
				1610
				1611	if (values_1[i][1] > values_1[j][1]) {
				1612	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1613	} else {
				1614	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1615	}
				1616	dcount1++;
				1617
				1618	if (values_1[i][2] > values_1[j][2]) {
				1619	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1620	} else {
				1621	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1622	}
				1623	dcount1++;
				1624	}
				1625	}
				1626
				1627	// Second 3x3 grid
				1628	sample_step = static_cast<int>(ceil(pattern_size * 2. / 3.));
				1629	dcount2 = 0;
				1630
				1631	for (int i = -pattern_size; i < pattern_size; i += sample_step) {
				1632	for (int j = -pattern_size; j < pattern_size; j += sample_step) {
				1633	di = dx = dy = 0.0;
				1634	nsamples = 0;
				1635
				1636	for (int k = i; k < i + sample_step; k++) {
				1637	for (int l = j; l < j + sample_step; l++) {
				1638	// Get the coordinates of the sample point
				1639	sample_y = yf + l * scale;
				1640	sample_x = xf + k * scale;
				1641
				1642	y1 = fRoundV2(sample_y);
				1643	x1 = fRoundV2(sample_x);
				1644
				1645	ri = *(evolution_[level].Lt.ptr<float>(y1) + x1);
				1646	rx = *(evolution_[level].Lx.ptr<float>(y1) + x1);
				1647	ry = *(evolution_[level].Ly.ptr<float>(y1) + x1);
				1648
				1649	di += ri;
				1650	dx += rx;
				1651	dy += ry;
				1652	nsamples++;
				1653	}
				1654	}
				1655
				1656	di /= nsamples;
				1657	dx /= nsamples;
				1658	dy /= nsamples;
				1659
				1660	values_2[dcount2][0] = di;
				1661	values_2[dcount2][1] = dx;
				1662	values_2[dcount2][2] = dy;
				1663	dcount2++;
				1664	}
				1665	}
				1666
				1667	// Do binary comparison second level
				1668	dcount2 = 0;
				1669	for (int i = 0; i < 9; i++) {
				1670	for (int j = i + 1; j < 9; j++) {
				1671	if (values_2[i][0] > values_2[j][0]) {
				1672	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1673	} else {
				1674	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1675	}
				1676	dcount1++;
				1677
				1678	if (values_2[i][1] > values_2[j][1]) {
				1679	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1680	} else {
				1681	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1682	}
				1683	dcount1++;
				1684
				1685	if (values_2[i][2] > values_2[j][2]) {
				1686	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1687	} else {
				1688	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1689	}
				1690	dcount1++;
				1691	}
				1692	}
				1693
				1694	// Third 4x4 grid
				1695	sample_step = pattern_size / 2;
				1696	dcount2 = 0;
				1697
				1698	for (int i = -pattern_size; i < pattern_size; i += sample_step) {
				1699	for (int j = -pattern_size; j < pattern_size; j += sample_step) {
				1700	di = dx = dy = 0.0;
				1701	nsamples = 0;
				1702
				1703	for (int k = i; k < i + sample_step; k++) {
				1704	for (int l = j; l < j + sample_step; l++) {
				1705	// Get the coordinates of the sample point
				1706	sample_y = yf + l * scale;
				1707	sample_x = xf + k * scale;
				1708
				1709	y1 = fRoundV2(sample_y);
				1710	x1 = fRoundV2(sample_x);
				1711
				1712	ri = *(evolution_[level].Lt.ptr<float>(y1) + x1);
				1713	rx = *(evolution_[level].Lx.ptr<float>(y1) + x1);
				1714	ry = *(evolution_[level].Ly.ptr<float>(y1) + x1);
				1715
				1716	di += ri;
				1717	dx += rx;
				1718	dy += ry;
				1719	nsamples++;
				1720	}
				1721	}
				1722
				1723	di /= nsamples;
				1724	dx /= nsamples;
				1725	dy /= nsamples;
				1726
				1727	values_3[dcount2][0] = di;
				1728	values_3[dcount2][1] = dx;
				1729	values_3[dcount2][2] = dy;
				1730	dcount2++;
				1731	}
				1732	}
				1733
				1734	// Do binary comparison third level
				1735	dcount2 = 0;
				1736	for (int i = 0; i < 16; i++) {
				1737	for (int j = i + 1; j < 16; j++) {
				1738	if (values_3[i][0] > values_3[j][0]) {
				1739	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1740	} else {
				1741	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1742	}
				1743	dcount1++;
				1744
				1745	if (values_3[i][1] > values_3[j][1]) {
				1746	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1747	} else {
				1748	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1749	}
				1750	dcount1++;
				1751
				1752	if (values_3[i][2] > values_3[j][2]) {
				1753	desc[dcount1 / 8] \|= (1 << (dcount1 % 8));
				1754	} else {
				1755	desc[dcount1 / 8] &= ~(1 << (dcount1 % 8));
				1756	}
				1757	dcount1++;
				1758	}
				1759	}
				1760	}
				1761
				1762	inline void MLDB_Full_Descriptor_InvokerV2::MLDB_Fill_Values(
				1763	float* values, int sample_step, int level, float xf, float yf, float co,
				1764	float si, float scale) const {
				1765	int pattern_size = options_.descriptor_pattern_size;
				1766	int chan = options_.descriptor_channels;
				1767	int valpos = 0;
				1768
				1769	for (int i = -pattern_size; i < pattern_size; i += sample_step) {
				1770	for (int j = -pattern_size; j < pattern_size; j += sample_step) {
				1771	float di, dx, dy;
				1772	di = dx = dy = 0.0;
				1773	int nsamples = 0;
				1774
				1775	for (int k = i; k < i + sample_step; k++) {
				1776	for (int l = j; l < j + sample_step; l++) {
				1777	float sample_y = yf + (l * co * scale + k * si * scale);
				1778	float sample_x = xf + (-l * si * scale + k * co * scale);
				1779
				1780	int y1 = fRoundV2(sample_y);
				1781	int x1 = fRoundV2(sample_x);
				1782
				1783	float ri = *(evolution_[level].Lt.ptr<float>(y1) + x1);
				1784	di += ri;
				1785
				1786	if (chan > 1) {
				1787	float rx = *(evolution_[level].Lx.ptr<float>(y1) + x1);
				1788	float ry = *(evolution_[level].Ly.ptr<float>(y1) + x1);
				1789	if (chan == 2) {
				1790	dx += sqrtf(rx * rx + ry * ry);
				1791	} else {
				1792	float rry = rx * co + ry * si;
				1793	float rrx = -rx * si + ry * co;
				1794	dx += rrx;
				1795	dy += rry;
				1796	}
				1797	}
				1798	nsamples++;
				1799	}
				1800	}
				1801	di /= nsamples;
				1802	dx /= nsamples;
				1803	dy /= nsamples;
				1804
				1805	values[valpos] = di;
				1806	if (chan > 1) {
				1807	values[valpos + 1] = dx;
				1808	}
				1809	if (chan > 2) {
				1810	values[valpos + 2] = dy;
				1811	}
				1812	valpos += chan;
				1813	}
				1814	}
				1815	}
				1816
				1817	void MLDB_Full_Descriptor_InvokerV2::MLDB_Binary_Comparisons(
				1818	float* values, unsigned char* desc, int count, int& dpos) const {
				1819	int chan = options_.descriptor_channels;
				1820	int32_t* ivalues = (int32_t*)values;
				1821	for (int i = 0; i < count * chan; i++) {
				1822	ivalues[i] = CV_TOGGLE_FLT(ivalues[i]);
				1823	}
				1824
				1825	for (int pos = 0; pos < chan; pos++) {
				1826	for (int i = 0; i < count; i++) {
				1827	int32_t ival = ivalues[chan * i + pos];
				1828	for (int j = i + 1; j < count; j++) {
				1829	if (ival > ivalues[chan * j + pos]) {
				1830	desc[dpos >> 3] \|= (1 << (dpos & 7));
				1831	} else {
				1832	desc[dpos >> 3] &= ~(1 << (dpos & 7));
				1833	}
				1834	dpos++;
				1835	}
				1836	}
				1837	}
				1838	}
				1839
				1840	/* ************************************************************************* */
				1841	/**
				1842	* @brief This method computes the descriptor of the provided keypoint given the
				1843	* main orientation of the keypoint
				1844	* @param kpt Input keypoint
				1845	* @param desc Descriptor vector
				1846	*/
				1847	void MLDB_Full_Descriptor_InvokerV2::Get_MLDB_Full_Descriptor(
				1848	const KeyPoint& kpt, unsigned char* desc) const {
				1849	const int max_channels = 3;
				1850	CV_Assert(options_.descriptor_channels <= max_channels);
				1851	float values[16 * max_channels];
				1852	const double size_mult[3] = {1, 2.0 / 3.0, 1.0 / 2.0};
				1853
				1854	float ratio = evolution_[kpt.class_id].octave_ratio;
				1855	float scale = (float)(evolution_[kpt.class_id].sigma_size);
				1856	float xf = kpt.pt.x / ratio;
				1857	float yf = kpt.pt.y / ratio;
				1858	float co = cos(kpt.angle);
				1859	float si = sin(kpt.angle);
				1860	int pattern_size = options_.descriptor_pattern_size;
				1861
				1862	int dpos = 0;
				1863	for (int lvl = 0; lvl < 3; lvl++) {
				1864	int val_count = (lvl + 2) * (lvl + 2);
				1865	int sample_step = static_cast<int>(ceil(pattern_size * size_mult[lvl]));
				1866	MLDB_Fill_Values(values, sample_step, kpt.class_id, xf, yf, co, si, scale);
				1867	MLDB_Binary_Comparisons(values, desc, val_count, dpos);
				1868	}
				1869
				1870	// Clear the uninitialized bits of the last byte
				1871	int remain = dpos % 8;
				1872	if (remain > 0) desc[dpos >> 3] &= (0xff >> (8 - remain));
				1873	}
				1874
				1875	/* ************************************************************************* */
				1876	/**
				1877	* @brief This function compares two values specified by comps[] and set the
				1878	* i-th bit of desc if the comparison is true.
				1879	* @param values Input array of values to compare
				1880	* @param comps Input array of indices at which two values are compared
				1881	* @param nbits The length of values[] as well as the number of bits to write in
				1882	* desc
				1883	* @param desc Descriptor vector
				1884	*/
				1885	template <typename Typ_ = uint64_t>
				1886	inline void compare_and_pack_descriptor(const float values[], const int* comps,
				1887	const int nbits, unsigned char* desc) {
				1888	const int nbits_in_bucket = sizeof(Typ_) << 3;
				1889	const int(idx)[2] = (const int()[2])comps;
				1890	int written = 0;
				1891
				1892	Typ_ bucket = 0;
				1893	for (int i = 0; i < nbits; i++) {
				1894	bucket <<= 1;
				1895	if (values[idx[i][0]] > values[idx[i][1]]) bucket \|= 1;
				1896
				1897	if ((i & (nbits_in_bucket - 1)) == (nbits_in_bucket - 1))
				1898	(reinterpret_cast<Typ_*>(desc))[written++] = bucket, bucket = 0;
				1899	}
				1900
				1901	// Flush the remaining bits in bucket
				1902	if (written * nbits_in_bucket < nbits) {
				1903	written = sizeof(Typ_); / Convert the unit from bucket to byte */
				1904
				1905	int remain = (nbits + 7) / 8 - written;
				1906	for (int i = 0; i < remain; i++)
				1907	desc[written++] = (uint8_t)(bucket & 0xFF), bucket >>= 8;
				1908	}
				1909	}
				1910
				1911	/* ************************************************************************* */
				1912	/**
				1913	* @brief This method computes the M-LDB descriptor of the provided keypoint
				1914	* given the main orientation of the keypoint. The descriptor is computed based
				1915	* on a subset of the bits of the whole descriptor
				1916	* @param kpt Input keypoint
				1917	* @param desc Descriptor vector
				1918	*/
				1919	void MLDB_Descriptor_Subset_InvokerV2::Get_MLDB_Descriptor_Subset(
				1920	const KeyPoint& kpt, unsigned char* desc) const {
				1921	const TEvolutionV2& e = evolution_[kpt.class_id];
				1922
				1923	// Get the information from the keypoint
				1924	const int scale = e.sigma_size;
				1925	const float yf = kpt.pt.y / e.octave_ratio;
				1926	const float xf = kpt.pt.x / e.octave_ratio;
				1927	const float co = cos(kpt.angle);
				1928	const float si = sin(kpt.angle);
				1929
				1930	// Matrices for the M-LDB descriptor: the size is [grid size] * [channel size]
				1931	CV_DbgAssert(descriptorSamples_.rows <= (4 + 9 + 16));
				1932	CV_DbgAssert(options_.descriptor_channels <= 3);
				1933	float values[(4 + 9 + 16) * 3];
				1934
				1935	// coords[3] is { grid_width, x, y }
				1936	const int* coords = descriptorSamples_.ptr<int>(0);
				1937
				1938	// Sample everything, but only do the comparisons
				1939	for (int i = 0; i < descriptorSamples_.rows; i++, coords += 3) {
				1940	float di = 0.0f;
				1941	float dx = 0.0f;
				1942	float dy = 0.0f;
				1943
				1944	for (int x = coords[1]; x < coords[1] + coords[0]; x++) {
				1945	for (int y = coords[2]; y < coords[2] + coords[0]; y++) {
				1946	// Get the coordinates of the sample point
				1947	int x1 = fRoundV2(xf + (x * scale * co - y * scale * si));
				1948	int y1 = fRoundV2(yf + (x * scale * si + y * scale * co));
				1949
				1950	di += *(e.Lt.ptr<float>(y1) + x1);
				1951
				1952	if (options_.descriptor_channels > 1) {
				1953	float rx = *(e.Lx.ptr<float>(y1) + x1);
				1954	float ry = *(e.Ly.ptr<float>(y1) + x1);
				1955
				1956	if (options_.descriptor_channels == 2) {
				1957	dx += sqrtf(rx * rx + ry * ry);
				1958	} else if (options_.descriptor_channels == 3) {
				1959	// Get the x and y derivatives on the rotated axis
				1960	dx += rx * co + ry * si;
				1961	dy += -rx * si + ry * co;
				1962	}
				1963	}
				1964	}
				1965	}
				1966
				1967	values[i * options_.descriptor_channels] = di;
				1968
				1969	if (options_.descriptor_channels == 2) {
				1970	values[i * options_.descriptor_channels + 1] = dx;
				1971	} else if (options_.descriptor_channels == 3) {
				1972	values[i * options_.descriptor_channels + 1] = dx;
				1973	values[i * options_.descriptor_channels + 2] = dy;
				1974	}
				1975	}
				1976
				1977	// Do the comparisons
				1978	compare_and_pack_descriptor<uint64_t>(values, descriptorBits_.ptr<int>(0),
				1979	descriptorBits_.rows, desc);
				1980	}
				1981
				1982	/* ************************************************************************* */
				1983	/**
				1984	* @brief This method computes the upright (not rotation invariant) M-LDB
				1985	* descriptor of the provided keypoint given the main orientation of the
				1986	* keypoint. The descriptor is computed based on a subset of the bits of the
				1987	* whole descriptor
				1988	* @param kpt Input keypoint
				1989	* @param desc Descriptor vector
				1990	*/
				1991	void Upright_MLDB_Descriptor_Subset_InvokerV2::
				1992	Get_Upright_MLDB_Descriptor_Subset(const KeyPoint& kpt,
				1993	unsigned char* desc) const {
				1994	const TEvolutionV2& e = evolution_[kpt.class_id];
				1995
				1996	// Get the information from the keypoint
				1997	const int scale = e.sigma_size;
				1998	const float yf = kpt.pt.y / e.octave_ratio;
				1999	const float xf = kpt.pt.x / e.octave_ratio;
				2000
				2001	// Matrices for the M-LDB descriptor: the size is [grid size] * [channel size]
				2002	CV_DbgAssert(descriptorSamples_.rows <= (4 + 9 + 16));
				2003	CV_DbgAssert(options_.descriptor_channels <= 3);
				2004	float values[(4 + 9 + 16) * 3];
				2005
				2006	// coords[3] is { grid_width, x, y }
				2007	const int* coords = descriptorSamples_.ptr<int>(0);
				2008
				2009	for (int i = 0; i < descriptorSamples_.rows; i++, coords += 3) {
				2010	float di = 0.0f;
				2011	float dx = 0.0f;
				2012	float dy = 0.0f;
				2013
				2014	for (int x = coords[1]; x < coords[1] + coords[0]; x++) {
				2015	for (int y = coords[2]; y < coords[2] + coords[0]; y++) {
				2016	// Get the coordinates of the sample point
				2017	int x1 = fRoundV2(xf + x * scale);
				2018	int y1 = fRoundV2(yf + y * scale);
				2019
				2020	di += *(e.Lt.ptr<float>(y1) + x1);
				2021
				2022	if (options_.descriptor_channels > 1) {
				2023	float rx = *(e.Lx.ptr<float>(y1) + x1);
				2024	float ry = *(e.Ly.ptr<float>(y1) + x1);
				2025
				2026	if (options_.descriptor_channels == 2) {
				2027	dx += sqrtf(rx * rx + ry * ry);
				2028	} else if (options_.descriptor_channels == 3) {
				2029	dx += rx;
				2030	dy += ry;
				2031	}
				2032	}
				2033	}
				2034	}
				2035
				2036	values[i * options_.descriptor_channels] = di;
				2037
				2038	if (options_.descriptor_channels == 2) {
				2039	values[i * options_.descriptor_channels + 1] = dx;
				2040	} else if (options_.descriptor_channels == 3) {
				2041	values[i * options_.descriptor_channels + 1] = dx;
				2042	values[i * options_.descriptor_channels + 2] = dy;
				2043	}
				2044	}
				2045
				2046	// Do the comparisons
				2047	compare_and_pack_descriptor<uint64_t>(values, descriptorBits_.ptr<int>(0),
				2048	descriptorBits_.rows, desc);
				2049	}
				2050
				2051	/* ************************************************************************* */
				2052	/**
				2053	* @brief This function computes a (quasi-random) list of bits to be taken
				2054	* from the full descriptor. To speed the extraction, the function creates
				2055	* a list of the samples that are involved in generating at least a bit
				2056	* (sampleList) and a list of the comparisons between those samples
				2057	* (comparisons)
				2058	* @param sampleList
				2059	* @param comparisons The matrix with the binary comparisons
				2060	* @param nbits The number of bits of the descriptor
				2061	* @param pattern_size The pattern size for the binary descriptor
				2062	* @param nchannels Number of channels to consider in the descriptor (1-3)
				2063	* @note The function keeps the 18 bits (3-channels by 6 comparisons) of the
				2064	* coarser grid, since it provides the most robust estimations
				2065	*/
				2066	static void generateDescriptorSubsampleV2(Mat& sampleList, Mat& comparisons,
				2067	int nbits, int pattern_size,
				2068	int nchannels) {
				2069	#if 0
				2070	// Replaced by an immediate to use stack; need C++11 constexpr to use the logic
				2071	int fullM_rows = 0;
				2072	for (int i = 0; i < 3; i++) {
				2073	int gz = (i + 2)*(i + 2);
				2074	fullM_rows += gz*(gz - 1) / 2;
				2075	}
				2076	#else
				2077	const int fullM_rows = 162;
				2078	#endif
				2079
				2080	int ssz = fullM_rows * nchannels; // ssz is 486 when nchannels is 3
				2081
				2082	CV_Assert(nbits <=
				2083	ssz); // Descriptor size can't be bigger than full descriptor
				2084
				2085	const int steps[3] = {pattern_size, (int)ceil(2.f * pattern_size / 3.f),
				2086	pattern_size / 2};
				2087
				2088	// Since the full descriptor is usually under 10k elements, we pick
				2089	// the selection from the full matrix. We take as many samples per
				2090	// pick as the number of channels. For every pick, we
				2091	// take the two samples involved and put them in the sampling list
				2092
				2093	int fullM_stack[fullM_rows *
				2094	5]; // About 6.3KB workspace with 64-bit int on stack
				2095	Mat_<int> fullM(fullM_rows, 5, fullM_stack);
				2096
				2097	for (int i = 0, c = 0; i < 3; i++) {
				2098	int gdiv = i + 2; // grid divisions, per row
				2099	int gsz = gdiv * gdiv;
				2100	int psz = (int)ceil(2.f * pattern_size / (float)gdiv);
				2101
				2102	for (int j = 0; j < gsz; j++) {
				2103	for (int k = j + 1; k < gsz; k++, c++) {
				2104	fullM(c, 0) = steps[i];
				2105	fullM(c, 1) = psz * (j % gdiv) - pattern_size;
				2106	fullM(c, 2) = psz * (j / gdiv) - pattern_size;
				2107	fullM(c, 3) = psz * (k % gdiv) - pattern_size;
				2108	fullM(c, 4) = psz * (k / gdiv) - pattern_size;
				2109	}
				2110	}
				2111	}
				2112
				2113	int comps_stack[486 * 2]; // About 7.6KB workspace with 64-bit int on stack
				2114	Mat_<int> comps(486, 2, comps_stack);
				2115	comps = 1000;
				2116
				2117	int samples_stack[(4 + 9 + 16) *
				2118	3]; // 696 bytes workspace with 64-bit int on stack
				2119	Mat_<int> samples((4 + 9 + 16), 3, samples_stack);
				2120
				2121	// Select some samples. A sample includes all channels
				2122	int count = 0;
				2123	int npicks = (int)ceil(nbits / (float)nchannels);
				2124	samples = -1;
				2125
				2126	srand(1024);
				2127	for (int i = 0; i < npicks; i++) {
				2128	int k = rand() % (fullM_rows - i);
				2129	if (i < 6) {
				2130	// Force use of the coarser grid values and comparisons
				2131	k = i;
				2132	}
				2133
				2134	bool n = true;
				2135
				2136	for (int j = 0; j < count; j++) {
				2137	if (samples(j, 0) == fullM(k, 0) && samples(j, 1) == fullM(k, 1) &&
				2138	samples(j, 2) == fullM(k, 2)) {
				2139	n = false;
				2140	comps(i * nchannels, 0) = nchannels * j;
				2141	comps(i * nchannels + 1, 0) = nchannels * j + 1;
				2142	comps(i * nchannels + 2, 0) = nchannels * j + 2;
				2143	break;
				2144	}
				2145	}
				2146
				2147	if (n) {
				2148	samples(count, 0) = fullM(k, 0);
				2149	samples(count, 1) = fullM(k, 1);
				2150	samples(count, 2) = fullM(k, 2);
				2151	comps(i * nchannels, 0) = nchannels * count;
				2152	comps(i * nchannels + 1, 0) = nchannels * count + 1;
				2153	comps(i * nchannels + 2, 0) = nchannels * count + 2;
				2154	count++;
				2155	}
				2156
				2157	n = true;
				2158	for (int j = 0; j < count; j++) {
				2159	if (samples(j, 0) == fullM(k, 0) && samples(j, 1) == fullM(k, 3) &&
				2160	samples(j, 2) == fullM(k, 4)) {
				2161	n = false;
				2162	comps(i * nchannels, 1) = nchannels * j;
				2163	comps(i * nchannels + 1, 1) = nchannels * j + 1;
				2164	comps(i * nchannels + 2, 1) = nchannels * j + 2;
				2165	break;
				2166	}
				2167	}
				2168
				2169	if (n) {
				2170	samples(count, 0) = fullM(k, 0);
				2171	samples(count, 1) = fullM(k, 3);
				2172	samples(count, 2) = fullM(k, 4);
				2173	comps(i * nchannels, 1) = nchannels * count;
				2174	comps(i * nchannels + 1, 1) = nchannels * count + 1;
				2175	comps(i * nchannels + 2, 1) = nchannels * count + 2;
				2176	count++;
				2177	}
				2178
				2179	fullM.row(fullM.rows - i - 1).copyTo(fullM.row(k));
				2180	}
				2181
				2182	sampleList = samples.rowRange(0, count).clone();
				2183	comparisons = comps.rowRange(0, nbits).clone();
				2184	}
				2185
				2186	} // namespace cv