blob: 6223f77ac5f12d3540cdf62bb62da452ef6650fe [file] [log] [blame]
Brian Silvermanf1196122020-01-16 00:41:54 -08001// clang-format off
2/*M///////////////////////////////////////////////////////////////////////////////////////
3//
4// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5//
6// By downloading, copying, installing or using the software you agree to this license.
7// If you do not agree to this license, do not download, install,
8// copy or use the software.
9//
10//
11// License Agreement
12// For Open Source Computer Vision Library
13//
14// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
15// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
16// Third party copyrights are property of their respective owners.
17//
18// Redistribution and use in source and binary forms, with or without modification,
19// are permitted provided that the following conditions are met:
20//
21// * Redistribution's of source code must retain the above copyright notice,
22// this list of conditions and the following disclaimer.
23//
24// * Redistribution's in binary form must reproduce the above copyright notice,
25// this list of conditions and the following disclaimer in the documentation
26// and/or other materials provided with the distribution.
27//
28// * The name of the copyright holders may not be used to endorse or promote products
29// derived from this software without specific prior written permission.
30//
31// This software is provided by the copyright holders and contributors "as is" and
32// any express or implied warranties, including, but not limited to, the implied
33// warranties of merchantability and fitness for a particular purpose are disclaimed.
34// In no event shall the Intel Corporation or contributors be liable for any direct,
35// indirect, incidental, special, exemplary, or consequential damages
36// (including, but not limited to, procurement of substitute goods or services;
37// loss of use, data, or profits; or business interruption) however caused
38// and on any theory of liability, whether in contract, strict liability,
39// or tort (including negligence or otherwise) arising in any way out of
40// the use of this software, even if advised of the possibility of such damage.
41//
42//M*/
43
44/**********************************************************************************************\
45 Implementation of SIFT is based on the code from http://blogs.oregonstate.edu/hess/code/sift/
46 Below is the original copyright.
47
48// Copyright (c) 2006-2010, Rob Hess <hess@eecs.oregonstate.edu>
49// All rights reserved.
50
51// The following patent has been issued for methods embodied in this
52// software: "Method and apparatus for identifying scale invariant features
53// in an image and use of same for locating an object in an image," David
54// G. Lowe, US Patent 6,711,293 (March 23, 2004). Provisional application
55// filed March 8, 1999. Asignee: The University of British Columbia. For
56// further details, contact David Lowe (lowe@cs.ubc.ca) or the
57// University-Industry Liaison Office of the University of British
58// Columbia.
59
60// Note that restrictions imposed by this patent (and possibly others)
61// exist independently of and may be in conflict with the freedoms granted
62// in this license, which refers to copyright of the program, not patents
63// for any methods that it implements. Both copyright and patent law must
64// be obeyed to legally use and redistribute this program and it is not the
65// purpose of this license to induce you to infringe any patents or other
66// property right claims or to contest validity of any such claims. If you
67// redistribute or use the program, then this license merely protects you
68// from committing copyright infringement. It does not protect you from
69// committing patent infringement. So, before you do anything with this
70// program, make sure that you have permission to do so not merely in terms
71// of copyright, but also in terms of patent law.
72
73// Please note that this license is not to be understood as a guarantee
74// either. If you use the program according to this license, but in
75// conflict with patent law, it does not mean that the licensor will refund
76// you for any losses that you incur if you are sued for your patent
77// infringement.
78
79// Redistribution and use in source and binary forms, with or without
80// modification, are permitted provided that the following conditions are
81// met:
82// * Redistributions of source code must retain the above copyright and
83// patent notices, this list of conditions and the following
84// disclaimer.
85// * Redistributions in binary form must reproduce the above copyright
86// notice, this list of conditions and the following disclaimer in
87// the documentation and/or other materials provided with the
88// distribution.
89// * Neither the name of Oregon State University nor the names of its
90// contributors may be used to endorse or promote products derived
91// from this software without specific prior written permission.
92
93// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
94// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
95// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
96// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
97// HOLDER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
98// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
99// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
100// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
101// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
102// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
103// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
104\**********************************************************************************************/
105// clang-format on
106
107#include "y2020/vision/sift/sift971.h"
108
109#include <iostream>
110#include <mutex>
111#include <stdarg.h>
112#include <opencv2/core/hal/hal.hpp>
113#include <opencv2/imgproc.hpp>
114
115using namespace cv;
116
117namespace frc971 {
118namespace vision {
119namespace {
120
121#define USE_AVX2 0
122
123/******************************* Defs and macros *****************************/
124
125// default width of descriptor histogram array
126static const int SIFT_DESCR_WIDTH = 4;
127
128// default number of bins per histogram in descriptor array
129static const int SIFT_DESCR_HIST_BINS = 8;
130
131// assumed gaussian blur for input image
132static const float SIFT_INIT_SIGMA = 0.5f;
133
134// width of border in which to ignore keypoints
135static const int SIFT_IMG_BORDER = 5;
136
137// maximum steps of keypoint interpolation before failure
138static const int SIFT_MAX_INTERP_STEPS = 5;
139
140// default number of bins in histogram for orientation assignment
141static const int SIFT_ORI_HIST_BINS = 36;
142
143// determines gaussian sigma for orientation assignment
144static const float SIFT_ORI_SIG_FCTR = 1.5f;
145
146// determines the radius of the region used in orientation assignment
147static const float SIFT_ORI_RADIUS = 3 * SIFT_ORI_SIG_FCTR;
148
149// orientation magnitude relative to max that results in new feature
150static const float SIFT_ORI_PEAK_RATIO = 0.8f;
151
152// determines the size of a single descriptor orientation histogram
153static const float SIFT_DESCR_SCL_FCTR = 3.f;
154
155// threshold on magnitude of elements of descriptor vector
156static const float SIFT_DESCR_MAG_THR = 0.2f;
157
158// factor used to convert floating-point descriptor to unsigned char
159static const float SIFT_INT_DESCR_FCTR = 512.f;
160
161#define DoG_TYPE_SHORT 0
162#if DoG_TYPE_SHORT
163// intermediate type used for DoG pyramids
164typedef short sift_wt;
165static const int SIFT_FIXPT_SCALE = 48;
166#else
167// intermediate type used for DoG pyramids
168typedef float sift_wt;
169static const int SIFT_FIXPT_SCALE = 1;
170#endif
171
172static inline void unpackOctave(const KeyPoint &kpt, int &octave, int &layer,
173 float &scale) {
174 octave = kpt.octave & 255;
175 layer = (kpt.octave >> 8) & 255;
176 octave = octave < 128 ? octave : (-128 | octave);
177 scale = octave >= 0 ? 1.f / (1 << octave) : (float)(1 << -octave);
178}
179
180static Mat createInitialImage(const Mat &img, bool doubleImageSize,
181 float sigma) {
182 Mat gray, gray_fpt;
183 if (img.channels() == 3 || img.channels() == 4) {
184 cvtColor(img, gray, COLOR_BGR2GRAY);
185 gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
186 } else
187 img.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
188
189 float sig_diff;
190
191 if (doubleImageSize) {
192 sig_diff = sqrtf(
193 std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f));
194 Mat dbl;
195#if DoG_TYPE_SHORT
196 resize(gray_fpt, dbl, Size(gray_fpt.cols * 2, gray_fpt.rows * 2), 0, 0,
197 INTER_LINEAR_EXACT);
198#else
199 resize(gray_fpt, dbl, Size(gray_fpt.cols * 2, gray_fpt.rows * 2), 0, 0,
200 INTER_LINEAR);
201#endif
202 GaussianBlur(dbl, dbl, Size(), sig_diff, sig_diff);
203 return dbl;
204 } else {
205 sig_diff = sqrtf(
206 std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f));
207 GaussianBlur(gray_fpt, gray_fpt, Size(), sig_diff, sig_diff);
208 return gray_fpt;
209 }
210}
211
212} // namespace
213
214void SIFT971_Impl::buildGaussianPyramid(const Mat &base, std::vector<Mat> &pyr,
215 int nOctaves) const {
216 std::vector<double> sig(nOctaveLayers + 3);
217 pyr.resize(nOctaves * (nOctaveLayers + 3));
218
219 // precompute Gaussian sigmas using the following formula:
220 // \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
221 sig[0] = sigma;
222 double k = std::pow(2., 1. / nOctaveLayers);
223 for (int i = 1; i < nOctaveLayers + 3; i++) {
224 double sig_prev = std::pow(k, (double)(i - 1)) * sigma;
225 double sig_total = sig_prev * k;
226 sig[i] = std::sqrt(sig_total * sig_total - sig_prev * sig_prev);
227 }
228
229 for (int o = 0; o < nOctaves; o++) {
230 for (int i = 0; i < nOctaveLayers + 3; i++) {
231 Mat &dst = pyr[o * (nOctaveLayers + 3) + i];
232 if (o == 0 && i == 0) dst = base;
233 // base of new octave is halved image from end of previous octave
234 else if (i == 0) {
235 const Mat &src = pyr[(o - 1) * (nOctaveLayers + 3) + nOctaveLayers];
236 resize(src, dst, Size(src.cols / 2, src.rows / 2), 0, 0, INTER_NEAREST);
237 } else {
238 const Mat &src = pyr[o * (nOctaveLayers + 3) + i - 1];
239 GaussianBlur(src, dst, Size(), sig[i], sig[i]);
240 }
241 }
242 }
243}
244
245namespace {
246
247class buildDoGPyramidComputer : public ParallelLoopBody {
248 public:
249 buildDoGPyramidComputer(int _nOctaveLayers, const std::vector<Mat> &_gpyr,
250 std::vector<Mat> &_dogpyr)
251 : nOctaveLayers(_nOctaveLayers), gpyr(_gpyr), dogpyr(_dogpyr) {}
252
253 void operator()(const cv::Range &range) const override {
254 const int begin = range.start;
255 const int end = range.end;
256
257 for (int a = begin; a < end; a++) {
258 const int o = a / (nOctaveLayers + 2);
259 const int i = a % (nOctaveLayers + 2);
260
261 const Mat &src1 = gpyr[o * (nOctaveLayers + 3) + i];
262 const Mat &src2 = gpyr[o * (nOctaveLayers + 3) + i + 1];
263 Mat &dst = dogpyr[o * (nOctaveLayers + 2) + i];
264 subtract(src2, src1, dst, noArray(), DataType<sift_wt>::type);
265 }
266 }
267
268 private:
269 int nOctaveLayers;
270 const std::vector<Mat> &gpyr;
271 std::vector<Mat> &dogpyr;
272};
273
274} // namespace
275
276void SIFT971_Impl::buildDoGPyramid(const std::vector<Mat> &gpyr,
277 std::vector<Mat> &dogpyr) const {
278 int nOctaves = (int)gpyr.size() / (nOctaveLayers + 3);
279 dogpyr.resize(nOctaves * (nOctaveLayers + 2));
280
281 parallel_for_(Range(0, nOctaves * (nOctaveLayers + 2)),
282 buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr));
283}
284
285namespace {
286
287// Computes a gradient orientation histogram at a specified pixel
288static float calcOrientationHist(const Mat &img, Point pt, int radius,
289 float sigma, float *hist, int n) {
290 int i, j, k, len = (radius * 2 + 1) * (radius * 2 + 1);
291
292 float expf_scale = -1.f / (2.f * sigma * sigma);
293 AutoBuffer<float> buf(len * 4 + n + 4);
294 float *X = buf, *Y = X + len, *Mag = X, *Ori = Y + len, *W = Ori + len;
295 float *temphist = W + len + 2;
296
297 for (i = 0; i < n; i++) temphist[i] = 0.f;
298
299 for (i = -radius, k = 0; i <= radius; i++) {
300 int y = pt.y + i;
301 if (y <= 0 || y >= img.rows - 1) continue;
302 for (j = -radius; j <= radius; j++) {
303 int x = pt.x + j;
304 if (x <= 0 || x >= img.cols - 1) continue;
305
306 float dx = (float)(img.at<sift_wt>(y, x + 1) - img.at<sift_wt>(y, x - 1));
307 float dy = (float)(img.at<sift_wt>(y - 1, x) - img.at<sift_wt>(y + 1, x));
308
309 X[k] = dx;
310 Y[k] = dy;
311 W[k] = (i * i + j * j) * expf_scale;
312 k++;
313 }
314 }
315
316 len = k;
317
318 // compute gradient values, orientations and the weights over the pixel
319 // neighborhood
320 cv::hal::exp32f(W, W, len);
321 cv::hal::fastAtan2(Y, X, Ori, len, true);
322 cv::hal::magnitude32f(X, Y, Mag, len);
323
324 k = 0;
325#if CV_AVX2
326 if (USE_AVX2) {
327 __m256 __nd360 = _mm256_set1_ps(n / 360.f);
328 __m256i __n = _mm256_set1_epi32(n);
329 int CV_DECL_ALIGNED(32) bin_buf[8];
330 float CV_DECL_ALIGNED(32) w_mul_mag_buf[8];
331 for (; k <= len - 8; k += 8) {
332 __m256i __bin =
333 _mm256_cvtps_epi32(_mm256_mul_ps(__nd360, _mm256_loadu_ps(&Ori[k])));
334
335 __bin = _mm256_sub_epi32(
336 __bin, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __bin), __n));
337 __bin = _mm256_add_epi32(
338 __bin, _mm256_and_si256(
339 __n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __bin)));
340
341 __m256 __w_mul_mag =
342 _mm256_mul_ps(_mm256_loadu_ps(&W[k]), _mm256_loadu_ps(&Mag[k]));
343
344 _mm256_store_si256((__m256i *)bin_buf, __bin);
345 _mm256_store_ps(w_mul_mag_buf, __w_mul_mag);
346
347 temphist[bin_buf[0]] += w_mul_mag_buf[0];
348 temphist[bin_buf[1]] += w_mul_mag_buf[1];
349 temphist[bin_buf[2]] += w_mul_mag_buf[2];
350 temphist[bin_buf[3]] += w_mul_mag_buf[3];
351 temphist[bin_buf[4]] += w_mul_mag_buf[4];
352 temphist[bin_buf[5]] += w_mul_mag_buf[5];
353 temphist[bin_buf[6]] += w_mul_mag_buf[6];
354 temphist[bin_buf[7]] += w_mul_mag_buf[7];
355 }
356 }
357#endif
358 for (; k < len; k++) {
359 int bin = cvRound((n / 360.f) * Ori[k]);
360 if (bin >= n) bin -= n;
361 if (bin < 0) bin += n;
362 temphist[bin] += W[k] * Mag[k];
363 }
364
365 // smooth the histogram
366 temphist[-1] = temphist[n - 1];
367 temphist[-2] = temphist[n - 2];
368 temphist[n] = temphist[0];
369 temphist[n + 1] = temphist[1];
370
371 i = 0;
372#if CV_AVX2
373 if (USE_AVX2) {
374 __m256 __d_1_16 = _mm256_set1_ps(1.f / 16.f);
375 __m256 __d_4_16 = _mm256_set1_ps(4.f / 16.f);
376 __m256 __d_6_16 = _mm256_set1_ps(6.f / 16.f);
377 for (; i <= n - 8; i += 8) {
378#if CV_FMA3
379 __m256 __hist = _mm256_fmadd_ps(
380 _mm256_add_ps(_mm256_loadu_ps(&temphist[i - 2]),
381 _mm256_loadu_ps(&temphist[i + 2])),
382 __d_1_16,
383 _mm256_fmadd_ps(
384 _mm256_add_ps(_mm256_loadu_ps(&temphist[i - 1]),
385 _mm256_loadu_ps(&temphist[i + 1])),
386 __d_4_16,
387 _mm256_mul_ps(_mm256_loadu_ps(&temphist[i]), __d_6_16)));
388#else
389 __m256 __hist = _mm256_add_ps(
390 _mm256_mul_ps(_mm256_add_ps(_mm256_loadu_ps(&temphist[i - 2]),
391 _mm256_loadu_ps(&temphist[i + 2])),
392 __d_1_16),
393 _mm256_add_ps(
394 _mm256_mul_ps(_mm256_add_ps(_mm256_loadu_ps(&temphist[i - 1]),
395 _mm256_loadu_ps(&temphist[i + 1])),
396 __d_4_16),
397 _mm256_mul_ps(_mm256_loadu_ps(&temphist[i]), __d_6_16)));
398#endif
399 _mm256_storeu_ps(&hist[i], __hist);
400 }
401 }
402#endif
403 for (; i < n; i++) {
404 hist[i] = (temphist[i - 2] + temphist[i + 2]) * (1.f / 16.f) +
405 (temphist[i - 1] + temphist[i + 1]) * (4.f / 16.f) +
406 temphist[i] * (6.f / 16.f);
407 }
408
409 float maxval = hist[0];
410 for (i = 1; i < n; i++) maxval = std::max(maxval, hist[i]);
411
412 return maxval;
413}
414
415//
416// Interpolates a scale-space extremum's location and scale to subpixel
417// accuracy to form an image feature. Rejects features with low contrast.
418// Based on Section 4 of Lowe's paper.
419static bool adjustLocalExtrema(const std::vector<Mat> &dog_pyr, KeyPoint &kpt,
420 int octv, int &layer, int &r, int &c,
421 int nOctaveLayers, float contrastThreshold,
422 float edgeThreshold, float sigma) {
423 const float img_scale = 1.f / (255 * SIFT_FIXPT_SCALE);
424 const float deriv_scale = img_scale * 0.5f;
425 const float second_deriv_scale = img_scale;
426 const float cross_deriv_scale = img_scale * 0.25f;
427
428 float xi = 0, xr = 0, xc = 0, contr = 0;
429 int i = 0;
430
431 for (; i < SIFT_MAX_INTERP_STEPS; i++) {
432 int idx = octv * (nOctaveLayers + 2) + layer;
433 const Mat &img = dog_pyr[idx];
434 const Mat &prev = dog_pyr[idx - 1];
435 const Mat &next = dog_pyr[idx + 1];
436
437 Vec3f dD(
438 (img.at<sift_wt>(r, c + 1) - img.at<sift_wt>(r, c - 1)) * deriv_scale,
439 (img.at<sift_wt>(r + 1, c) - img.at<sift_wt>(r - 1, c)) * deriv_scale,
440 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c)) * deriv_scale);
441
442 float v2 = (float)img.at<sift_wt>(r, c) * 2;
443 float dxx = (img.at<sift_wt>(r, c + 1) + img.at<sift_wt>(r, c - 1) - v2) *
444 second_deriv_scale;
445 float dyy = (img.at<sift_wt>(r + 1, c) + img.at<sift_wt>(r - 1, c) - v2) *
446 second_deriv_scale;
447 float dss = (next.at<sift_wt>(r, c) + prev.at<sift_wt>(r, c) - v2) *
448 second_deriv_scale;
449 float dxy =
450 (img.at<sift_wt>(r + 1, c + 1) - img.at<sift_wt>(r + 1, c - 1) -
451 img.at<sift_wt>(r - 1, c + 1) + img.at<sift_wt>(r - 1, c - 1)) *
452 cross_deriv_scale;
453 float dxs = (next.at<sift_wt>(r, c + 1) - next.at<sift_wt>(r, c - 1) -
454 prev.at<sift_wt>(r, c + 1) + prev.at<sift_wt>(r, c - 1)) *
455 cross_deriv_scale;
456 float dys = (next.at<sift_wt>(r + 1, c) - next.at<sift_wt>(r - 1, c) -
457 prev.at<sift_wt>(r + 1, c) + prev.at<sift_wt>(r - 1, c)) *
458 cross_deriv_scale;
459
460 Matx33f H(dxx, dxy, dxs, dxy, dyy, dys, dxs, dys, dss);
461
462 Vec3f X = H.solve(dD, DECOMP_LU);
463
464 xi = -X[2];
465 xr = -X[1];
466 xc = -X[0];
467
468 if (std::abs(xi) < 0.5f && std::abs(xr) < 0.5f && std::abs(xc) < 0.5f)
469 break;
470
471 if (std::abs(xi) > (float)(INT_MAX / 3) ||
472 std::abs(xr) > (float)(INT_MAX / 3) ||
473 std::abs(xc) > (float)(INT_MAX / 3))
474 return false;
475
476 c += cvRound(xc);
477 r += cvRound(xr);
478 layer += cvRound(xi);
479
480 if (layer < 1 || layer > nOctaveLayers || c < SIFT_IMG_BORDER ||
481 c >= img.cols - SIFT_IMG_BORDER || r < SIFT_IMG_BORDER ||
482 r >= img.rows - SIFT_IMG_BORDER)
483 return false;
484 }
485
486 // ensure convergence of interpolation
487 if (i >= SIFT_MAX_INTERP_STEPS) return false;
488
489 {
490 int idx = octv * (nOctaveLayers + 2) + layer;
491 const Mat &img = dog_pyr[idx];
492 const Mat &prev = dog_pyr[idx - 1];
493 const Mat &next = dog_pyr[idx + 1];
494 Matx31f dD(
495 (img.at<sift_wt>(r, c + 1) - img.at<sift_wt>(r, c - 1)) * deriv_scale,
496 (img.at<sift_wt>(r + 1, c) - img.at<sift_wt>(r - 1, c)) * deriv_scale,
497 (next.at<sift_wt>(r, c) - prev.at<sift_wt>(r, c)) * deriv_scale);
498 float t = dD.dot(Matx31f(xc, xr, xi));
499
500 contr = img.at<sift_wt>(r, c) * img_scale + t * 0.5f;
501 if (std::abs(contr) * nOctaveLayers < contrastThreshold) return false;
502
503 // principal curvatures are computed using the trace and det of Hessian
504 float v2 = img.at<sift_wt>(r, c) * 2.f;
505 float dxx = (img.at<sift_wt>(r, c + 1) + img.at<sift_wt>(r, c - 1) - v2) *
506 second_deriv_scale;
507 float dyy = (img.at<sift_wt>(r + 1, c) + img.at<sift_wt>(r - 1, c) - v2) *
508 second_deriv_scale;
509 float dxy =
510 (img.at<sift_wt>(r + 1, c + 1) - img.at<sift_wt>(r + 1, c - 1) -
511 img.at<sift_wt>(r - 1, c + 1) + img.at<sift_wt>(r - 1, c - 1)) *
512 cross_deriv_scale;
513 float tr = dxx + dyy;
514 float det = dxx * dyy - dxy * dxy;
515
516 if (det <= 0 || tr * tr * edgeThreshold >=
517 (edgeThreshold + 1) * (edgeThreshold + 1) * det)
518 return false;
519 }
520
521 kpt.pt.x = (c + xc) * (1 << octv);
522 kpt.pt.y = (r + xr) * (1 << octv);
523 kpt.octave = octv + (layer << 8) + (cvRound((xi + 0.5) * 255) << 16);
524 kpt.size = sigma * powf(2.f, (layer + xi) / nOctaveLayers) * (1 << octv) * 2;
525 kpt.response = std::abs(contr);
526
527 return true;
528}
529
530template <typename T>
531class PerThreadAccumulator {
532 public:
533 void Add(std::vector<T> &&data) {
534 std::unique_lock locker(mutex_);
535 result_.emplace_back(data);
536 }
537
538 std::vector<std::vector<T>> move_result() { return std::move(result_); }
539
540 private:
541 // Should we do something more intelligent with per-thread std::vector that we
542 // merge at the end?
543 std::vector<std::vector<T>> result_;
544 std::mutex mutex_;
545};
546
547class findScaleSpaceExtremaComputer : public ParallelLoopBody {
548 public:
549 findScaleSpaceExtremaComputer(
550 int _o, int _i, int _threshold, int _idx, int _step, int _cols,
551 int _nOctaveLayers, double _contrastThreshold, double _edgeThreshold,
552 double _sigma, const std::vector<Mat> &_gauss_pyr,
553 const std::vector<Mat> &_dog_pyr,
554 PerThreadAccumulator<KeyPoint> &_tls_kpts_struct)
555
556 : o(_o),
557 i(_i),
558 threshold(_threshold),
559 idx(_idx),
560 step(_step),
561 cols(_cols),
562 nOctaveLayers(_nOctaveLayers),
563 contrastThreshold(_contrastThreshold),
564 edgeThreshold(_edgeThreshold),
565 sigma(_sigma),
566 gauss_pyr(_gauss_pyr),
567 dog_pyr(_dog_pyr),
568 tls_kpts_struct(_tls_kpts_struct) {}
569 void operator()(const cv::Range &range) const override {
570 const int begin = range.start;
571 const int end = range.end;
572
573 static const int n = SIFT_ORI_HIST_BINS;
574 float hist[n];
575
576 const Mat &img = dog_pyr[idx];
577 const Mat &prev = dog_pyr[idx - 1];
578 const Mat &next = dog_pyr[idx + 1];
579
580 std::vector<KeyPoint> tls_kpts;
581
582 KeyPoint kpt;
583 for (int r = begin; r < end; r++) {
584 const sift_wt *currptr = img.ptr<sift_wt>(r);
585 const sift_wt *prevptr = prev.ptr<sift_wt>(r);
586 const sift_wt *nextptr = next.ptr<sift_wt>(r);
587
588 for (int c = SIFT_IMG_BORDER; c < cols - SIFT_IMG_BORDER; c++) {
589 sift_wt val = currptr[c];
590
591 // find local extrema with pixel accuracy
592 if (std::abs(val) > threshold &&
593 ((val > 0 && val >= currptr[c - 1] && val >= currptr[c + 1] &&
594 val >= currptr[c - step - 1] && val >= currptr[c - step] &&
595 val >= currptr[c - step + 1] && val >= currptr[c + step - 1] &&
596 val >= currptr[c + step] && val >= currptr[c + step + 1] &&
597 val >= nextptr[c] && val >= nextptr[c - 1] &&
598 val >= nextptr[c + 1] && val >= nextptr[c - step - 1] &&
599 val >= nextptr[c - step] && val >= nextptr[c - step + 1] &&
600 val >= nextptr[c + step - 1] && val >= nextptr[c + step] &&
601 val >= nextptr[c + step + 1] && val >= prevptr[c] &&
602 val >= prevptr[c - 1] && val >= prevptr[c + 1] &&
603 val >= prevptr[c - step - 1] && val >= prevptr[c - step] &&
604 val >= prevptr[c - step + 1] && val >= prevptr[c + step - 1] &&
605 val >= prevptr[c + step] && val >= prevptr[c + step + 1]) ||
606 (val < 0 && val <= currptr[c - 1] && val <= currptr[c + 1] &&
607 val <= currptr[c - step - 1] && val <= currptr[c - step] &&
608 val <= currptr[c - step + 1] && val <= currptr[c + step - 1] &&
609 val <= currptr[c + step] && val <= currptr[c + step + 1] &&
610 val <= nextptr[c] && val <= nextptr[c - 1] &&
611 val <= nextptr[c + 1] && val <= nextptr[c - step - 1] &&
612 val <= nextptr[c - step] && val <= nextptr[c - step + 1] &&
613 val <= nextptr[c + step - 1] && val <= nextptr[c + step] &&
614 val <= nextptr[c + step + 1] && val <= prevptr[c] &&
615 val <= prevptr[c - 1] && val <= prevptr[c + 1] &&
616 val <= prevptr[c - step - 1] && val <= prevptr[c - step] &&
617 val <= prevptr[c - step + 1] && val <= prevptr[c + step - 1] &&
618 val <= prevptr[c + step] && val <= prevptr[c + step + 1]))) {
619 int r1 = r, c1 = c, layer = i;
620 if (!adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, nOctaveLayers,
621 (float)contrastThreshold,
622 (float)edgeThreshold, (float)sigma))
623 continue;
624 float scl_octv = kpt.size * 0.5f / (1 << o);
625 float omax = calcOrientationHist(
626 gauss_pyr[o * (nOctaveLayers + 3) + layer], Point(c1, r1),
627 cvRound(SIFT_ORI_RADIUS * scl_octv), SIFT_ORI_SIG_FCTR * scl_octv,
628 hist, n);
629 float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO);
630 for (int j = 0; j < n; j++) {
631 int l = j > 0 ? j - 1 : n - 1;
632 int r2 = j < n - 1 ? j + 1 : 0;
633
634 if (hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr) {
635 float bin = j + 0.5f * (hist[l] - hist[r2]) /
636 (hist[l] - 2 * hist[j] + hist[r2]);
637 bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin;
638 kpt.angle = 360.f - (float)((360.f / n) * bin);
639 if (std::abs(kpt.angle - 360.f) < FLT_EPSILON) kpt.angle = 0.f;
640 { tls_kpts.push_back(kpt); }
641 }
642 }
643 }
644 }
645 }
646
647 tls_kpts_struct.Add(std::move(tls_kpts));
648 }
649
650 private:
651 int o, i;
652 int threshold;
653 int idx, step, cols;
654 int nOctaveLayers;
655 double contrastThreshold;
656 double edgeThreshold;
657 double sigma;
658 const std::vector<Mat> &gauss_pyr;
659 const std::vector<Mat> &dog_pyr;
660 PerThreadAccumulator<KeyPoint> &tls_kpts_struct;
661};
662
663} // namespace
664
665//
666// Detects features at extrema in DoG scale space. Bad features are discarded
667// based on contrast and ratio of principal curvatures.
668void SIFT971_Impl::findScaleSpaceExtrema(
669 const std::vector<Mat> &gauss_pyr, const std::vector<Mat> &dog_pyr,
670 std::vector<KeyPoint> &keypoints) const {
671 const int nOctaves = (int)gauss_pyr.size() / (nOctaveLayers + 3);
672 const int threshold =
673 cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE);
674
675 keypoints.clear();
676 PerThreadAccumulator<KeyPoint> tls_kpts_struct;
677
678 for (int o = 0; o < nOctaves; o++)
679 for (int i = 1; i <= nOctaveLayers; i++) {
680 const int idx = o * (nOctaveLayers + 2) + i;
681 const Mat &img = dog_pyr[idx];
682 const int step = (int)img.step1();
683 const int rows = img.rows, cols = img.cols;
684
685 parallel_for_(Range(SIFT_IMG_BORDER, rows - SIFT_IMG_BORDER),
686 findScaleSpaceExtremaComputer(
687 o, i, threshold, idx, step, cols, nOctaveLayers,
688 contrastThreshold, edgeThreshold, sigma, gauss_pyr,
689 dog_pyr, tls_kpts_struct));
690 }
691
692 const std::vector<std::vector<KeyPoint>> kpt_vecs =
693 tls_kpts_struct.move_result();
694 for (size_t i = 0; i < kpt_vecs.size(); ++i) {
695 keypoints.insert(keypoints.end(), kpt_vecs[i].begin(), kpt_vecs[i].end());
696 }
697}
698
699namespace {
700
701static void calcSIFTDescriptor(const Mat &img, Point2f ptf, float ori,
702 float scl, int d, int n, float *dst) {
703 Point pt(cvRound(ptf.x), cvRound(ptf.y));
704 float cos_t = cosf(ori * (float)(CV_PI / 180));
705 float sin_t = sinf(ori * (float)(CV_PI / 180));
706 float bins_per_rad = n / 360.f;
707 float exp_scale = -1.f / (d * d * 0.5f);
708 float hist_width = SIFT_DESCR_SCL_FCTR * scl;
709 int radius = cvRound(hist_width * 1.4142135623730951f * (d + 1) * 0.5f);
710 // Clip the radius to the diagonal of the image to avoid autobuffer too large
711 // exception
712 radius = std::min(radius, (int)sqrt(((double)img.cols) * img.cols +
713 ((double)img.rows) * img.rows));
714 cos_t /= hist_width;
715 sin_t /= hist_width;
716
717 int i, j, k, len = (radius * 2 + 1) * (radius * 2 + 1),
718 histlen = (d + 2) * (d + 2) * (n + 2);
719 int rows = img.rows, cols = img.cols;
720
721 AutoBuffer<float> buf(len * 6 + histlen);
722 float *X = buf, *Y = X + len, *Mag = Y, *Ori = Mag + len, *W = Ori + len;
723 float *RBin = W + len, *CBin = RBin + len, *hist = CBin + len;
724
725 for (i = 0; i < d + 2; i++) {
726 for (j = 0; j < d + 2; j++)
727 for (k = 0; k < n + 2; k++) hist[(i * (d + 2) + j) * (n + 2) + k] = 0.;
728 }
729
730 for (i = -radius, k = 0; i <= radius; i++)
731 for (j = -radius; j <= radius; j++) {
732 // Calculate sample's histogram array coords rotated relative to ori.
733 // Subtract 0.5 so samples that fall e.g. in the center of row 1 (i.e.
734 // r_rot = 1.5) have full weight placed in row 1 after interpolation.
735 float c_rot = j * cos_t - i * sin_t;
736 float r_rot = j * sin_t + i * cos_t;
737 float rbin = r_rot + d / 2 - 0.5f;
738 float cbin = c_rot + d / 2 - 0.5f;
739 int r = pt.y + i, c = pt.x + j;
740
741 if (rbin > -1 && rbin < d && cbin > -1 && cbin < d && r > 0 &&
742 r < rows - 1 && c > 0 && c < cols - 1) {
743 float dx =
744 (float)(img.at<sift_wt>(r, c + 1) - img.at<sift_wt>(r, c - 1));
745 float dy =
746 (float)(img.at<sift_wt>(r - 1, c) - img.at<sift_wt>(r + 1, c));
747 X[k] = dx;
748 Y[k] = dy;
749 RBin[k] = rbin;
750 CBin[k] = cbin;
751 W[k] = (c_rot * c_rot + r_rot * r_rot) * exp_scale;
752 k++;
753 }
754 }
755
756 len = k;
757 cv::hal::fastAtan2(Y, X, Ori, len, true);
758 cv::hal::magnitude32f(X, Y, Mag, len);
759 cv::hal::exp32f(W, W, len);
760
761 k = 0;
762#if CV_AVX2
763 if (USE_AVX2) {
764 int CV_DECL_ALIGNED(32) idx_buf[8];
765 float CV_DECL_ALIGNED(32) rco_buf[64];
766 const __m256 __ori = _mm256_set1_ps(ori);
767 const __m256 __bins_per_rad = _mm256_set1_ps(bins_per_rad);
768 const __m256i __n = _mm256_set1_epi32(n);
769 for (; k <= len - 8; k += 8) {
770 __m256 __rbin = _mm256_loadu_ps(&RBin[k]);
771 __m256 __cbin = _mm256_loadu_ps(&CBin[k]);
772 __m256 __obin = _mm256_mul_ps(
773 _mm256_sub_ps(_mm256_loadu_ps(&Ori[k]), __ori), __bins_per_rad);
774 __m256 __mag =
775 _mm256_mul_ps(_mm256_loadu_ps(&Mag[k]), _mm256_loadu_ps(&W[k]));
776
777 __m256 __r0 = _mm256_floor_ps(__rbin);
778 __rbin = _mm256_sub_ps(__rbin, __r0);
779 __m256 __c0 = _mm256_floor_ps(__cbin);
780 __cbin = _mm256_sub_ps(__cbin, __c0);
781 __m256 __o0 = _mm256_floor_ps(__obin);
782 __obin = _mm256_sub_ps(__obin, __o0);
783
784 __m256i __o0i = _mm256_cvtps_epi32(__o0);
785 __o0i = _mm256_add_epi32(
786 __o0i, _mm256_and_si256(
787 __n, _mm256_cmpgt_epi32(_mm256_setzero_si256(), __o0i)));
788 __o0i = _mm256_sub_epi32(
789 __o0i, _mm256_andnot_si256(_mm256_cmpgt_epi32(__n, __o0i), __n));
790
791 __m256 __v_r1 = _mm256_mul_ps(__mag, __rbin);
792 __m256 __v_r0 = _mm256_sub_ps(__mag, __v_r1);
793
794 __m256 __v_rc11 = _mm256_mul_ps(__v_r1, __cbin);
795 __m256 __v_rc10 = _mm256_sub_ps(__v_r1, __v_rc11);
796
797 __m256 __v_rc01 = _mm256_mul_ps(__v_r0, __cbin);
798 __m256 __v_rc00 = _mm256_sub_ps(__v_r0, __v_rc01);
799
800 __m256 __v_rco111 = _mm256_mul_ps(__v_rc11, __obin);
801 __m256 __v_rco110 = _mm256_sub_ps(__v_rc11, __v_rco111);
802
803 __m256 __v_rco101 = _mm256_mul_ps(__v_rc10, __obin);
804 __m256 __v_rco100 = _mm256_sub_ps(__v_rc10, __v_rco101);
805
806 __m256 __v_rco011 = _mm256_mul_ps(__v_rc01, __obin);
807 __m256 __v_rco010 = _mm256_sub_ps(__v_rc01, __v_rco011);
808
809 __m256 __v_rco001 = _mm256_mul_ps(__v_rc00, __obin);
810 __m256 __v_rco000 = _mm256_sub_ps(__v_rc00, __v_rco001);
811
812 __m256i __one = _mm256_set1_epi32(1);
813 __m256i __idx = _mm256_add_epi32(
814 _mm256_mullo_epi32(
815 _mm256_add_epi32(
816 _mm256_mullo_epi32(
817 _mm256_add_epi32(_mm256_cvtps_epi32(__r0), __one),
818 _mm256_set1_epi32(d + 2)),
819 _mm256_add_epi32(_mm256_cvtps_epi32(__c0), __one)),
820 _mm256_set1_epi32(n + 2)),
821 __o0i);
822
823 _mm256_store_si256((__m256i *)idx_buf, __idx);
824
825 _mm256_store_ps(&(rco_buf[0]), __v_rco000);
826 _mm256_store_ps(&(rco_buf[8]), __v_rco001);
827 _mm256_store_ps(&(rco_buf[16]), __v_rco010);
828 _mm256_store_ps(&(rco_buf[24]), __v_rco011);
829 _mm256_store_ps(&(rco_buf[32]), __v_rco100);
830 _mm256_store_ps(&(rco_buf[40]), __v_rco101);
831 _mm256_store_ps(&(rco_buf[48]), __v_rco110);
832 _mm256_store_ps(&(rco_buf[56]), __v_rco111);
833#define HIST_SUM_HELPER(id) \
834 hist[idx_buf[(id)]] += rco_buf[(id)]; \
835 hist[idx_buf[(id)] + 1] += rco_buf[8 + (id)]; \
836 hist[idx_buf[(id)] + (n + 2)] += rco_buf[16 + (id)]; \
837 hist[idx_buf[(id)] + (n + 3)] += rco_buf[24 + (id)]; \
838 hist[idx_buf[(id)] + (d + 2) * (n + 2)] += rco_buf[32 + (id)]; \
839 hist[idx_buf[(id)] + (d + 2) * (n + 2) + 1] += rco_buf[40 + (id)]; \
840 hist[idx_buf[(id)] + (d + 3) * (n + 2)] += rco_buf[48 + (id)]; \
841 hist[idx_buf[(id)] + (d + 3) * (n + 2) + 1] += rco_buf[56 + (id)];
842
843 HIST_SUM_HELPER(0);
844 HIST_SUM_HELPER(1);
845 HIST_SUM_HELPER(2);
846 HIST_SUM_HELPER(3);
847 HIST_SUM_HELPER(4);
848 HIST_SUM_HELPER(5);
849 HIST_SUM_HELPER(6);
850 HIST_SUM_HELPER(7);
851
852#undef HIST_SUM_HELPER
853 }
854 }
855#endif
856 for (; k < len; k++) {
857 float rbin = RBin[k], cbin = CBin[k];
858 float obin = (Ori[k] - ori) * bins_per_rad;
859 float mag = Mag[k] * W[k];
860
861 int r0 = cvFloor(rbin);
862 int c0 = cvFloor(cbin);
863 int o0 = cvFloor(obin);
864 rbin -= r0;
865 cbin -= c0;
866 obin -= o0;
867
868 if (o0 < 0) o0 += n;
869 if (o0 >= n) o0 -= n;
870
871 // histogram update using tri-linear interpolation
872 float v_r1 = mag * rbin, v_r0 = mag - v_r1;
873 float v_rc11 = v_r1 * cbin, v_rc10 = v_r1 - v_rc11;
874 float v_rc01 = v_r0 * cbin, v_rc00 = v_r0 - v_rc01;
875 float v_rco111 = v_rc11 * obin, v_rco110 = v_rc11 - v_rco111;
876 float v_rco101 = v_rc10 * obin, v_rco100 = v_rc10 - v_rco101;
877 float v_rco011 = v_rc01 * obin, v_rco010 = v_rc01 - v_rco011;
878 float v_rco001 = v_rc00 * obin, v_rco000 = v_rc00 - v_rco001;
879
880 int idx = ((r0 + 1) * (d + 2) + c0 + 1) * (n + 2) + o0;
881 hist[idx] += v_rco000;
882 hist[idx + 1] += v_rco001;
883 hist[idx + (n + 2)] += v_rco010;
884 hist[idx + (n + 3)] += v_rco011;
885 hist[idx + (d + 2) * (n + 2)] += v_rco100;
886 hist[idx + (d + 2) * (n + 2) + 1] += v_rco101;
887 hist[idx + (d + 3) * (n + 2)] += v_rco110;
888 hist[idx + (d + 3) * (n + 2) + 1] += v_rco111;
889 }
890
891 // finalize histogram, since the orientation histograms are circular
892 for (i = 0; i < d; i++)
893 for (j = 0; j < d; j++) {
894 int idx = ((i + 1) * (d + 2) + (j + 1)) * (n + 2);
895 hist[idx] += hist[idx + n];
896 hist[idx + 1] += hist[idx + n + 1];
897 for (k = 0; k < n; k++) dst[(i * d + j) * n + k] = hist[idx + k];
898 }
899 // copy histogram to the descriptor,
900 // apply hysteresis thresholding
901 // and scale the result, so that it can be easily converted
902 // to byte array
903 float nrm2 = 0;
904 len = d * d * n;
905 k = 0;
906#if CV_AVX2
907 if (USE_AVX2) {
908 float CV_DECL_ALIGNED(32) nrm2_buf[8];
909 __m256 __nrm2 = _mm256_setzero_ps();
910 __m256 __dst;
911 for (; k <= len - 8; k += 8) {
912 __dst = _mm256_loadu_ps(&dst[k]);
913#if CV_FMA3
914 __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
915#else
916 __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst));
917#endif
918 }
919 _mm256_store_ps(nrm2_buf, __nrm2);
920 nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] + nrm2_buf[4] +
921 nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7];
922 }
923#endif
924 for (; k < len; k++) nrm2 += dst[k] * dst[k];
925
926 float thr = std::sqrt(nrm2) * SIFT_DESCR_MAG_THR;
927
928 i = 0, nrm2 = 0;
929#if 0 // CV_AVX2
930 // This code cannot be enabled because it sums nrm2 in a different order,
931 // thus producing slightly different results
932 if( USE_AVX2 )
933 {
934 float CV_DECL_ALIGNED(32) nrm2_buf[8];
935 __m256 __dst;
936 __m256 __nrm2 = _mm256_setzero_ps();
937 __m256 __thr = _mm256_set1_ps(thr);
938 for( ; i <= len - 8; i += 8 )
939 {
940 __dst = _mm256_loadu_ps(&dst[i]);
941 __dst = _mm256_min_ps(__dst, __thr);
942 _mm256_storeu_ps(&dst[i], __dst);
943#if CV_FMA3
944 __nrm2 = _mm256_fmadd_ps(__dst, __dst, __nrm2);
945#else
946 __nrm2 = _mm256_add_ps(__nrm2, _mm256_mul_ps(__dst, __dst));
947#endif
948 }
949 _mm256_store_ps(nrm2_buf, __nrm2);
950 nrm2 = nrm2_buf[0] + nrm2_buf[1] + nrm2_buf[2] + nrm2_buf[3] +
951 nrm2_buf[4] + nrm2_buf[5] + nrm2_buf[6] + nrm2_buf[7];
952 }
953#endif
954 for (; i < len; i++) {
955 float val = std::min(dst[i], thr);
956 dst[i] = val;
957 nrm2 += val * val;
958 }
959 nrm2 = SIFT_INT_DESCR_FCTR / std::max(std::sqrt(nrm2), FLT_EPSILON);
960
961#if 1
962 k = 0;
963#if CV_AVX2
964 if (USE_AVX2) {
965 __m256 __dst;
966 __m256 __min = _mm256_setzero_ps();
967 __m256 __max = _mm256_set1_ps(255.0f); // max of uchar
968 __m256 __nrm2 = _mm256_set1_ps(nrm2);
969 for (k = 0; k <= len - 8; k += 8) {
970 __dst = _mm256_loadu_ps(&dst[k]);
971 __dst = _mm256_min_ps(
972 _mm256_max_ps(
973 _mm256_round_ps(_mm256_mul_ps(__dst, __nrm2),
974 _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC),
975 __min),
976 __max);
977 _mm256_storeu_ps(&dst[k], __dst);
978 }
979 }
980#endif
981 for (; k < len; k++) {
982 dst[k] = saturate_cast<uchar>(dst[k] * nrm2);
983 }
984#else
985 float nrm1 = 0;
986 for (k = 0; k < len; k++) {
987 dst[k] *= nrm2;
988 nrm1 += dst[k];
989 }
990 nrm1 = 1.f / std::max(nrm1, FLT_EPSILON);
991 for (k = 0; k < len; k++) {
992 dst[k] = std::sqrt(dst[k] * nrm1); // saturate_cast<uchar>(std::sqrt(dst[k]
993 // * nrm1)*SIFT_INT_DESCR_FCTR);
994 }
995#endif
996}
997
998class calcDescriptorsComputer : public ParallelLoopBody {
999 public:
1000 calcDescriptorsComputer(const std::vector<Mat> &_gpyr,
1001 const std::vector<KeyPoint> &_keypoints,
1002 Mat &_descriptors, int _nOctaveLayers,
1003 int _firstOctave)
1004 : gpyr(_gpyr),
1005 keypoints(_keypoints),
1006 descriptors(_descriptors),
1007 nOctaveLayers(_nOctaveLayers),
1008 firstOctave(_firstOctave) {}
1009
1010 void operator()(const cv::Range &range) const override {
1011 const int begin = range.start;
1012 const int end = range.end;
1013
1014 static const int d = SIFT_DESCR_WIDTH, n = SIFT_DESCR_HIST_BINS;
1015
1016 for (int i = begin; i < end; i++) {
1017 KeyPoint kpt = keypoints[i];
1018 int octave, layer;
1019 float scale;
1020 unpackOctave(kpt, octave, layer, scale);
1021 CV_Assert(octave >= firstOctave && layer <= nOctaveLayers + 2);
1022 float size = kpt.size * scale;
1023 Point2f ptf(kpt.pt.x * scale, kpt.pt.y * scale);
1024 const Mat &img =
1025 gpyr[(octave - firstOctave) * (nOctaveLayers + 3) + layer];
1026
1027 float angle = 360.f - kpt.angle;
1028 if (std::abs(angle - 360.f) < FLT_EPSILON) angle = 0.f;
1029 calcSIFTDescriptor(img, ptf, angle, size * 0.5f, d, n,
1030 descriptors.ptr<float>((int)i));
1031 }
1032 }
1033
1034 private:
1035 const std::vector<Mat> &gpyr;
1036 const std::vector<KeyPoint> &keypoints;
1037 Mat &descriptors;
1038 int nOctaveLayers;
1039 int firstOctave;
1040};
1041
1042static void calcDescriptors(const std::vector<Mat> &gpyr,
1043 const std::vector<KeyPoint> &keypoints,
1044 Mat &descriptors, int nOctaveLayers,
1045 int firstOctave) {
1046 parallel_for_(Range(0, static_cast<int>(keypoints.size())),
1047 calcDescriptorsComputer(gpyr, keypoints, descriptors,
1048 nOctaveLayers, firstOctave));
1049}
1050
1051} // namespace
1052
1053//////////////////////////////////////////////////////////////////////////////////////////
1054
1055SIFT971_Impl::SIFT971_Impl(int _nfeatures, int _nOctaveLayers,
1056 double _contrastThreshold, double _edgeThreshold,
1057 double _sigma)
1058 : nfeatures(_nfeatures),
1059 nOctaveLayers(_nOctaveLayers),
1060 contrastThreshold(_contrastThreshold),
1061 edgeThreshold(_edgeThreshold),
1062 sigma(_sigma) {}
1063
1064int SIFT971_Impl::descriptorSize() const {
1065 return SIFT_DESCR_WIDTH * SIFT_DESCR_WIDTH * SIFT_DESCR_HIST_BINS;
1066}
1067
1068int SIFT971_Impl::descriptorType() const { return CV_32F; }
1069
1070int SIFT971_Impl::defaultNorm() const { return NORM_L2; }
1071
1072void SIFT971_Impl::detectAndCompute(InputArray _image, InputArray _mask,
1073 std::vector<KeyPoint> &keypoints,
1074 OutputArray _descriptors,
1075 bool useProvidedKeypoints) {
1076 int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0;
1077 Mat image = _image.getMat(), mask = _mask.getMat();
1078
1079 if (image.empty() || image.depth() != CV_8U)
1080 CV_Error(Error::StsBadArg,
1081 "image is empty or has incorrect depth (!=CV_8U)");
1082
1083 if (!mask.empty() && mask.type() != CV_8UC1)
1084 CV_Error(Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)");
1085
1086 if (useProvidedKeypoints) {
1087 firstOctave = 0;
1088 int maxOctave = INT_MIN;
1089 for (size_t i = 0; i < keypoints.size(); i++) {
1090 int octave, layer;
1091 float scale;
1092 unpackOctave(keypoints[i], octave, layer, scale);
1093 firstOctave = std::min(firstOctave, octave);
1094 maxOctave = std::max(maxOctave, octave);
1095 actualNLayers = std::max(actualNLayers, layer - 2);
1096 }
1097
1098 firstOctave = std::min(firstOctave, 0);
1099 CV_Assert(firstOctave >= -1 && actualNLayers <= nOctaveLayers);
1100 actualNOctaves = maxOctave - firstOctave + 1;
1101 }
1102
1103 Mat base = createInitialImage(image, firstOctave < 0, (float)sigma);
1104 std::vector<Mat> gpyr;
1105 int nOctaves =
1106 actualNOctaves > 0
1107 ? actualNOctaves
1108 : cvRound(std::log((double)std::min(base.cols, base.rows)) /
1109 std::log(2.) -
1110 2) -
1111 firstOctave;
1112
1113 // double t, tf = getTickFrequency();
1114 // t = (double)getTickCount();
1115 buildGaussianPyramid(base, gpyr, nOctaves);
1116
1117 // t = (double)getTickCount() - t;
1118 // printf("pyramid construction time: %g\n", t*1000./tf);
1119
1120 if (!useProvidedKeypoints) {
1121 std::vector<Mat> dogpyr;
1122 buildDoGPyramid(gpyr, dogpyr);
1123 // t = (double)getTickCount();
1124 findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
1125 // TODO(Brian): I think it can go faster by knowing they're sorted?
1126 // KeyPointsFilter::removeDuplicatedSorted( keypoints );
1127 KeyPointsFilter::removeDuplicated(keypoints);
1128
1129 if (nfeatures > 0) KeyPointsFilter::retainBest(keypoints, nfeatures);
1130 // t = (double)getTickCount() - t;
1131 // printf("keypoint detection time: %g\n", t*1000./tf);
1132
1133 if (firstOctave < 0)
1134 for (size_t i = 0; i < keypoints.size(); i++) {
1135 KeyPoint &kpt = keypoints[i];
1136 float scale = 1.f / (float)(1 << -firstOctave);
1137 kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255);
1138 kpt.pt *= scale;
1139 kpt.size *= scale;
1140 }
1141
1142 if (!mask.empty()) KeyPointsFilter::runByPixelsMask(keypoints, mask);
1143 } else {
1144 // filter keypoints by mask
1145 // KeyPointsFilter::runByPixelsMask( keypoints, mask );
1146 }
1147
1148 if (_descriptors.needed()) {
1149 // t = (double)getTickCount();
1150 int dsize = descriptorSize();
1151 _descriptors.create((int)keypoints.size(), dsize, CV_32F);
1152 Mat descriptors = _descriptors.getMat();
1153
1154 calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
1155 // t = (double)getTickCount() - t;
1156 // printf("descriptor extraction time: %g\n", t*1000./tf);
1157 }
1158}
1159
1160} // namespace vision
1161} // namespace frc971