blob: b1eb2208643178adac7b601e5e5874d471042491 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Copyright (c) 2013 libmv authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to
5// deal in the Software without restriction, including without limitation the
6// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7// sell copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19// IN THE SOFTWARE.
20//
21// Author: mierle@gmail.com (Keir Mierle)
22// sergey.vfx@gmail.com (Sergey Sharybin)
23//
24// This is an example application which contains bundle adjustment code used
25// in the Libmv library and Blender. It reads problems from files passed via
26// the command line and runs the bundle adjuster on the problem.
27//
28// File with problem a binary file, for which it is crucial to know in which
29// order bytes of float values are stored in. This information is provided
30// by a single character in the beginning of the file. There're two possible
31// values of this byte:
32// - V, which means values in the file are stored with big endian type
33// - v, which means values in the file are stored with little endian type
34//
35// The rest of the file contains data in the following order:
36// - Space in which markers' coordinates are stored in
37// - Camera intrinsics
38// - Number of cameras
39// - Cameras
40// - Number of 3D points
41// - 3D points
42// - Number of markers
43// - Markers
44//
45// Markers' space could either be normalized or image (pixels). This is defined
46// by the single character in the file. P means markers in the file is in image
47// space, and N means markers are in normalized space.
48//
49// Camera intrinsics are 8 described by 8 float 8.
50// This values goes in the following order:
51//
52// - Focal length, principal point X, principal point Y, k1, k2, k3, p1, p2
53//
54// Every camera is described by:
55//
56// - Image for which camera belongs to (single 4 bytes integer value).
57// - Column-major camera rotation matrix, 9 float values.
58// - Camera translation, 3-component vector of float values.
59//
60// Image number shall be greater or equal to zero. Order of cameras does not
61// matter and gaps are possible.
62//
63// Every 3D point is decribed by:
64//
65// - Track number point belongs to (single 4 bytes integer value).
66// - 3D position vector, 3-component vector of float values.
67//
68// Track number shall be greater or equal to zero. Order of tracks does not
69// matter and gaps are possible.
70//
71// Finally every marker is described by:
72//
73// - Image marker belongs to single 4 bytes integer value).
74// - Track marker belongs to single 4 bytes integer value).
75// - 2D marker position vector, (two float values).
76//
77// Marker's space is used a default value for refine_intrinsics command line
78// flag. This means if there's no refine_intrinsics flag passed via command
79// line, camera intrinsics will be refined if markers in the problem are
80// stored in image space and camera intrinsics will not be refined if markers
81// are in normalized space.
82//
83// Passing refine_intrinsics command line flag defines explicitly whether
84// refinement of intrinsics will happen. Currently, only none and all
85// intrinsics refinement is supported.
86//
87// There're existing problem files dumped from blender stored in folder
88// ../data/libmv-ba-problems.
89
Austin Schuh70cc9552019-01-21 19:46:48 -080090#include <fcntl.h>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080091
92#include <cstdio>
Austin Schuh70cc9552019-01-21 19:46:48 -080093#include <sstream>
94#include <string>
95#include <vector>
96
97#ifdef _MSC_VER
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080098#include <io.h>
99#define open _open
100#define close _close
Austin Schuh70cc9552019-01-21 19:46:48 -0800101typedef unsigned __int32 uint32_t;
102#else
103#include <stdint.h>
104#include <unistd.h>
105
106// O_BINARY is not defined on unix like platforms, as there is no
107// difference between binary and text files.
108#define O_BINARY 0
109
110#endif
111
112#include "ceres/ceres.h"
113#include "ceres/rotation.h"
114#include "gflags/gflags.h"
115#include "glog/logging.h"
116
117typedef Eigen::Matrix<double, 3, 3> Mat3;
118typedef Eigen::Matrix<double, 6, 1> Vec6;
119typedef Eigen::Vector3d Vec3;
120typedef Eigen::Vector4d Vec4;
121
122using std::vector;
123
124DEFINE_string(input, "", "Input File name");
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800125DEFINE_string(refine_intrinsics,
126 "",
127 "Camera intrinsics to be refined. Options are: none, radial.");
Austin Schuh70cc9552019-01-21 19:46:48 -0800128
129namespace {
130
131// A EuclideanCamera is the location and rotation of the camera
132// viewing an image.
133//
134// image identifies which image this camera represents.
135// R is a 3x3 matrix representing the rotation of the camera.
136// t is a translation vector representing its positions.
137struct EuclideanCamera {
138 EuclideanCamera() : image(-1) {}
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800139 EuclideanCamera(const EuclideanCamera& c) : image(c.image), R(c.R), t(c.t) {}
Austin Schuh70cc9552019-01-21 19:46:48 -0800140
141 int image;
142 Mat3 R;
143 Vec3 t;
144};
145
146// A Point is the 3D location of a track.
147//
148// track identifies which track this point corresponds to.
149// X represents the 3D position of the track.
150struct EuclideanPoint {
151 EuclideanPoint() : track(-1) {}
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800152 EuclideanPoint(const EuclideanPoint& p) : track(p.track), X(p.X) {}
Austin Schuh70cc9552019-01-21 19:46:48 -0800153 int track;
154 Vec3 X;
155};
156
157// A Marker is the 2D location of a tracked point in an image.
158//
159// x and y is the position of the marker in pixels from the top left corner
160// in the image identified by an image. All markers for to the same target
161// form a track identified by a common track number.
162struct Marker {
163 int image;
164 int track;
165 double x, y;
166};
167
168// Cameras intrinsics to be bundled.
169//
170// BUNDLE_RADIAL actually implies bundling of k1 and k2 coefficients only,
171// no bundling of k3 is possible at this moment.
172enum BundleIntrinsics {
173 BUNDLE_NO_INTRINSICS = 0,
174 BUNDLE_FOCAL_LENGTH = 1,
175 BUNDLE_PRINCIPAL_POINT = 2,
176 BUNDLE_RADIAL_K1 = 4,
177 BUNDLE_RADIAL_K2 = 8,
178 BUNDLE_RADIAL = 12,
179 BUNDLE_TANGENTIAL_P1 = 16,
180 BUNDLE_TANGENTIAL_P2 = 32,
181 BUNDLE_TANGENTIAL = 48,
182};
183
184// Denotes which blocks to keep constant during bundling.
185// For example it is useful to keep camera translations constant
186// when bundling tripod motions.
187enum BundleConstraints {
188 BUNDLE_NO_CONSTRAINTS = 0,
189 BUNDLE_NO_TRANSLATION = 1,
190};
191
192// The intrinsics need to get combined into a single parameter block; use these
193// enums to index instead of numeric constants.
194enum {
195 OFFSET_FOCAL_LENGTH,
196 OFFSET_PRINCIPAL_POINT_X,
197 OFFSET_PRINCIPAL_POINT_Y,
198 OFFSET_K1,
199 OFFSET_K2,
200 OFFSET_K3,
201 OFFSET_P1,
202 OFFSET_P2,
203};
204
205// Returns a pointer to the camera corresponding to a image.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800206EuclideanCamera* CameraForImage(vector<EuclideanCamera>* all_cameras,
Austin Schuh70cc9552019-01-21 19:46:48 -0800207 const int image) {
208 if (image < 0 || image >= all_cameras->size()) {
209 return NULL;
210 }
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800211 EuclideanCamera* camera = &(*all_cameras)[image];
Austin Schuh70cc9552019-01-21 19:46:48 -0800212 if (camera->image == -1) {
213 return NULL;
214 }
215 return camera;
216}
217
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800218const EuclideanCamera* CameraForImage(
219 const vector<EuclideanCamera>& all_cameras, const int image) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800220 if (image < 0 || image >= all_cameras.size()) {
221 return NULL;
222 }
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800223 const EuclideanCamera* camera = &all_cameras[image];
Austin Schuh70cc9552019-01-21 19:46:48 -0800224 if (camera->image == -1) {
225 return NULL;
226 }
227 return camera;
228}
229
230// Returns maximal image number at which marker exists.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800231int MaxImage(const vector<Marker>& all_markers) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800232 if (all_markers.size() == 0) {
233 return -1;
234 }
235
236 int max_image = all_markers[0].image;
237 for (int i = 1; i < all_markers.size(); i++) {
238 max_image = std::max(max_image, all_markers[i].image);
239 }
240 return max_image;
241}
242
243// Returns a pointer to the point corresponding to a track.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800244EuclideanPoint* PointForTrack(vector<EuclideanPoint>* all_points,
Austin Schuh70cc9552019-01-21 19:46:48 -0800245 const int track) {
246 if (track < 0 || track >= all_points->size()) {
247 return NULL;
248 }
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800249 EuclideanPoint* point = &(*all_points)[track];
Austin Schuh70cc9552019-01-21 19:46:48 -0800250 if (point->track == -1) {
251 return NULL;
252 }
253 return point;
254}
255
256// Reader of binary file which makes sure possibly needed endian
257// conversion happens when loading values like floats and integers.
258//
259// File's endian type is reading from a first character of file, which
260// could either be V for big endian or v for little endian. This
261// means you need to design file format assuming first character
262// denotes file endianness in this way.
263class EndianAwareFileReader {
264 public:
265 EndianAwareFileReader(void) : file_descriptor_(-1) {
266 // Get an endian type of the host machine.
267 union {
268 unsigned char bytes[4];
269 uint32_t value;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800270 } endian_test = {{0, 1, 2, 3}};
Austin Schuh70cc9552019-01-21 19:46:48 -0800271 host_endian_type_ = endian_test.value;
272 file_endian_type_ = host_endian_type_;
273 }
274
275 ~EndianAwareFileReader(void) {
276 if (file_descriptor_ > 0) {
277 close(file_descriptor_);
278 }
279 }
280
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800281 bool OpenFile(const std::string& file_name) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800282 file_descriptor_ = open(file_name.c_str(), O_RDONLY | O_BINARY);
283 if (file_descriptor_ < 0) {
284 return false;
285 }
286 // Get an endian tpye of data in the file.
287 unsigned char file_endian_type_flag = Read<unsigned char>();
288 if (file_endian_type_flag == 'V') {
289 file_endian_type_ = kBigEndian;
290 } else if (file_endian_type_flag == 'v') {
291 file_endian_type_ = kLittleEndian;
292 } else {
293 LOG(FATAL) << "Problem file is stored in unknown endian type.";
294 }
295 return true;
296 }
297
298 // Read value from the file, will switch endian if needed.
299 template <typename T>
300 T Read(void) const {
301 T value;
302 CHECK_GT(read(file_descriptor_, &value, sizeof(value)), 0);
303 // Switch endian type if file contains data in different type
304 // that current machine.
305 if (file_endian_type_ != host_endian_type_) {
306 value = SwitchEndian<T>(value);
307 }
308 return value;
309 }
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800310
Austin Schuh70cc9552019-01-21 19:46:48 -0800311 private:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800312 static constexpr long int kLittleEndian = 0x03020100ul;
313 static constexpr long int kBigEndian = 0x00010203ul;
Austin Schuh70cc9552019-01-21 19:46:48 -0800314
315 // Switch endian type between big to little.
316 template <typename T>
317 T SwitchEndian(const T value) const {
318 if (sizeof(T) == 4) {
319 unsigned int temp_value = static_cast<unsigned int>(value);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800320 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800321 return ((temp_value >> 24)) |
322 ((temp_value << 8) & 0x00ff0000) |
323 ((temp_value >> 8) & 0x0000ff00) |
324 ((temp_value << 24));
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800325 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800326 } else if (sizeof(T) == 1) {
327 return value;
328 } else {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800329 LOG(FATAL) << "Entered non-implemented part of endian "
330 "switching function.";
Austin Schuh70cc9552019-01-21 19:46:48 -0800331 }
332 }
333
334 int host_endian_type_;
335 int file_endian_type_;
336 int file_descriptor_;
337};
338
339// Read 3x3 column-major matrix from the file
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800340void ReadMatrix3x3(const EndianAwareFileReader& file_reader, Mat3* matrix) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800341 for (int i = 0; i < 9; i++) {
342 (*matrix)(i % 3, i / 3) = file_reader.Read<float>();
343 }
344}
345
346// Read 3-vector from file
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800347void ReadVector3(const EndianAwareFileReader& file_reader, Vec3* vector) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800348 for (int i = 0; i < 3; i++) {
349 (*vector)(i) = file_reader.Read<float>();
350 }
351}
352
353// Reads a bundle adjustment problem from the file.
354//
355// file_name denotes from which file to read the problem.
356// camera_intrinsics will contain initial camera intrinsics values.
357//
358// all_cameras is a vector of all reconstructed cameras to be optimized,
359// vector element with number i will contain camera for image i.
360//
361// all_points is a vector of all reconstructed 3D points to be optimized,
362// vector element with number i will contain point for track i.
363//
364// all_markers is a vector of all tracked markers existing in
365// the problem. Only used for reprojection error calculation, stay
366// unchanged during optimization.
367//
368// Returns false if any kind of error happened during
369// reading.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800370bool ReadProblemFromFile(const std::string& file_name,
Austin Schuh70cc9552019-01-21 19:46:48 -0800371 double camera_intrinsics[8],
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800372 vector<EuclideanCamera>* all_cameras,
373 vector<EuclideanPoint>* all_points,
374 bool* is_image_space,
375 vector<Marker>* all_markers) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800376 EndianAwareFileReader file_reader;
377 if (!file_reader.OpenFile(file_name)) {
378 return false;
379 }
380
381 // Read markers' space flag.
382 unsigned char is_image_space_flag = file_reader.Read<unsigned char>();
383 if (is_image_space_flag == 'P') {
384 *is_image_space = true;
385 } else if (is_image_space_flag == 'N') {
386 *is_image_space = false;
387 } else {
388 LOG(FATAL) << "Problem file contains markers stored in unknown space.";
389 }
390
391 // Read camera intrinsics.
392 for (int i = 0; i < 8; i++) {
393 camera_intrinsics[i] = file_reader.Read<float>();
394 }
395
396 // Read all cameras.
397 int number_of_cameras = file_reader.Read<int>();
398 for (int i = 0; i < number_of_cameras; i++) {
399 EuclideanCamera camera;
400
401 camera.image = file_reader.Read<int>();
402 ReadMatrix3x3(file_reader, &camera.R);
403 ReadVector3(file_reader, &camera.t);
404
405 if (camera.image >= all_cameras->size()) {
406 all_cameras->resize(camera.image + 1);
407 }
408
409 (*all_cameras)[camera.image].image = camera.image;
410 (*all_cameras)[camera.image].R = camera.R;
411 (*all_cameras)[camera.image].t = camera.t;
412 }
413
414 LOG(INFO) << "Read " << number_of_cameras << " cameras.";
415
416 // Read all reconstructed 3D points.
417 int number_of_points = file_reader.Read<int>();
418 for (int i = 0; i < number_of_points; i++) {
419 EuclideanPoint point;
420
421 point.track = file_reader.Read<int>();
422 ReadVector3(file_reader, &point.X);
423
424 if (point.track >= all_points->size()) {
425 all_points->resize(point.track + 1);
426 }
427
428 (*all_points)[point.track].track = point.track;
429 (*all_points)[point.track].X = point.X;
430 }
431
432 LOG(INFO) << "Read " << number_of_points << " points.";
433
434 // And finally read all markers.
435 int number_of_markers = file_reader.Read<int>();
436 for (int i = 0; i < number_of_markers; i++) {
437 Marker marker;
438
439 marker.image = file_reader.Read<int>();
440 marker.track = file_reader.Read<int>();
441 marker.x = file_reader.Read<float>();
442 marker.y = file_reader.Read<float>();
443
444 all_markers->push_back(marker);
445 }
446
447 LOG(INFO) << "Read " << number_of_markers << " markers.";
448
449 return true;
450}
451
452// Apply camera intrinsics to the normalized point to get image coordinates.
453// This applies the radial lens distortion to a point which is in normalized
454// camera coordinates (i.e. the principal point is at (0, 0)) to get image
455// coordinates in pixels. Templated for use with autodifferentiation.
456template <typename T>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800457inline void ApplyRadialDistortionCameraIntrinsics(const T& focal_length_x,
458 const T& focal_length_y,
459 const T& principal_point_x,
460 const T& principal_point_y,
461 const T& k1,
462 const T& k2,
463 const T& k3,
464 const T& p1,
465 const T& p2,
466 const T& normalized_x,
467 const T& normalized_y,
468 T* image_x,
469 T* image_y) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800470 T x = normalized_x;
471 T y = normalized_y;
472
473 // Apply distortion to the normalized points to get (xd, yd).
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800474 T r2 = x * x + y * y;
Austin Schuh70cc9552019-01-21 19:46:48 -0800475 T r4 = r2 * r2;
476 T r6 = r4 * r2;
477 T r_coeff = 1.0 + k1 * r2 + k2 * r4 + k3 * r6;
478 T xd = x * r_coeff + 2.0 * p1 * x * y + p2 * (r2 + 2.0 * x * x);
479 T yd = y * r_coeff + 2.0 * p2 * x * y + p1 * (r2 + 2.0 * y * y);
480
481 // Apply focal length and principal point to get the final image coordinates.
482 *image_x = focal_length_x * xd + principal_point_x;
483 *image_y = focal_length_y * yd + principal_point_y;
484}
485
486// Cost functor which computes reprojection error of 3D point X
487// on camera defined by angle-axis rotation and it's translation
488// (which are in the same block due to optimization reasons).
489//
490// This functor uses a radial distortion model.
491struct OpenCVReprojectionError {
492 OpenCVReprojectionError(const double observed_x, const double observed_y)
493 : observed_x(observed_x), observed_y(observed_y) {}
494
495 template <typename T>
496 bool operator()(const T* const intrinsics,
497 const T* const R_t, // Rotation denoted by angle axis
498 // followed with translation
499 const T* const X, // Point coordinates 3x1.
500 T* residuals) const {
501 // Unpack the intrinsics.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800502 const T& focal_length = intrinsics[OFFSET_FOCAL_LENGTH];
Austin Schuh70cc9552019-01-21 19:46:48 -0800503 const T& principal_point_x = intrinsics[OFFSET_PRINCIPAL_POINT_X];
504 const T& principal_point_y = intrinsics[OFFSET_PRINCIPAL_POINT_Y];
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800505 const T& k1 = intrinsics[OFFSET_K1];
506 const T& k2 = intrinsics[OFFSET_K2];
507 const T& k3 = intrinsics[OFFSET_K3];
508 const T& p1 = intrinsics[OFFSET_P1];
509 const T& p2 = intrinsics[OFFSET_P2];
Austin Schuh70cc9552019-01-21 19:46:48 -0800510
511 // Compute projective coordinates: x = RX + t.
512 T x[3];
513
514 ceres::AngleAxisRotatePoint(R_t, X, x);
515 x[0] += R_t[3];
516 x[1] += R_t[4];
517 x[2] += R_t[5];
518
519 // Compute normalized coordinates: x /= x[2].
520 T xn = x[0] / x[2];
521 T yn = x[1] / x[2];
522
523 T predicted_x, predicted_y;
524
525 // Apply distortion to the normalized points to get (xd, yd).
526 // TODO(keir): Do early bailouts for zero distortion; these are expensive
527 // jet operations.
528 ApplyRadialDistortionCameraIntrinsics(focal_length,
529 focal_length,
530 principal_point_x,
531 principal_point_y,
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800532 k1,
533 k2,
534 k3,
535 p1,
536 p2,
537 xn,
538 yn,
Austin Schuh70cc9552019-01-21 19:46:48 -0800539 &predicted_x,
540 &predicted_y);
541
542 // The error is the difference between the predicted and observed position.
543 residuals[0] = predicted_x - observed_x;
544 residuals[1] = predicted_y - observed_y;
545
546 return true;
547 }
548
549 const double observed_x;
550 const double observed_y;
551};
552
553// Print a message to the log which camera intrinsics are gonna to be optimized.
554void BundleIntrinsicsLogMessage(const int bundle_intrinsics) {
555 if (bundle_intrinsics == BUNDLE_NO_INTRINSICS) {
556 LOG(INFO) << "Bundling only camera positions.";
557 } else {
558 std::string bundling_message = "";
559
560#define APPEND_BUNDLING_INTRINSICS(name, flag) \
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800561 if (bundle_intrinsics & flag) { \
562 if (!bundling_message.empty()) { \
563 bundling_message += ", "; \
564 } \
565 bundling_message += name; \
566 } \
567 (void)0
Austin Schuh70cc9552019-01-21 19:46:48 -0800568
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800569 APPEND_BUNDLING_INTRINSICS("f", BUNDLE_FOCAL_LENGTH);
Austin Schuh70cc9552019-01-21 19:46:48 -0800570 APPEND_BUNDLING_INTRINSICS("px, py", BUNDLE_PRINCIPAL_POINT);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800571 APPEND_BUNDLING_INTRINSICS("k1", BUNDLE_RADIAL_K1);
572 APPEND_BUNDLING_INTRINSICS("k2", BUNDLE_RADIAL_K2);
573 APPEND_BUNDLING_INTRINSICS("p1", BUNDLE_TANGENTIAL_P1);
574 APPEND_BUNDLING_INTRINSICS("p2", BUNDLE_TANGENTIAL_P2);
Austin Schuh70cc9552019-01-21 19:46:48 -0800575
576 LOG(INFO) << "Bundling " << bundling_message << ".";
577 }
578}
579
580// Print a message to the log containing all the camera intriniscs values.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800581void PrintCameraIntrinsics(const char* text, const double* camera_intrinsics) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800582 std::ostringstream intrinsics_output;
583
584 intrinsics_output << "f=" << camera_intrinsics[OFFSET_FOCAL_LENGTH];
585
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800586 intrinsics_output << " cx=" << camera_intrinsics[OFFSET_PRINCIPAL_POINT_X]
587 << " cy=" << camera_intrinsics[OFFSET_PRINCIPAL_POINT_Y];
Austin Schuh70cc9552019-01-21 19:46:48 -0800588
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800589#define APPEND_DISTORTION_COEFFICIENT(name, offset) \
590 { \
591 if (camera_intrinsics[offset] != 0.0) { \
592 intrinsics_output << " " name "=" << camera_intrinsics[offset]; \
593 } \
594 } \
595 (void)0
Austin Schuh70cc9552019-01-21 19:46:48 -0800596
597 APPEND_DISTORTION_COEFFICIENT("k1", OFFSET_K1);
598 APPEND_DISTORTION_COEFFICIENT("k2", OFFSET_K2);
599 APPEND_DISTORTION_COEFFICIENT("k3", OFFSET_K3);
600 APPEND_DISTORTION_COEFFICIENT("p1", OFFSET_P1);
601 APPEND_DISTORTION_COEFFICIENT("p2", OFFSET_P2);
602
603#undef APPEND_DISTORTION_COEFFICIENT
604
605 LOG(INFO) << text << intrinsics_output.str();
606}
607
608// Get a vector of camera's rotations denoted by angle axis
609// conjuncted with translations into single block
610//
611// Element with index i matches to a rotation+translation for
612// camera at image i.
613vector<Vec6> PackCamerasRotationAndTranslation(
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800614 const vector<Marker>& all_markers,
615 const vector<EuclideanCamera>& all_cameras) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800616 vector<Vec6> all_cameras_R_t;
617 int max_image = MaxImage(all_markers);
618
619 all_cameras_R_t.resize(max_image + 1);
620
621 for (int i = 0; i <= max_image; i++) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800622 const EuclideanCamera* camera = CameraForImage(all_cameras, i);
Austin Schuh70cc9552019-01-21 19:46:48 -0800623
624 if (!camera) {
625 continue;
626 }
627
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800628 ceres::RotationMatrixToAngleAxis(&camera->R(0, 0), &all_cameras_R_t[i](0));
Austin Schuh70cc9552019-01-21 19:46:48 -0800629 all_cameras_R_t[i].tail<3>() = camera->t;
630 }
631
632 return all_cameras_R_t;
633}
634
635// Convert cameras rotations fro mangle axis back to rotation matrix.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800636void UnpackCamerasRotationAndTranslation(const vector<Marker>& all_markers,
637 const vector<Vec6>& all_cameras_R_t,
638 vector<EuclideanCamera>* all_cameras) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800639 int max_image = MaxImage(all_markers);
640
641 for (int i = 0; i <= max_image; i++) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800642 EuclideanCamera* camera = CameraForImage(all_cameras, i);
Austin Schuh70cc9552019-01-21 19:46:48 -0800643
644 if (!camera) {
645 continue;
646 }
647
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800648 ceres::AngleAxisToRotationMatrix(&all_cameras_R_t[i](0), &camera->R(0, 0));
Austin Schuh70cc9552019-01-21 19:46:48 -0800649 camera->t = all_cameras_R_t[i].tail<3>();
650 }
651}
652
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800653void EuclideanBundleCommonIntrinsics(const vector<Marker>& all_markers,
Austin Schuh70cc9552019-01-21 19:46:48 -0800654 const int bundle_intrinsics,
655 const int bundle_constraints,
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800656 double* camera_intrinsics,
657 vector<EuclideanCamera>* all_cameras,
658 vector<EuclideanPoint>* all_points) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800659 PrintCameraIntrinsics("Original intrinsics: ", camera_intrinsics);
660
661 ceres::Problem::Options problem_options;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800662 problem_options.cost_function_ownership = ceres::DO_NOT_TAKE_OWNERSHIP;
Austin Schuh70cc9552019-01-21 19:46:48 -0800663 ceres::Problem problem(problem_options);
664
665 // Convert cameras rotations to angle axis and merge with translation
666 // into single parameter block for maximal minimization speed
667 //
668 // Block for minimization has got the following structure:
669 // <3 elements for angle-axis> <3 elements for translation>
670 vector<Vec6> all_cameras_R_t =
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800671 PackCamerasRotationAndTranslation(all_markers, *all_cameras);
Austin Schuh70cc9552019-01-21 19:46:48 -0800672
673 // Parameterization used to restrict camera motion for modal solvers.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800674 ceres::SubsetParameterization* constant_transform_parameterization = NULL;
Austin Schuh70cc9552019-01-21 19:46:48 -0800675 if (bundle_constraints & BUNDLE_NO_TRANSLATION) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800676 std::vector<int> constant_translation;
Austin Schuh70cc9552019-01-21 19:46:48 -0800677
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800678 // First three elements are rotation, last three are translation.
679 constant_translation.push_back(3);
680 constant_translation.push_back(4);
681 constant_translation.push_back(5);
Austin Schuh70cc9552019-01-21 19:46:48 -0800682
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800683 constant_transform_parameterization =
Austin Schuh70cc9552019-01-21 19:46:48 -0800684 new ceres::SubsetParameterization(6, constant_translation);
685 }
686
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800687 std::vector<OpenCVReprojectionError> errors;
688 std::vector<ceres::AutoDiffCostFunction<OpenCVReprojectionError, 2, 8, 6, 3>>
689 costFunctions;
690 errors.reserve(all_markers.size());
691 costFunctions.reserve(all_markers.size());
692
Austin Schuh70cc9552019-01-21 19:46:48 -0800693 int num_residuals = 0;
694 bool have_locked_camera = false;
695 for (int i = 0; i < all_markers.size(); ++i) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800696 const Marker& marker = all_markers[i];
697 EuclideanCamera* camera = CameraForImage(all_cameras, marker.image);
698 EuclideanPoint* point = PointForTrack(all_points, marker.track);
Austin Schuh70cc9552019-01-21 19:46:48 -0800699 if (camera == NULL || point == NULL) {
700 continue;
701 }
702
703 // Rotation of camera denoted in angle axis followed with
704 // camera translaiton.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800705 double* current_camera_R_t = &all_cameras_R_t[camera->image](0);
Austin Schuh70cc9552019-01-21 19:46:48 -0800706
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800707 errors.emplace_back(marker.x, marker.y);
708 costFunctions.emplace_back(&errors.back(), ceres::DO_NOT_TAKE_OWNERSHIP);
709
710 problem.AddResidualBlock(&costFunctions.back(),
711 NULL,
712 camera_intrinsics,
713 current_camera_R_t,
714 &point->X(0));
Austin Schuh70cc9552019-01-21 19:46:48 -0800715
716 // We lock the first camera to better deal with scene orientation ambiguity.
717 if (!have_locked_camera) {
718 problem.SetParameterBlockConstant(current_camera_R_t);
719 have_locked_camera = true;
720 }
721
722 if (bundle_constraints & BUNDLE_NO_TRANSLATION) {
723 problem.SetParameterization(current_camera_R_t,
724 constant_transform_parameterization);
725 }
726
727 num_residuals++;
728 }
729 LOG(INFO) << "Number of residuals: " << num_residuals;
730
731 if (!num_residuals) {
732 LOG(INFO) << "Skipping running minimizer with zero residuals";
733 return;
734 }
735
736 BundleIntrinsicsLogMessage(bundle_intrinsics);
737
738 if (bundle_intrinsics == BUNDLE_NO_INTRINSICS) {
739 // No camera intrinsics are being refined,
740 // set the whole parameter block as constant for best performance.
741 problem.SetParameterBlockConstant(camera_intrinsics);
742 } else {
743 // Set the camera intrinsics that are not to be bundled as
744 // constant using some macro trickery.
745
746 std::vector<int> constant_intrinsics;
747#define MAYBE_SET_CONSTANT(bundle_enum, offset) \
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800748 if (!(bundle_intrinsics & bundle_enum)) { \
749 constant_intrinsics.push_back(offset); \
750 }
751 MAYBE_SET_CONSTANT(BUNDLE_FOCAL_LENGTH, OFFSET_FOCAL_LENGTH);
Austin Schuh70cc9552019-01-21 19:46:48 -0800752 MAYBE_SET_CONSTANT(BUNDLE_PRINCIPAL_POINT, OFFSET_PRINCIPAL_POINT_X);
753 MAYBE_SET_CONSTANT(BUNDLE_PRINCIPAL_POINT, OFFSET_PRINCIPAL_POINT_Y);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800754 MAYBE_SET_CONSTANT(BUNDLE_RADIAL_K1, OFFSET_K1);
755 MAYBE_SET_CONSTANT(BUNDLE_RADIAL_K2, OFFSET_K2);
756 MAYBE_SET_CONSTANT(BUNDLE_TANGENTIAL_P1, OFFSET_P1);
757 MAYBE_SET_CONSTANT(BUNDLE_TANGENTIAL_P2, OFFSET_P2);
Austin Schuh70cc9552019-01-21 19:46:48 -0800758#undef MAYBE_SET_CONSTANT
759
760 // Always set K3 constant, it's not used at the moment.
761 constant_intrinsics.push_back(OFFSET_K3);
762
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800763 ceres::SubsetParameterization* subset_parameterization =
764 new ceres::SubsetParameterization(8, constant_intrinsics);
Austin Schuh70cc9552019-01-21 19:46:48 -0800765
766 problem.SetParameterization(camera_intrinsics, subset_parameterization);
767 }
768
769 // Configure the solver.
770 ceres::Solver::Options options;
771 options.use_nonmonotonic_steps = true;
772 options.preconditioner_type = ceres::SCHUR_JACOBI;
773 options.linear_solver_type = ceres::ITERATIVE_SCHUR;
774 options.use_inner_iterations = true;
775 options.max_num_iterations = 100;
776 options.minimizer_progress_to_stdout = true;
777
778 // Solve!
779 ceres::Solver::Summary summary;
780 ceres::Solve(options, &problem, &summary);
781
782 std::cout << "Final report:\n" << summary.FullReport();
783
784 // Copy rotations and translations back.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800785 UnpackCamerasRotationAndTranslation(
786 all_markers, all_cameras_R_t, all_cameras);
Austin Schuh70cc9552019-01-21 19:46:48 -0800787
788 PrintCameraIntrinsics("Final intrinsics: ", camera_intrinsics);
789}
790} // namespace
791
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800792int main(int argc, char** argv) {
793 GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
Austin Schuh70cc9552019-01-21 19:46:48 -0800794 google::InitGoogleLogging(argv[0]);
795
796 if (FLAGS_input.empty()) {
797 LOG(ERROR) << "Usage: libmv_bundle_adjuster --input=blender_problem";
798 return EXIT_FAILURE;
799 }
800
801 double camera_intrinsics[8];
802 vector<EuclideanCamera> all_cameras;
803 vector<EuclideanPoint> all_points;
804 bool is_image_space;
805 vector<Marker> all_markers;
806
807 if (!ReadProblemFromFile(FLAGS_input,
808 camera_intrinsics,
809 &all_cameras,
810 &all_points,
811 &is_image_space,
812 &all_markers)) {
813 LOG(ERROR) << "Error reading problem file";
814 return EXIT_FAILURE;
815 }
816
817 // If there's no refine_intrinsics passed via command line
818 // (in this case FLAGS_refine_intrinsics will be an empty string)
819 // we use problem's settings to detect whether intrinsics
820 // shall be refined or not.
821 //
822 // Namely, if problem has got markers stored in image (pixel)
823 // space, we do full intrinsics refinement. If markers are
824 // stored in normalized space, and refine_intrinsics is not
825 // set, no refining will happen.
826 //
827 // Using command line argument refine_intrinsics will explicitly
828 // declare which intrinsics need to be refined and in this case
829 // refining flags does not depend on problem at all.
830 int bundle_intrinsics = BUNDLE_NO_INTRINSICS;
831 if (FLAGS_refine_intrinsics.empty()) {
832 if (is_image_space) {
833 bundle_intrinsics = BUNDLE_FOCAL_LENGTH | BUNDLE_RADIAL;
834 }
835 } else {
836 if (FLAGS_refine_intrinsics == "radial") {
837 bundle_intrinsics = BUNDLE_FOCAL_LENGTH | BUNDLE_RADIAL;
838 } else if (FLAGS_refine_intrinsics != "none") {
839 LOG(ERROR) << "Unsupported value for refine-intrinsics";
840 return EXIT_FAILURE;
841 }
842 }
843
844 // Run the bundler.
845 EuclideanBundleCommonIntrinsics(all_markers,
846 bundle_intrinsics,
847 BUNDLE_NO_CONSTRAINTS,
848 camera_intrinsics,
849 &all_cameras,
850 &all_points);
851
852 return EXIT_SUCCESS;
853}