Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame^] | 1 | // Ceres Solver - A fast non-linear least squares minimizer |
| 2 | // Copyright 2015 Google Inc. All rights reserved. |
| 3 | // http://ceres-solver.org/ |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright notice, |
| 9 | // this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above copyright notice, |
| 11 | // this list of conditions and the following disclaimer in the documentation |
| 12 | // and/or other materials provided with the distribution. |
| 13 | // * Neither the name of Google Inc. nor the names of its contributors may be |
| 14 | // used to endorse or promote products derived from this software without |
| 15 | // specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 21 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 22 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 23 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 24 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 25 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 26 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 27 | // POSSIBILITY OF SUCH DAMAGE. |
| 28 | // |
| 29 | // Author: sameeragarwal@google.com (Sameer Agarwal) |
| 30 | |
| 31 | #ifndef CERES_PUBLIC_COVARIANCE_H_ |
| 32 | #define CERES_PUBLIC_COVARIANCE_H_ |
| 33 | |
| 34 | #include <memory> |
| 35 | #include <utility> |
| 36 | #include <vector> |
| 37 | #include "ceres/internal/disable_warnings.h" |
| 38 | #include "ceres/internal/port.h" |
| 39 | #include "ceres/types.h" |
| 40 | |
| 41 | namespace ceres { |
| 42 | |
| 43 | class Problem; |
| 44 | |
| 45 | namespace internal { |
| 46 | class CovarianceImpl; |
| 47 | } // namespace internal |
| 48 | |
| 49 | // WARNING |
| 50 | // ======= |
| 51 | // It is very easy to use this class incorrectly without understanding |
| 52 | // the underlying mathematics. Please read and understand the |
| 53 | // documentation completely before attempting to use this class. |
| 54 | // |
| 55 | // |
| 56 | // This class allows the user to evaluate the covariance for a |
| 57 | // non-linear least squares problem and provides random access to its |
| 58 | // blocks |
| 59 | // |
| 60 | // Background |
| 61 | // ========== |
| 62 | // One way to assess the quality of the solution returned by a |
| 63 | // non-linear least squares solver is to analyze the covariance of the |
| 64 | // solution. |
| 65 | // |
| 66 | // Let us consider the non-linear regression problem |
| 67 | // |
| 68 | // y = f(x) + N(0, I) |
| 69 | // |
| 70 | // i.e., the observation y is a random non-linear function of the |
| 71 | // independent variable x with mean f(x) and identity covariance. Then |
| 72 | // the maximum likelihood estimate of x given observations y is the |
| 73 | // solution to the non-linear least squares problem: |
| 74 | // |
| 75 | // x* = arg min_x |f(x)|^2 |
| 76 | // |
| 77 | // And the covariance of x* is given by |
| 78 | // |
| 79 | // C(x*) = inverse[J'(x*)J(x*)] |
| 80 | // |
| 81 | // Here J(x*) is the Jacobian of f at x*. The above formula assumes |
| 82 | // that J(x*) has full column rank. |
| 83 | // |
| 84 | // If J(x*) is rank deficient, then the covariance matrix C(x*) is |
| 85 | // also rank deficient and is given by |
| 86 | // |
| 87 | // C(x*) = pseudoinverse[J'(x*)J(x*)] |
| 88 | // |
| 89 | // Note that in the above, we assumed that the covariance |
| 90 | // matrix for y was identity. This is an important assumption. If this |
| 91 | // is not the case and we have |
| 92 | // |
| 93 | // y = f(x) + N(0, S) |
| 94 | // |
| 95 | // Where S is a positive semi-definite matrix denoting the covariance |
| 96 | // of y, then the maximum likelihood problem to be solved is |
| 97 | // |
| 98 | // x* = arg min_x f'(x) inverse[S] f(x) |
| 99 | // |
| 100 | // and the corresponding covariance estimate of x* is given by |
| 101 | // |
| 102 | // C(x*) = inverse[J'(x*) inverse[S] J(x*)] |
| 103 | // |
| 104 | // So, if it is the case that the observations being fitted to have a |
| 105 | // covariance matrix not equal to identity, then it is the user's |
| 106 | // responsibility that the corresponding cost functions are correctly |
| 107 | // scaled, e.g. in the above case the cost function for this problem |
| 108 | // should evaluate S^{-1/2} f(x) instead of just f(x), where S^{-1/2} |
| 109 | // is the inverse square root of the covariance matrix S. |
| 110 | // |
| 111 | // This class allows the user to evaluate the covariance for a |
| 112 | // non-linear least squares problem and provides random access to its |
| 113 | // blocks. The computation assumes that the CostFunctions compute |
| 114 | // residuals such that their covariance is identity. |
| 115 | // |
| 116 | // Since the computation of the covariance matrix requires computing |
| 117 | // the inverse of a potentially large matrix, this can involve a |
| 118 | // rather large amount of time and memory. However, it is usually the |
| 119 | // case that the user is only interested in a small part of the |
| 120 | // covariance matrix. Quite often just the block diagonal. This class |
| 121 | // allows the user to specify the parts of the covariance matrix that |
| 122 | // she is interested in and then uses this information to only compute |
| 123 | // and store those parts of the covariance matrix. |
| 124 | // |
| 125 | // Rank of the Jacobian |
| 126 | // -------------------- |
| 127 | // As we noted above, if the jacobian is rank deficient, then the |
| 128 | // inverse of J'J is not defined and instead a pseudo inverse needs to |
| 129 | // be computed. |
| 130 | // |
| 131 | // The rank deficiency in J can be structural -- columns which are |
| 132 | // always known to be zero or numerical -- depending on the exact |
| 133 | // values in the Jacobian. |
| 134 | // |
| 135 | // Structural rank deficiency occurs when the problem contains |
| 136 | // parameter blocks that are constant. This class correctly handles |
| 137 | // structural rank deficiency like that. |
| 138 | // |
| 139 | // Numerical rank deficiency, where the rank of the matrix cannot be |
| 140 | // predicted by its sparsity structure and requires looking at its |
| 141 | // numerical values is more complicated. Here again there are two |
| 142 | // cases. |
| 143 | // |
| 144 | // a. The rank deficiency arises from overparameterization. e.g., a |
| 145 | // four dimensional quaternion used to parameterize SO(3), which is |
| 146 | // a three dimensional manifold. In cases like this, the user should |
| 147 | // use an appropriate LocalParameterization. Not only will this lead |
| 148 | // to better numerical behaviour of the Solver, it will also expose |
| 149 | // the rank deficiency to the Covariance object so that it can |
| 150 | // handle it correctly. |
| 151 | // |
| 152 | // b. More general numerical rank deficiency in the Jacobian |
| 153 | // requires the computation of the so called Singular Value |
| 154 | // Decomposition (SVD) of J'J. We do not know how to do this for |
| 155 | // large sparse matrices efficiently. For small and moderate sized |
| 156 | // problems this is done using dense linear algebra. |
| 157 | // |
| 158 | // Gauge Invariance |
| 159 | // ---------------- |
| 160 | // In structure from motion (3D reconstruction) problems, the |
| 161 | // reconstruction is ambiguous up to a similarity transform. This is |
| 162 | // known as a Gauge Ambiguity. Handling Gauges correctly requires the |
| 163 | // use of SVD or custom inversion algorithms. For small problems the |
| 164 | // user can use the dense algorithm. For more details see |
| 165 | // |
| 166 | // Ken-ichi Kanatani, Daniel D. Morris: Gauges and gauge |
| 167 | // transformations for uncertainty description of geometric structure |
| 168 | // with indeterminacy. IEEE Transactions on Information Theory 47(5): |
| 169 | // 2017-2028 (2001) |
| 170 | // |
| 171 | // Example Usage |
| 172 | // ============= |
| 173 | // |
| 174 | // double x[3]; |
| 175 | // double y[2]; |
| 176 | // |
| 177 | // Problem problem; |
| 178 | // problem.AddParameterBlock(x, 3); |
| 179 | // problem.AddParameterBlock(y, 2); |
| 180 | // <Build Problem> |
| 181 | // <Solve Problem> |
| 182 | // |
| 183 | // Covariance::Options options; |
| 184 | // Covariance covariance(options); |
| 185 | // |
| 186 | // std::vector<std::pair<const double*, const double*>> covariance_blocks; |
| 187 | // covariance_blocks.push_back(make_pair(x, x)); |
| 188 | // covariance_blocks.push_back(make_pair(y, y)); |
| 189 | // covariance_blocks.push_back(make_pair(x, y)); |
| 190 | // |
| 191 | // CHECK(covariance.Compute(covariance_blocks, &problem)); |
| 192 | // |
| 193 | // double covariance_xx[3 * 3]; |
| 194 | // double covariance_yy[2 * 2]; |
| 195 | // double covariance_xy[3 * 2]; |
| 196 | // covariance.GetCovarianceBlock(x, x, covariance_xx) |
| 197 | // covariance.GetCovarianceBlock(y, y, covariance_yy) |
| 198 | // covariance.GetCovarianceBlock(x, y, covariance_xy) |
| 199 | // |
| 200 | class CERES_EXPORT Covariance { |
| 201 | public: |
| 202 | struct CERES_EXPORT Options { |
| 203 | // Sparse linear algebra library to use when a sparse matrix |
| 204 | // factorization is being used to compute the covariance matrix. |
| 205 | // |
| 206 | // Currently this only applies to SPARSE_QR. |
| 207 | SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type = |
| 208 | #if !defined(CERES_NO_SUITESPARSE) |
| 209 | SUITE_SPARSE; |
| 210 | #else |
| 211 | // Eigen's QR factorization is always available. |
| 212 | EIGEN_SPARSE; |
| 213 | #endif |
| 214 | |
| 215 | // Ceres supports two different algorithms for covariance |
| 216 | // estimation, which represent different tradeoffs in speed, |
| 217 | // accuracy and reliability. |
| 218 | // |
| 219 | // 1. DENSE_SVD uses Eigen's JacobiSVD to perform the |
| 220 | // computations. It computes the singular value decomposition |
| 221 | // |
| 222 | // U * S * V' = J |
| 223 | // |
| 224 | // and then uses it to compute the pseudo inverse of J'J as |
| 225 | // |
| 226 | // pseudoinverse[J'J]^ = V * pseudoinverse[S] * V' |
| 227 | // |
| 228 | // It is an accurate but slow method and should only be used |
| 229 | // for small to moderate sized problems. It can handle |
| 230 | // full-rank as well as rank deficient Jacobians. |
| 231 | // |
| 232 | // 2. SPARSE_QR uses the sparse QR factorization algorithm |
| 233 | // to compute the decomposition |
| 234 | // |
| 235 | // Q * R = J |
| 236 | // |
| 237 | // [J'J]^-1 = [R*R']^-1 |
| 238 | // |
| 239 | // SPARSE_QR is not capable of computing the covariance if the |
| 240 | // Jacobian is rank deficient. Depending on the value of |
| 241 | // Covariance::Options::sparse_linear_algebra_library_type, either |
| 242 | // Eigen's Sparse QR factorization algorithm will be used or |
| 243 | // SuiteSparse's high performance SuiteSparseQR algorithm will be |
| 244 | // used. |
| 245 | CovarianceAlgorithmType algorithm_type = SPARSE_QR; |
| 246 | |
| 247 | // If the Jacobian matrix is near singular, then inverting J'J |
| 248 | // will result in unreliable results, e.g, if |
| 249 | // |
| 250 | // J = [1.0 1.0 ] |
| 251 | // [1.0 1.0000001 ] |
| 252 | // |
| 253 | // which is essentially a rank deficient matrix, we have |
| 254 | // |
| 255 | // inv(J'J) = [ 2.0471e+14 -2.0471e+14] |
| 256 | // [-2.0471e+14 2.0471e+14] |
| 257 | // |
| 258 | // This is not a useful result. Therefore, by default |
| 259 | // Covariance::Compute will return false if a rank deficient |
| 260 | // Jacobian is encountered. How rank deficiency is detected |
| 261 | // depends on the algorithm being used. |
| 262 | // |
| 263 | // 1. DENSE_SVD |
| 264 | // |
| 265 | // min_sigma / max_sigma < sqrt(min_reciprocal_condition_number) |
| 266 | // |
| 267 | // where min_sigma and max_sigma are the minimum and maxiumum |
| 268 | // singular values of J respectively. |
| 269 | // |
| 270 | // 2. SPARSE_QR |
| 271 | // |
| 272 | // rank(J) < num_col(J) |
| 273 | // |
| 274 | // Here rank(J) is the estimate of the rank of J returned by the |
| 275 | // sparse QR factorization algorithm. It is a fairly reliable |
| 276 | // indication of rank deficiency. |
| 277 | // |
| 278 | double min_reciprocal_condition_number = 1e-14; |
| 279 | |
| 280 | // When using DENSE_SVD, the user has more control in dealing with |
| 281 | // singular and near singular covariance matrices. |
| 282 | // |
| 283 | // As mentioned above, when the covariance matrix is near |
| 284 | // singular, instead of computing the inverse of J'J, the |
| 285 | // Moore-Penrose pseudoinverse of J'J should be computed. |
| 286 | // |
| 287 | // If J'J has the eigen decomposition (lambda_i, e_i), where |
| 288 | // lambda_i is the i^th eigenvalue and e_i is the corresponding |
| 289 | // eigenvector, then the inverse of J'J is |
| 290 | // |
| 291 | // inverse[J'J] = sum_i e_i e_i' / lambda_i |
| 292 | // |
| 293 | // and computing the pseudo inverse involves dropping terms from |
| 294 | // this sum that correspond to small eigenvalues. |
| 295 | // |
| 296 | // How terms are dropped is controlled by |
| 297 | // min_reciprocal_condition_number and null_space_rank. |
| 298 | // |
| 299 | // If null_space_rank is non-negative, then the smallest |
| 300 | // null_space_rank eigenvalue/eigenvectors are dropped |
| 301 | // irrespective of the magnitude of lambda_i. If the ratio of the |
| 302 | // smallest non-zero eigenvalue to the largest eigenvalue in the |
| 303 | // truncated matrix is still below |
| 304 | // min_reciprocal_condition_number, then the Covariance::Compute() |
| 305 | // will fail and return false. |
| 306 | // |
| 307 | // Setting null_space_rank = -1 drops all terms for which |
| 308 | // |
| 309 | // lambda_i / lambda_max < min_reciprocal_condition_number. |
| 310 | // |
| 311 | // This option has no effect on the SUITE_SPARSE_QR and |
| 312 | // EIGEN_SPARSE_QR algorithms. |
| 313 | int null_space_rank = 0; |
| 314 | |
| 315 | int num_threads = 1; |
| 316 | |
| 317 | // Even though the residual blocks in the problem may contain loss |
| 318 | // functions, setting apply_loss_function to false will turn off |
| 319 | // the application of the loss function to the output of the cost |
| 320 | // function and in turn its effect on the covariance. |
| 321 | // |
| 322 | // TODO(sameergaarwal): Expand this based on Jim's experiments. |
| 323 | bool apply_loss_function = true; |
| 324 | }; |
| 325 | |
| 326 | explicit Covariance(const Options& options); |
| 327 | ~Covariance(); |
| 328 | |
| 329 | // Compute a part of the covariance matrix. |
| 330 | // |
| 331 | // The vector covariance_blocks, indexes into the covariance matrix |
| 332 | // block-wise using pairs of parameter blocks. This allows the |
| 333 | // covariance estimation algorithm to only compute and store these |
| 334 | // blocks. |
| 335 | // |
| 336 | // Since the covariance matrix is symmetric, if the user passes |
| 337 | // (block1, block2), then GetCovarianceBlock can be called with |
| 338 | // block1, block2 as well as block2, block1. |
| 339 | // |
| 340 | // covariance_blocks cannot contain duplicates. Bad things will |
| 341 | // happen if they do. |
| 342 | // |
| 343 | // Note that the list of covariance_blocks is only used to determine |
| 344 | // what parts of the covariance matrix are computed. The full |
| 345 | // Jacobian is used to do the computation, i.e. they do not have an |
| 346 | // impact on what part of the Jacobian is used for computation. |
| 347 | // |
| 348 | // The return value indicates the success or failure of the |
| 349 | // covariance computation. Please see the documentation for |
| 350 | // Covariance::Options for more on the conditions under which this |
| 351 | // function returns false. |
| 352 | bool Compute( |
| 353 | const std::vector<std::pair<const double*, |
| 354 | const double*>>& covariance_blocks, |
| 355 | Problem* problem); |
| 356 | |
| 357 | // Compute a part of the covariance matrix. |
| 358 | // |
| 359 | // The vector parameter_blocks contains the parameter blocks that |
| 360 | // are used for computing the covariance matrix. From this vector |
| 361 | // all covariance pairs are generated. This allows the covariance |
| 362 | // estimation algorithm to only compute and store these blocks. |
| 363 | // |
| 364 | // parameter_blocks cannot contain duplicates. Bad things will |
| 365 | // happen if they do. |
| 366 | // |
| 367 | // Note that the list of covariance_blocks is only used to determine |
| 368 | // what parts of the covariance matrix are computed. The full |
| 369 | // Jacobian is used to do the computation, i.e. they do not have an |
| 370 | // impact on what part of the Jacobian is used for computation. |
| 371 | // |
| 372 | // The return value indicates the success or failure of the |
| 373 | // covariance computation. Please see the documentation for |
| 374 | // Covariance::Options for more on the conditions under which this |
| 375 | // function returns false. |
| 376 | bool Compute(const std::vector<const double*>& parameter_blocks, |
| 377 | Problem* problem); |
| 378 | |
| 379 | // Return the block of the cross-covariance matrix corresponding to |
| 380 | // parameter_block1 and parameter_block2. |
| 381 | // |
| 382 | // Compute must be called before the first call to |
| 383 | // GetCovarianceBlock and the pair <parameter_block1, |
| 384 | // parameter_block2> OR the pair <parameter_block2, |
| 385 | // parameter_block1> must have been present in the vector |
| 386 | // covariance_blocks when Compute was called. Otherwise |
| 387 | // GetCovarianceBlock will return false. |
| 388 | // |
| 389 | // covariance_block must point to a memory location that can store a |
| 390 | // parameter_block1_size x parameter_block2_size matrix. The |
| 391 | // returned covariance will be a row-major matrix. |
| 392 | bool GetCovarianceBlock(const double* parameter_block1, |
| 393 | const double* parameter_block2, |
| 394 | double* covariance_block) const; |
| 395 | |
| 396 | // Return the block of the cross-covariance matrix corresponding to |
| 397 | // parameter_block1 and parameter_block2. |
| 398 | // Returns cross-covariance in the tangent space if a local |
| 399 | // parameterization is associated with either parameter block; |
| 400 | // else returns cross-covariance in the ambient space. |
| 401 | // |
| 402 | // Compute must be called before the first call to |
| 403 | // GetCovarianceBlock and the pair <parameter_block1, |
| 404 | // parameter_block2> OR the pair <parameter_block2, |
| 405 | // parameter_block1> must have been present in the vector |
| 406 | // covariance_blocks when Compute was called. Otherwise |
| 407 | // GetCovarianceBlock will return false. |
| 408 | // |
| 409 | // covariance_block must point to a memory location that can store a |
| 410 | // parameter_block1_local_size x parameter_block2_local_size matrix. The |
| 411 | // returned covariance will be a row-major matrix. |
| 412 | bool GetCovarianceBlockInTangentSpace(const double* parameter_block1, |
| 413 | const double* parameter_block2, |
| 414 | double* covariance_block) const; |
| 415 | |
| 416 | // Return the covariance matrix corresponding to all parameter_blocks. |
| 417 | // |
| 418 | // Compute must be called before calling GetCovarianceMatrix and all |
| 419 | // parameter_blocks must have been present in the vector |
| 420 | // parameter_blocks when Compute was called. Otherwise |
| 421 | // GetCovarianceMatrix returns false. |
| 422 | // |
| 423 | // covariance_matrix must point to a memory location that can store |
| 424 | // the size of the covariance matrix. The covariance matrix will be |
| 425 | // a square matrix whose row and column count is equal to the sum of |
| 426 | // the sizes of the individual parameter blocks. The covariance |
| 427 | // matrix will be a row-major matrix. |
| 428 | bool GetCovarianceMatrix(const std::vector<const double *> ¶meter_blocks, |
| 429 | double *covariance_matrix); |
| 430 | |
| 431 | // Return the covariance matrix corresponding to parameter_blocks |
| 432 | // in the tangent space if a local parameterization is associated |
| 433 | // with one of the parameter blocks else returns the covariance |
| 434 | // matrix in the ambient space. |
| 435 | // |
| 436 | // Compute must be called before calling GetCovarianceMatrix and all |
| 437 | // parameter_blocks must have been present in the vector |
| 438 | // parameters_blocks when Compute was called. Otherwise |
| 439 | // GetCovarianceMatrix returns false. |
| 440 | // |
| 441 | // covariance_matrix must point to a memory location that can store |
| 442 | // the size of the covariance matrix. The covariance matrix will be |
| 443 | // a square matrix whose row and column count is equal to the sum of |
| 444 | // the sizes of the tangent spaces of the individual parameter |
| 445 | // blocks. The covariance matrix will be a row-major matrix. |
| 446 | bool GetCovarianceMatrixInTangentSpace( |
| 447 | const std::vector<const double*>& parameter_blocks, |
| 448 | double* covariance_matrix); |
| 449 | |
| 450 | private: |
| 451 | std::unique_ptr<internal::CovarianceImpl> impl_; |
| 452 | }; |
| 453 | |
| 454 | } // namespace ceres |
| 455 | |
| 456 | #include "ceres/internal/reenable_warnings.h" |
| 457 | |
| 458 | #endif // CERES_PUBLIC_COVARIANCE_H_ |