blob: b8582f85b6713eec5b7b93f33a469bd7667f9ea2 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30//
31// The LossFunction interface is the way users describe how residuals
32// are converted to cost terms for the overall problem cost function.
33// For the exact manner in which loss functions are converted to the
34// overall cost for a problem, see problem.h.
35//
36// For least squares problem where there are no outliers and standard
37// squared loss is expected, it is not necessary to create a loss
Austin Schuh3de38b02024-06-25 18:25:10 -070038// function; instead passing a nullptr to the problem when adding
Austin Schuh70cc9552019-01-21 19:46:48 -080039// residuals implies a standard squared loss.
40//
41// For least squares problems where the minimization may encounter
42// input terms that contain outliers, that is, completely bogus
43// measurements, it is important to use a loss function that reduces
44// their associated penalty.
45//
46// Consider a structure from motion problem. The unknowns are 3D
47// points and camera parameters, and the measurements are image
48// coordinates describing the expected reprojected position for a
49// point in a camera. For example, we want to model the geometry of a
50// street scene with fire hydrants and cars, observed by a moving
51// camera with unknown parameters, and the only 3D points we care
52// about are the pointy tippy-tops of the fire hydrants. Our magic
53// image processing algorithm, which is responsible for producing the
54// measurements that are input to Ceres, has found and matched all
55// such tippy-tops in all image frames, except that in one of the
56// frame it mistook a car's headlight for a hydrant. If we didn't do
57// anything special (i.e. if we used a basic quadratic loss), the
58// residual for the erroneous measurement will result in extreme error
59// due to the quadratic nature of squared loss. This results in the
60// entire solution getting pulled away from the optimum to reduce
61// the large error that would otherwise be attributed to the wrong
62// measurement.
63//
64// Using a robust loss function, the cost for large residuals is
65// reduced. In the example above, this leads to outlier terms getting
66// downweighted so they do not overly influence the final solution.
67//
68// What cost function is best?
69//
70// In general, there isn't a principled way to select a robust loss
71// function. The authors suggest starting with a non-robust cost, then
72// only experimenting with robust loss functions if standard squared
73// loss doesn't work.
74
75#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
76#define CERES_PUBLIC_LOSS_FUNCTION_H_
77
78#include <memory>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080079
Austin Schuh70cc9552019-01-21 19:46:48 -080080#include "ceres/internal/disable_warnings.h"
Austin Schuh3de38b02024-06-25 18:25:10 -070081#include "ceres/internal/export.h"
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080082#include "ceres/types.h"
83#include "glog/logging.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080084
85namespace ceres {
86
87class CERES_EXPORT LossFunction {
88 public:
Austin Schuh3de38b02024-06-25 18:25:10 -070089 virtual ~LossFunction();
Austin Schuh70cc9552019-01-21 19:46:48 -080090
91 // For a residual vector with squared 2-norm 'sq_norm', this method
92 // is required to fill in the value and derivatives of the loss
93 // function (rho in this example):
94 //
95 // out[0] = rho(sq_norm),
96 // out[1] = rho'(sq_norm),
97 // out[2] = rho''(sq_norm),
98 //
99 // Here the convention is that the contribution of a term to the
100 // cost function is given by 1/2 rho(s), where
101 //
102 // s = ||residuals||^2.
103 //
104 // Calling the method with a negative value of 's' is an error and
105 // the implementations are not required to handle that case.
106 //
107 // Most sane choices of rho() satisfy:
108 //
109 // rho(0) = 0,
110 // rho'(0) = 1,
111 // rho'(s) < 1 in outlier region,
112 // rho''(s) < 0 in outlier region,
113 //
114 // so that they mimic the least squares cost for small residuals.
115 virtual void Evaluate(double sq_norm, double out[3]) const = 0;
116};
117
118// Some common implementations follow below.
119//
120// Note: in the region of interest (i.e. s < 3) we have:
121// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
122
Austin Schuh70cc9552019-01-21 19:46:48 -0800123// This corresponds to no robustification.
124//
125// rho(s) = s
126//
127// At s = 0: rho = [0, 1, 0].
128//
Austin Schuh3de38b02024-06-25 18:25:10 -0700129// It is not normally necessary to use this, as passing nullptr for the
Austin Schuh70cc9552019-01-21 19:46:48 -0800130// loss function when building the problem accomplishes the same
131// thing.
Austin Schuh3de38b02024-06-25 18:25:10 -0700132class CERES_EXPORT TrivialLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800133 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800134 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800135};
136
137// Scaling
138// -------
139// Given one robustifier
140// s -> rho(s)
141// one can change the length scale at which robustification takes
142// place, by adding a scale factor 'a' as follows:
143//
144// s -> a^2 rho(s / a^2).
145//
146// The first and second derivatives are:
147//
148// s -> rho'(s / a^2),
149// s -> (1 / a^2) rho''(s / a^2),
150//
151// but the behaviour near s = 0 is the same as the original function,
152// i.e.
153//
154// rho(s) = s + higher order terms,
155// a^2 rho(s / a^2) = s + higher order terms.
156//
157// The scalar 'a' should be positive.
158//
159// The reason for the appearance of squaring is that 'a' is in the
160// units of the residual vector norm whereas 's' is a squared
161// norm. For applications it is more convenient to specify 'a' than
162// its square. The commonly used robustifiers below are described in
163// un-scaled format (a = 1) but their implementations work for any
164// non-zero value of 'a'.
165
166// Huber.
167//
168// rho(s) = s for s <= 1,
169// rho(s) = 2 sqrt(s) - 1 for s >= 1.
170//
171// At s = 0: rho = [0, 1, 0].
172//
173// The scaling parameter 'a' corresponds to 'delta' on this page:
174// http://en.wikipedia.org/wiki/Huber_Loss_Function
Austin Schuh3de38b02024-06-25 18:25:10 -0700175class CERES_EXPORT HuberLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800176 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800177 explicit HuberLoss(double a) : a_(a), b_(a * a) {}
178 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800179
180 private:
181 const double a_;
182 // b = a^2.
183 const double b_;
184};
185
186// Soft L1, similar to Huber but smooth.
187//
188// rho(s) = 2 (sqrt(1 + s) - 1).
189//
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800190// At s = 0: rho = [0, 1, -1 / (2 * a^2)].
Austin Schuh3de38b02024-06-25 18:25:10 -0700191class CERES_EXPORT SoftLOneLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800192 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800193 explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) {}
194 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800195
196 private:
197 // b = a^2.
198 const double b_;
199 // c = 1 / a^2.
200 const double c_;
201};
202
203// Inspired by the Cauchy distribution
204//
205// rho(s) = log(1 + s).
206//
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800207// At s = 0: rho = [0, 1, -1 / a^2].
Austin Schuh3de38b02024-06-25 18:25:10 -0700208class CERES_EXPORT CauchyLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800209 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800210 explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) {}
211 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800212
213 private:
214 // b = a^2.
215 const double b_;
216 // c = 1 / a^2.
217 const double c_;
218};
219
220// Loss that is capped beyond a certain level using the arc-tangent function.
221// The scaling parameter 'a' determines the level where falloff occurs.
222// For costs much smaller than 'a', the loss function is linear and behaves like
223// TrivialLoss, and for values much larger than 'a' the value asymptotically
224// approaches the constant value of a * PI / 2.
225//
226// rho(s) = a atan(s / a).
227//
228// At s = 0: rho = [0, 1, 0].
Austin Schuh3de38b02024-06-25 18:25:10 -0700229class CERES_EXPORT ArctanLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800230 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800231 explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) {}
232 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800233
234 private:
235 const double a_;
236 // b = 1 / a^2.
237 const double b_;
238};
239
240// Loss function that maps to approximately zero cost in a range around the
241// origin, and reverts to linear in error (quadratic in cost) beyond this range.
242// The tolerance parameter 'a' sets the nominal point at which the
243// transition occurs, and the transition size parameter 'b' sets the nominal
244// distance over which most of the transition occurs. Both a and b must be
245// greater than zero, and typically b will be set to a fraction of a.
246// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
247// about 1 at s >= a + b.
248//
249// The term is computed as:
250//
251// rho(s) = b log(1 + exp((s - a) / b)) - c0.
252//
253// where c0 is chosen so that rho(0) == 0
254//
255// c0 = b log(1 + exp(-a / b)
256//
257// This has the following useful properties:
258//
259// rho(s) == 0 for s = 0
260// rho'(s) ~= 0 for s << a - b
261// rho'(s) ~= 1 for s >> a + b
262// rho''(s) > 0 for all s
263//
264// In addition, all derivatives are continuous, and the curvature is
265// concentrated in the range a - b to a + b.
266//
267// At s = 0: rho = [0, ~0, ~0].
Austin Schuh3de38b02024-06-25 18:25:10 -0700268class CERES_EXPORT TolerantLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800269 public:
270 explicit TolerantLoss(double a, double b);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800271 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800272
273 private:
274 const double a_, b_, c_;
275};
276
277// This is the Tukey biweight loss function which aggressively
278// attempts to suppress large errors.
279//
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800280// The term is computed as follows where the equations are scaled by a
281// factor of 2 because the cost function is given by 1/2 rho(s):
Austin Schuh70cc9552019-01-21 19:46:48 -0800282//
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800283// rho(s) = a^2 / 3 * (1 - (1 - s / a^2)^3 ) for s <= a^2,
284// rho(s) = a^2 / 3 for s > a^2.
Austin Schuh70cc9552019-01-21 19:46:48 -0800285//
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800286// At s = 0: rho = [0, 1, -2 / a^2]
Austin Schuh3de38b02024-06-25 18:25:10 -0700287class CERES_EXPORT TukeyLoss final : public ceres::LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800288 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800289 explicit TukeyLoss(double a) : a_squared_(a * a) {}
290 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800291
292 private:
293 const double a_squared_;
294};
295
296// Composition of two loss functions. The error is the result of first
297// evaluating g followed by f to yield the composition f(g(s)).
Austin Schuh3de38b02024-06-25 18:25:10 -0700298// The loss functions must not be nullptr.
299class CERES_EXPORT ComposedLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800300 public:
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800301 explicit ComposedLoss(const LossFunction* f,
302 Ownership ownership_f,
303 const LossFunction* g,
304 Ownership ownership_g);
Austin Schuh3de38b02024-06-25 18:25:10 -0700305 ~ComposedLoss() override;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800306 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800307
308 private:
309 std::unique_ptr<const LossFunction> f_, g_;
310 const Ownership ownership_f_, ownership_g_;
311};
312
313// The discussion above has to do with length scaling: it affects the space
314// in which s is measured. Sometimes you want to simply scale the output
315// value of the robustifier. For example, you might want to weight
316// different error terms differently (e.g., weight pixel reprojection
317// errors differently from terrain errors).
318//
319// If rho is the wrapped robustifier, then this simply outputs
320// s -> a * rho(s)
321//
322// The first and second derivatives are, not surprisingly
323// s -> a * rho'(s)
324// s -> a * rho''(s)
325//
Austin Schuh3de38b02024-06-25 18:25:10 -0700326// Since we treat the a nullptr Loss function as the Identity loss
327// function, rho = nullptr is a valid input and will result in the input
Austin Schuh70cc9552019-01-21 19:46:48 -0800328// being scaled by a. This provides a simple way of implementing a
329// scaled ResidualBlock.
Austin Schuh3de38b02024-06-25 18:25:10 -0700330class CERES_EXPORT ScaledLoss final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800331 public:
332 // Constructs a ScaledLoss wrapping another loss function. Takes
333 // ownership of the wrapped loss function or not depending on the
334 // ownership parameter.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800335 ScaledLoss(const LossFunction* rho, double a, Ownership ownership)
336 : rho_(rho), a_(a), ownership_(ownership) {}
Austin Schuh70cc9552019-01-21 19:46:48 -0800337 ScaledLoss(const ScaledLoss&) = delete;
338 void operator=(const ScaledLoss&) = delete;
339
Austin Schuh3de38b02024-06-25 18:25:10 -0700340 ~ScaledLoss() override {
Austin Schuh70cc9552019-01-21 19:46:48 -0800341 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
342 rho_.release();
343 }
344 }
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800345 void Evaluate(double, double*) const override;
Austin Schuh70cc9552019-01-21 19:46:48 -0800346
347 private:
348 std::unique_ptr<const LossFunction> rho_;
349 const double a_;
350 const Ownership ownership_;
351};
352
353// Sometimes after the optimization problem has been constructed, we
354// wish to mutate the scale of the loss function. For example, when
355// performing estimation from data which has substantial outliers,
356// convergence can be improved by starting out with a large scale,
357// optimizing the problem and then reducing the scale. This can have
358// better convergence behaviour than just using a loss function with a
359// small scale.
360//
361// This templated class allows the user to implement a loss function
362// whose scale can be mutated after an optimization problem has been
363// constructed.
364//
Austin Schuh3de38b02024-06-25 18:25:10 -0700365// Since we treat the a nullptr Loss function as the Identity loss
366// function, rho = nullptr is a valid input.
Austin Schuh70cc9552019-01-21 19:46:48 -0800367//
368// Example usage
369//
370// Problem problem;
371//
372// // Add parameter blocks
373//
374// CostFunction* cost_function =
375// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
376// new UW_Camera_Mapper(feature_x, feature_y));
377//
Austin Schuh3de38b02024-06-25 18:25:10 -0700378// LossFunctionWrapper* loss_function = new LossFunctionWrapper(
379// new HuberLoss(1.0), TAKE_OWNERSHIP);
Austin Schuh70cc9552019-01-21 19:46:48 -0800380//
381// problem.AddResidualBlock(cost_function, loss_function, parameters);
382//
383// Solver::Options options;
384// Solger::Summary summary;
385//
386// Solve(options, &problem, &summary)
387//
388// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
389//
390// Solve(options, &problem, &summary)
391//
Austin Schuh3de38b02024-06-25 18:25:10 -0700392class CERES_EXPORT LossFunctionWrapper final : public LossFunction {
Austin Schuh70cc9552019-01-21 19:46:48 -0800393 public:
394 LossFunctionWrapper(LossFunction* rho, Ownership ownership)
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800395 : rho_(rho), ownership_(ownership) {}
Austin Schuh70cc9552019-01-21 19:46:48 -0800396
397 LossFunctionWrapper(const LossFunctionWrapper&) = delete;
398 void operator=(const LossFunctionWrapper&) = delete;
399
Austin Schuh3de38b02024-06-25 18:25:10 -0700400 ~LossFunctionWrapper() override {
Austin Schuh70cc9552019-01-21 19:46:48 -0800401 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
402 rho_.release();
403 }
404 }
405
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800406 void Evaluate(double sq_norm, double out[3]) const override {
Austin Schuh3de38b02024-06-25 18:25:10 -0700407 if (rho_.get() == nullptr) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800408 out[0] = sq_norm;
409 out[1] = 1.0;
410 out[2] = 0.0;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800411 } else {
Austin Schuh70cc9552019-01-21 19:46:48 -0800412 rho_->Evaluate(sq_norm, out);
413 }
414 }
415
416 void Reset(LossFunction* rho, Ownership ownership) {
417 if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
418 rho_.release();
419 }
420 rho_.reset(rho);
421 ownership_ = ownership;
422 }
423
424 private:
425 std::unique_ptr<const LossFunction> rho_;
426 Ownership ownership_;
427};
428
429} // namespace ceres
430
431#include "ceres/internal/reenable_warnings.h"
432
433#endif // CERES_PUBLIC_LOSS_FUNCTION_H_