Blame - include/ceres/loss_function.h - RealtimeRoboticsGroup/test

blob: 7aabf7dfce1f53eccf3a2d2ddf1aa7a25a32c051 [file] [log] [blame]

Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	2	// Copyright 2019 Google Inc. All rights reserved.
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	//
				31	// The LossFunction interface is the way users describe how residuals
				32	// are converted to cost terms for the overall problem cost function.
				33	// For the exact manner in which loss functions are converted to the
				34	// overall cost for a problem, see problem.h.
				35	//
				36	// For least squares problem where there are no outliers and standard
				37	// squared loss is expected, it is not necessary to create a loss
				38	// function; instead passing a NULL to the problem when adding
				39	// residuals implies a standard squared loss.
				40	//
				41	// For least squares problems where the minimization may encounter
				42	// input terms that contain outliers, that is, completely bogus
				43	// measurements, it is important to use a loss function that reduces
				44	// their associated penalty.
				45	//
				46	// Consider a structure from motion problem. The unknowns are 3D
				47	// points and camera parameters, and the measurements are image
				48	// coordinates describing the expected reprojected position for a
				49	// point in a camera. For example, we want to model the geometry of a
				50	// street scene with fire hydrants and cars, observed by a moving
				51	// camera with unknown parameters, and the only 3D points we care
				52	// about are the pointy tippy-tops of the fire hydrants. Our magic
				53	// image processing algorithm, which is responsible for producing the
				54	// measurements that are input to Ceres, has found and matched all
				55	// such tippy-tops in all image frames, except that in one of the
				56	// frame it mistook a car's headlight for a hydrant. If we didn't do
				57	// anything special (i.e. if we used a basic quadratic loss), the
				58	// residual for the erroneous measurement will result in extreme error
				59	// due to the quadratic nature of squared loss. This results in the
				60	// entire solution getting pulled away from the optimum to reduce
				61	// the large error that would otherwise be attributed to the wrong
				62	// measurement.
				63	//
				64	// Using a robust loss function, the cost for large residuals is
				65	// reduced. In the example above, this leads to outlier terms getting
				66	// downweighted so they do not overly influence the final solution.
				67	//
				68	// What cost function is best?
				69	//
				70	// In general, there isn't a principled way to select a robust loss
				71	// function. The authors suggest starting with a non-robust cost, then
				72	// only experimenting with robust loss functions if standard squared
				73	// loss doesn't work.
				74
				75	#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
				76	#define CERES_PUBLIC_LOSS_FUNCTION_H_
				77
				78	#include <memory>
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	79
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	80	#include "ceres/internal/disable_warnings.h"
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	81	#include "ceres/types.h"
				82	#include "glog/logging.h"
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	83
				84	namespace ceres {
				85
				86	class CERES_EXPORT LossFunction {
				87	public:
				88	virtual ~LossFunction() {}
				89
				90	// For a residual vector with squared 2-norm 'sq_norm', this method
				91	// is required to fill in the value and derivatives of the loss
				92	// function (rho in this example):
				93	//
				94	// out[0] = rho(sq_norm),
				95	// out[1] = rho'(sq_norm),
				96	// out[2] = rho''(sq_norm),
				97	//
				98	// Here the convention is that the contribution of a term to the
				99	// cost function is given by 1/2 rho(s), where
				100	//
				101	// s = \|\|residuals\|\|^2.
				102	//
				103	// Calling the method with a negative value of 's' is an error and
				104	// the implementations are not required to handle that case.
				105	//
				106	// Most sane choices of rho() satisfy:
				107	//
				108	// rho(0) = 0,
				109	// rho'(0) = 1,
				110	// rho'(s) < 1 in outlier region,
				111	// rho''(s) < 0 in outlier region,
				112	//
				113	// so that they mimic the least squares cost for small residuals.
				114	virtual void Evaluate(double sq_norm, double out[3]) const = 0;
				115	};
				116
				117	// Some common implementations follow below.
				118	//
				119	// Note: in the region of interest (i.e. s < 3) we have:
				120	// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
				121
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	122	// This corresponds to no robustification.
				123	//
				124	// rho(s) = s
				125	//
				126	// At s = 0: rho = [0, 1, 0].
				127	//
				128	// It is not normally necessary to use this, as passing NULL for the
				129	// loss function when building the problem accomplishes the same
				130	// thing.
				131	class CERES_EXPORT TrivialLoss : public LossFunction {
				132	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	133	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	134	};
				135
				136	// Scaling
				137	// -------
				138	// Given one robustifier
				139	// s -> rho(s)
				140	// one can change the length scale at which robustification takes
				141	// place, by adding a scale factor 'a' as follows:
				142	//
				143	// s -> a^2 rho(s / a^2).
				144	//
				145	// The first and second derivatives are:
				146	//
				147	// s -> rho'(s / a^2),
				148	// s -> (1 / a^2) rho''(s / a^2),
				149	//
				150	// but the behaviour near s = 0 is the same as the original function,
				151	// i.e.
				152	//
				153	// rho(s) = s + higher order terms,
				154	// a^2 rho(s / a^2) = s + higher order terms.
				155	//
				156	// The scalar 'a' should be positive.
				157	//
				158	// The reason for the appearance of squaring is that 'a' is in the
				159	// units of the residual vector norm whereas 's' is a squared
				160	// norm. For applications it is more convenient to specify 'a' than
				161	// its square. The commonly used robustifiers below are described in
				162	// un-scaled format (a = 1) but their implementations work for any
				163	// non-zero value of 'a'.
				164
				165	// Huber.
				166	//
				167	// rho(s) = s for s <= 1,
				168	// rho(s) = 2 sqrt(s) - 1 for s >= 1.
				169	//
				170	// At s = 0: rho = [0, 1, 0].
				171	//
				172	// The scaling parameter 'a' corresponds to 'delta' on this page:
				173	// http://en.wikipedia.org/wiki/Huber_Loss_Function
				174	class CERES_EXPORT HuberLoss : public LossFunction {
				175	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	176	explicit HuberLoss(double a) : a_(a), b_(a * a) {}
				177	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	178
				179	private:
				180	const double a_;
				181	// b = a^2.
				182	const double b_;
				183	};
				184
				185	// Soft L1, similar to Huber but smooth.
				186	//
				187	// rho(s) = 2 (sqrt(1 + s) - 1).
				188	//
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	189	// At s = 0: rho = [0, 1, -1 / (2 * a^2)].
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	190	class CERES_EXPORT SoftLOneLoss : public LossFunction {
				191	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	192	explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) {}
				193	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	194
				195	private:
				196	// b = a^2.
				197	const double b_;
				198	// c = 1 / a^2.
				199	const double c_;
				200	};
				201
				202	// Inspired by the Cauchy distribution
				203	//
				204	// rho(s) = log(1 + s).
				205	//
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	206	// At s = 0: rho = [0, 1, -1 / a^2].
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	207	class CERES_EXPORT CauchyLoss : public LossFunction {
				208	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	209	explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) {}
				210	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	211
				212	private:
				213	// b = a^2.
				214	const double b_;
				215	// c = 1 / a^2.
				216	const double c_;
				217	};
				218
				219	// Loss that is capped beyond a certain level using the arc-tangent function.
				220	// The scaling parameter 'a' determines the level where falloff occurs.
				221	// For costs much smaller than 'a', the loss function is linear and behaves like
				222	// TrivialLoss, and for values much larger than 'a' the value asymptotically
				223	// approaches the constant value of a * PI / 2.
				224	//
				225	// rho(s) = a atan(s / a).
				226	//
				227	// At s = 0: rho = [0, 1, 0].
				228	class CERES_EXPORT ArctanLoss : public LossFunction {
				229	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	230	explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) {}
				231	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	232
				233	private:
				234	const double a_;
				235	// b = 1 / a^2.
				236	const double b_;
				237	};
				238
				239	// Loss function that maps to approximately zero cost in a range around the
				240	// origin, and reverts to linear in error (quadratic in cost) beyond this range.
				241	// The tolerance parameter 'a' sets the nominal point at which the
				242	// transition occurs, and the transition size parameter 'b' sets the nominal
				243	// distance over which most of the transition occurs. Both a and b must be
				244	// greater than zero, and typically b will be set to a fraction of a.
				245	// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
				246	// about 1 at s >= a + b.
				247	//
				248	// The term is computed as:
				249	//
				250	// rho(s) = b log(1 + exp((s - a) / b)) - c0.
				251	//
				252	// where c0 is chosen so that rho(0) == 0
				253	//
				254	// c0 = b log(1 + exp(-a / b)
				255	//
				256	// This has the following useful properties:
				257	//
				258	// rho(s) == 0 for s = 0
				259	// rho'(s) ~= 0 for s << a - b
				260	// rho'(s) ~= 1 for s >> a + b
				261	// rho''(s) > 0 for all s
				262	//
				263	// In addition, all derivatives are continuous, and the curvature is
				264	// concentrated in the range a - b to a + b.
				265	//
				266	// At s = 0: rho = [0, ~0, ~0].
				267	class CERES_EXPORT TolerantLoss : public LossFunction {
				268	public:
				269	explicit TolerantLoss(double a, double b);
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	270	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	271
				272	private:
				273	const double a_, b_, c_;
				274	};
				275
				276	// This is the Tukey biweight loss function which aggressively
				277	// attempts to suppress large errors.
				278	//
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	279	// The term is computed as follows where the equations are scaled by a
				280	// factor of 2 because the cost function is given by 1/2 rho(s):
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	281	//
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	282	// rho(s) = a^2 / 3 * (1 - (1 - s / a^2)^3 ) for s <= a^2,
				283	// rho(s) = a^2 / 3 for s > a^2.
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	284	//
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	285	// At s = 0: rho = [0, 1, -2 / a^2]
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	286	class CERES_EXPORT TukeyLoss : public ceres::LossFunction {
				287	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	288	explicit TukeyLoss(double a) : a_squared_(a * a) {}
				289	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	290
				291	private:
				292	const double a_squared_;
				293	};
				294
				295	// Composition of two loss functions. The error is the result of first
				296	// evaluating g followed by f to yield the composition f(g(s)).
				297	// The loss functions must not be NULL.
				298	class CERES_EXPORT ComposedLoss : public LossFunction {
				299	public:
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	300	explicit ComposedLoss(const LossFunction* f,
				301	Ownership ownership_f,
				302	const LossFunction* g,
				303	Ownership ownership_g);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	304	virtual ~ComposedLoss();
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	305	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	306
				307	private:
				308	std::unique_ptr<const LossFunction> f_, g_;
				309	const Ownership ownership_f_, ownership_g_;
				310	};
				311
				312	// The discussion above has to do with length scaling: it affects the space
				313	// in which s is measured. Sometimes you want to simply scale the output
				314	// value of the robustifier. For example, you might want to weight
				315	// different error terms differently (e.g., weight pixel reprojection
				316	// errors differently from terrain errors).
				317	//
				318	// If rho is the wrapped robustifier, then this simply outputs
				319	// s -> a * rho(s)
				320	//
				321	// The first and second derivatives are, not surprisingly
				322	// s -> a * rho'(s)
				323	// s -> a * rho''(s)
				324	//
				325	// Since we treat the a NULL Loss function as the Identity loss
				326	// function, rho = NULL is a valid input and will result in the input
				327	// being scaled by a. This provides a simple way of implementing a
				328	// scaled ResidualBlock.
				329	class CERES_EXPORT ScaledLoss : public LossFunction {
				330	public:
				331	// Constructs a ScaledLoss wrapping another loss function. Takes
				332	// ownership of the wrapped loss function or not depending on the
				333	// ownership parameter.
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	334	ScaledLoss(const LossFunction* rho, double a, Ownership ownership)
				335	: rho_(rho), a_(a), ownership_(ownership) {}
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	336	ScaledLoss(const ScaledLoss&) = delete;
				337	void operator=(const ScaledLoss&) = delete;
				338
				339	virtual ~ScaledLoss() {
				340	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				341	rho_.release();
				342	}
				343	}
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	344	void Evaluate(double, double*) const override;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	345
				346	private:
				347	std::unique_ptr<const LossFunction> rho_;
				348	const double a_;
				349	const Ownership ownership_;
				350	};
				351
				352	// Sometimes after the optimization problem has been constructed, we
				353	// wish to mutate the scale of the loss function. For example, when
				354	// performing estimation from data which has substantial outliers,
				355	// convergence can be improved by starting out with a large scale,
				356	// optimizing the problem and then reducing the scale. This can have
				357	// better convergence behaviour than just using a loss function with a
				358	// small scale.
				359	//
				360	// This templated class allows the user to implement a loss function
				361	// whose scale can be mutated after an optimization problem has been
				362	// constructed.
				363	//
				364	// Since we treat the a NULL Loss function as the Identity loss
				365	// function, rho = NULL is a valid input.
				366	//
				367	// Example usage
				368	//
				369	// Problem problem;
				370	//
				371	// // Add parameter blocks
				372	//
				373	// CostFunction* cost_function =
				374	// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
				375	// new UW_Camera_Mapper(feature_x, feature_y));
				376	//
				377	// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
				378	//
				379	// problem.AddResidualBlock(cost_function, loss_function, parameters);
				380	//
				381	// Solver::Options options;
				382	// Solger::Summary summary;
				383	//
				384	// Solve(options, &problem, &summary)
				385	//
				386	// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
				387	//
				388	// Solve(options, &problem, &summary)
				389	//
				390	class CERES_EXPORT LossFunctionWrapper : public LossFunction {
				391	public:
				392	LossFunctionWrapper(LossFunction* rho, Ownership ownership)
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	393	: rho_(rho), ownership_(ownership) {}
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	394
				395	LossFunctionWrapper(const LossFunctionWrapper&) = delete;
				396	void operator=(const LossFunctionWrapper&) = delete;
				397
				398	virtual ~LossFunctionWrapper() {
				399	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				400	rho_.release();
				401	}
				402	}
				403
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	404	void Evaluate(double sq_norm, double out[3]) const override {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	405	if (rho_.get() == NULL) {
				406	out[0] = sq_norm;
				407	out[1] = 1.0;
				408	out[2] = 0.0;
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame^]	409	} else {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	410	rho_->Evaluate(sq_norm, out);
				411	}
				412	}
				413
				414	void Reset(LossFunction* rho, Ownership ownership) {
				415	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				416	rho_.release();
				417	}
				418	rho_.reset(rho);
				419	ownership_ = ownership;
				420	}
				421
				422	private:
				423	std::unique_ptr<const LossFunction> rho_;
				424	Ownership ownership_;
				425	};
				426
				427	} // namespace ceres
				428
				429	#include "ceres/internal/reenable_warnings.h"
				430
				431	#endif // CERES_PUBLIC_LOSS_FUNCTION_H_