Blame - include/ceres/loss_function.h - RealtimeRoboticsGroup/test

blob: 97a70b6ad4289f43eae786d9ee661cc6ae842b25 [file] [log] [blame]

Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame^]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2015 Google Inc. All rights reserved.
				3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	//
				31	// The LossFunction interface is the way users describe how residuals
				32	// are converted to cost terms for the overall problem cost function.
				33	// For the exact manner in which loss functions are converted to the
				34	// overall cost for a problem, see problem.h.
				35	//
				36	// For least squares problem where there are no outliers and standard
				37	// squared loss is expected, it is not necessary to create a loss
				38	// function; instead passing a NULL to the problem when adding
				39	// residuals implies a standard squared loss.
				40	//
				41	// For least squares problems where the minimization may encounter
				42	// input terms that contain outliers, that is, completely bogus
				43	// measurements, it is important to use a loss function that reduces
				44	// their associated penalty.
				45	//
				46	// Consider a structure from motion problem. The unknowns are 3D
				47	// points and camera parameters, and the measurements are image
				48	// coordinates describing the expected reprojected position for a
				49	// point in a camera. For example, we want to model the geometry of a
				50	// street scene with fire hydrants and cars, observed by a moving
				51	// camera with unknown parameters, and the only 3D points we care
				52	// about are the pointy tippy-tops of the fire hydrants. Our magic
				53	// image processing algorithm, which is responsible for producing the
				54	// measurements that are input to Ceres, has found and matched all
				55	// such tippy-tops in all image frames, except that in one of the
				56	// frame it mistook a car's headlight for a hydrant. If we didn't do
				57	// anything special (i.e. if we used a basic quadratic loss), the
				58	// residual for the erroneous measurement will result in extreme error
				59	// due to the quadratic nature of squared loss. This results in the
				60	// entire solution getting pulled away from the optimum to reduce
				61	// the large error that would otherwise be attributed to the wrong
				62	// measurement.
				63	//
				64	// Using a robust loss function, the cost for large residuals is
				65	// reduced. In the example above, this leads to outlier terms getting
				66	// downweighted so they do not overly influence the final solution.
				67	//
				68	// What cost function is best?
				69	//
				70	// In general, there isn't a principled way to select a robust loss
				71	// function. The authors suggest starting with a non-robust cost, then
				72	// only experimenting with robust loss functions if standard squared
				73	// loss doesn't work.
				74
				75	#ifndef CERES_PUBLIC_LOSS_FUNCTION_H_
				76	#define CERES_PUBLIC_LOSS_FUNCTION_H_
				77
				78	#include <memory>
				79	#include "glog/logging.h"
				80	#include "ceres/types.h"
				81	#include "ceres/internal/disable_warnings.h"
				82
				83	namespace ceres {
				84
				85	class CERES_EXPORT LossFunction {
				86	public:
				87	virtual ~LossFunction() {}
				88
				89	// For a residual vector with squared 2-norm 'sq_norm', this method
				90	// is required to fill in the value and derivatives of the loss
				91	// function (rho in this example):
				92	//
				93	// out[0] = rho(sq_norm),
				94	// out[1] = rho'(sq_norm),
				95	// out[2] = rho''(sq_norm),
				96	//
				97	// Here the convention is that the contribution of a term to the
				98	// cost function is given by 1/2 rho(s), where
				99	//
				100	// s = \|\|residuals\|\|^2.
				101	//
				102	// Calling the method with a negative value of 's' is an error and
				103	// the implementations are not required to handle that case.
				104	//
				105	// Most sane choices of rho() satisfy:
				106	//
				107	// rho(0) = 0,
				108	// rho'(0) = 1,
				109	// rho'(s) < 1 in outlier region,
				110	// rho''(s) < 0 in outlier region,
				111	//
				112	// so that they mimic the least squares cost for small residuals.
				113	virtual void Evaluate(double sq_norm, double out[3]) const = 0;
				114	};
				115
				116	// Some common implementations follow below.
				117	//
				118	// Note: in the region of interest (i.e. s < 3) we have:
				119	// TrivialLoss >= HuberLoss >= SoftLOneLoss >= CauchyLoss
				120
				121
				122	// This corresponds to no robustification.
				123	//
				124	// rho(s) = s
				125	//
				126	// At s = 0: rho = [0, 1, 0].
				127	//
				128	// It is not normally necessary to use this, as passing NULL for the
				129	// loss function when building the problem accomplishes the same
				130	// thing.
				131	class CERES_EXPORT TrivialLoss : public LossFunction {
				132	public:
				133	virtual void Evaluate(double, double*) const;
				134	};
				135
				136	// Scaling
				137	// -------
				138	// Given one robustifier
				139	// s -> rho(s)
				140	// one can change the length scale at which robustification takes
				141	// place, by adding a scale factor 'a' as follows:
				142	//
				143	// s -> a^2 rho(s / a^2).
				144	//
				145	// The first and second derivatives are:
				146	//
				147	// s -> rho'(s / a^2),
				148	// s -> (1 / a^2) rho''(s / a^2),
				149	//
				150	// but the behaviour near s = 0 is the same as the original function,
				151	// i.e.
				152	//
				153	// rho(s) = s + higher order terms,
				154	// a^2 rho(s / a^2) = s + higher order terms.
				155	//
				156	// The scalar 'a' should be positive.
				157	//
				158	// The reason for the appearance of squaring is that 'a' is in the
				159	// units of the residual vector norm whereas 's' is a squared
				160	// norm. For applications it is more convenient to specify 'a' than
				161	// its square. The commonly used robustifiers below are described in
				162	// un-scaled format (a = 1) but their implementations work for any
				163	// non-zero value of 'a'.
				164
				165	// Huber.
				166	//
				167	// rho(s) = s for s <= 1,
				168	// rho(s) = 2 sqrt(s) - 1 for s >= 1.
				169	//
				170	// At s = 0: rho = [0, 1, 0].
				171	//
				172	// The scaling parameter 'a' corresponds to 'delta' on this page:
				173	// http://en.wikipedia.org/wiki/Huber_Loss_Function
				174	class CERES_EXPORT HuberLoss : public LossFunction {
				175	public:
				176	explicit HuberLoss(double a) : a_(a), b_(a * a) { }
				177	virtual void Evaluate(double, double*) const;
				178
				179	private:
				180	const double a_;
				181	// b = a^2.
				182	const double b_;
				183	};
				184
				185	// Soft L1, similar to Huber but smooth.
				186	//
				187	// rho(s) = 2 (sqrt(1 + s) - 1).
				188	//
				189	// At s = 0: rho = [0, 1, -1/2].
				190	class CERES_EXPORT SoftLOneLoss : public LossFunction {
				191	public:
				192	explicit SoftLOneLoss(double a) : b_(a * a), c_(1 / b_) { }
				193	virtual void Evaluate(double, double*) const;
				194
				195	private:
				196	// b = a^2.
				197	const double b_;
				198	// c = 1 / a^2.
				199	const double c_;
				200	};
				201
				202	// Inspired by the Cauchy distribution
				203	//
				204	// rho(s) = log(1 + s).
				205	//
				206	// At s = 0: rho = [0, 1, -1].
				207	class CERES_EXPORT CauchyLoss : public LossFunction {
				208	public:
				209	explicit CauchyLoss(double a) : b_(a * a), c_(1 / b_) { }
				210	virtual void Evaluate(double, double*) const;
				211
				212	private:
				213	// b = a^2.
				214	const double b_;
				215	// c = 1 / a^2.
				216	const double c_;
				217	};
				218
				219	// Loss that is capped beyond a certain level using the arc-tangent function.
				220	// The scaling parameter 'a' determines the level where falloff occurs.
				221	// For costs much smaller than 'a', the loss function is linear and behaves like
				222	// TrivialLoss, and for values much larger than 'a' the value asymptotically
				223	// approaches the constant value of a * PI / 2.
				224	//
				225	// rho(s) = a atan(s / a).
				226	//
				227	// At s = 0: rho = [0, 1, 0].
				228	class CERES_EXPORT ArctanLoss : public LossFunction {
				229	public:
				230	explicit ArctanLoss(double a) : a_(a), b_(1 / (a * a)) { }
				231	virtual void Evaluate(double, double*) const;
				232
				233	private:
				234	const double a_;
				235	// b = 1 / a^2.
				236	const double b_;
				237	};
				238
				239	// Loss function that maps to approximately zero cost in a range around the
				240	// origin, and reverts to linear in error (quadratic in cost) beyond this range.
				241	// The tolerance parameter 'a' sets the nominal point at which the
				242	// transition occurs, and the transition size parameter 'b' sets the nominal
				243	// distance over which most of the transition occurs. Both a and b must be
				244	// greater than zero, and typically b will be set to a fraction of a.
				245	// The slope rho'[s] varies smoothly from about 0 at s <= a - b to
				246	// about 1 at s >= a + b.
				247	//
				248	// The term is computed as:
				249	//
				250	// rho(s) = b log(1 + exp((s - a) / b)) - c0.
				251	//
				252	// where c0 is chosen so that rho(0) == 0
				253	//
				254	// c0 = b log(1 + exp(-a / b)
				255	//
				256	// This has the following useful properties:
				257	//
				258	// rho(s) == 0 for s = 0
				259	// rho'(s) ~= 0 for s << a - b
				260	// rho'(s) ~= 1 for s >> a + b
				261	// rho''(s) > 0 for all s
				262	//
				263	// In addition, all derivatives are continuous, and the curvature is
				264	// concentrated in the range a - b to a + b.
				265	//
				266	// At s = 0: rho = [0, ~0, ~0].
				267	class CERES_EXPORT TolerantLoss : public LossFunction {
				268	public:
				269	explicit TolerantLoss(double a, double b);
				270	virtual void Evaluate(double, double*) const;
				271
				272	private:
				273	const double a_, b_, c_;
				274	};
				275
				276	// This is the Tukey biweight loss function which aggressively
				277	// attempts to suppress large errors.
				278	//
				279	// The term is computed as:
				280	//
				281	// rho(s) = a^2 / 6 * (1 - (1 - s / a^2)^3 ) for s <= a^2,
				282	// rho(s) = a^2 / 6 for s > a^2.
				283	//
				284	// At s = 0: rho = [0, 0.5, -1 / a^2]
				285	class CERES_EXPORT TukeyLoss : public ceres::LossFunction {
				286	public:
				287	explicit TukeyLoss(double a) : a_squared_(a * a) { }
				288	virtual void Evaluate(double, double*) const;
				289
				290	private:
				291	const double a_squared_;
				292	};
				293
				294	// Composition of two loss functions. The error is the result of first
				295	// evaluating g followed by f to yield the composition f(g(s)).
				296	// The loss functions must not be NULL.
				297	class CERES_EXPORT ComposedLoss : public LossFunction {
				298	public:
				299	explicit ComposedLoss(const LossFunction* f, Ownership ownership_f,
				300	const LossFunction* g, Ownership ownership_g);
				301	virtual ~ComposedLoss();
				302	virtual void Evaluate(double, double*) const;
				303
				304	private:
				305	std::unique_ptr<const LossFunction> f_, g_;
				306	const Ownership ownership_f_, ownership_g_;
				307	};
				308
				309	// The discussion above has to do with length scaling: it affects the space
				310	// in which s is measured. Sometimes you want to simply scale the output
				311	// value of the robustifier. For example, you might want to weight
				312	// different error terms differently (e.g., weight pixel reprojection
				313	// errors differently from terrain errors).
				314	//
				315	// If rho is the wrapped robustifier, then this simply outputs
				316	// s -> a * rho(s)
				317	//
				318	// The first and second derivatives are, not surprisingly
				319	// s -> a * rho'(s)
				320	// s -> a * rho''(s)
				321	//
				322	// Since we treat the a NULL Loss function as the Identity loss
				323	// function, rho = NULL is a valid input and will result in the input
				324	// being scaled by a. This provides a simple way of implementing a
				325	// scaled ResidualBlock.
				326	class CERES_EXPORT ScaledLoss : public LossFunction {
				327	public:
				328	// Constructs a ScaledLoss wrapping another loss function. Takes
				329	// ownership of the wrapped loss function or not depending on the
				330	// ownership parameter.
				331	ScaledLoss(const LossFunction* rho, double a, Ownership ownership) :
				332	rho_(rho), a_(a), ownership_(ownership) { }
				333	ScaledLoss(const ScaledLoss&) = delete;
				334	void operator=(const ScaledLoss&) = delete;
				335
				336	virtual ~ScaledLoss() {
				337	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				338	rho_.release();
				339	}
				340	}
				341	virtual void Evaluate(double, double*) const;
				342
				343	private:
				344	std::unique_ptr<const LossFunction> rho_;
				345	const double a_;
				346	const Ownership ownership_;
				347	};
				348
				349	// Sometimes after the optimization problem has been constructed, we
				350	// wish to mutate the scale of the loss function. For example, when
				351	// performing estimation from data which has substantial outliers,
				352	// convergence can be improved by starting out with a large scale,
				353	// optimizing the problem and then reducing the scale. This can have
				354	// better convergence behaviour than just using a loss function with a
				355	// small scale.
				356	//
				357	// This templated class allows the user to implement a loss function
				358	// whose scale can be mutated after an optimization problem has been
				359	// constructed.
				360	//
				361	// Since we treat the a NULL Loss function as the Identity loss
				362	// function, rho = NULL is a valid input.
				363	//
				364	// Example usage
				365	//
				366	// Problem problem;
				367	//
				368	// // Add parameter blocks
				369	//
				370	// CostFunction* cost_function =
				371	// new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
				372	// new UW_Camera_Mapper(feature_x, feature_y));
				373	//
				374	// LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
				375	//
				376	// problem.AddResidualBlock(cost_function, loss_function, parameters);
				377	//
				378	// Solver::Options options;
				379	// Solger::Summary summary;
				380	//
				381	// Solve(options, &problem, &summary)
				382	//
				383	// loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
				384	//
				385	// Solve(options, &problem, &summary)
				386	//
				387	class CERES_EXPORT LossFunctionWrapper : public LossFunction {
				388	public:
				389	LossFunctionWrapper(LossFunction* rho, Ownership ownership)
				390	: rho_(rho), ownership_(ownership) {
				391	}
				392
				393	LossFunctionWrapper(const LossFunctionWrapper&) = delete;
				394	void operator=(const LossFunctionWrapper&) = delete;
				395
				396	virtual ~LossFunctionWrapper() {
				397	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				398	rho_.release();
				399	}
				400	}
				401
				402	virtual void Evaluate(double sq_norm, double out[3]) const {
				403	if (rho_.get() == NULL) {
				404	out[0] = sq_norm;
				405	out[1] = 1.0;
				406	out[2] = 0.0;
				407	}
				408	else {
				409	rho_->Evaluate(sq_norm, out);
				410	}
				411	}
				412
				413	void Reset(LossFunction* rho, Ownership ownership) {
				414	if (ownership_ == DO_NOT_TAKE_OWNERSHIP) {
				415	rho_.release();
				416	}
				417	rho_.reset(rho);
				418	ownership_ = ownership;
				419	}
				420
				421	private:
				422	std::unique_ptr<const LossFunction> rho_;
				423	Ownership ownership_;
				424	};
				425
				426	} // namespace ceres
				427
				428	#include "ceres/internal/reenable_warnings.h"
				429
				430	#endif // CERES_PUBLIC_LOSS_FUNCTION_H_