Blame - include/ceres/dynamic_autodiff_cost_function.h - RealtimeRoboticsGroup/test

blob: 1bfb7a5a9c88795816c0abb6c77d2fae85f06ecb [file] [log] [blame]

Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame^]	1	// Ceres Solver - A fast non-linear least squares minimizer
				2	// Copyright 2015 Google Inc. All rights reserved.
				3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30	// mierle@gmail.com (Keir Mierle)
				31
				32	#ifndef CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
				33	#define CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_
				34
				35	#include <cmath>
				36	#include <numeric>
				37	#include <vector>
				38
				39	#include <memory>
				40	#include "ceres/dynamic_cost_function.h"
				41	#include "ceres/jet.h"
				42	#include "glog/logging.h"
				43
				44	namespace ceres {
				45
				46	// This autodiff implementation differs from the one found in
				47	// autodiff_cost_function.h by supporting autodiff on cost functions
				48	// with variable numbers of parameters with variable sizes. With the
				49	// other implementation, all the sizes (both the number of parameter
				50	// blocks and the size of each block) must be fixed at compile time.
				51	//
				52	// The functor API differs slightly from the API for fixed size
				53	// autodiff; the expected interface for the cost functors is:
				54	//
				55	// struct MyCostFunctor {
				56	// template<typename T>
				57	// bool operator()(T const* const* parameters, T* residuals) const {
				58	// // Use parameters[i] to access the i'th parameter block.
				59	// }
				60	// };
				61	//
				62	// Since the sizing of the parameters is done at runtime, you must
				63	// also specify the sizes after creating the dynamic autodiff cost
				64	// function. For example:
				65	//
				66	// DynamicAutoDiffCostFunction<MyCostFunctor, 3> cost_function(
				67	// new MyCostFunctor());
				68	// cost_function.AddParameterBlock(5);
				69	// cost_function.AddParameterBlock(10);
				70	// cost_function.SetNumResiduals(21);
				71	//
				72	// Under the hood, the implementation evaluates the cost function
				73	// multiple times, computing a small set of the derivatives (four by
				74	// default, controlled by the Stride template parameter) with each
				75	// pass. There is a tradeoff with the size of the passes; you may want
				76	// to experiment with the stride.
				77	template <typename CostFunctor, int Stride = 4>
				78	class DynamicAutoDiffCostFunction : public DynamicCostFunction {
				79	public:
				80	explicit DynamicAutoDiffCostFunction(CostFunctor* functor)
				81	: functor_(functor) {}
				82
				83	virtual ~DynamicAutoDiffCostFunction() {}
				84
				85	virtual bool Evaluate(double const* const* parameters,
				86	double* residuals,
				87	double** jacobians) const {
				88	CHECK_GT(num_residuals(), 0)
				89	<< "You must call DynamicAutoDiffCostFunction::SetNumResiduals() "
				90	<< "before DynamicAutoDiffCostFunction::Evaluate().";
				91
				92	if (jacobians == NULL) {
				93	return (*functor_)(parameters, residuals);
				94	}
				95
				96	// The difficulty with Jets, as implemented in Ceres, is that they were
				97	// originally designed for strictly compile-sized use. At this point, there
				98	// is a large body of code that assumes inside a cost functor it is
				99	// acceptable to do e.g. T(1.5) and get an appropriately sized jet back.
				100	//
				101	// Unfortunately, it is impossible to communicate the expected size of a
				102	// dynamically sized jet to the static instantiations that existing code
				103	// depends on.
				104	//
				105	// To work around this issue, the solution here is to evaluate the
				106	// jacobians in a series of passes, each one computing Stride *
				107	// num_residuals() derivatives. This is done with small, fixed-size jets.
				108	const int num_parameter_blocks =
				109	static_cast<int>(parameter_block_sizes().size());
				110	const int num_parameters = std::accumulate(parameter_block_sizes().begin(),
				111	parameter_block_sizes().end(),
				112	0);
				113
				114	// Allocate scratch space for the strided evaluation.
				115	std::vector<Jet<double, Stride>> input_jets(num_parameters);
				116	std::vector<Jet<double, Stride>> output_jets(num_residuals());
				117
				118	// Make the parameter pack that is sent to the functor (reused).
				119	std::vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks,
				120	static_cast<Jet<double, Stride>* >(NULL));
				121	int num_active_parameters = 0;
				122
				123	// To handle constant parameters between non-constant parameter blocks, the
				124	// start position --- a raw parameter index --- of each contiguous block of
				125	// non-constant parameters is recorded in start_derivative_section.
				126	std::vector<int> start_derivative_section;
				127	bool in_derivative_section = false;
				128	int parameter_cursor = 0;
				129
				130	// Discover the derivative sections and set the parameter values.
				131	for (int i = 0; i < num_parameter_blocks; ++i) {
				132	jet_parameters[i] = &input_jets[parameter_cursor];
				133
				134	const int parameter_block_size = parameter_block_sizes()[i];
				135	if (jacobians[i] != NULL) {
				136	if (!in_derivative_section) {
				137	start_derivative_section.push_back(parameter_cursor);
				138	in_derivative_section = true;
				139	}
				140
				141	num_active_parameters += parameter_block_size;
				142	} else {
				143	in_derivative_section = false;
				144	}
				145
				146	for (int j = 0; j < parameter_block_size; ++j, parameter_cursor++) {
				147	input_jets[parameter_cursor].a = parameters[i][j];
				148	}
				149	}
				150
				151	// When `num_active_parameters % Stride != 0` then it can be the case
				152	// that `active_parameter_count < Stride` while parameter_cursor is less
				153	// than the total number of parameters and with no remaining non-constant
				154	// parameter blocks. Pushing parameter_cursor (the total number of
				155	// parameters) as a final entry to start_derivative_section is required
				156	// because if a constant parameter block is encountered after the
				157	// last non-constant block then current_derivative_section is incremented
				158	// and would otherwise index an invalid position in
				159	// start_derivative_section. Setting the final element to the total number
				160	// of parameters means that this can only happen at most once in the loop
				161	// below.
				162	start_derivative_section.push_back(parameter_cursor);
				163
				164	// Evaluate all of the strides. Each stride is a chunk of the derivative to
				165	// evaluate, typically some size proportional to the size of the SIMD
				166	// registers of the CPU.
				167	int num_strides = static_cast<int>(ceil(num_active_parameters /
				168	static_cast<float>(Stride)));
				169
				170	int current_derivative_section = 0;
				171	int current_derivative_section_cursor = 0;
				172
				173	for (int pass = 0; pass < num_strides; ++pass) {
				174	// Set most of the jet components to zero, except for
				175	// non-constant #Stride parameters.
				176	const int initial_derivative_section = current_derivative_section;
				177	const int initial_derivative_section_cursor =
				178	current_derivative_section_cursor;
				179
				180	int active_parameter_count = 0;
				181	parameter_cursor = 0;
				182
				183	for (int i = 0; i < num_parameter_blocks; ++i) {
				184	for (int j = 0; j < parameter_block_sizes()[i];
				185	++j, parameter_cursor++) {
				186	input_jets[parameter_cursor].v.setZero();
				187	if (active_parameter_count < Stride &&
				188	parameter_cursor >= (
				189	start_derivative_section[current_derivative_section] +
				190	current_derivative_section_cursor)) {
				191	if (jacobians[i] != NULL) {
				192	input_jets[parameter_cursor].v[active_parameter_count] = 1.0;
				193	++active_parameter_count;
				194	++current_derivative_section_cursor;
				195	} else {
				196	++current_derivative_section;
				197	current_derivative_section_cursor = 0;
				198	}
				199	}
				200	}
				201	}
				202
				203	if (!(*functor_)(&jet_parameters[0], &output_jets[0])) {
				204	return false;
				205	}
				206
				207	// Copy the pieces of the jacobians into their final place.
				208	active_parameter_count = 0;
				209
				210	current_derivative_section = initial_derivative_section;
				211	current_derivative_section_cursor = initial_derivative_section_cursor;
				212
				213	for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) {
				214	for (int j = 0; j < parameter_block_sizes()[i];
				215	++j, parameter_cursor++) {
				216	if (active_parameter_count < Stride &&
				217	parameter_cursor >= (
				218	start_derivative_section[current_derivative_section] +
				219	current_derivative_section_cursor)) {
				220	if (jacobians[i] != NULL) {
				221	for (int k = 0; k < num_residuals(); ++k) {
				222	jacobians[i][k * parameter_block_sizes()[i] + j] =
				223	output_jets[k].v[active_parameter_count];
				224	}
				225	++active_parameter_count;
				226	++current_derivative_section_cursor;
				227	} else {
				228	++current_derivative_section;
				229	current_derivative_section_cursor = 0;
				230	}
				231	}
				232	}
				233	}
				234
				235	// Only copy the residuals over once (even though we compute them on
				236	// every loop).
				237	if (pass == num_strides - 1) {
				238	for (int k = 0; k < num_residuals(); ++k) {
				239	residuals[k] = output_jets[k].a;
				240	}
				241	}
				242	}
				243	return true;
				244	}
				245
				246	private:
				247	std::unique_ptr<CostFunctor> functor_;
				248	};
				249
				250	} // namespace ceres
				251
				252	#endif // CERES_PUBLIC_DYNAMIC_AUTODIFF_COST_FUNCTION_H_