Squashed 'third_party/ceres/' content from commit e51e9b4

Change-Id: I763587619d57e594d3fa158dc3a7fe0b89a1743b
git-subtree-dir: third_party/ceres
git-subtree-split: e51e9b46f6ca88ab8b2266d0e362771db6d98067
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
new file mode 100644
index 0000000..cfdd910
--- /dev/null
+++ b/docs/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Ceres Solver - A fast non-linear least squares minimizer
+# Copyright 2015 Google Inc. All rights reserved.
+# http://ceres-solver.org/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+# * Neither the name of Google Inc. nor the names of its contributors may be
+#   used to endorse or promote products derived from this software without
+#   specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+
+add_subdirectory(source)
diff --git a/docs/source/CMakeLists.txt b/docs/source/CMakeLists.txt
new file mode 100644
index 0000000..70bf998
--- /dev/null
+++ b/docs/source/CMakeLists.txt
@@ -0,0 +1,19 @@
+find_package(Sphinx REQUIRED)
+
+# HTML output directory
+set(SPHINX_HTML_DIR "${Ceres_BINARY_DIR}/docs/html")
+
+# Install documentation
+install(DIRECTORY ${SPHINX_HTML_DIR}
+        DESTINATION "${CERES_DOCS_INSTALL_DIR}"
+        COMPONENT Doc
+        PATTERN "${SPHINX_HTML_DIR}/*")
+
+# Building using 'make_docs.py' python script
+add_custom_target(ceres_docs ALL
+                  python
+                  "${Ceres_SOURCE_DIR}/scripts/make_docs.py"
+                  "${Ceres_SOURCE_DIR}"
+                  "${Ceres_BINARY_DIR}/docs"
+                  "${SPHINX_EXECUTABLE}"
+                  COMMENT "Building HTML documentation with Sphinx")
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
new file mode 100644
index 0000000..61c8eb5
--- /dev/null
+++ b/docs/source/_templates/layout.html
@@ -0,0 +1,13 @@
+{% extends "!layout.html" %}
+
+{% block footer %}
+{{ super() }}
+<script>
+  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+  ga('create', 'UA-49769510-1', 'ceres-solver.org');
+  ga('send', 'pageview');
+</script>
+{% endblock %}
diff --git a/docs/source/analytical_derivatives.rst b/docs/source/analytical_derivatives.rst
new file mode 100644
index 0000000..2a3a404
--- /dev/null
+++ b/docs/source/analytical_derivatives.rst
@@ -0,0 +1,192 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-analytical_derivatives:
+
+====================
+Analytic Derivatives
+====================
+
+Consider the problem of fitting the following curve (`Rat43
+<http://www.itl.nist.gov/div898/strd/nls/data/ratkowsky3.shtml>`_) to
+data:
+
+.. math::
+  y = \frac{b_1}{(1+e^{b_2-b_3x})^{1/b_4}}
+
+That is, given some data :math:`\{x_i, y_i\},\ \forall i=1,... ,n`,
+determine parameters :math:`b_1, b_2, b_3` and :math:`b_4` that best
+fit this data.
+
+Which can be stated as the problem of finding the
+values of :math:`b_1, b_2, b_3` and :math:`b_4` are the ones that
+minimize the following objective function [#f1]_:
+
+.. math::
+   \begin{align}
+   E(b_1, b_2, b_3, b_4)
+   &= \sum_i f^2(b_1, b_2, b_3, b_4 ; x_i, y_i)\\
+   &= \sum_i \left(\frac{b_1}{(1+e^{b_2-b_3x_i})^{1/b_4}} - y_i\right)^2\\
+   \end{align}
+
+To solve this problem using Ceres Solver, we need to define a
+:class:`CostFunction` that computes the residual :math:`f` for a given
+:math:`x` and :math:`y` and its derivatives with respect to
+:math:`b_1, b_2, b_3` and :math:`b_4`.
+
+Using elementary differential calculus, we can see that:
+
+.. math::
+  \begin{align}
+  D_1 f(b_1, b_2, b_3, b_4; x,y) &= \frac{1}{(1+e^{b_2-b_3x})^{1/b_4}}\\
+  D_2 f(b_1, b_2, b_3, b_4; x,y) &=
+  \frac{-b_1e^{b_2-b_3x}}{b_4(1+e^{b_2-b_3x})^{1/b_4 + 1}} \\
+  D_3 f(b_1, b_2, b_3, b_4; x,y) &=
+  \frac{b_1xe^{b_2-b_3x}}{b_4(1+e^{b_2-b_3x})^{1/b_4 + 1}} \\
+  D_4 f(b_1, b_2, b_3, b_4; x,y) & = \frac{b_1  \log\left(1+e^{b_2-b_3x}\right) }{b_4^2(1+e^{b_2-b_3x})^{1/b_4}}
+  \end{align}
+
+With these derivatives in hand, we can now implement the
+:class:`CostFunction` as:
+
+.. code-block:: c++
+
+  class Rat43Analytic : public SizedCostFunction<1,4> {
+     public:
+       Rat43Analytic(const double x, const double y) : x_(x), y_(y) {}
+       virtual ~Rat43Analytic() {}
+       virtual bool Evaluate(double const* const* parameters,
+                             double* residuals,
+                             double** jacobians) const {
+         const double b1 = parameters[0][0];
+         const double b2 = parameters[0][1];
+         const double b3 = parameters[0][2];
+         const double b4 = parameters[0][3];
+
+         residuals[0] = b1 *  pow(1 + exp(b2 -  b3 * x_), -1.0 / b4) - y_;
+
+         if (!jacobians) return true;
+         double* jacobian = jacobians[0];
+         if (!jacobian) return true;
+
+         jacobian[0] = pow(1 + exp(b2 - b3 * x_), -1.0 / b4);
+         jacobian[1] = -b1 * exp(b2 - b3 * x_) *
+                       pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
+         jacobian[2] = x_ * b1 * exp(b2 - b3 * x_) *
+                       pow(1 + exp(b2 - b3 * x_), -1.0 / b4 - 1) / b4;
+         jacobian[3] = b1 * log(1 + exp(b2 - b3 * x_)) *
+                       pow(1 + exp(b2 - b3 * x_), -1.0 / b4) / (b4 * b4);
+         return true;
+       }
+
+      private:
+       const double x_;
+       const double y_;
+   };
+
+This is tedious code, hard to read and with a lot of
+redundancy. So in practice we will cache some sub-expressions to
+improve its efficiency, which would give us something like:
+
+.. code-block:: c++
+
+  class Rat43AnalyticOptimized : public SizedCostFunction<1,4> {
+     public:
+       Rat43AnalyticOptimized(const double x, const double y) : x_(x), y_(y) {}
+       virtual ~Rat43AnalyticOptimized() {}
+       virtual bool Evaluate(double const* const* parameters,
+                             double* residuals,
+                             double** jacobians) const {
+         const double b1 = parameters[0][0];
+         const double b2 = parameters[0][1];
+         const double b3 = parameters[0][2];
+         const double b4 = parameters[0][3];
+
+         const double t1 = exp(b2 -  b3 * x_);
+         const double t2 = 1 + t1;
+         const double t3 = pow(t2, -1.0 / b4);
+         residuals[0] = b1 * t3 - y_;
+
+         if (!jacobians) return true;
+         double* jacobian = jacobians[0];
+         if (!jacobian) return true;
+
+         const double t4 = pow(t2, -1.0 / b4 - 1);
+         jacobian[0] = t3;
+         jacobian[1] = -b1 * t1 * t4 / b4;
+         jacobian[2] = -x_ * jacobian[1];
+         jacobian[3] = b1 * log(t2) * t3 / (b4 * b4);
+         return true;
+       }
+
+     private:
+       const double x_;
+       const double y_;
+   };
+
+What is the difference in performance of these two implementations?
+
+==========================   =========
+CostFunction                 Time (ns)
+==========================   =========
+Rat43Analytic                      255
+Rat43AnalyticOptimized              92
+==========================   =========
+
+``Rat43AnalyticOptimized`` is :math:`2.8` times faster than
+``Rat43Analytic``.  This difference in run-time is not uncommon. To
+get the best performance out of analytically computed derivatives, one
+usually needs to optimize the code to account for common
+sub-expressions.
+
+
+When should you use analytical derivatives?
+===========================================
+
+#. The expressions are simple, e.g. mostly linear.
+
+#. A computer algebra system like `Maple
+   <https://www.maplesoft.com/products/maple/>`_ , `Mathematica
+   <https://www.wolfram.com/mathematica/>`_, or `SymPy
+   <http://www.sympy.org/en/index.html>`_ can be used to symbolically
+   differentiate the objective function and generate the C++ to
+   evaluate them.
+
+#. Performance is of utmost concern and there is algebraic structure
+   in the terms that you can exploit to get better performance than
+   automatic differentiation.
+
+   That said, getting the best performance out of analytical
+   derivatives requires a non-trivial amount of work.  Before going
+   down this path, it is useful to measure the amount of time being
+   spent evaluating the Jacobian as a fraction of the total solve time
+   and remember `Amdahl's Law
+   <https://en.wikipedia.org/wiki/Amdahl's_law>`_ is your friend.
+
+#. There is no other way to compute the derivatives, e.g. you
+   wish to compute the derivative of the root of a polynomial:
+
+   .. math::
+     a_3(x,y)z^3 + a_2(x,y)z^2 + a_1(x,y)z + a_0(x,y) = 0
+
+
+   with respect to :math:`x` and :math:`y`. This requires the use of
+   the `Inverse Function Theorem
+   <https://en.wikipedia.org/wiki/Inverse_function_theorem>`_
+
+#. You love the chain rule and actually enjoy doing all the algebra by
+   hand.
+
+
+.. rubric:: Footnotes
+
+.. [#f1] The notion of best fit depends on the choice of the objective
+         function used to measure the quality of fit, which in turn
+         depends on the underlying noise process which generated the
+         observations. Minimizing the sum of squared differences is
+         the right thing to do when the noise is `Gaussian
+         <https://en.wikipedia.org/wiki/Normal_distribution>`_. In
+         that case the optimal value of the parameters is the `Maximum
+         Likelihood Estimate
+         <https://en.wikipedia.org/wiki/Maximum_likelihood_estimation>`_.
diff --git a/docs/source/automatic_derivatives.rst b/docs/source/automatic_derivatives.rst
new file mode 100644
index 0000000..0c48c80
--- /dev/null
+++ b/docs/source/automatic_derivatives.rst
@@ -0,0 +1,307 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-automatic_derivatives:
+
+=====================
+Automatic Derivatives
+=====================
+
+We will now consider automatic differentiation. It is a technique that
+can compute exact derivatives, fast, while requiring about the same
+effort from the user as is needed to use numerical differentiation.
+
+Don't believe me? Well here goes. The following code fragment
+implements an automatically differentiated ``CostFunction`` for `Rat43
+<http://www.itl.nist.gov/div898/strd/nls/data/ratkowsky3.shtml>`_.
+
+.. code-block:: c++
+
+  struct Rat43CostFunctor {
+    Rat43CostFunctor(const double x, const double y) : x_(x), y_(y) {}
+
+    template <typename T>
+    bool operator()(const T* parameters, T* residuals) const {
+      const T b1 = parameters[0];
+      const T b2 = parameters[1];
+      const T b3 = parameters[2];
+      const T b4 = parameters[3];
+      residuals[0] = b1 * pow(1.0 + exp(b2 -  b3 * x_), -1.0 / b4) - y_;
+      return true;
+    }
+
+    private:
+      const double x_;
+      const double y_;
+  };
+
+
+  CostFunction* cost_function =
+        new AutoDiffCostFunction<Rat43CostFunctor, 1, 4>(
+          new Rat43CostFunctor(x, y));
+
+Notice that compared to numeric differentiation, the only difference
+when defining the functor for use with automatic differentiation is
+the signature of the ``operator()``.
+
+In the case of numeric differentiation it was
+
+.. code-block:: c++
+
+   bool operator()(const double* parameters, double* residuals) const;
+
+and for automatic differentiation it is a templated function of the
+form
+
+.. code-block:: c++
+
+   template <typename T> bool operator()(const T* parameters, T* residuals) const;
+
+
+So what does this small change buy us? The following table compares
+the time it takes to evaluate the residual and the Jacobian for
+`Rat43` using various methods.
+
+==========================   =========
+CostFunction                 Time (ns)
+==========================   =========
+Rat43Analytic                      255
+Rat43AnalyticOptimized              92
+Rat43NumericDiffForward            262
+Rat43NumericDiffCentral            517
+Rat43NumericDiffRidders           3760
+Rat43AutomaticDiff                 129
+==========================   =========
+
+We can get exact derivatives using automatic differentiation
+(``Rat43AutomaticDiff``) with about the same effort that is required
+to write the code for numeric differentiation but only :math:`40\%`
+slower than hand optimized analytical derivatives.
+
+So how does it work? For this we will have to learn about **Dual
+Numbers** and **Jets** .
+
+
+Dual Numbers & Jets
+===================
+
+.. NOTE::
+
+   Reading this and the next section on implementing Jets is not
+   necessary to use automatic differentiation in Ceres Solver. But
+   knowing the basics of how Jets work is useful when debugging and
+   reasoning about the performance of automatic differentiation.
+
+Dual numbers are an extension of the real numbers analogous to complex
+numbers: whereas complex numbers augment the reals by introducing an
+imaginary unit :math:`\iota` such that :math:`\iota^2 = -1`, dual
+numbers introduce an *infinitesimal* unit :math:`\epsilon` such that
+:math:`\epsilon^2 = 0` . A dual number :math:`a + v\epsilon` has two
+components, the *real* component :math:`a` and the *infinitesimal*
+component :math:`v`.
+
+Surprisingly, this simple change leads to a convenient method for
+computing exact derivatives without needing to manipulate complicated
+symbolic expressions.
+
+For example, consider the function
+
+.. math::
+
+   f(x) = x^2 ,
+
+Then,
+
+.. math::
+
+   \begin{align}
+   f(10 + \epsilon) &= (10 + \epsilon)^2\\
+            &= 100 + 20 \epsilon + \epsilon^2\\
+            &= 100 + 20 \epsilon
+   \end{align}
+
+Observe that the coefficient of :math:`\epsilon` is :math:`Df(10) =
+20`. Indeed this generalizes to functions which are not
+polynomial. Consider an arbitrary differentiable function
+:math:`f(x)`. Then we can evaluate :math:`f(x + \epsilon)` by
+considering the Taylor expansion of :math:`f` near :math:`x`, which
+gives us the infinite series
+
+.. math::
+   \begin{align}
+   f(x + \epsilon) &= f(x) + Df(x) \epsilon + D^2f(x)
+   \frac{\epsilon^2}{2} + D^3f(x) \frac{\epsilon^3}{6} + \cdots\\
+   f(x + \epsilon) &= f(x) + Df(x) \epsilon
+   \end{align}
+
+Here we are using the fact that :math:`\epsilon^2 = 0`.
+
+A `Jet <https://en.wikipedia.org/wiki/Jet_(mathematics)>`_ is a
+:math:`n`-dimensional dual number, where we augment the real numbers
+with :math:`n` infinitesimal units :math:`\epsilon_i,\ i=1,...,n` with
+the property that :math:`\forall i, j\ :\epsilon_i\epsilon_j = 0`. Then
+a Jet consists of a *real* part :math:`a` and a :math:`n`-dimensional
+*infinitesimal* part :math:`\mathbf{v}`, i.e.,
+
+.. math::
+   x = a + \sum_j v_{j} \epsilon_j
+
+The summation notation gets tedious, so we will also just write
+
+.. math::
+   x = a + \mathbf{v}.
+
+where the :math:`\epsilon_i`'s are implicit. Then, using the same
+Taylor series expansion used above, we can see that:
+
+.. math::
+
+  f(a + \mathbf{v}) = f(a) + Df(a) \mathbf{v}.
+
+Similarly for a multivariate function
+:math:`f:\mathbb{R}^{n}\rightarrow \mathbb{R}^m`, evaluated on
+:math:`x_i = a_i + \mathbf{v}_i,\ \forall i = 1,...,n`:
+
+.. math::
+   f(x_1,..., x_n) = f(a_1, ..., a_n) + \sum_i D_i f(a_1, ..., a_n) \mathbf{v}_i
+
+So if each :math:`\mathbf{v}_i = e_i` were the :math:`i^{\text{th}}`
+standard basis vector, then, the above expression would simplify to
+
+.. math::
+   f(x_1,..., x_n) = f(a_1, ..., a_n) + \sum_i D_i f(a_1, ..., a_n) \epsilon_i
+
+and we can extract the coordinates of the Jacobian by inspecting the
+coefficients of :math:`\epsilon_i`.
+
+Implementing Jets
+-----------------
+
+In order for the above to work in practice, we will need the ability
+to evaluate an arbitrary function :math:`f` not just on real numbers
+but also on dual numbers, but one does not usually evaluate functions
+by evaluating their Taylor expansions,
+
+This is where C++ templates and operator overloading comes into
+play. The following code fragment has a simple implementation of a
+``Jet`` and some operators/functions that operate on them.
+
+.. code-block:: c++
+
+   template<int N> struct Jet {
+     double a;
+     Eigen::Matrix<double, 1, N> v;
+   };
+
+   template<int N> Jet<N> operator+(const Jet<N>& f, const Jet<N>& g) {
+     return Jet<N>(f.a + g.a, f.v + g.v);
+   }
+
+   template<int N> Jet<N> operator-(const Jet<N>& f, const Jet<N>& g) {
+     return Jet<N>(f.a - g.a, f.v - g.v);
+   }
+
+   template<int N> Jet<N> operator*(const Jet<N>& f, const Jet<N>& g) {
+     return Jet<N>(f.a * g.a, f.a * g.v + f.v * g.a);
+   }
+
+   template<int N> Jet<N> operator/(const Jet<N>& f, const Jet<N>& g) {
+     return Jet<N>(f.a / g.a, f.v / g.a - f.a * g.v / (g.a * g.a));
+   }
+
+   template <int N> Jet<N> exp(const Jet<N>& f) {
+     return Jet<T, N>(exp(f.a), exp(f.a) * f.v);
+   }
+
+   // This is a simple implementation for illustration purposes, the
+   // actual implementation of pow requires careful handling of a number
+   // of corner cases.
+   template <int N>  Jet<N> pow(const Jet<N>& f, const Jet<N>& g) {
+     return Jet<N>(pow(f.a, g.a),
+                   g.a * pow(f.a, g.a - 1.0) * f.v +
+                   pow(f.a, g.a) * log(f.a); * g.v);
+   }
+
+
+With these overloaded functions in hand, we can now call
+``Rat43CostFunctor`` with an array of Jets instead of doubles. Putting
+that together with appropriately initialized Jets allows us to compute
+the Jacobian as follows:
+
+.. code-block:: c++
+
+  class Rat43Automatic : public ceres::SizedCostFunction<1,4> {
+   public:
+    Rat43Automatic(const Rat43CostFunctor* functor) : functor_(functor) {}
+    virtual ~Rat43Automatic() {}
+    virtual bool Evaluate(double const* const* parameters,
+                          double* residuals,
+                          double** jacobians) const {
+      // Just evaluate the residuals if Jacobians are not required.
+      if (!jacobians) return (*functor_)(parameters[0], residuals);
+
+      // Initialize the Jets
+      ceres::Jet<4> jets[4];
+      for (int i = 0; i < 4; ++i) {
+        jets[i].a = parameters[0][i];
+        jets[i].v.setZero();
+        jets[i].v[i] = 1.0;
+      }
+
+      ceres::Jet<4> result;
+      (*functor_)(jets, &result);
+
+      // Copy the values out of the Jet.
+      residuals[0] = result.a;
+      for (int i = 0; i < 4; ++i) {
+        jacobians[0][i] = result.v[i];
+      }
+      return true;
+    }
+
+   private:
+    std::unique_ptr<const Rat43CostFunctor> functor_;
+  };
+
+Indeed, this is essentially how :class:`AutoDiffCostFunction` works.
+
+
+Pitfalls
+========
+
+Automatic differentiation frees the user from the burden of computing
+and reasoning about the symbolic expressions for the Jacobians, but
+this freedom comes at a cost. For example consider the following
+simple functor:
+
+.. code-block:: c++
+
+   struct Functor {
+     template <typename T> bool operator()(const T* x, T* residual) const {
+       residual[0] = 1.0 - sqrt(x[0] * x[0] + x[1] * x[1]);
+       return true;
+     }
+   };
+
+Looking at the code for the residual computation, one does not foresee
+any problems. However, if we look at the analytical expressions for
+the Jacobian:
+
+.. math::
+
+      y &= 1 - \sqrt{x_0^2 + x_1^2}\\
+   D_1y &= -\frac{x_0}{\sqrt{x_0^2 + x_1^2}},\
+   D_2y = -\frac{x_1}{\sqrt{x_0^2 + x_1^2}}
+
+we find that it is an indeterminate form at :math:`x_0 = 0, x_1 =
+0`.
+
+There is no single solution to this problem. In some cases one needs
+to reason explicitly about the points where indeterminacy may occur
+and use alternate expressions using `L'Hopital's rule
+<https://en.wikipedia.org/wiki/L'H%C3%B4pital's_rule>`_ (see for
+example some of the conversion routines in `rotation.h
+<https://github.com/ceres-solver/ceres-solver/blob/master/include/ceres/rotation.h>`_. In
+other cases, one may need to regularize the expressions to eliminate
+these points.
diff --git a/docs/source/bibliography.rst b/docs/source/bibliography.rst
new file mode 100644
index 0000000..5352c65
--- /dev/null
+++ b/docs/source/bibliography.rst
@@ -0,0 +1,132 @@
+.. _sec-bibliography:
+
+============
+Bibliography
+============
+
+.. [Agarwal] S. Agarwal, N. Snavely, S. M. Seitz and R. Szeliski,
+   **Bundle Adjustment in the Large**, *Proceedings of the European
+   Conference on Computer Vision*, pp. 29--42, 2010.
+
+.. [Bjorck] A. Bjorck, **Numerical Methods for Least Squares
+   Problems**, SIAM, 1996
+
+.. [Brown] D. C. Brown, **A solution to the general problem of
+   multiple station analytical stereo triangulation**,  Technical
+   Report 43, Patrick Airforce Base, Florida, 1958.
+
+.. [ByrdNocedal] R. H. Byrd, J. Nocedal, R. B. Schanbel,
+   **Representations of Quasi-Newton Matrices and their use in Limited
+   Memory Methods**, *Mathematical Programming* 63(4):129–-156, 1994.
+
+.. [ByrdSchnabel] R.H. Byrd, R.B. Schnabel, and G.A. Shultz, **Approximate
+   solution of the trust region problem by minimization over
+   two dimensional subspaces**, *Mathematical programming*,
+   40(1):247–263, 1988.
+
+.. [Chen] Y. Chen, T. A. Davis, W. W. Hager, and
+   S. Rajamanickam, **Algorithm 887: CHOLMOD, Supernodal Sparse
+   Cholesky Factorization and Update/Downdate**, *TOMS*, 35(3), 2008.
+
+.. [Conn] A.R. Conn, N.I.M. Gould, and P.L. Toint, **Trust region
+   methods**, *Society for Industrial Mathematics*, 2000.
+
+.. [GolubPereyra] G.H. Golub and V. Pereyra, **The differentiation of
+   pseudo-inverses and nonlinear least squares problems whose
+   variables separate**, *SIAM Journal on numerical analysis*,
+   10(2):413–432, 1973.
+
+.. [HartleyZisserman] R.I. Hartley & A. Zisserman, **Multiview
+   Geometry in Computer Vision**, Cambridge University Press, 2004.
+
+.. [KanataniMorris] K. Kanatani and D. D. Morris, **Gauges and gauge
+   transformations for uncertainty description of geometric structure
+   with indeterminacy**, *IEEE Transactions on Information Theory*
+   47(5):2017-2028, 2001.
+
+.. [Keys] R. G. Keys, **Cubic convolution interpolation for digital
+   image processing**, *IEEE Trans. on Acoustics, Speech, and Signal
+   Processing*, 29(6), 1981.
+
+.. [KushalAgarwal] A. Kushal and S. Agarwal, **Visibility based
+   preconditioning for bundle adjustment**, *In Proceedings of the
+   IEEE Conference on Computer Vision and Pattern Recognition*, 2012.
+
+.. [Kanzow] C. Kanzow, N. Yamashita and M. Fukushima,
+   **Levenberg–Marquardt methods with strong local convergence
+   properties for solving nonlinear equations with convex
+   constraints**, *Journal of Computational and Applied Mathematics*,
+   177(2):375–397, 2005.
+
+.. [Levenberg] K. Levenberg, **A method for the solution of certain
+   nonlinear problems in least squares**, *Quart. Appl.  Math*,
+   2(2):164–168, 1944.
+
+.. [LiSaad] Na Li and Y. Saad, **MIQR: A multilevel incomplete qr
+   preconditioner for large sparse least squares problems**, *SIAM
+   Journal on Matrix Analysis and Applications*, 28(2):524–550, 2007.
+
+.. [Madsen] K. Madsen, H.B. Nielsen, and O. Tingleff, **Methods for
+   nonlinear least squares problems**, 2004.
+
+.. [Mandel] J. Mandel, **On block diagonal and Schur complement
+   preconditioning**, *Numer. Math.*, 58(1):79–93, 1990.
+
+.. [Marquardt] D.W. Marquardt, **An algorithm for least squares
+   estimation of nonlinear parameters**, *J. SIAM*, 11(2):431–441,
+   1963.
+
+.. [Mathew] T.P.A. Mathew, **Domain decomposition methods for the
+   numerical solution of partial differential equations**, Springer
+   Verlag, 2008.
+
+.. [NashSofer] S.G. Nash and A. Sofer, **Assessing a search direction
+   within a truncated newton method**, *Operations Research Letters*,
+   9(4):219–221, 1990.
+
+.. [Nocedal] J. Nocedal, **Updating Quasi-Newton Matrices with Limited
+   Storage**, *Mathematics of Computation*, 35(151): 773--782, 1980.
+
+.. [NocedalWright] J. Nocedal & S. Wright, **Numerical Optimization**,
+   Springer, 2004.
+
+.. [Oren] S. S. Oren, **Self-scaling Variable Metric (SSVM) Algorithms
+   Part II: Implementation and Experiments**, Management Science,
+   20(5), 863-874, 1974.
+
+.. [Press] W. H. Press, S. A. Teukolsky, W. T. Vetterling
+   & B. P. Flannery, **Numerical Recipes**, Cambridge University
+   Press, 2007.
+
+.. [Ridders] C. J. F. Ridders, **Accurate computation of F'(x) and
+   F'(x) F"(x)**, Advances in Engineering Software 4(2), 75-76, 1978.
+
+.. [RuheWedin] A. Ruhe and P.Å. Wedin, **Algorithms for separable
+   nonlinear least squares problems**, Siam Review, 22(3):318–337,
+   1980.
+
+.. [Saad] Y. Saad, **Iterative methods for sparse linear
+   systems**, SIAM, 2003.
+
+.. [Stigler] S. M. Stigler, **Gauss and the invention of least
+   squares**, *The Annals of Statistics*, 9(3):465-474, 1981.
+
+.. [TenenbaumDirector] J. Tenenbaum & B. Director, **How Gauss
+   Determined the Orbit of Ceres**.
+
+.. [TrefethenBau] L.N. Trefethen and D. Bau, **Numerical Linear
+   Algebra**, SIAM, 1997.
+
+.. [Triggs] B. Triggs, P. F. Mclauchlan, R. I. Hartley &
+   A. W. Fitzgibbon, **Bundle Adjustment: A Modern Synthesis**,
+   Proceedings of the International Workshop on Vision Algorithms:
+   Theory and Practice, pp. 298-372, 1999.
+
+.. [Wiberg] T. Wiberg, **Computation of principal components when data
+   are missing**, In Proc. *Second Symp. Computational Statistics*,
+   pages 229–236, 1976.
+
+.. [WrightHolt] S. J. Wright and J. N. Holt, **An Inexact
+   Levenberg Marquardt Method for Large Sparse Nonlinear Least
+   Squares**, *Journal of the Australian Mathematical Society Series
+   B*, 26(4):387–403, 1985.
diff --git a/docs/source/conf.py b/docs/source/conf.py
new file mode 100644
index 0000000..c266746
--- /dev/null
+++ b/docs/source/conf.py
@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+#
+# Ceres Solver documentation build configuration file, created by
+# sphinx-quickstart on Sun Jan 20 20:34:07 2013.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.todo', 'sphinx.ext.mathjax', 'sphinx.ext.ifconfig']
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Ceres Solver'
+copyright = u'2018 Google Inc'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '1.14'
+# The full version, including alpha/beta/rc tags.
+release = '1.14.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ["_themes",]
+import sphinx_rtd_theme
+html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+html_title = "Ceres Solver"
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+#html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+html_domain_indices = True
+
+# If false, no index is generated.
+html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+html_show_sphinx = False
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'CeresSolverdoc'
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+  ('index', 'CeresSolver.tex', u'Ceres Solver',
+   u'Sameer Agarwal, Keir Mierle & Others', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'ceressolver', u'Ceres Solver',
+     [u'Sameer Agarwal, Keir Mierle & Others'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'CeresSolver', u'Ceres Solver',
+   u'Sameer Agarwal, Keir Mierle & Others', 'CeresSolver', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
new file mode 100644
index 0000000..3ef8629
--- /dev/null
+++ b/docs/source/contributing.rst
@@ -0,0 +1,132 @@
+.. _chapter-contributing:
+
+============
+Contributing
+============
+
+We welcome contributions to Ceres, whether they are new features, bug
+fixes or tests. The Ceres `mailing
+<http://groups.google.com/group/ceres-solver>`_ list is the best place
+for all development related discussions. Please consider joining
+it. If you have ideas on how you would like to contribute to Ceres, it
+is a good idea to let us know on the mailing list before you start
+development. We may have suggestions that will save effort when trying
+to merge your work into the main branch. If you are looking for ideas,
+please let us know about your interest and skills and we will be happy
+to make a suggestion or three.
+
+We follow Google's `C++ Style Guide
+<https://google.github.io/styleguide/cppguide.html>`_ and
+use `git <http://git-scm.com/>`_ for version control. We use the
+`Gerrit <https://ceres-solver-review.googlesource.com/>`_ to collaborate and
+review changes to Ceres. Gerrit enables pre-commit reviews so that
+Ceres can maintain a linear history with clean, reviewed commits, and
+no merges.
+
+We now describe how to set up your development environment and submit
+a change list for review via Gerrit.
+
+Setting up your Environment
+===========================
+
+1. Download and configure ``git``.
+
+   * Mac ``brew install git``.
+   * Linux ``sudo apt-get install git``.
+   * Windows. Download `msysgit
+     <https://code.google.com/p/msysgit/>`_, which includes a minimal
+     `Cygwin <http://www.cygwin.com/>`_ install.
+
+2. Sign up for `Gerrit
+   <https://ceres-solver-review.googlesource.com/>`_. You will also need to
+   `sign the Contributor License Agreement (CLA)
+   <https://opensource.google.com/docs/cla/#sign>`_ with Google, which gives
+   Google a royalty-free unlimited license to use your contributions. You
+   retain copyright.
+
+3. Clone the Ceres Solver ``git`` repository from Gerrit.
+
+   .. code-block:: bash
+
+      git clone https://ceres-solver.googlesource.com/ceres-solver
+
+
+4. Build Ceres, following the instructions in
+   :ref:`chapter-installation`.
+
+   On Mac and Linux, the ``CMake`` build will download and enable
+   the Gerrit pre-commit hook automatically. This pre-submit hook
+   creates `Change-Id: ...` lines in your commits.
+
+   If this does not work OR you are on Windows, execute the
+   following in the root directory of the local ``git`` repository:
+
+   .. code-block:: bash
+
+      curl -o .git/hooks/commit-msg https://ceres-solver-review.googlesource.com/tools/hooks/commit-msg
+      chmod +x .git/hooks/commit-msg
+
+5. Configure your Gerrit password with a ``.gitcookies`` which allows pushing
+   to Gerrit without having to enter a very long random password every time:
+
+   * Sign into `http://ceres-solver-review.googlesource.com
+     <http://ceres-solver-review.googlesource.com>`_.
+
+   * Click ``Settings -> HTTP Credentials -> Obtain Password``.
+
+   * (maybe) Select an account for multi-login. This should be the
+     same as your Gerrit login.
+
+   * Click ``Allow access`` when the page requests access to your
+     ``git`` repositories.
+
+   * Follow the instructions from Gerrit to create a ``.gitcookies`` file on
+     your system, either in ``$HOME/.gitcookies`` (Mac and Linux) or
+     ``%USERPROFILE%\.gitcookies`` (Windows). Note that for Windows, please get
+     a recent `Git for Windows <https://git-scm.com/download/win>`_ install to
+     enable automatic lookup in the ``%USERPROFILE%\.gitcookies``.
+
+Submitting a change
+===================
+
+1. Make your changes against master or whatever branch you
+   like. Commit your changes as one patch. When you commit, the Gerrit
+   hook will add a `Change-Id:` line as the last line of the commit.
+
+   Make sure that your commit message is formatted in the `50/72 style
+   <http://tbaggery.com/2008/04/19/a-note-about-git-commit-messages.html>`_.
+
+2. Push your changes to the Ceres Gerrit instance:
+
+   .. code-block:: bash
+
+      git push origin HEAD:refs/for/master
+
+   When the push succeeds, the console will display a URL showing the
+   address of the review. Go to the URL and add at least one of the
+   maintainers (Sameer Agarwal, Keir Mierle, Alex Stewart or William
+   Rucklidge) as reviewers.
+
+3. Wait for a review.
+
+4. Once review comments come in, address them. Please reply to each
+   comment in Gerrit, which makes the re-review process easier. After
+   modifying the code in your ``git`` instance, *don't make a new
+   commit*. Instead, update the last commit using a command like the
+   following:
+
+   .. code-block:: bash
+
+      git commit --amend -a
+
+   This will update the last commit, so that it has both the original
+   patch and your updates as a single commit. You will have a chance
+   to edit the commit message as well. Push the new commit to Gerrit
+   as before.
+
+   Gerrit will use the ``Change-Id:`` to match the previous commit
+   with the new one. The review interface retains your original patch,
+   but also shows the new patch.
+
+   Publish your responses to the comments, and wait for a new round
+   of reviews.
diff --git a/docs/source/derivatives.rst b/docs/source/derivatives.rst
new file mode 100644
index 0000000..bff6a29
--- /dev/null
+++ b/docs/source/derivatives.rst
@@ -0,0 +1,60 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-on_derivatives:
+
+==============
+On Derivatives
+==============
+
+Ceres Solver, like all gradient based optimization algorithms, depends
+on being able to evaluate the objective function and its derivatives
+at arbitrary points in its domain. Indeed, defining the objective
+function and its `Jacobian
+<https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant>`_ is
+the principal task that the user is required to perform when solving
+an optimization problem using Ceres Solver. The correct and efficient
+computation of the Jacobian is the key to good performance.
+
+Ceres Solver offers considerable flexibility in how the user can
+provide derivatives to the solver. She can use:
+
+#. :ref:`chapter-analytical_derivatives`: The user figures out the
+   derivatives herself, by hand or using a tool like `Maple
+   <https://www.maplesoft.com/products/maple/>`_ or `Mathematica
+   <https://www.wolfram.com/mathematica/>`_, and implements them in a
+   :class:`CostFunction`.
+#. :ref:`chapter-numerical_derivatives`: Ceres numerically computes
+   the derivative using finite differences.
+#. :ref:`chapter-automatic_derivatives`: Ceres automatically computes
+   the analytic derivative using C++ templates and operator
+   overloading.
+
+Which of these three approaches (alone or in combination) should be
+used depends on the situation and the tradeoffs the user is willing to
+make. Unfortunately, numerical optimization textbooks rarely discuss
+these issues in detail and the user is left to her own devices.
+
+The aim of this article is to fill this gap and describe each of these
+three approaches in the context of Ceres Solver with sufficient detail
+that the user can make an informed choice.
+
+For the impatient amongst you, here is some high level advice:
+
+#. Use :ref:`chapter-automatic_derivatives`.
+#. In some cases it maybe worth using
+   :ref:`chapter-analytical_derivatives`.
+#. Avoid :ref:`chapter-numerical_derivatives`. Use it as a measure of
+   last resort, mostly to interface with external libraries.
+
+For the rest, read on.
+
+.. toctree::
+   :maxdepth: 1
+
+   spivak_notation
+   analytical_derivatives
+   numerical_derivatives
+   automatic_derivatives
+   interfacing_with_autodiff
diff --git a/docs/source/faqs.rst b/docs/source/faqs.rst
new file mode 100644
index 0000000..5a28f41
--- /dev/null
+++ b/docs/source/faqs.rst
@@ -0,0 +1,29 @@
+.. _chapter-tricks:
+
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+===================
+FAQS, Tips & Tricks
+===================
+
+Answers to frequently asked questions, tricks of the trade and general
+wisdom.
+
+.. toctree::
+   :maxdepth: 2
+
+   modeling_faqs
+   solving_faqs
+
+
+Further Reading
+===============
+
+For a short but informative introduction to the subject we recommend
+the booklet by [Madsen]_ . For a general introduction to non-linear
+optimization we recommend [NocedalWright]_. [Bjorck]_ remains the
+seminal reference on least squares problems. [TrefethenBau]_ book is
+our favorite text on introductory numerical linear algebra. [Triggs]_
+provides a thorough coverage of the bundle adjustment problem.
diff --git a/docs/source/features.rst b/docs/source/features.rst
new file mode 100644
index 0000000..e71bd39
--- /dev/null
+++ b/docs/source/features.rst
@@ -0,0 +1,86 @@
+====
+Why?
+====
+.. _chapter-features:
+
+* **Code Quality** - Ceres Solver has been used in production at
+  Google for more than four years now. It is clean, extensively tested
+  and well documented code that is actively developed and supported.
+
+* **Modeling API** - It is rarely the case that one starts with the
+  exact and complete formulation of the problem that one is trying to
+  solve. Ceres's modeling API has been designed so that the user can
+  easily build and modify the objective function, one term at a
+  time. And to do so without worrying about how the solver is going to
+  deal with the resulting changes in the sparsity/structure of the
+  underlying problem.
+
+  - **Derivatives** Supplying derivatives is perhaps the most tedious
+    and error prone part of using an optimization library.  Ceres
+    ships with `automatic`_ and `numeric`_ differentiation. So you
+    never have to compute derivatives by hand (unless you really want
+    to). Not only this, Ceres allows you to mix automatic, numeric and
+    analytical derivatives in any combination that you want.
+
+  - **Robust Loss Functions** Most non-linear least squares problems
+    involve data. If there is data, there will be outliers. Ceres
+    allows the user to *shape* their residuals using a
+    :class:`LossFunction` to reduce the influence of outliers.
+
+  - **Local Parameterization** In many cases, some parameters lie on a
+    manifold other than Euclidean space, e.g., rotation matrices. In
+    such cases, the user can specify the geometry of the local tangent
+    space by specifying a :class:`LocalParameterization` object.
+
+* **Solver Choice** Depending on the size, sparsity structure, time &
+  memory budgets, and solution quality requirements, different
+  optimization algorithms will suit different needs. To this end,
+  Ceres Solver comes with a variety of optimization algorithms:
+
+  - **Trust Region Solvers** - Ceres supports Levenberg-Marquardt,
+    Powell's Dogleg, and Subspace dogleg methods. The key
+    computational cost in all of these methods is the solution of a
+    linear system. To this end Ceres ships with a variety of linear
+    solvers - dense QR and dense Cholesky factorization (using
+    `Eigen`_ or `LAPACK`_) for dense problems, sparse Cholesky
+    factorization (`SuiteSparse`_, `CXSparse`_ or `Eigen`_) for large
+    sparse problems custom Schur complement based dense, sparse, and
+    iterative linear solvers for `bundle adjustment`_ problems.
+
+  - **Line Search Solvers** - When the problem size is so large that
+    storing and factoring the Jacobian is not feasible or a low
+    accuracy solution is required cheaply, Ceres offers a number of
+    line search based algorithms. This includes a number of variants
+    of Non-linear Conjugate Gradients, BFGS and LBFGS.
+
+* **Speed** - Ceres Solver has been extensively optimized, with C++
+  templating, hand written linear algebra routines and OpenMP or C++11 threads
+  based multithreading of the Jacobian evaluation and the linear solvers.
+
+* **Solution Quality** Ceres is the `best performing`_ solver on the NIST
+  problem set used by Mondragon and Borchers for benchmarking
+  non-linear least squares solvers.
+
+* **Covariance estimation** - Evaluate the sensitivity/uncertainty of
+  the solution by evaluating all or part of the covariance
+  matrix. Ceres is one of the few solvers that allows you to to do
+  this analysis at scale.
+
+* **Community** Since its release as an open source software, Ceres
+  has developed an active developer community that contributes new
+  features, bug fixes and support.
+
+* **Portability** - Runs on *Linux*, *Windows*, *Mac OS X*, *Android*
+  *and iOS*.
+
+* **BSD Licensed** The BSD license offers the flexibility to ship your
+  application
+
+.. _best performing: https://groups.google.com/forum/#!topic/ceres-solver/UcicgMPgbXw
+.. _bundle adjustment: http://en.wikipedia.org/wiki/Bundle_adjustment
+.. _SuiteSparse: http://www.cise.ufl.edu/research/sparse/SuiteSparse/
+.. _Eigen: http://eigen.tuxfamily.org/
+.. _LAPACK: http://www.netlib.org/lapack/
+.. _CXSparse: https://www.cise.ufl.edu/research/sparse/CXSparse/
+.. _automatic: http://en.wikipedia.org/wiki/Automatic_differentiation
+.. _numeric: http://en.wikipedia.org/wiki/Numerical_differentiation
diff --git a/docs/source/forward_central_error.png b/docs/source/forward_central_error.png
new file mode 100644
index 0000000..56d10b0
--- /dev/null
+++ b/docs/source/forward_central_error.png
Binary files differ
diff --git a/docs/source/forward_central_ridders_error.png b/docs/source/forward_central_ridders_error.png
new file mode 100644
index 0000000..ecea674
--- /dev/null
+++ b/docs/source/forward_central_ridders_error.png
Binary files differ
diff --git a/docs/source/gradient_solver.rst b/docs/source/gradient_solver.rst
new file mode 100644
index 0000000..1356e74
--- /dev/null
+++ b/docs/source/gradient_solver.rst
@@ -0,0 +1,517 @@
+.. highlight:: c++
+
+.. default-domain:: cpp
+
+.. _chapter-gradient_problem_solver:
+
+==================================
+General Unconstrained Minimization
+==================================
+
+Modeling
+========
+
+:class:`FirstOrderFunction`
+---------------------------
+
+.. class:: FirstOrderFunction
+
+  Instances of :class:`FirstOrderFunction` implement the evaluation of
+  a function and its gradient.
+
+  .. code-block:: c++
+
+   class FirstOrderFunction {
+     public:
+      virtual ~FirstOrderFunction() {}
+      virtual bool Evaluate(const double* const parameters,
+                            double* cost,
+                            double* gradient) const = 0;
+      virtual int NumParameters() const = 0;
+   };
+
+.. function:: bool FirstOrderFunction::Evaluate(const double* const parameters, double* cost, double* gradient) const
+
+   Evaluate the cost/value of the function. If ``gradient`` is not
+   ``NULL`` then evaluate the gradient too. If evaluation is
+   successful return, ``true`` else return ``false``.
+
+   ``cost`` guaranteed to be never ``NULL``, ``gradient`` can be ``NULL``.
+
+.. function:: int FirstOrderFunction::NumParameters() const
+
+   Number of parameters in the domain of the function.
+
+
+:class:`GradientProblem`
+------------------------
+
+.. class:: GradientProblem
+
+.. code-block:: c++
+
+  class GradientProblem {
+   public:
+    explicit GradientProblem(FirstOrderFunction* function);
+    GradientProblem(FirstOrderFunction* function,
+                    LocalParameterization* parameterization);
+    int NumParameters() const;
+    int NumLocalParameters() const;
+    bool Evaluate(const double* parameters, double* cost, double* gradient) const;
+    bool Plus(const double* x, const double* delta, double* x_plus_delta) const;
+  };
+
+Instances of :class:`GradientProblem` represent general non-linear
+optimization problems that must be solved using just the value of the
+objective function and its gradient. Unlike the :class:`Problem`
+class, which can only be used to model non-linear least squares
+problems, instances of :class:`GradientProblem` not restricted in the
+form of the objective function.
+
+Structurally :class:`GradientProblem` is a composition of a
+:class:`FirstOrderFunction` and optionally a
+:class:`LocalParameterization`.
+
+The :class:`FirstOrderFunction` is responsible for evaluating the cost
+and gradient of the objective function.
+
+The :class:`LocalParameterization` is responsible for going back and
+forth between the ambient space and the local tangent space. When a
+:class:`LocalParameterization` is not provided, then the tangent space
+is assumed to coincide with the ambient Euclidean space that the
+gradient vector lives in.
+
+The constructor takes ownership of the :class:`FirstOrderFunction` and
+:class:`LocalParamterization` objects passed to it.
+
+
+.. function:: void Solve(const GradientProblemSolver::Options& options, const GradientProblem& problem, double* parameters, GradientProblemSolver::Summary* summary)
+
+   Solve the given :class:`GradientProblem` using the values in
+   ``parameters`` as the initial guess of the solution.
+
+
+Solving
+=======
+
+:class:`GradientProblemSolver::Options`
+---------------------------------------
+
+.. class:: GradientProblemSolver::Options
+
+   :class:`GradientProblemSolver::Options` controls the overall
+   behavior of the solver. We list the various settings and their
+   default values below.
+
+.. function:: bool GradientProblemSolver::Options::IsValid(string* error) const
+
+   Validate the values in the options struct and returns true on
+   success. If there is a problem, the method returns false with
+   ``error`` containing a textual description of the cause.
+
+.. member:: LineSearchDirectionType GradientProblemSolver::Options::line_search_direction_type
+
+   Default: ``LBFGS``
+
+   Choices are ``STEEPEST_DESCENT``, ``NONLINEAR_CONJUGATE_GRADIENT``,
+   ``BFGS`` and ``LBFGS``.
+
+.. member:: LineSearchType GradientProblemSolver::Options::line_search_type
+
+   Default: ``WOLFE``
+
+   Choices are ``ARMIJO`` and ``WOLFE`` (strong Wolfe conditions).
+   Note that in order for the assumptions underlying the ``BFGS`` and
+   ``LBFGS`` line search direction algorithms to be guaranteed to be
+   satisifed, the ``WOLFE`` line search should be used.
+
+.. member:: NonlinearConjugateGradientType GradientProblemSolver::Options::nonlinear_conjugate_gradient_type
+
+   Default: ``FLETCHER_REEVES``
+
+   Choices are ``FLETCHER_REEVES``, ``POLAK_RIBIERE`` and
+   ``HESTENES_STIEFEL``.
+
+.. member:: int GradientProblemSolver::Options::max_lbfs_rank
+
+   Default: 20
+
+   The L-BFGS hessian approximation is a low rank approximation to the
+   inverse of the Hessian matrix. The rank of the approximation
+   determines (linearly) the space and time complexity of using the
+   approximation. Higher the rank, the better is the quality of the
+   approximation. The increase in quality is however is bounded for a
+   number of reasons.
+
+     1. The method only uses secant information and not actual
+        derivatives.
+
+     2. The Hessian approximation is constrained to be positive
+        definite.
+
+   So increasing this rank to a large number will cost time and space
+   complexity without the corresponding increase in solution
+   quality. There are no hard and fast rules for choosing the maximum
+   rank. The best choice usually requires some problem specific
+   experimentation.
+
+.. member:: bool GradientProblemSolver::Options::use_approximate_eigenvalue_bfgs_scaling
+
+   Default: ``false``
+
+   As part of the ``BFGS`` update step / ``LBFGS`` right-multiply
+   step, the initial inverse Hessian approximation is taken to be the
+   Identity.  However, [Oren]_ showed that using instead :math:`I *
+   \gamma`, where :math:`\gamma` is a scalar chosen to approximate an
+   eigenvalue of the true inverse Hessian can result in improved
+   convergence in a wide variety of cases.  Setting
+   ``use_approximate_eigenvalue_bfgs_scaling`` to true enables this
+   scaling in ``BFGS`` (before first iteration) and ``LBFGS`` (at each
+   iteration).
+
+   Precisely, approximate eigenvalue scaling equates to
+
+   .. math:: \gamma = \frac{y_k' s_k}{y_k' y_k}
+
+   With:
+
+  .. math:: y_k = \nabla f_{k+1} - \nabla f_k
+  .. math:: s_k = x_{k+1} - x_k
+
+  Where :math:`f()` is the line search objective and :math:`x` the
+  vector of parameter values [NocedalWright]_.
+
+  It is important to note that approximate eigenvalue scaling does
+  **not** *always* improve convergence, and that it can in fact
+  *significantly* degrade performance for certain classes of problem,
+  which is why it is disabled by default.  In particular it can
+  degrade performance when the sensitivity of the problem to different
+  parameters varies significantly, as in this case a single scalar
+  factor fails to capture this variation and detrimentally downscales
+  parts of the Jacobian approximation which correspond to
+  low-sensitivity parameters. It can also reduce the robustness of the
+  solution to errors in the Jacobians.
+
+.. member:: LineSearchIterpolationType GradientProblemSolver::Options::line_search_interpolation_type
+
+   Default: ``CUBIC``
+
+   Degree of the polynomial used to approximate the objective
+   function. Valid values are ``BISECTION``, ``QUADRATIC`` and
+   ``CUBIC``.
+
+.. member:: double GradientProblemSolver::Options::min_line_search_step_size
+
+   The line search terminates if:
+
+   .. math:: \|\Delta x_k\|_\infty < \text{min_line_search_step_size}
+
+   where :math:`\|\cdot\|_\infty` refers to the max norm, and
+   :math:`\Delta x_k` is the step change in the parameter values at
+   the :math:`k`-th iteration.
+
+.. member:: double GradientProblemSolver::Options::line_search_sufficient_function_decrease
+
+   Default: ``1e-4``
+
+   Solving the line search problem exactly is computationally
+   prohibitive. Fortunately, line search based optimization algorithms
+   can still guarantee convergence if instead of an exact solution,
+   the line search algorithm returns a solution which decreases the
+   value of the objective function sufficiently. More precisely, we
+   are looking for a step size s.t.
+
+   .. math:: f(\text{step_size}) \le f(0) + \text{sufficient_decrease} * [f'(0) * \text{step_size}]
+
+   This condition is known as the Armijo condition.
+
+.. member:: double GradientProblemSolver::Options::max_line_search_step_contraction
+
+   Default: ``1e-3``
+
+   In each iteration of the line search,
+
+   .. math:: \text{new_step_size} \geq \text{max_line_search_step_contraction} * \text{step_size}
+
+   Note that by definition, for contraction:
+
+   .. math:: 0 < \text{max_step_contraction} < \text{min_step_contraction} < 1
+
+.. member:: double GradientProblemSolver::Options::min_line_search_step_contraction
+
+   Default: ``0.6``
+
+   In each iteration of the line search,
+
+   .. math:: \text{new_step_size} \leq \text{min_line_search_step_contraction} * \text{step_size}
+
+   Note that by definition, for contraction:
+
+   .. math:: 0 < \text{max_step_contraction} < \text{min_step_contraction} < 1
+
+.. member:: int GradientProblemSolver::Options::max_num_line_search_step_size_iterations
+
+   Default: ``20``
+
+   Maximum number of trial step size iterations during each line
+   search, if a step size satisfying the search conditions cannot be
+   found within this number of trials, the line search will stop.
+
+   As this is an 'artificial' constraint (one imposed by the user, not
+   the underlying math), if ``WOLFE`` line search is being used, *and*
+   points satisfying the Armijo sufficient (function) decrease
+   condition have been found during the current search (in :math:`\leq`
+   ``max_num_line_search_step_size_iterations``).  Then, the step size
+   with the lowest function value which satisfies the Armijo condition
+   will be returned as the new valid step, even though it does *not*
+   satisfy the strong Wolfe conditions.  This behaviour protects
+   against early termination of the optimizer at a sub-optimal point.
+
+.. member:: int GradientProblemSolver::Options::max_num_line_search_direction_restarts
+
+   Default: ``5``
+
+   Maximum number of restarts of the line search direction algorithm
+   before terminating the optimization. Restarts of the line search
+   direction algorithm occur when the current algorithm fails to
+   produce a new descent direction. This typically indicates a
+   numerical failure, or a breakdown in the validity of the
+   approximations used.
+
+.. member:: double GradientProblemSolver::Options::line_search_sufficient_curvature_decrease
+
+   Default: ``0.9``
+
+   The strong Wolfe conditions consist of the Armijo sufficient
+   decrease condition, and an additional requirement that the
+   step size be chosen s.t. the *magnitude* ('strong' Wolfe
+   conditions) of the gradient along the search direction
+   decreases sufficiently. Precisely, this second condition
+   is that we seek a step size s.t.
+
+   .. math:: \|f'(\text{step_size})\| \leq \text{sufficient_curvature_decrease} * \|f'(0)\|
+
+   Where :math:`f()` is the line search objective and :math:`f'()` is the derivative
+   of :math:`f` with respect to the step size: :math:`\frac{d f}{d~\text{step size}}`.
+
+.. member:: double GradientProblemSolver::Options::max_line_search_step_expansion
+
+   Default: ``10.0``
+
+   During the bracketing phase of a Wolfe line search, the step size
+   is increased until either a point satisfying the Wolfe conditions
+   is found, or an upper bound for a bracket containing a point
+   satisfying the conditions is found.  Precisely, at each iteration
+   of the expansion:
+
+   .. math:: \text{new_step_size} \leq \text{max_step_expansion} * \text{step_size}
+
+   By definition for expansion
+
+   .. math:: \text{max_step_expansion} > 1.0
+
+.. member:: int GradientProblemSolver::Options::max_num_iterations
+
+   Default: ``50``
+
+   Maximum number of iterations for which the solver should run.
+
+.. member:: double GradientProblemSolver::Options::max_solver_time_in_seconds
+
+   Default: ``1e6``
+   Maximum amount of time for which the solver should run.
+
+.. member:: double GradientProblemSolver::Options::function_tolerance
+
+   Default: ``1e-6``
+
+   Solver terminates if
+
+   .. math:: \frac{|\Delta \text{cost}|}{\text{cost}} \leq \text{function_tolerance}
+
+   where, :math:`\Delta \text{cost}` is the change in objective
+   function value (up or down) in the current iteration of the line search.
+
+.. member:: double GradientProblemSolver::Options::gradient_tolerance
+
+   Default: ``1e-10``
+
+   Solver terminates if
+
+   .. math:: \|x - \Pi \boxplus(x, -g(x))\|_\infty \leq \text{gradient_tolerance}
+
+   where :math:`\|\cdot\|_\infty` refers to the max norm, :math:`\Pi`
+   is projection onto the bounds constraints and :math:`\boxplus` is
+   Plus operation for the overall local parameterization associated
+   with the parameter vector.
+
+.. member:: double GradientProblemSolver::Options::parameter_tolerance
+
+   Default: ``1e-8``
+
+   Solver terminates if
+
+   .. math:: \|\Delta x\| \leq (\|x\| + \text{parameter_tolerance}) * \text{parameter_tolerance}
+
+   where :math:`\Delta x` is the step computed by the linear solver in
+   the current iteration of the line search.
+
+.. member:: LoggingType GradientProblemSolver::Options::logging_type
+
+   Default: ``PER_MINIMIZER_ITERATION``
+
+.. member:: bool GradientProblemSolver::Options::minimizer_progress_to_stdout
+
+   Default: ``false``
+
+   By default the :class:`Minimizer` progress is logged to ``STDERR``
+   depending on the ``vlog`` level. If this flag is set to true, and
+   :member:`GradientProblemSolver::Options::logging_type` is not
+   ``SILENT``, the logging output is sent to ``STDOUT``.
+
+   The progress display looks like
+
+   .. code-block:: bash
+
+      0: f: 2.317806e+05 d: 0.00e+00 g: 3.19e-01 h: 0.00e+00 s: 0.00e+00 e:  0 it: 2.98e-02 tt: 8.50e-02
+      1: f: 2.312019e+05 d: 5.79e+02 g: 3.18e-01 h: 2.41e+01 s: 1.00e+00 e:  1 it: 4.54e-02 tt: 1.31e-01
+      2: f: 2.300462e+05 d: 1.16e+03 g: 3.17e-01 h: 4.90e+01 s: 2.54e-03 e:  1 it: 4.96e-02 tt: 1.81e-01
+
+   Here
+
+   #. ``f`` is the value of the objective function.
+   #. ``d`` is the change in the value of the objective function if
+      the step computed in this iteration is accepted.
+   #. ``g`` is the max norm of the gradient.
+   #. ``h`` is the change in the parameter vector.
+   #. ``s`` is the optimal step length computed by the line search.
+   #. ``it`` is the time take by the current iteration.
+   #. ``tt`` is the total time taken by the minimizer.
+
+.. member:: vector<IterationCallback> GradientProblemSolver::Options::callbacks
+
+   Callbacks that are executed at the end of each iteration of the
+   :class:`Minimizer`. They are executed in the order that they are
+   specified in this vector. By default, parameter blocks are updated
+   only at the end of the optimization, i.e., when the
+   :class:`Minimizer` terminates. This behavior is controlled by
+   :member:`GradientProblemSolver::Options::update_state_every_variable`. If
+   the user wishes to have access to the update parameter blocks when
+   his/her callbacks are executed, then set
+   :member:`GradientProblemSolver::Options::update_state_every_iteration`
+   to true.
+
+   The solver does NOT take ownership of these pointers.
+
+
+.. member:: bool Solver::Options::update_state_every_iteration
+
+   Default: ``false``
+
+   Normally the parameter vector is only updated when the solver
+   terminates. Setting this to true updates it every iteration. This
+   setting is useful when building an interactive application using
+   Ceres and using an :class:`IterationCallback`.
+
+:class:`GradientProblemSolver::Summary`
+---------------------------------------
+
+.. class:: GradientProblemSolver::Summary
+
+   Summary of the various stages of the solver after termination.
+
+.. function:: string GradientProblemSolver::Summary::BriefReport() const
+
+   A brief one line description of the state of the solver after
+   termination.
+
+.. function:: string GradientProblemSolver::Summary::FullReport() const
+
+   A full multiline description of the state of the solver after
+   termination.
+
+.. function:: bool GradientProblemSolver::Summary::IsSolutionUsable() const
+
+   Whether the solution returned by the optimization algorithm can be
+   relied on to be numerically sane. This will be the case if
+   `GradientProblemSolver::Summary:termination_type` is set to `CONVERGENCE`,
+   `USER_SUCCESS` or `NO_CONVERGENCE`, i.e., either the solver
+   converged by meeting one of the convergence tolerances or because
+   the user indicated that it had converged or it ran to the maximum
+   number of iterations or time.
+
+.. member:: TerminationType GradientProblemSolver::Summary::termination_type
+
+   The cause of the minimizer terminating.
+
+.. member:: string GradientProblemSolver::Summary::message
+
+   Reason why the solver terminated.
+
+.. member:: double GradientProblemSolver::Summary::initial_cost
+
+   Cost of the problem (value of the objective function) before the
+   optimization.
+
+.. member:: double GradientProblemSolver::Summary::final_cost
+
+   Cost of the problem (value of the objective function) after the
+   optimization.
+
+.. member:: vector<IterationSummary> GradientProblemSolver::Summary::iterations
+
+   :class:`IterationSummary` for each minimizer iteration in order.
+
+.. member:: int num_cost_evaluations
+
+   Number of times the cost (and not the gradient) was evaluated.
+
+.. member:: int num_gradient_evaluations
+
+   Number of times the gradient (and the cost) were evaluated.
+
+.. member:: double GradientProblemSolver::Summary::total_time_in_seconds
+
+   Time (in seconds) spent in the solver.
+
+.. member:: double GradientProblemSolver::Summary::cost_evaluation_time_in_seconds
+
+   Time (in seconds) spent evaluating the cost vector.
+
+.. member:: double GradientProblemSolver::Summary::gradient_evaluation_time_in_seconds
+
+   Time (in seconds) spent evaluating the gradient vector.
+
+.. member:: int GradientProblemSolver::Summary::num_parameters
+
+   Number of parameters in the problem.
+
+.. member:: int GradientProblemSolver::Summary::num_local_parameters
+
+   Dimension of the tangent space of the problem. This is different
+   from :member:`GradientProblemSolver::Summary::num_parameters` if a
+   :class:`LocalParameterization` object is used.
+
+.. member:: LineSearchDirectionType GradientProblemSolver::Summary::line_search_direction_type
+
+   Type of line search direction used.
+
+.. member:: LineSearchType GradientProblemSolver::Summary::line_search_type
+
+   Type of the line search algorithm used.
+
+.. member:: LineSearchInterpolationType GradientProblemSolver::Summary::line_search_interpolation_type
+
+   When performing line search, the degree of the polynomial used to
+   approximate the objective function.
+
+.. member:: NonlinearConjugateGradientType GradientProblemSolver::Summary::nonlinear_conjugate_gradient_type
+
+   If the line search direction is `NONLINEAR_CONJUGATE_GRADIENT`,
+   then this indicates the particular variant of non-linear conjugate
+   gradient used.
+
+.. member:: int GradientProblemSolver::Summary::max_lbfgs_rank
+
+   If the type of the line search direction is `LBFGS`, then this
+   indicates the rank of the Hessian approximation.
diff --git a/docs/source/gradient_tutorial.rst b/docs/source/gradient_tutorial.rst
new file mode 100644
index 0000000..0bbdee4
--- /dev/null
+++ b/docs/source/gradient_tutorial.rst
@@ -0,0 +1,138 @@
+.. highlight:: c++
+
+.. default-domain:: cpp
+
+.. _chapter-gradient_tutorial:
+
+==================================
+General Unconstrained Minimization
+==================================
+
+While much of Ceres Solver is devoted to solving non-linear least
+squares problems, internally it contains a solver that can solve
+general unconstrained optimization problems using just their objective
+function value and gradients. The ``GradientProblem`` and
+``GradientProblemSolver`` objects give the user access to this solver.
+
+So without much further ado, let us look at how one goes about using
+them.
+
+Rosenbrock's Function
+=====================
+
+We consider the minimization of the famous `Rosenbrock's function
+<http://en.wikipedia.org/wiki/Rosenbrock_function>`_ [#f1]_.
+
+We begin by defining an instance of the ``FirstOrderFunction``
+interface. This is the object that is responsible for computing the
+objective function value and the gradient (if required). This is the
+analog of the :class:`CostFunction` when defining non-linear least
+squares problems in Ceres.
+
+.. code::
+
+  class Rosenbrock : public ceres::FirstOrderFunction {
+   public:
+    virtual bool Evaluate(const double* parameters,
+                          double* cost,
+                          double* gradient) const {
+      const double x = parameters[0];
+      const double y = parameters[1];
+
+      cost[0] = (1.0 - x) * (1.0 - x) + 100.0 * (y - x * x) * (y - x * x);
+      if (gradient != NULL) {
+        gradient[0] = -2.0 * (1.0 - x) - 200.0 * (y - x * x) * 2.0 * x;
+        gradient[1] = 200.0 * (y - x * x);
+      }
+      return true;
+    }
+
+    virtual int NumParameters() const { return 2; }
+  };
+
+
+Minimizing it then is a straightforward matter of constructing a
+:class:`GradientProblem` object and calling :func:`Solve` on it.
+
+.. code::
+
+    double parameters[2] = {-1.2, 1.0};
+
+    ceres::GradientProblem problem(new Rosenbrock());
+
+    ceres::GradientProblemSolver::Options options;
+    options.minimizer_progress_to_stdout = true;
+    ceres::GradientProblemSolver::Summary summary;
+    ceres::Solve(options, problem, parameters, &summary);
+
+    std::cout << summary.FullReport() << "\n";
+
+Executing this code results, solve the problem using limited memory
+`BFGS
+<http://en.wikipedia.org/wiki/Broyden%E2%80%93Fletcher%E2%80%93Goldfarb%E2%80%93Shanno_algorithm>`_
+algorithm.
+
+.. code-block:: bash
+
+     0: f: 2.420000e+01 d: 0.00e+00 g: 2.16e+02 h: 0.00e+00 s: 0.00e+00 e:  0 it: 2.00e-05 tt: 2.00e-05
+     1: f: 4.280493e+00 d: 1.99e+01 g: 1.52e+01 h: 2.01e-01 s: 8.62e-04 e:  2 it: 7.32e-05 tt: 2.19e-04
+     2: f: 3.571154e+00 d: 7.09e-01 g: 1.35e+01 h: 3.78e-01 s: 1.34e-01 e:  3 it: 2.50e-05 tt: 2.68e-04
+     3: f: 3.440869e+00 d: 1.30e-01 g: 1.73e+01 h: 1.36e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 2.92e-04
+     4: f: 3.213597e+00 d: 2.27e-01 g: 1.55e+01 h: 1.06e-01 s: 4.59e-01 e:  1 it: 2.86e-06 tt: 3.14e-04
+     5: f: 2.839723e+00 d: 3.74e-01 g: 1.05e+01 h: 1.34e-01 s: 5.24e-01 e:  1 it: 2.86e-06 tt: 3.36e-04
+     6: f: 2.448490e+00 d: 3.91e-01 g: 1.29e+01 h: 3.04e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 3.58e-04
+     7: f: 1.943019e+00 d: 5.05e-01 g: 4.00e+00 h: 8.81e-02 s: 7.43e-01 e:  1 it: 4.05e-06 tt: 3.79e-04
+     8: f: 1.731469e+00 d: 2.12e-01 g: 7.36e+00 h: 1.71e-01 s: 4.60e-01 e:  2 it: 9.06e-06 tt: 4.06e-04
+     9: f: 1.503267e+00 d: 2.28e-01 g: 6.47e+00 h: 8.66e-02 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 4.33e-04
+    10: f: 1.228331e+00 d: 2.75e-01 g: 2.00e+00 h: 7.70e-02 s: 7.90e-01 e:  1 it: 3.81e-06 tt: 4.54e-04
+    11: f: 1.016523e+00 d: 2.12e-01 g: 5.15e+00 h: 1.39e-01 s: 3.76e-01 e:  2 it: 1.00e-05 tt: 4.82e-04
+    12: f: 9.145773e-01 d: 1.02e-01 g: 6.74e+00 h: 7.98e-02 s: 1.00e+00 e:  1 it: 3.10e-06 tt: 5.03e-04
+    13: f: 7.508302e-01 d: 1.64e-01 g: 3.88e+00 h: 5.76e-02 s: 4.93e-01 e:  1 it: 2.86e-06 tt: 5.25e-04
+    14: f: 5.832378e-01 d: 1.68e-01 g: 5.56e+00 h: 1.42e-01 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 5.47e-04
+    15: f: 3.969581e-01 d: 1.86e-01 g: 1.64e+00 h: 1.17e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 5.68e-04
+    16: f: 3.171557e-01 d: 7.98e-02 g: 3.84e+00 h: 1.18e-01 s: 3.97e-01 e:  2 it: 9.06e-06 tt: 5.94e-04
+    17: f: 2.641257e-01 d: 5.30e-02 g: 3.27e+00 h: 6.14e-02 s: 1.00e+00 e:  1 it: 3.10e-06 tt: 6.16e-04
+    18: f: 1.909730e-01 d: 7.32e-02 g: 5.29e-01 h: 8.55e-02 s: 6.82e-01 e:  1 it: 4.05e-06 tt: 6.42e-04
+    19: f: 1.472012e-01 d: 4.38e-02 g: 3.11e+00 h: 1.20e-01 s: 3.47e-01 e:  2 it: 1.00e-05 tt: 6.69e-04
+    20: f: 1.093558e-01 d: 3.78e-02 g: 2.97e+00 h: 8.43e-02 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 6.91e-04
+    21: f: 6.710346e-02 d: 4.23e-02 g: 1.42e+00 h: 9.64e-02 s: 8.85e-01 e:  1 it: 3.81e-06 tt: 7.12e-04
+    22: f: 3.993377e-02 d: 2.72e-02 g: 2.30e+00 h: 1.29e-01 s: 4.63e-01 e:  2 it: 9.06e-06 tt: 7.39e-04
+    23: f: 2.911794e-02 d: 1.08e-02 g: 2.55e+00 h: 6.55e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 7.62e-04
+    24: f: 1.457683e-02 d: 1.45e-02 g: 2.77e-01 h: 6.37e-02 s: 6.14e-01 e:  1 it: 3.81e-06 tt: 7.84e-04
+    25: f: 8.577515e-03 d: 6.00e-03 g: 2.86e+00 h: 1.40e-01 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.05e-04
+    26: f: 3.486574e-03 d: 5.09e-03 g: 1.76e-01 h: 1.23e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.27e-04
+    27: f: 1.257570e-03 d: 2.23e-03 g: 1.39e-01 h: 5.08e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.48e-04
+    28: f: 2.783568e-04 d: 9.79e-04 g: 6.20e-01 h: 6.47e-02 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 8.69e-04
+    29: f: 2.533399e-05 d: 2.53e-04 g: 1.68e-02 h: 1.98e-03 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 8.91e-04
+    30: f: 7.591572e-07 d: 2.46e-05 g: 5.40e-03 h: 9.27e-03 s: 1.00e+00 e:  1 it: 3.81e-06 tt: 9.12e-04
+    31: f: 1.902460e-09 d: 7.57e-07 g: 1.62e-03 h: 1.89e-03 s: 1.00e+00 e:  1 it: 2.86e-06 tt: 9.33e-04
+    32: f: 1.003030e-12 d: 1.90e-09 g: 3.50e-05 h: 3.52e-05 s: 1.00e+00 e:  1 it: 3.10e-06 tt: 9.54e-04
+    33: f: 4.835994e-17 d: 1.00e-12 g: 1.05e-07 h: 1.13e-06 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 9.81e-04
+    34: f: 1.885250e-22 d: 4.84e-17 g: 2.69e-10 h: 1.45e-08 s: 1.00e+00 e:  1 it: 4.05e-06 tt: 1.00e-03
+
+  Solver Summary (v 1.12.0-lapack-suitesparse-cxsparse-no_openmp)
+
+  Parameters                                  2
+  Line search direction              LBFGS (20)
+  Line search type                  CUBIC WOLFE
+
+
+  Cost:
+  Initial                          2.420000e+01
+  Final                            1.885250e-22
+  Change                           2.420000e+01
+
+  Minimizer iterations                       35
+
+  Time (in seconds):
+
+    Cost evaluation                       0.000
+    Gradient evaluation                   0.000
+  Total                                   0.003
+
+  Termination:                      CONVERGENCE (Gradient tolerance reached. Gradient max norm: 9.032775e-13 <= 1.000000e-10)
+
+.. rubric:: Footnotes
+
+.. [#f1] `examples/rosenbrock.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/rosenbrock.cc>`_
diff --git a/docs/source/index.rst b/docs/source/index.rst
new file mode 100644
index 0000000..d72368f
--- /dev/null
+++ b/docs/source/index.rst
@@ -0,0 +1,67 @@
+============
+Ceres Solver
+============
+
+Ceres Solver [#f1]_ is an open source C++ library for modeling and
+solving large, complicated optimization problems. It can be used to
+solve `Non-linear Least Squares`_ problems with bounds constraints and
+general unconstrained optimization problems. It is a mature, feature
+rich, and performant library that has been used in production at
+Google since 2010. For more, see :doc:`features`.
+
+`ceres-solver@googlegroups.com
+<https://groups.google.com/forum/?fromgroups#!forum/ceres-solver>`_ is
+the place for discussions and questions about Ceres Solver. We use the
+`GitHub Issue Tracker
+<https://github.com/ceres-solver/ceres-solver/issues>`_ to manage bug
+reports and feature requests.
+
+
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+
+   features
+   installation
+   tutorial
+   derivatives
+   nnls_modeling
+   nnls_solving
+   nnls_covariance
+   gradient_solver
+   faqs
+   users
+   contributing
+   version_history
+   bibliography
+   license
+
+.. _Non-linear Least Squares: http://en.wikipedia.org/wiki/Non-linear_least_squares
+
+
+Cite Us
+=======
+
+If you use Ceres Solver for a publication, please cite it as::
+
+    @misc{ceres-solver,
+      author = "Sameer Agarwal and Keir Mierle and Others",
+      title = "Ceres Solver",
+      howpublished = "\url{http://ceres-solver.org}",
+    }
+
+
+.. rubric:: Footnotes
+
+.. [#f1] While there is some debate as to who invented the method of
+         Least Squares [Stigler]_, there is no questioning the fact
+         that it was `Carl Friedrich Gauss
+         <http://www-groups.dcs.st-and.ac.uk/~history/Biographies/Gauss.html>`_
+         who brought it to the attention of the world. Using just 22
+         observations of the newly discovered asteroid `Ceres
+         <http://en.wikipedia.org/wiki/Ceres_(dwarf_planet)>`_, Gauss
+         used the method of least squares to correctly predict when
+         and where the asteroid will emerge from behind the Sun
+         [TenenbaumDirector]_. We named our solver after Ceres to
+         celebrate this seminal event in the history of astronomy,
+         statistics and optimization.
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
new file mode 100644
index 0000000..b3dfb50
--- /dev/null
+++ b/docs/source/installation.rst
@@ -0,0 +1,1160 @@
+.. _chapter-installation:
+
+============
+Installation
+============
+
+Getting the source code
+=======================
+.. _section-source:
+
+You can start with the `latest stable release
+<http://ceres-solver.org/ceres-solver-1.14.0.tar.gz>`_ . Or if you want
+the latest version, you can clone the git repository
+
+.. code-block:: bash
+
+       git clone https://ceres-solver.googlesource.com/ceres-solver
+
+.. _section-dependencies:
+
+Dependencies
+============
+
+  .. NOTE ::
+
+    All versions of Ceres > 1.14 require a **fully C++11-compliant**
+    compiler.  In versions <= 1.14, C++11 was an optional requirement
+    controlled by the ``CXX11 [Default: OFF]`` build option.
+
+Ceres relies on a number of open source libraries, some of which are
+optional. For details on customizing the build process, see
+:ref:`section-customizing` .
+
+- `Eigen <http://eigen.tuxfamily.org/index.php?title=Main_Page>`_
+  3.2.2 or later **strongly** recommended, 3.1.0 or later **required**.
+
+  .. NOTE ::
+
+    Ceres can also use Eigen as a sparse linear algebra
+    library. Please see the documentation for ``EIGENSPARSE`` for
+    more details.
+
+- `CMake <http://www.cmake.org>`_ 3.5 or later.
+  **Required on all platforms except for legacy Android.**
+
+- `glog <https://github.com/google/glog>`_ 0.3.1 or
+  later. **Recommended**
+
+  ``glog`` is used extensively throughout Ceres for logging detailed
+  information about memory allocations and time consumed in various
+  parts of the solve, internal error conditions etc. The Ceres
+  developers use it extensively to observe and analyze Ceres's
+  performance. `glog <https://github.com/google/glog>`_ allows you to
+  control its behaviour from the command line. Starting with
+  ``-logtostderr`` you can add ``-v=N`` for increasing values of ``N``
+  to get more and more verbose and detailed information about Ceres
+  internals.
+
+  Ceres also ships with a minimal replacement of ``glog`` called
+  ``miniglog`` that can be enabled with the ``MINIGLOG`` build option.
+  ``miniglog`` is supplied for platforms which do not support the full
+  version of ``glog``.
+
+  In an attempt to reduce dependencies, it may be tempting to use
+  ``miniglog`` on platforms which already support ``glog``. While
+  there is nothing preventing the user from doing so, we strongly
+  recommend against it. ``miniglog`` has worse performance than
+  ``glog`` and is much harder to control and use.
+
+  .. NOTE ::
+
+     If you are compiling ``glog`` from source, please note that
+     currently, the unit tests for ``glog`` (which are enabled by
+     default) do not compile against a default build of ``gflags`` 2.1
+     as the gflags namespace changed from ``google::`` to
+     ``gflags::``.  A patch to fix this is available from `here
+     <https://code.google.com/p/google-glog/issues/detail?id=194>`_.
+
+- `gflags <https://github.com/gflags/gflags>`_. Needed to build
+  examples and tests.
+
+- `SuiteSparse
+  <http://faculty.cse.tamu.edu/davis/suitesparse.html>`_. Needed for
+  solving large sparse linear systems. **Optional; strongly recomended
+  for large scale bundle adjustment**
+
+- `CXSparse <http://faculty.cse.tamu.edu/davis/suitesparse.html>`_.
+  Similar to ``SuiteSparse`` but simpler and slower. CXSparse has
+  no dependencies on ``LAPACK`` and ``BLAS``. This makes for a simpler
+  build process and a smaller binary. **Optional**
+
+- `Apple's Accelerate sparse solvers <https://developer.apple.com/documentation/accelerate/sparse_solvers>`_.
+  As of Xcode 9.0, Apple's Accelerate framework includes support for
+  solving sparse linear systems across macOS, iOS et al. **Optional**
+
+- `BLAS <http://www.netlib.org/blas/>`_ and `LAPACK
+  <http://www.netlib.org/lapack/>`_ routines are needed by
+  ``SuiteSparse``, and optionally used by Ceres directly for some
+  operations.
+
+  On ``UNIX`` OSes other than Mac OS X we recommend `ATLAS
+  <http://math-atlas.sourceforge.net/>`_, which includes ``BLAS`` and
+  ``LAPACK`` routines. It is also possible to use `OpenBLAS
+  <https://github.com/xianyi/OpenBLAS>`_ . However, one needs to be
+  careful to `turn off the threading
+  <https://github.com/xianyi/OpenBLAS/wiki/faq#wiki-multi-threaded>`_
+  inside ``OpenBLAS`` as it conflicts with use of threads in Ceres.
+
+  Mac OS X ships with an optimized ``LAPACK`` and ``BLAS``
+  implementation as part of the ``Accelerate`` framework. The Ceres
+  build system will automatically detect and use it.
+
+  For Windows things are much more complicated. `LAPACK For
+  Windows <http://icl.cs.utk.edu/lapack-for-windows/lapack/>`_
+  has detailed instructions..
+
+  **Optional but required for** ``SuiteSparse``.
+
+.. _section-linux:
+
+Linux
+=====
+
+We will use `Ubuntu <http://www.ubuntu.com>`_ as our example linux
+distribution.
+
+.. NOTE::
+
+ Up to at least Ubuntu 14.04, the SuiteSparse package in the official
+ package repository (built from SuiteSparse v3.4.0) **cannot** be used
+ to build Ceres as a *shared* library.  Thus if you want to build
+ Ceres as a shared library using SuiteSparse, you must perform a
+ source install of SuiteSparse or use an external PPA (see `bug report
+ here
+ <https://bugs.launchpad.net/ubuntu/+source/suitesparse/+bug/1333214>`_).
+ It is recommended that you use the current version of SuiteSparse
+ (4.2.1 at the time of writing).
+
+
+Start by installing all the dependencies.
+
+.. code-block:: bash
+
+     # CMake
+     sudo apt-get install cmake
+     # google-glog + gflags
+     sudo apt-get install libgoogle-glog-dev
+     # BLAS & LAPACK
+     sudo apt-get install libatlas-base-dev
+     # Eigen3
+     sudo apt-get install libeigen3-dev
+     # SuiteSparse and CXSparse (optional)
+     # - If you want to build Ceres as a *static* library (the default)
+     #   you can use the SuiteSparse package in the main Ubuntu package
+     #   repository:
+     sudo apt-get install libsuitesparse-dev
+     # - However, if you want to build Ceres as a *shared* library, you must
+     #   add the following PPA:
+     sudo add-apt-repository ppa:bzindovic/suitesparse-bugfix-1319687
+     sudo apt-get update
+     sudo apt-get install libsuitesparse-dev
+
+We are now ready to build, test, and install Ceres.
+
+.. code-block:: bash
+
+ tar zxf ceres-solver-1.14.0.tar.gz
+ mkdir ceres-bin
+ cd ceres-bin
+ cmake ../ceres-solver-1.14.0
+ make -j3
+ make test
+ # Optionally install Ceres, it can also be exported using CMake which
+ # allows Ceres to be used without requiring installation, see the documentation
+ # for the EXPORT_BUILD_DIR option for more information.
+ make install
+
+You can also try running the command line bundling application with one of the
+included problems, which comes from the University of Washington's BAL
+dataset [Agarwal]_.
+
+.. code-block:: bash
+
+ bin/simple_bundle_adjuster ../ceres-solver-1.14.0/data/problem-16-22106-pre.txt
+
+This runs Ceres for a maximum of 10 iterations using the
+``DENSE_SCHUR`` linear solver. The output should look something like
+this.
+
+.. code-block:: bash
+
+    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+       0  4.185660e+06    0.00e+00    1.09e+08   0.00e+00   0.00e+00  1.00e+04       0    7.59e-02    3.37e-01
+       1  1.062590e+05    4.08e+06    8.99e+06   5.36e+02   9.82e-01  3.00e+04       1    1.65e-01    5.03e-01
+       2  4.992817e+04    5.63e+04    8.32e+06   3.19e+02   6.52e-01  3.09e+04       1    1.45e-01    6.48e-01
+       3  1.899774e+04    3.09e+04    1.60e+06   1.24e+02   9.77e-01  9.26e+04       1    1.43e-01    7.92e-01
+       4  1.808729e+04    9.10e+02    3.97e+05   6.39e+01   9.51e-01  2.78e+05       1    1.45e-01    9.36e-01
+       5  1.803399e+04    5.33e+01    1.48e+04   1.23e+01   9.99e-01  8.33e+05       1    1.45e-01    1.08e+00
+       6  1.803390e+04    9.02e-02    6.35e+01   8.00e-01   1.00e+00  2.50e+06       1    1.50e-01    1.23e+00
+
+    Ceres Solver v1.14.0 Solve Report
+    ----------------------------------
+                                         Original                  Reduced
+    Parameter blocks                        22122                    22122
+    Parameters                              66462                    66462
+    Residual blocks                         83718                    83718
+    Residual                               167436                   167436
+
+    Minimizer                        TRUST_REGION
+
+    Dense linear algebra library            EIGEN
+    Trust region strategy     LEVENBERG_MARQUARDT
+
+                                            Given                     Used
+    Linear solver                     DENSE_SCHUR              DENSE_SCHUR
+    Threads                                     1                        1
+    Linear solver threads                       1                        1
+    Linear solver ordering              AUTOMATIC                22106, 16
+
+    Cost:
+    Initial                          4.185660e+06
+    Final                            1.803390e+04
+    Change                           4.167626e+06
+
+    Minimizer iterations                        6
+    Successful steps                            6
+    Unsuccessful steps                          0
+
+    Time (in seconds):
+    Preprocessor                            0.261
+
+      Residual evaluation                   0.082
+      Jacobian evaluation                   0.412
+      Linear solver                         0.442
+    Minimizer                               1.051
+
+    Postprocessor                           0.002
+    Total                                   1.357
+
+    Termination:                      CONVERGENCE (Function tolerance reached. |cost_change|/cost: 1.769766e-09 <= 1.000000e-06)
+
+.. section-osx:
+
+Mac OS X
+========
+.. NOTE::
+
+ Ceres will not compile using Xcode 4.5.x (Clang version 4.1) due to a
+ bug in that version of Clang.  If you are running Xcode 4.5.x, please
+ update to Xcode >= 4.6.x before attempting to build Ceres.
+
+
+On OS X, you can either use `MacPorts <https://www.macports.org/>`_ or
+`Homebrew <http://mxcl.github.com/homebrew/>`_ to install Ceres Solver.
+
+If using `MacPorts <https://www.macports.org/>`_, then
+
+.. code-block:: bash
+
+   sudo port install ceres-solver
+
+will install the latest version.
+
+If using `Homebrew <http://mxcl.github.com/homebrew/>`_ and assuming
+that you have the ``homebrew/science`` [#f1]_ tap enabled, then
+
+.. code-block:: bash
+
+      brew install ceres-solver
+
+will install the latest stable version along with all the required
+dependencies and
+
+.. code-block:: bash
+
+      brew install ceres-solver --HEAD
+
+will install the latest version in the git repo.
+
+You can also install each of the dependencies by hand using `Homebrew
+<http://mxcl.github.com/homebrew/>`_. There is no need to install
+``BLAS`` or ``LAPACK`` separately as OS X ships with optimized
+``BLAS`` and ``LAPACK`` routines as part of the `vecLib
+<https://developer.apple.com/library/mac/#documentation/Performance/Conceptual/vecLib/Reference/reference.html>`_
+framework.
+
+.. code-block:: bash
+
+      # CMake
+      brew install cmake
+      # google-glog and gflags
+      brew install glog
+      # Eigen3
+      brew install eigen
+      # SuiteSparse and CXSparse
+      brew install suite-sparse
+
+We are now ready to build, test, and install Ceres.
+
+.. code-block:: bash
+
+   tar zxf ceres-solver-1.14.0.tar.gz
+   mkdir ceres-bin
+   cd ceres-bin
+   cmake ../ceres-solver-1.14.0
+   make -j3
+   make test
+   # Optionally install Ceres, it can also be exported using CMake which
+   # allows Ceres to be used without requiring installation, see the
+   # documentation for the EXPORT_BUILD_DIR option for more information.
+   make install
+
+Building with OpenMP on OS X
+----------------------------
+
+Up to at least Xcode 8, OpenMP support was disabled in Apple's version of
+Clang.  However, you can install the latest version of the LLVM toolchain
+from Homebrew which does support OpenMP, and thus build Ceres with OpenMP
+support on OS X.  To do this, you must install llvm via Homebrew:
+
+.. code-block:: bash
+
+      # Install latest version of LLVM toolchain.
+      brew install llvm
+
+As the LLVM formula in Homebrew is keg-only, it will not be installed to
+``/usr/local`` to avoid conflicts with the standard Apple LLVM toolchain.
+To build Ceres with the Homebrew LLVM toolchain you should do the
+following:
+
+.. code-block:: bash
+
+   tar zxf ceres-solver-1.14.0.tar.gz
+   mkdir ceres-bin
+   cd ceres-bin
+   # Configure the local shell only (not persistent) to use the Homebrew LLVM
+   # toolchain in favour of the default Apple version.  This is taken
+   # verbatim from the instructions output by Homebrew when installing the
+   # llvm formula.
+   export LDFLAGS="-L/usr/local/opt/llvm/lib -Wl,-rpath,/usr/local/opt/llvm/lib"
+   export CPPFLAGS="-I/usr/local/opt/llvm/include"
+   export PATH="/usr/local/opt/llvm/bin:$PATH"
+   # Force CMake to use the Homebrew version of Clang.  OpenMP will be
+   # automatically enabled if it is detected that the compiler supports it.
+   cmake -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ ../ceres-solver-1.14.0
+   make -j3
+   make test
+   # Optionally install Ceres.  It can also be exported using CMake which
+   # allows Ceres to be used without requiring installation.  See the
+   # documentation for the EXPORT_BUILD_DIR option for more information.
+   make install
+
+Like the Linux build, you should now be able to run
+``bin/simple_bundle_adjuster``.
+
+
+.. rubric:: Footnotes
+
+.. [#f1] Ceres and many of its dependencies are in `homebrew/science
+   <https://github.com/Homebrew/homebrew-science>`_ tap. So, if you
+   don't have this tap enabled, then you will need to enable it as
+   follows before executing any of the commands in this section.
+
+   .. code-block:: bash
+
+      brew tap homebrew/science
+
+
+.. _section-windows:
+
+Windows
+=======
+
+.. NOTE::
+
+  If you find the following CMake difficult to set up, then you may
+  be interested in a `Microsoft Visual Studio wrapper
+  <https://github.com/tbennun/ceres-windows>`_ for Ceres Solver by Tal
+  Ben-Nun.
+
+On Windows, we support building with Visual Studio 2013 Release 4 or newer. Note
+that the Windows port is less featureful and less tested than the
+Linux or Mac OS X versions due to the lack of an officially supported
+way of building SuiteSparse and CXSparse.  There are however a number
+of unofficial ways of building these libraries. Building on Windows
+also a bit more involved since there is no automated way to install
+dependencies.
+
+.. NOTE:: Using ``google-glog`` & ``miniglog`` with windows.h.
+
+ The windows.h header if used with GDI (Graphics Device Interface)
+ defines ``ERROR``, which conflicts with the definition of ``ERROR``
+ as a LogSeverity level in ``google-glog`` and ``miniglog``.  There
+ are at least two possible fixes to this problem:
+
+ #. Use ``google-glog`` and define ``GLOG_NO_ABBREVIATED_SEVERITIES``
+    when building Ceres and your own project, as documented `here
+    <http://google-glog.googlecode.com/svn/trunk/doc/glog.html>`__.
+    Note that this fix will not work for ``miniglog``, but use of
+    ``miniglog`` is strongly discouraged on any platform for which
+    ``google-glog`` is available (which includes Windows).
+ #. If you do not require GDI, then define ``NOGDI`` **before**
+    including windows.h.  This solution should work for both
+    ``google-glog`` and ``miniglog`` and is documented for
+    ``google-glog`` `here
+    <https://code.google.com/p/google-glog/issues/detail?id=33>`__.
+
+#. Make a toplevel directory for deps & build & src somewhere: ``ceres/``
+#. Get dependencies; unpack them as subdirectories in ``ceres/``
+   (``ceres/eigen``, ``ceres/glog``, etc)
+
+   #. ``Eigen`` 3.1 (needed on Windows; 3.0.x will not work). There is
+      no need to build anything; just unpack the source tarball.
+
+   #. ``google-glog`` Open up the Visual Studio solution and build it.
+   #. ``gflags`` Open up the Visual Studio solution and build it.
+
+   #. (Experimental) ``SuiteSparse`` Previously SuiteSparse was not
+      available on Windows, recently it has become possible to build
+      it on Windows using the `suitesparse-metis-for-windows
+      <https://github.com/jlblancoc/suitesparse-metis-for-windows>`_
+      project.  If you wish to use ``SuiteSparse``, follow their
+      instructions for obtaining and building it.
+
+   #. (Experimental) ``CXSparse`` Previously CXSparse was not
+      available on Windows, there are now several ports that enable it
+      to be, including: `[1] <https://github.com/PetterS/CXSparse>`_
+      and `[2] <https://github.com/TheFrenchLeaf/CXSparse>`_.  If you
+      wish to use ``CXSparse``, follow their instructions for
+      obtaining and building it.
+
+#. Unpack the Ceres tarball into ``ceres``. For the tarball, you
+   should get a directory inside ``ceres`` similar to
+   ``ceres-solver-1.3.0``. Alternately, checkout Ceres via ``git`` to
+   get ``ceres-solver.git`` inside ``ceres``.
+
+#. Install ``CMake``,
+
+#. Make a dir ``ceres/ceres-bin`` (for an out-of-tree build)
+
+#. Run ``CMake``; select the ``ceres-solver-X.Y.Z`` or
+   ``ceres-solver.git`` directory for the CMake file. Then select the
+   ``ceres-bin`` for the build dir.
+
+#. Try running ``Configure``. It won't work. It'll show a bunch of options.
+   You'll need to set:
+
+   #. ``EIGEN_INCLUDE_DIR_HINTS``
+   #. ``GLOG_INCLUDE_DIR_HINTS``
+   #. ``GLOG_LIBRARY_DIR_HINTS``
+   #. ``GFLAGS_INCLUDE_DIR_HINTS``
+   #. ``GFLAGS_LIBRARY_DIR_HINTS``
+   #. (Optional) ``SUITESPARSE_INCLUDE_DIR_HINTS``
+   #. (Optional) ``SUITESPARSE_LIBRARY_DIR_HINTS``
+   #. (Optional) ``CXSPARSE_INCLUDE_DIR_HINTS``
+   #. (Optional) ``CXSPARSE_LIBRARY_DIR_HINTS``
+
+   to the appropriate directories where you unpacked/built them. If
+   any of the variables are not visible in the ``CMake`` GUI, create a
+   new entry for them.  We recommend using the
+   ``<NAME>_(INCLUDE/LIBRARY)_DIR_HINTS`` variables rather than
+   setting the ``<NAME>_INCLUDE_DIR`` & ``<NAME>_LIBRARY`` variables
+   directly to keep all of the validity checking, and to avoid having
+   to specify the library files manually.
+
+#. You may have to tweak some more settings to generate a MSVC
+   project.  After each adjustment, try pressing Configure & Generate
+   until it generates successfully.
+
+#. Open the solution and build it in MSVC
+
+
+To run the tests, select the ``RUN_TESTS`` target and hit **Build
+RUN_TESTS** from the build menu.
+
+Like the Linux build, you should now be able to run
+``bin/simple_bundle_adjuster``.
+
+Notes:
+
+#. The default build is Debug; consider switching it to release mode.
+#. Currently ``system_test`` is not working properly.
+#. CMake puts the resulting test binaries in ``ceres-bin/examples/Debug``
+   by default.
+#. The solvers supported on Windows are ``DENSE_QR``, ``DENSE_SCHUR``,
+   ``CGNR``, and ``ITERATIVE_SCHUR``.
+#. We're looking for someone to work with upstream ``SuiteSparse`` to
+   port their build system to something sane like ``CMake``, and get a
+   fully supported Windows port.
+
+
+.. _section-android:
+
+Android
+=======
+
+.. NOTE::
+
+    You will need Android NDK r15 or higher to build Ceres solver.
+
+To build Ceres for Android, we need to force ``CMake`` to find
+the toolchains from the Android NDK instead of using the standard
+ones. For example, assuming you have specified ``$NDK_DIR``:
+
+.. code-block:: bash
+
+    cmake \
+    -DCMAKE_TOOLCHAIN_FILE=\
+        $NDK_DIR/build/cmake/android.toolchain.cmake \
+    -DEIGEN_INCLUDE_DIR=/path/to/eigen/header \
+    -DANDROID_ABI=armeabi-v7a \
+    -DANDROID_STL=c++_shared \
+    -DANDROID_NATIVE_API_LEVEL=android-24 \
+    -DBUILD_SHARED_LIBS=ON \
+    -DMINIGLOG=ON \
+    <PATH_TO_CERES_SOURCE>
+
+You can build for any Android STL or ABI, but the c++_shared STL
+and the armeabi-v7a or arm64-v8a ABI are recommended for 32bit
+and 64bit architectures, respectively. Several API levels may
+be supported, but it is recommended that you use the highest
+level that is suitable for your Android project.
+
+.. NOTE::
+
+    You must always use the same API level and STL library for
+    your Android project and the Ceres binaries.
+
+After building, you get a ``libceres.so`` library, which you can
+link in your Android build system by using a
+``PREBUILT_SHARED_LIBRARY`` target in your build script.
+
+If you are building any Ceres samples and would like to verify
+your library, you will need to place them in an executable public
+directory together with ``libceres.so`` on your Android device
+(e.g. in /data/local/tmp) and ensure that the STL library from
+your NDK is present in that same directory. You may then execute
+the sample by running for example:
+
+.. code-block:: bash
+    adb shell
+    cd /data/local/tmp
+    LD_LIBRARY_PATH=/data/local/tmp ./helloworld
+
+Note that any solvers or other shared dependencies you include in
+your project must also be present in your android build config and
+your test directory on Android.
+
+.. _section-ios:
+
+iOS
+===
+
+.. NOTE::
+
+   You need iOS version 7.0 or higher to build Ceres Solver.
+
+To build Ceres for iOS, we need to force ``CMake`` to find the
+toolchains from the iOS SDK instead of using the standard ones. For
+example:
+
+.. code-block:: bash
+
+   cmake \
+   -DCMAKE_TOOLCHAIN_FILE=../ceres-solver/cmake/iOS.cmake \
+   -DEIGEN_INCLUDE_DIR=/path/to/eigen/header \
+   -DIOS_PLATFORM=<PLATFORM> \
+   <PATH_TO_CERES_SOURCE>
+
+``PLATFORM`` can be: ``OS``, ``SIMULATOR`` or ``SIMULATOR64``. You can
+build for ``OS`` (``armv7``, ``armv7s``, ``arm64``), ``SIMULATOR``
+(``i386``) or ``SIMULATOR64`` (``x86_64``) separately and use ``lipo``
+to merge them into one static library.  See ``cmake/iOS.cmake`` for
+more options.
+
+.. NOTE::
+
+   iOS version 11.0+ requires a 64-bit architecture, so you cannot
+   build for armv7/armv7s with iOS 11.0+ (only arm64 is supported).
+
+After building, you will get a ``libceres.a`` library, which you will
+need to add to your Xcode project.
+
+The default CMake configuration builds a bare bones version of Ceres
+Solver that only depends on Eigen (``MINIGLOG`` is compiled into Ceres
+if it is used), this should be sufficient for solving small to
+moderate sized problems (No ``SPARSE_SCHUR``,
+``SPARSE_NORMAL_CHOLESKY`` linear solvers and no ``CLUSTER_JACOBI``
+and ``CLUSTER_TRIDIAGONAL`` preconditioners).
+
+If you decide to use ``LAPACK`` and ``BLAS``, then you also need to
+add ``Accelerate.framework`` to your Xcode project's linking
+dependency.
+
+.. _section-customizing:
+
+Customizing the build
+=====================
+
+It is possible to reduce the libraries needed to build Ceres and
+customize the build process by setting the appropriate options in
+``CMake``.  These options can either be set in the ``CMake`` GUI, or
+via ``-D<OPTION>=<ON/OFF>`` when running ``CMake`` from the command
+line.  In general, you should only modify these options from their
+defaults if you know what you are doing.
+
+.. NOTE::
+
+ If you are setting variables via ``-D<VARIABLE>=<VALUE>`` when
+ calling ``CMake``, it is important to understand that this forcibly
+ **overwrites** the variable ``<VARIABLE>`` in the ``CMake`` cache at
+ the start of *every configure*.
+
+ This can lead to confusion if you are invoking the ``CMake`` `curses
+ <http://www.gnu.org/software/ncurses/ncurses.html>`_ terminal GUI
+ (via ``ccmake``, e.g. ```ccmake -D<VARIABLE>=<VALUE>
+ <PATH_TO_SRC>``).  In this case, even if you change the value of
+ ``<VARIABLE>`` in the ``CMake`` GUI, your changes will be
+ **overwritten** with the value passed via ``-D<VARIABLE>=<VALUE>``
+ (if one exists) at the start of each configure.
+
+ As such, it is generally easier not to pass values to ``CMake`` via
+ ``-D`` and instead interactively experiment with their values in the
+ ``CMake`` GUI.  If they are not present in the *Standard View*,
+ toggle to the *Advanced View* with ``<t>``.
+
+
+Modifying default compilation flags
+-----------------------------------
+
+The ``CMAKE_CXX_FLAGS`` variable can be used to define additional
+default compilation flags for all build types.  Any flags specified
+in ``CMAKE_CXX_FLAGS`` will be used in addition to the default
+flags used by Ceres for the current build type.
+
+For example, if you wished to build Ceres with `-march=native
+<https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html>`_ which is not
+enabled by default (even if ``CMAKE_BUILD_TYPE=Release``) you would invoke
+CMake with:
+
+.. code-block:: bash
+
+       cmake -DCMAKE_CXX_FLAGS="-march=native" <PATH_TO_CERES_SOURCE>
+
+.. NOTE ::
+
+    The use of ``-march=native`` will limit portability, as it will tune the
+    implementation to the specific CPU of the compiling machine (e.g. use of
+    AVX if available).  Run-time segfaults may occur if you then tried to
+    run the resulting binaries on a machine with a different processor, even
+    if it is from the same family (e.g. x86) if the specific options available
+    are different.  Note that the performance gains from the use of
+    ``-march=native`` are not guaranteed to be significant.
+
+.. _options-controlling-ceres-configuration:
+
+Options controlling Ceres configuration
+---------------------------------------
+
+#. ``LAPACK [Default: ON]``: If this option is enabled, and the ``BLAS`` and
+   ``LAPACK`` libraries are found, Ceres will enable **direct** use of
+   ``LAPACK`` routines (i.e. Ceres itself will call them).  If this option is
+   disabled, then Ceres will not require ``LAPACK`` or ``BLAS``.  It is
+   however still possible that Ceres may call ``LAPACK`` routines indirectly
+   via SuiteSparse if ``LAPACK=OFF`` and ``SUITESPARSE=ON``.  Finally
+   note that if ``LAPACK=ON`` and ``SUITESPARSE=ON``, the ``LAPACK`` and
+   ``BLAS`` libraries used by SuiteSparse and Ceres should be the same.
+
+#. ``SUITESPARSE [Default: ON]``: By default, Ceres will link to
+   ``SuiteSparse`` if it and all of its dependencies are present. Turn
+   this ``OFF`` to build Ceres without ``SuiteSparse``.
+
+   .. NOTE::
+
+      SuiteSparse is licensed under a mixture of GPL/LGPL/Commercial
+      terms.  Ceres requires some components that are only licensed under
+      GPL/Commercial terms.
+
+#. ``CXSPARSE [Default: ON]``: By default, Ceres will link to
+   ``CXSparse`` if all its dependencies are present. Turn this ``OFF``
+   to build Ceres without ``CXSparse``.
+
+   .. NOTE::
+
+      CXSparse is licensed under the LGPL.
+
+#. ``ACCELERATESPARSE [Default: ON]``: By default, Ceres will link to
+   Apple's Accelerate framework directly if a version of it is detected
+   which supports solving sparse linear systems.  Note that on Apple OSs
+   Accelerate usually also provides the BLAS/LAPACK implementations and
+   so would be linked against irrespective of the value of ``ACCELERATESPARSE``.
+
+#. ``EIGENSPARSE [Default: ON]``: By default, Ceres will not use
+   Eigen's sparse Cholesky factorization.
+
+   .. NOTE::
+
+      For good performance, use Eigen version 3.2.2 or later.
+
+   .. NOTE::
+
+      Unlike the rest of Eigen (>= 3.1.1 MPL2, < 3.1.1 LGPL), Eigen's sparse
+      Cholesky factorization is (still) licensed under the LGPL.
+
+#. ``GFLAGS [Default: ON]``: Turn this ``OFF`` to build Ceres without
+   ``gflags``. This will also prevent some of the example code from
+   building.
+
+#. ``MINIGLOG [Default: OFF]``: Ceres includes a stripped-down,
+   minimal implementation of ``glog`` which can optionally be used as
+   a substitute for ``glog``, thus removing ``glog`` as a required
+   dependency. Turn this ``ON`` to use this minimal ``glog``
+   implementation.
+
+#. ``SCHUR_SPECIALIZATIONS [Default: ON]``: If you are concerned about
+   binary size/compilation time over some small (10-20%) performance
+   gains in the ``SPARSE_SCHUR`` solver, you can disable some of the
+   template specializations by turning this ``OFF``.
+
+#. ``CERES_THREADING_MODEL [Default: CXX11_THREADS > OPENMP > NO_THREADS]``:
+   Multi-threading backend Ceres should be compiled with.  This will
+   automatically be set to only accept the available subset of threading
+   options in the CMake GUI.
+
+#. ``BUILD_SHARED_LIBS [Default: OFF]``: By default Ceres is built as
+   a static library, turn this ``ON`` to instead build Ceres as a
+   shared library.
+
+#. ``EXPORT_BUILD_DIR [Default: OFF]``: By default Ceres is configured
+   solely for installation, and so must be installed in order for
+   clients to use it.  Turn this ``ON`` to export Ceres' build
+   directory location into the `user's local CMake package registry
+   <http://www.cmake.org/cmake/help/v3.2/manual/cmake-packages.7.html#user-package-registry>`_
+   where it will be detected **without requiring installation** in a
+   client project using CMake when `find_package(Ceres)
+   <http://www.cmake.org/cmake/help/v3.2/command/find_package.html>`_
+   is invoked.
+
+#. ``BUILD_DOCUMENTATION [Default: OFF]``: Use this to enable building
+   the documentation, requires `Sphinx <http://sphinx-doc.org/>`_ and
+   the `sphinx-better-theme
+   <https://pypi.python.org/pypi/sphinx-better-theme>`_ package
+   available from the Python package index. In addition, ``make
+   ceres_docs`` can be used to build only the documentation.
+
+#. ``MSVC_USE_STATIC_CRT [Default: OFF]`` *Windows Only*: By default
+   Ceres will use the Visual Studio default, *shared* C-Run Time (CRT)
+   library.  Turn this ``ON`` to use the *static* C-Run Time library
+   instead.
+
+#. ``LIB_SUFFIX [Default: "64" on non-Debian/Arch based 64-bit Linux,
+   otherwise: ""]``: The suffix to append to the library install
+   directory, built from:
+   ``${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}``.
+
+   The filesystem hierarchy standard recommends that 64-bit systems
+   install native libraries to lib64 rather than lib.  Most Linux
+   distributions follow this convention, but Debian and Arch based
+   distros do not.  Note that the only generally sensible values for
+   ``LIB_SUFFIX`` are "" and "64".
+
+   Although by default Ceres will auto-detect non-Debian/Arch based
+   64-bit Linux distributions and default ``LIB_SUFFIX`` to "64", this
+   can always be overridden by manually specifying LIB_SUFFIX using:
+   ``-DLIB_SUFFIX=<VALUE>`` when invoking CMake.
+
+
+Options controlling Ceres dependency locations
+----------------------------------------------
+
+Ceres uses the ``CMake`` `find_package
+<http://www.cmake.org/cmake/help/v3.2/command/find_package.html>`_
+function to find all of its dependencies using
+``Find<DEPENDENCY_NAME>.cmake`` scripts which are either included in
+Ceres (for most dependencies) or are shipped as standard with
+``CMake`` (for ``LAPACK`` & ``BLAS``).  These scripts will search all
+of the "standard" install locations for various OSs for each
+dependency.  However, particularly for Windows, they may fail to find
+the library, in this case you will have to manually specify its
+installed location.  The ``Find<DEPENDENCY_NAME>.cmake`` scripts
+shipped with Ceres support two ways for you to do this:
+
+#. Set the *hints* variables specifying the *directories* to search in
+   preference, but in addition, to the search directories in the
+   ``Find<DEPENDENCY_NAME>.cmake`` script:
+
+   - ``<DEPENDENCY_NAME (CAPS)>_INCLUDE_DIR_HINTS``
+   - ``<DEPENDENCY_NAME (CAPS)>_LIBRARY_DIR_HINTS``
+
+   These variables should be set via ``-D<VAR>=<VALUE>``
+   ``CMake`` arguments as they are not visible in the GUI.
+
+#. Set the variables specifying the *explicit* include directory
+   and library file to use:
+
+   - ``<DEPENDENCY_NAME (CAPS)>_INCLUDE_DIR``
+   - ``<DEPENDENCY_NAME (CAPS)>_LIBRARY``
+
+   This bypasses *all* searching in the
+   ``Find<DEPENDENCY_NAME>.cmake`` script, but validation is still
+   performed.
+
+   These variables are available to set in the ``CMake`` GUI. They are
+   visible in the *Standard View* if the library has not been found
+   (but the current Ceres configuration requires it), but are always
+   visible in the *Advanced View*.  They can also be set directly via
+   ``-D<VAR>=<VALUE>`` arguments to ``CMake``.
+
+Building using custom BLAS & LAPACK installs
+----------------------------------------------
+
+If the standard find package scripts for ``BLAS`` & ``LAPACK`` which
+ship with ``CMake`` fail to find the desired libraries on your system,
+try setting ``CMAKE_LIBRARY_PATH`` to the path(s) to the directories
+containing the ``BLAS`` & ``LAPACK`` libraries when invoking ``CMake``
+to build Ceres via ``-D<VAR>=<VALUE>``.  This should result in the
+libraries being found for any common variant of each.
+
+Alternatively, you may also directly specify the ``BLAS_LIBRARIES`` and
+``LAPACK_LIBRARIES`` variables via ``-D<VAR>=<VALUE>`` when invoking CMake
+to configure Ceres.
+
+.. _section-using-ceres:
+
+Using Ceres with CMake
+======================
+
+In order to use Ceres in client code with CMake using `find_package()
+<http://www.cmake.org/cmake/help/v3.2/command/find_package.html>`_
+then either:
+
+#. Ceres must have been installed with ``make install``.  If the
+    install location is non-standard (i.e. is not in CMake's default
+    search paths) then it will not be detected by default, see:
+    :ref:`section-local-installations`.
+
+    Note that if you are using a non-standard install location you
+    should consider exporting Ceres instead, as this will not require
+    any extra information to be provided in client code for Ceres to
+    be detected.
+
+#. Or Ceres' build directory must have been exported by enabling the
+    ``EXPORT_BUILD_DIR`` option when Ceres was configured.
+
+
+As an example of how to use Ceres, to compile `examples/helloworld.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+in a separate standalone project, the following CMakeList.txt can be
+used:
+
+.. code-block:: cmake
+
+    cmake_minimum_required(VERSION 3.5)
+
+    project(helloworld)
+
+    find_package(Ceres REQUIRED)
+
+    # helloworld
+    add_executable(helloworld helloworld.cc)
+    target_link_libraries(helloworld ${CERES_LIBRARIES})
+
+Irrespective of whether Ceres was installed or exported, if multiple
+versions are detected, set: ``Ceres_DIR`` to control which is used.
+If Ceres was installed ``Ceres_DIR`` should be the path to the
+directory containing the installed ``CeresConfig.cmake`` file
+(e.g. ``/usr/local/share/Ceres``).  If Ceres was exported, then
+``Ceres_DIR`` should be the path to the exported Ceres build
+directory.
+
+  .. NOTE ::
+
+     You do not need to call include_directories(${CERES_INCLUDE_DIRS})
+     as the exported Ceres CMake target already contains the definitions
+     of its public include directories which will be automatically
+     included by CMake when compiling a target that links against Ceres.
+
+Specify Ceres components
+-------------------------------------
+
+You can specify particular Ceres components that you require (in order
+for Ceres to be reported as found) when invoking
+``find_package(Ceres)``.  This allows you to specify, for example,
+that you require a version of Ceres built with SuiteSparse support.
+By definition, if you do not specify any components when calling
+``find_package(Ceres)`` (the default) any version of Ceres detected
+will be reported as found, irrespective of which components it was
+built with.
+
+The Ceres components which can be specified are:
+
+#. ``LAPACK``: Ceres built using LAPACK (``LAPACK=ON``).
+
+#. ``SuiteSparse``: Ceres built with SuiteSparse (``SUITESPARSE=ON``).
+
+#. ``CXSparse``: Ceres built with CXSparse (``CXSPARSE=ON``).
+
+#. ``AccelerateSparse``: Ceres built with Apple's Accelerate sparse solvers (``ACCELERATESPARSE=ON``).
+
+#. ``EigenSparse``: Ceres built with Eigen's sparse Cholesky factorization
+   (``EIGENSPARSE=ON``).
+
+#. ``SparseLinearAlgebraLibrary``: Ceres built with *at least one* sparse linear
+   algebra library.  This is equivalent to ``SuiteSparse`` **OR** ``CXSparse``
+   **OR** ``AccelerateSparse``  **OR** ``EigenSparse``.
+
+#. ``SchurSpecializations``: Ceres built with Schur specializations
+   (``SCHUR_SPECIALIZATIONS=ON``).
+
+#. ``OpenMP``: Ceres built with OpenMP (``CERES_THREADING_MODEL=OPENMP``).
+
+#. ``Multithreading``: Ceres built with *a* multithreading library.
+   This is equivalent to (``CERES_THREAD != NO_THREADS``).
+
+#. ``C++11``: Ceres built with C++11.
+
+To specify one/multiple Ceres components use the ``COMPONENTS`` argument to
+`find_package()
+<http://www.cmake.org/cmake/help/v3.2/command/find_package.html>`_ like so:
+
+.. code-block:: cmake
+
+    # Find a version of Ceres compiled with SuiteSparse & EigenSparse support.
+    #
+    # NOTE: This will report Ceres as **not** found if the detected version of
+    #            Ceres was not compiled with both SuiteSparse & EigenSparse.
+    #            Remember, if you have multiple versions of Ceres installed, you
+    #            can use Ceres_DIR to specify which should be used.
+    find_package(Ceres REQUIRED COMPONENTS SuiteSparse EigenSparse)
+
+
+Specify Ceres version
+---------------------
+
+Additionally, when CMake has found Ceres it can optionally check the package
+version, if it has been specified in the `find_package()
+<http://www.cmake.org/cmake/help/v3.2/command/find_package.html>`_
+call.  For example:
+
+.. code-block:: cmake
+
+    find_package(Ceres 1.2.3 REQUIRED)
+
+.. _section-local-installations:
+
+Local installations
+-------------------
+
+If Ceres was installed in a non-standard path by specifying
+``-DCMAKE_INSTALL_PREFIX="/some/where/local"``, then the user should
+add the **PATHS** option to the ``find_package()`` command, e.g.,
+
+.. code-block:: cmake
+
+   find_package(Ceres REQUIRED PATHS "/some/where/local/")
+
+Note that this can be used to have multiple versions of Ceres
+installed.  However, particularly if you have only a single version of
+Ceres which you want to use but do not wish to install to a system
+location, you should consider exporting Ceres using the
+``EXPORT_BUILD_DIR`` option instead of a local install, as exported
+versions of Ceres will be automatically detected by CMake,
+irrespective of their location.
+
+Understanding the CMake Package System
+----------------------------------------
+
+Although a full tutorial on CMake is outside the scope of this guide,
+here we cover some of the most common CMake misunderstandings that
+crop up when using Ceres.  For more detailed CMake usage, the
+following references are very useful:
+
+- The `official CMake tutorial <http://www.cmake.org/cmake-tutorial/>`_
+
+   Provides a tour of the core features of CMake.
+
+- `ProjectConfig tutorial
+  <http://www.cmake.org/Wiki/CMake/Tutorials/How_to_create_a_ProjectConfig.cmake_file>`_
+  and the `cmake-packages documentation
+  <http://www.cmake.org/cmake/help/git-master/manual/cmake-packages.7.html>`_
+
+   Cover how to write a ``ProjectConfig.cmake`` file, discussed below,
+   for your own project when installing or exporting it using CMake.
+   It also covers how these processes in conjunction with
+   ``find_package()`` are actually handled by CMake.  The
+   `ProjectConfig tutorial
+   <http://www.cmake.org/Wiki/CMake/Tutorials/How_to_create_a_ProjectConfig.cmake_file>`_
+   is the older style, currently used by Ceres for compatibility with
+   older versions of CMake.
+
+  .. NOTE :: **Targets in CMake.**
+
+    All libraries and executables built using CMake are represented as
+    *targets* created using `add_library()
+    <http://www.cmake.org/cmake/help/v3.2/command/add_library.html>`_
+    and `add_executable()
+    <http://www.cmake.org/cmake/help/v3.2/command/add_executable.html>`_.
+    Targets encapsulate the rules and dependencies (which can be other
+    targets) required to build or link against an object.  This allows
+    CMake to implicitly manage dependency chains.  Thus it is
+    sufficient to tell CMake that a library target: ``B`` depends on a
+    previously declared library target ``A``, and CMake will
+    understand that this means that ``B`` also depends on all of the
+    public dependencies of ``A``.
+
+When a project like Ceres is installed using CMake, or its build
+directory is exported into the local CMake package registry (see
+:ref:`section-install-vs-export`), in addition to the public headers
+and compiled libraries, a set of CMake-specific project configuration
+files are also installed to: ``<INSTALL_ROOT>/share/Ceres`` (if Ceres
+is installed), or created in the build directory (if Ceres' build
+directory is exported).  When `find_package
+<http://www.cmake.org/cmake/help/v3.2/command/find_package.html>`_ is
+invoked, CMake checks various standard install locations (including
+``/usr/local`` on Linux & UNIX systems), and the local CMake package
+registry for CMake configuration files for the project to be found
+(i.e. Ceres in the case of ``find_package(Ceres)``).  Specifically it
+looks for:
+
+- ``<PROJECT_NAME>Config.cmake`` (or
+  ``<lower_case_project_name>-config.cmake``)
+
+   Which is written by the developers of the project, and is
+   configured with the selected options and installed locations when
+   the project is built and defines the CMake variables:
+   ``<PROJECT_NAME>_INCLUDE_DIRS`` & ``<PROJECT_NAME>_LIBRARIES``
+   which are used by the caller to import the project.
+
+The ``<PROJECT_NAME>Config.cmake`` typically includes a second file
+installed to the same location:
+
+- ``<PROJECT_NAME>Targets.cmake``
+
+   Which is autogenerated by CMake as part of the install process and defines
+   **imported targets** for the project in the caller's CMake scope.
+
+An **imported target** contains the same information about a library
+as a CMake target that was declared locally in the current CMake
+project using ``add_library()``.  However, imported targets refer to
+objects that have already been built by a different CMake project.
+Principally, an imported target contains the location of the compiled
+object and all of its public dependencies required to link against it.
+Any locally declared target can depend on an imported target, and
+CMake will manage the dependency chain, just as if the imported target
+had been declared locally by the current project.
+
+Crucially, just like any locally declared CMake target, an imported target is
+identified by its **name** when adding it as a dependency to another target.
+
+Thus, if in a project using Ceres you had the following in your CMakeLists.txt:
+
+.. code-block:: cmake
+
+    find_package(Ceres REQUIRED)
+    message("CERES_LIBRARIES = ${CERES_LIBRARIES}")
+
+You would see the output: ``CERES_LIBRARIES = ceres``.  **However**,
+here ``ceres`` is an **imported target** created when
+``CeresTargets.cmake`` was read as part of ``find_package(Ceres
+REQUIRED)``.  It does **not** refer (directly) to the compiled Ceres
+library: ``libceres.a/so/dylib/lib``.  This distinction is important,
+as depending on the options selected when it was built, Ceres can have
+public link dependencies which are encapsulated in the imported target
+and automatically added to the link step when Ceres is added as a
+dependency of another target by CMake.  In this case, linking only
+against ``libceres.a/so/dylib/lib`` without these other public
+dependencies would result in a linker error.
+
+Note that this description applies both to projects that are
+**installed** using CMake, and to those whose **build directory is
+exported** using `export()
+<http://www.cmake.org/cmake/help/v3.2/command/export.html>`_ (instead
+of `install()
+<http://www.cmake.org/cmake/help/v3.2/command/install.html>`_).  Ceres
+supports both installation and export of its build directory if the
+``EXPORT_BUILD_DIR`` option is enabled, see
+:ref:`section-customizing`.
+
+.. _section-install-vs-export:
+
+Installing a project with CMake vs Exporting its build directory
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When a project is **installed**, the compiled libraries and headers
+are copied from the source & build directory to the install location,
+and it is these copied files that are used by any client code.  When a
+project's build directory is **exported**, instead of copying the
+compiled libraries and headers, CMake creates an entry for the project
+in the `user's local CMake package registry
+<http://www.cmake.org/cmake/help/v3.2/manual/cmake-packages.7.html#user-package-registry>`_,
+``<USER_HOME>/.cmake/packages`` on Linux & OS X, which contains the
+path to the project's build directory which will be checked by CMake
+during a call to ``find_package()``.  The effect of which is that any
+client code uses the compiled libraries and headers in the build
+directory directly, **thus not requiring the project to be installed
+to be used**.
+
+Installing / Exporting a project that uses Ceres
+--------------------------------------------------
+
+As described in `Understanding the CMake Package System`_, the contents of
+the ``CERES_LIBRARIES`` variable is the **name** of an imported target which
+represents Ceres.  If you are installing / exporting your *own* project which
+*uses* Ceres, it is important to understand that:
+
+**Imported targets are not (re)exported when a project which imported them is
+exported**.
+
+Thus, when a project ``Foo`` which uses Ceres is exported, its list of
+dependencies as seen by another project ``Bar`` which imports ``Foo``
+via: ``find_package(Foo REQUIRED)`` will contain: ``ceres``.  However,
+the definition of ``ceres`` as an imported target is **not
+(re)exported** when Foo is exported.  Hence, without any additional
+steps, when processing ``Bar``, ``ceres`` will not be defined as an
+imported target.  Thus, when processing ``Bar``, CMake will assume
+that ``ceres`` refers only to: ``libceres.a/so/dylib/lib`` (the
+compiled Ceres library) directly if it is on the current list of
+search paths.  In which case, no CMake errors will occur, but ``Bar``
+will not link properly, as it does not have the required public link
+dependencies of Ceres, which are stored in the imported target
+definition.
+
+The solution to this is for ``Foo`` (i.e., the project that uses
+Ceres) to invoke ``find_package(Ceres)`` in ``FooConfig.cmake``, thus
+``ceres`` will be defined as an imported target when CMake processes
+``Bar``.  An example of the required modifications to
+``FooConfig.cmake`` are show below:
+
+.. code-block:: cmake
+
+    # Importing Ceres in FooConfig.cmake using CMake 2.8.x style.
+    #
+    # When configure_file() is used to generate FooConfig.cmake from
+    # FooConfig.cmake.in, @Ceres_DIR@ will be replaced with the current
+    # value of Ceres_DIR being used by Foo.  This should be passed as a hint
+    # when invoking find_package(Ceres) to ensure that the same install of
+    # Ceres is used as was used to build Foo.
+    set(CERES_DIR_HINTS @Ceres_DIR@)
+
+    # Forward the QUIET / REQUIRED options.
+    if (Foo_FIND_QUIETLY)
+       find_package(Ceres QUIET HINTS ${CERES_DIR_HINTS})
+    elseif (Foo_FIND_REQUIRED)
+       find_package(Ceres REQUIRED HINTS ${CERES_DIR_HINTS})
+    else ()
+       find_package(Ceres HINTS ${CERES_DIR_HINTS})
+    endif()
+
+.. code-block:: cmake
+
+    # Importing Ceres in FooConfig.cmake using CMake 3.x style.
+    #
+    # In CMake v3.x, the find_dependency() macro exists to forward the REQUIRED
+    # / QUIET parameters to find_package() when searching for dependencies.
+    #
+    # Note that find_dependency() does not take a path hint, so if Ceres was
+    # installed in a non-standard location, that location must be added to
+    # CMake's search list before this call.
+    include(CMakeFindDependencyMacro)
+    find_dependency(Ceres)
diff --git a/docs/source/interfacing_with_autodiff.rst b/docs/source/interfacing_with_autodiff.rst
new file mode 100644
index 0000000..b79ed45
--- /dev/null
+++ b/docs/source/interfacing_with_autodiff.rst
@@ -0,0 +1,293 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-interfacing_with_automatic_differentiation:
+
+Interfacing with Automatic Differentiation
+==========================================
+
+Automatic differentiation is straightforward to use in cases where an
+explicit expression for the cost function is available. But this is
+not always possible. Often one has to interface with external routines
+or data. In this chapter we will consider a number of different ways
+of doing so.
+
+To do this, we will consider the problem of finding parameters
+:math:`\theta` and :math:`t` that solve an optimization problem of the
+form:
+
+.. math::
+   \min & \quad \sum_i \left \|y_i - f\left (\|q_{i}\|^2\right) q_i
+   \right \|^2\\
+   \text{such that} & \quad q_i = R(\theta) x_i + t
+
+Here, :math:`R` is a two dimensional rotation matrix parameterized
+using the angle :math:`\theta` and :math:`t` is a two dimensional
+vector. :math:`f` is an external distortion function.
+
+We begin by considering the case, where we have a templated function
+:code:`TemplatedComputeDistortion` that can compute the function
+:math:`f`. Then the implementation of the corresponding residual
+functor is straightforward and will look as follows:
+
+.. code-block:: c++
+   :emphasize-lines: 21
+
+   template <typename T> T TemplatedComputeDistortion(const T r2) {
+     const double k1 = 0.0082;
+     const double k2 = 0.000023;
+     return 1.0 + k1 * y2 + k2 * r2 * r2;
+   }
+
+   struct Affine2DWithDistortion {
+     Affine2DWithDistortion(const double x_in[2], const double y_in[2]) {
+       x[0] = x_in[0];
+       x[1] = x_in[1];
+       y[0] = y_in[0];
+       y[1] = y_in[1];
+     }
+
+     template <typename T>
+     bool operator()(const T* theta,
+                     const T* t,
+                     T* residuals) const {
+       const T q_0 =  cos(theta[0]) * x[0] - sin(theta[0]) * x[1] + t[0];
+       const T q_1 =  sin(theta[0]) * x[0] + cos(theta[0]) * x[1] + t[1];
+       const T f = TemplatedComputeDistortion(q_0 * q_0 + q_1 * q_1);
+       residuals[0] = y[0] - f * q_0;
+       residuals[1] = y[1] - f * q_1;
+       return true;
+     }
+
+     double x[2];
+     double y[2];
+   };
+
+So far so good, but let us now consider three ways of defining
+:math:`f` which are not directly amenable to being used with automatic
+differentiation:
+
+#. A non-templated function that evaluates its value.
+#. A function that evaluates its value and derivative.
+#. A function that is defined as a table of values to be interpolated.
+
+We will consider them in turn below.
+
+A function that returns its value
+----------------------------------
+
+Suppose we were given a function :code:`ComputeDistortionValue` with
+the following signature
+
+.. code-block:: c++
+
+   double ComputeDistortionValue(double r2);
+
+that computes the value of :math:`f`. The actual implementation of the
+function does not matter. Interfacing this function with
+:code:`Affine2DWithDistortion` is a three step process:
+
+1. Wrap :code:`ComputeDistortionValue` into a functor
+   :code:`ComputeDistortionValueFunctor`.
+2. Numerically differentiate :code:`ComputeDistortionValueFunctor`
+   using :class:`NumericDiffCostFunction` to create a
+   :class:`CostFunction`.
+3. Wrap the resulting :class:`CostFunction` object using
+   :class:`CostFunctionToFunctor`. The resulting object is a functor
+   with a templated :code:`operator()` method, which pipes the
+   Jacobian computed by :class:`NumericDiffCostFunction` into the
+   appropriate :code:`Jet` objects.
+
+An implementation of the above three steps looks as follows:
+
+.. code-block:: c++
+   :emphasize-lines: 15,16,17,18,19,20, 29
+
+   struct ComputeDistortionValueFunctor {
+     bool operator()(const double* r2, double* value) const {
+       *value = ComputeDistortionValue(r2[0]);
+       return true;
+     }
+   };
+
+   struct Affine2DWithDistortion {
+     Affine2DWithDistortion(const double x_in[2], const double y_in[2]) {
+       x[0] = x_in[0];
+       x[1] = x_in[1];
+       y[0] = y_in[0];
+       y[1] = y_in[1];
+
+       compute_distortion.reset(new ceres::CostFunctionToFunctor<1, 1>(
+            new ceres::NumericDiffCostFunction<ComputeDistortionValueFunctor,
+                                               ceres::CENTRAL,
+                                               1,
+                                               1>(
+               new ComputeDistortionValueFunctor)));
+     }
+
+     template <typename T>
+     bool operator()(const T* theta, const T* t, T* residuals) const {
+       const T q_0 = cos(theta[0]) * x[0] - sin(theta[0]) * x[1] + t[0];
+       const T q_1 = sin(theta[0]) * x[0] + cos(theta[0]) * x[1] + t[1];
+       const T r2 = q_0 * q_0 + q_1 * q_1;
+       T f;
+       (*compute_distortion)(&r2, &f);
+       residuals[0] = y[0] - f * q_0;
+       residuals[1] = y[1] - f * q_1;
+       return true;
+     }
+
+     double x[2];
+     double y[2];
+     std::unique_ptr<ceres::CostFunctionToFunctor<1, 1> > compute_distortion;
+   };
+
+
+A function that returns its value and derivative
+------------------------------------------------
+
+Now suppose we are given a function :code:`ComputeDistortionValue`
+thatis able to compute its value and optionally its Jacobian on demand
+and has the following signature:
+
+.. code-block:: c++
+
+   void ComputeDistortionValueAndJacobian(double r2,
+                                          double* value,
+                                          double* jacobian);
+
+Again, the actual implementation of the function does not
+matter. Interfacing this function with :code:`Affine2DWithDistortion`
+is a two step process:
+
+1. Wrap :code:`ComputeDistortionValueAndJacobian` into a
+   :class:`CostFunction` object which we call
+   :code:`ComputeDistortionFunction`.
+2. Wrap the resulting :class:`ComputeDistortionFunction` object using
+   :class:`CostFunctionToFunctor`. The resulting object is a functor
+   with a templated :code:`operator()` method, which pipes the
+   Jacobian computed by :class:`NumericDiffCostFunction` into the
+   appropriate :code:`Jet` objects.
+
+The resulting code will look as follows:
+
+.. code-block:: c++
+   :emphasize-lines: 21,22, 33
+
+   class ComputeDistortionFunction : public ceres::SizedCostFunction<1, 1> {
+    public:
+     virtual bool Evaluate(double const* const* parameters,
+                           double* residuals,
+                           double** jacobians) const {
+       if (!jacobians) {
+         ComputeDistortionValueAndJacobian(parameters[0][0], residuals, NULL);
+       } else {
+         ComputeDistortionValueAndJacobian(parameters[0][0], residuals, jacobians[0]);
+       }
+       return true;
+     }
+   };
+
+   struct Affine2DWithDistortion {
+     Affine2DWithDistortion(const double x_in[2], const double y_in[2]) {
+       x[0] = x_in[0];
+       x[1] = x_in[1];
+       y[0] = y_in[0];
+       y[1] = y_in[1];
+       compute_distortion.reset(
+           new ceres::CostFunctionToFunctor<1, 1>(new ComputeDistortionFunction));
+     }
+
+     template <typename T>
+     bool operator()(const T* theta,
+                     const T* t,
+                     T* residuals) const {
+       const T q_0 =  cos(theta[0]) * x[0] - sin(theta[0]) * x[1] + t[0];
+       const T q_1 =  sin(theta[0]) * x[0] + cos(theta[0]) * x[1] + t[1];
+       const T r2 = q_0 * q_0 + q_1 * q_1;
+       T f;
+       (*compute_distortion)(&r2, &f);
+       residuals[0] = y[0] - f * q_0;
+       residuals[1] = y[1] - f * q_1;
+       return true;
+     }
+
+     double x[2];
+     double y[2];
+     std::unique_ptr<ceres::CostFunctionToFunctor<1, 1> > compute_distortion;
+   };
+
+
+A function that is defined as a table of values
+-----------------------------------------------
+
+The third and final case we will consider is where the function
+:math:`f` is defined as a table of values on the interval :math:`[0,
+100)`, with a value for each integer.
+
+.. code-block:: c++
+
+   vector<double> distortion_values;
+
+There are many ways of interpolating a table of values. Perhaps the
+simplest and most common method is linear interpolation. But it is not
+a great idea to use linear interpolation because the interpolating
+function is not differentiable at the sample points.
+
+A simple (well behaved) differentiable interpolation is the `Cubic
+Hermite Spline
+<http://en.wikipedia.org/wiki/Cubic_Hermite_spline>`_. Ceres Solver
+ships with routines to perform Cubic & Bi-Cubic interpolation that is
+automatic differentiation friendly.
+
+Using Cubic interpolation requires first constructing a
+:class:`Grid1D` object to wrap the table of values and then
+constructing a :class:`CubicInterpolator` object using it.
+
+The resulting code will look as follows:
+
+.. code-block:: c++
+   :emphasize-lines: 10,11,12,13, 24, 32,33
+
+   struct Affine2DWithDistortion {
+     Affine2DWithDistortion(const double x_in[2],
+                            const double y_in[2],
+                            const std::vector<double>& distortion_values) {
+       x[0] = x_in[0];
+       x[1] = x_in[1];
+       y[0] = y_in[0];
+       y[1] = y_in[1];
+
+       grid.reset(new ceres::Grid1D<double, 1>(
+           &distortion_values[0], 0, distortion_values.size()));
+       compute_distortion.reset(
+           new ceres::CubicInterpolator<ceres::Grid1D<double, 1> >(*grid));
+     }
+
+     template <typename T>
+     bool operator()(const T* theta,
+                     const T* t,
+                     T* residuals) const {
+       const T q_0 =  cos(theta[0]) * x[0] - sin(theta[0]) * x[1] + t[0];
+       const T q_1 =  sin(theta[0]) * x[0] + cos(theta[0]) * x[1] + t[1];
+       const T r2 = q_0 * q_0 + q_1 * q_1;
+       T f;
+       compute_distortion->Evaluate(r2, &f);
+       residuals[0] = y[0] - f * q_0;
+       residuals[1] = y[1] - f * q_1;
+       return true;
+     }
+
+     double x[2];
+     double y[2];
+     std::unique_ptr<ceres::Grid1D<double, 1> > grid;
+     std::unique_ptr<ceres::CubicInterpolator<ceres::Grid1D<double, 1> > > compute_distortion;
+   };
+
+In the above example we used :class:`Grid1D` and
+:class:`CubicInterpolator` to interpolate a one dimensional table of
+values. :class:`Grid2D` combined with :class:`CubicInterpolator` lets
+the user to interpolate two dimensional tables of values. Note that
+neither :class:`Grid1D` or :class:`Grid2D` are limited to scalar
+valued functions, they also work with vector valued functions.
diff --git a/docs/source/least_squares_fit.png b/docs/source/least_squares_fit.png
new file mode 100644
index 0000000..7dad673
--- /dev/null
+++ b/docs/source/least_squares_fit.png
Binary files differ
diff --git a/docs/source/license.rst b/docs/source/license.rst
new file mode 100644
index 0000000..a3c55c9
--- /dev/null
+++ b/docs/source/license.rst
@@ -0,0 +1,38 @@
+=======
+License
+=======
+
+.. NOTE::
+
+   This page refers only to the license for Ceres itself, independent of its
+   optional dependencies which are separately licensed and which can affect
+   the resulting license of Ceres if built with them enabled.  See
+   :ref:`options-controlling-ceres-configuration` for an overview of these
+   implications.
+
+Ceres Solver is licensed under the New BSD license, whose terms are as follows.
+
+Copyright 2016 Google Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1.    Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+2.    Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+3.    Neither the name of Google Inc.,  nor the names of its contributors may
+      be used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+This software is provided by the copyright holders and contributors "AS IS" and
+any express or implied warranties, including, but not limited to, the implied
+warranties of merchantability and fitness for a particular purpose are
+disclaimed. In no event shall Google Inc. be liable for any direct, indirect,
+incidental, special, exemplary, or consequential damages (including, but not
+limited to, procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including negligence
+or otherwise) arising in any way out of the use of this software, even if
+advised of the possibility of such damage.
diff --git a/docs/source/loss.png b/docs/source/loss.png
new file mode 100644
index 0000000..9f98d00
--- /dev/null
+++ b/docs/source/loss.png
Binary files differ
diff --git a/docs/source/manhattan_olson_3500_result.png b/docs/source/manhattan_olson_3500_result.png
new file mode 100644
index 0000000..5631dfa
--- /dev/null
+++ b/docs/source/manhattan_olson_3500_result.png
Binary files differ
diff --git a/docs/source/modeling_faqs.rst b/docs/source/modeling_faqs.rst
new file mode 100644
index 0000000..a0c8f2f
--- /dev/null
+++ b/docs/source/modeling_faqs.rst
@@ -0,0 +1,134 @@
+.. _chapter-modeling_faqs:
+
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+========
+Modeling
+========
+
+#. Use analytical/automatic derivatives.
+
+   This is the single most important piece of advice we can give to
+   you. It is tempting to take the easy way out and use numeric
+   differentiation. This is a bad idea. Numeric differentiation is
+   slow, ill-behaved, hard to get right, and results in poor
+   convergence behaviour.
+
+   Ceres allows the user to define templated functors which will
+   be automatically differentiated. For most situations this is enough
+   and we recommend using this facility. In some cases the derivatives
+   are simple enough or the performance considerations are such that
+   the overhead of automatic differentiation is too much. In such
+   cases, analytic derivatives are recommended.
+
+   The use of numerical derivatives should be a measure of last
+   resort, where it is simply not possible to write a templated
+   implementation of the cost function.
+
+   In many cases it is not possible to do analytic or automatic
+   differentiation of the entire cost function, but it is generally
+   the case that it is possible to decompose the cost function into
+   parts that need to be numerically differentiated and parts that can
+   be automatically or analytically differentiated.
+
+   To this end, Ceres has extensive support for mixing analytic,
+   automatic and numeric differentiation. See
+   :class:`CostFunctionToFunctor`.
+
+#. When using Quaternions,  consider using :class:`QuaternionParameterization`.
+
+   `Quaternions <https://en.wikipedia.org/wiki/Quaternion>`_ are a
+   four dimensional parameterization of the space of three dimensional
+   rotations :math:`SO(3)`.  However, the :math:`SO(3)` is a three
+   dimensional set, and so is the tangent space of a
+   Quaternion. Therefore, it is sometimes (not always) beneficial to
+   associate a local parameterization with parameter blocks
+   representing a Quaternion. Assuming that the order of entries in
+   your parameter block is :math:`w,x,y,z`, you can use
+   :class:`QuaternionParameterization`.
+
+   .. NOTE::
+
+     If you are using `Eigen's Quaternion
+     <http://eigen.tuxfamily.org/dox/classEigen_1_1Quaternion.html>`_
+     object, whose layout is :math:`x,y,z,w`, then you should use
+     :class:`EigenQuaternionParameterization`.
+
+
+#. How do I solve problems with general linear & non-linear
+   **inequality** constraints with Ceres Solver?
+
+   Currently, Ceres Solver only supports upper and lower bounds
+   constraints on the parameter blocks.
+
+   A crude way of dealing with inequality constraints is have one or
+   more of your cost functions check if the inequalities you are
+   interested in are satisfied, and if not return false instead of
+   true. This will prevent the solver from ever stepping into an
+   infeasible region.
+
+   This requires that the starting point for the optimization be a
+   feasible point.  You also risk pre-mature convergence using this
+   method.
+
+#. How do I solve problems with general linear & non-linear **equality**
+   constraints with Ceres Solver?
+
+   There is no built in support in ceres for solving problems with
+   equality constraints.  Currently, Ceres Solver only supports upper
+   and lower bounds constraints on the parameter blocks.
+
+   The trick described above for dealing with inequality
+   constraints will **not** work for equality constraints.
+
+#. How do I set one or more components of a parameter block constant?
+
+   Using :class:`SubsetParameterization`.
+
+#. Putting `Inverse Function Theorem
+   <http://en.wikipedia.org/wiki/Inverse_function_theorem>`_ to use.
+
+   Every now and then we have to deal with functions which cannot be
+   evaluated analytically. Computing the Jacobian in such cases is
+   tricky. A particularly interesting case is where the inverse of the
+   function is easy to compute analytically. An example of such a
+   function is the Coordinate transformation between the `ECEF
+   <http://en.wikipedia.org/wiki/ECEF>`_ and the `WGS84
+   <http://en.wikipedia.org/wiki/World_Geodetic_System>`_ where the
+   conversion from WGS84 to ECEF is analytic, but the conversion
+   back to WGS84 uses an iterative algorithm. So how do you compute the
+   derivative of the ECEF to WGS84 transformation?
+
+   One obvious approach would be to numerically
+   differentiate the conversion function. This is not a good idea. For
+   one, it will be slow, but it will also be numerically quite
+   bad.
+
+   Turns out you can use the `Inverse Function Theorem
+   <http://en.wikipedia.org/wiki/Inverse_function_theorem>`_ in this
+   case to compute the derivatives more or less analytically.
+
+   The key result here is. If :math:`x = f^{-1}(y)`, and :math:`Df(x)`
+   is the invertible Jacobian of :math:`f` at :math:`x`. Then the
+   Jacobian :math:`Df^{-1}(y) = [Df(x)]^{-1}`, i.e., the Jacobian of
+   the :math:`f^{-1}` is the inverse of the Jacobian of :math:`f`.
+
+   Algorithmically this means that given :math:`y`, compute :math:`x =
+   f^{-1}(y)` by whatever means you can. Evaluate the Jacobian of
+   :math:`f` at :math:`x`. If the Jacobian matrix is invertible, then
+   its inverse is the Jacobian of :math:`f^{-1}(y)` at  :math:`y`.
+
+   One can put this into practice with the following code fragment.
+
+   .. code-block:: c++
+
+      Eigen::Vector3d ecef; // Fill some values
+      // Iterative computation.
+      Eigen::Vector3d lla = ECEFToLLA(ecef);
+      // Analytic derivatives
+      Eigen::Matrix3d lla_to_ecef_jacobian = LLAToECEFJacobian(lla);
+      bool invertible;
+      Eigen::Matrix3d ecef_to_lla_jacobian;
+      lla_to_ecef_jacobian.computeInverseWithCheck(ecef_to_lla_jacobian, invertible);
diff --git a/docs/source/nnls_covariance.rst b/docs/source/nnls_covariance.rst
new file mode 100644
index 0000000..9c6cea8
--- /dev/null
+++ b/docs/source/nnls_covariance.rst
@@ -0,0 +1,376 @@
+
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-nnls_covariance:
+
+=====================
+Covariance Estimation
+=====================
+
+Introduction
+============
+
+One way to assess the quality of the solution returned by a non-linear
+least squares solver is to analyze the covariance of the solution.
+
+Let us consider the non-linear regression problem
+
+.. math::  y = f(x) + N(0, I)
+
+i.e., the observation :math:`y` is a random non-linear function of the
+independent variable :math:`x` with mean :math:`f(x)` and identity
+covariance. Then the maximum likelihood estimate of :math:`x` given
+observations :math:`y` is the solution to the non-linear least squares
+problem:
+
+.. math:: x^* = \arg \min_x \|f(x)\|^2
+
+And the covariance of :math:`x^*` is given by
+
+.. math:: C(x^*) = \left(J'(x^*)J(x^*)\right)^{-1}
+
+Here :math:`J(x^*)` is the Jacobian of :math:`f` at :math:`x^*`. The
+above formula assumes that :math:`J(x^*)` has full column rank.
+
+If :math:`J(x^*)` is rank deficient, then the covariance matrix :math:`C(x^*)`
+is also rank deficient and is given by the Moore-Penrose pseudo inverse.
+
+.. math:: C(x^*) =  \left(J'(x^*)J(x^*)\right)^{\dagger}
+
+Note that in the above, we assumed that the covariance matrix for
+:math:`y` was identity. This is an important assumption. If this is
+not the case and we have
+
+.. math:: y = f(x) + N(0, S)
+
+Where :math:`S` is a positive semi-definite matrix denoting the
+covariance of :math:`y`, then the maximum likelihood problem to be
+solved is
+
+.. math:: x^* = \arg \min_x f'(x) S^{-1} f(x)
+
+and the corresponding covariance estimate of :math:`x^*` is given by
+
+.. math:: C(x^*) = \left(J'(x^*) S^{-1} J(x^*)\right)^{-1}
+
+So, if it is the case that the observations being fitted to have a
+covariance matrix not equal to identity, then it is the user's
+responsibility that the corresponding cost functions are correctly
+scaled, e.g. in the above case the cost function for this problem
+should evaluate :math:`S^{-1/2} f(x)` instead of just :math:`f(x)`,
+where :math:`S^{-1/2}` is the inverse square root of the covariance
+matrix :math:`S`.
+
+Gauge Invariance
+================
+
+In structure from motion (3D reconstruction) problems, the
+reconstruction is ambiguous up to a similarity transform. This is
+known as a *Gauge Ambiguity*. Handling Gauges correctly requires the
+use of SVD or custom inversion algorithms. For small problems the
+user can use the dense algorithm. For more details see the work of
+Kanatani & Morris [KanataniMorris]_.
+
+
+:class:`Covariance`
+===================
+
+:class:`Covariance` allows the user to evaluate the covariance for a
+non-linear least squares problem and provides random access to its
+blocks. The computation assumes that the cost functions compute
+residuals such that their covariance is identity.
+
+Since the computation of the covariance matrix requires computing the
+inverse of a potentially large matrix, this can involve a rather large
+amount of time and memory. However, it is usually the case that the
+user is only interested in a small part of the covariance
+matrix. Quite often just the block diagonal. :class:`Covariance`
+allows the user to specify the parts of the covariance matrix that she
+is interested in and then uses this information to only compute and
+store those parts of the covariance matrix.
+
+Rank of the Jacobian
+====================
+
+As we noted above, if the Jacobian is rank deficient, then the inverse
+of :math:`J'J` is not defined and instead a pseudo inverse needs to be
+computed.
+
+The rank deficiency in :math:`J` can be *structural* -- columns
+which are always known to be zero or *numerical* -- depending on the
+exact values in the Jacobian.
+
+Structural rank deficiency occurs when the problem contains parameter
+blocks that are constant. This class correctly handles structural rank
+deficiency like that.
+
+Numerical rank deficiency, where the rank of the matrix cannot be
+predicted by its sparsity structure and requires looking at its
+numerical values is more complicated. Here again there are two
+cases.
+
+  a. The rank deficiency arises from overparameterization. e.g., a
+     four dimensional quaternion used to parameterize :math:`SO(3)`,
+     which is a three dimensional manifold. In cases like this, the
+     user should use an appropriate
+     :class:`LocalParameterization`. Not only will this lead to better
+     numerical behaviour of the Solver, it will also expose the rank
+     deficiency to the :class:`Covariance` object so that it can
+     handle it correctly.
+
+  b. More general numerical rank deficiency in the Jacobian requires
+     the computation of the so called Singular Value Decomposition
+     (SVD) of :math:`J'J`. We do not know how to do this for large
+     sparse matrices efficiently. For small and moderate sized
+     problems this is done using dense linear algebra.
+
+
+:class:`Covariance::Options`
+
+.. class:: Covariance::Options
+
+.. member:: int Covariance::Options::num_threads
+
+   Default: ``1``
+
+   Number of threads to be used for evaluating the Jacobian and
+   estimation of covariance.
+
+.. member:: SparseLinearAlgebraLibraryType Covariance::Options::sparse_linear_algebra_library_type
+
+   Default: ``SUITE_SPARSE`` Ceres Solver is built with support for
+   `SuiteSparse <http://faculty.cse.tamu.edu/davis/suitesparse.html>`_
+   and ``EIGEN_SPARSE`` otherwise. Note that ``EIGEN_SPARSE`` is
+   always available.
+
+.. member:: CovarianceAlgorithmType Covariance::Options::algorithm_type
+
+   Default: ``SPARSE_QR``
+
+   Ceres supports two different algorithms for covariance estimation,
+   which represent different tradeoffs in speed, accuracy and
+   reliability.
+
+   1. ``SPARSE_QR`` uses the sparse QR factorization algorithm to
+      compute the decomposition
+
+       .. math::
+
+          QR &= J\\
+          \left(J^\top J\right)^{-1} &= \left(R^\top R\right)^{-1}
+
+      The speed of this algorithm depends on the sparse linear algebra
+      library being used. ``Eigen``'s sparse QR factorization is a
+      moderately fast algorithm suitable for small to medium sized
+      matrices. For best performance we recommend using
+      ``SuiteSparseQR`` which is enabled by setting
+      :member:`Covaraince::Options::sparse_linear_algebra_library_type`
+      to ``SUITE_SPARSE``.
+
+      Neither ``SPARSE_QR`` cannot compute the covariance if the
+      Jacobian is rank deficient.
+
+
+   2. ``DENSE_SVD`` uses ``Eigen``'s ``JacobiSVD`` to perform the
+      computations. It computes the singular value decomposition
+
+      .. math::   U S V^\top = J
+
+      and then uses it to compute the pseudo inverse of J'J as
+
+      .. math::   (J'J)^{\dagger} = V  S^{\dagger}  V^\top
+
+      It is an accurate but slow method and should only be used for
+      small to moderate sized problems. It can handle full-rank as
+      well as rank deficient Jacobians.
+
+
+.. member:: int Covariance::Options::min_reciprocal_condition_number
+
+   Default: :math:`10^{-14}`
+
+   If the Jacobian matrix is near singular, then inverting :math:`J'J`
+   will result in unreliable results, e.g, if
+
+   .. math::
+
+     J = \begin{bmatrix}
+         1.0& 1.0 \\
+         1.0& 1.0000001
+         \end{bmatrix}
+
+   which is essentially a rank deficient matrix, we have
+
+   .. math::
+
+     (J'J)^{-1} = \begin{bmatrix}
+                  2.0471e+14&  -2.0471e+14 \\
+                  -2.0471e+14   2.0471e+14
+                  \end{bmatrix}
+
+
+   This is not a useful result. Therefore, by default
+   :func:`Covariance::Compute` will return ``false`` if a rank
+   deficient Jacobian is encountered. How rank deficiency is detected
+   depends on the algorithm being used.
+
+   1. ``DENSE_SVD``
+
+      .. math:: \frac{\sigma_{\text{min}}}{\sigma_{\text{max}}}  < \sqrt{\text{min_reciprocal_condition_number}}
+
+      where :math:`\sigma_{\text{min}}` and
+      :math:`\sigma_{\text{max}}` are the minimum and maxiumum
+      singular values of :math:`J` respectively.
+
+   2. ``SPARSE_QR``
+
+       .. math:: \operatorname{rank}(J) < \operatorname{num\_col}(J)
+
+       Here :math:`\operatorname{rank}(J)` is the estimate of the rank
+       of :math:`J` returned by the sparse QR factorization
+       algorithm. It is a fairly reliable indication of rank
+       deficiency.
+
+.. member:: int Covariance::Options::null_space_rank
+
+    When using ``DENSE_SVD``, the user has more control in dealing
+    with singular and near singular covariance matrices.
+
+    As mentioned above, when the covariance matrix is near singular,
+    instead of computing the inverse of :math:`J'J`, the Moore-Penrose
+    pseudoinverse of :math:`J'J` should be computed.
+
+    If :math:`J'J` has the eigen decomposition :math:`(\lambda_i,
+    e_i)`, where :math:`\lambda_i` is the :math:`i^\textrm{th}`
+    eigenvalue and :math:`e_i` is the corresponding eigenvector, then
+    the inverse of :math:`J'J` is
+
+    .. math:: (J'J)^{-1} = \sum_i \frac{1}{\lambda_i} e_i e_i'
+
+    and computing the pseudo inverse involves dropping terms from this
+    sum that correspond to small eigenvalues.
+
+    How terms are dropped is controlled by
+    `min_reciprocal_condition_number` and `null_space_rank`.
+
+    If `null_space_rank` is non-negative, then the smallest
+    `null_space_rank` eigenvalue/eigenvectors are dropped irrespective
+    of the magnitude of :math:`\lambda_i`. If the ratio of the
+    smallest non-zero eigenvalue to the largest eigenvalue in the
+    truncated matrix is still below min_reciprocal_condition_number,
+    then the `Covariance::Compute()` will fail and return `false`.
+
+    Setting `null_space_rank = -1` drops all terms for which
+
+    .. math::  \frac{\lambda_i}{\lambda_{\textrm{max}}} < \textrm{min_reciprocal_condition_number}
+
+    This option has no effect on ``SPARSE_QR``.
+
+.. member:: bool Covariance::Options::apply_loss_function
+
+   Default: `true`
+
+   Even though the residual blocks in the problem may contain loss
+   functions, setting ``apply_loss_function`` to false will turn off
+   the application of the loss function to the output of the cost
+   function and in turn its effect on the covariance.
+
+.. class:: Covariance
+
+   :class:`Covariance::Options` as the name implies is used to control
+   the covariance estimation algorithm. Covariance estimation is a
+   complicated and numerically sensitive procedure. Please read the
+   entire documentation for :class:`Covariance::Options` before using
+   :class:`Covariance`.
+
+.. function:: bool Covariance::Compute(const vector<pair<const double*, const double*> >& covariance_blocks, Problem* problem)
+
+   Compute a part of the covariance matrix.
+
+   The vector ``covariance_blocks``, indexes into the covariance
+   matrix block-wise using pairs of parameter blocks. This allows the
+   covariance estimation algorithm to only compute and store these
+   blocks.
+
+   Since the covariance matrix is symmetric, if the user passes
+   ``<block1, block2>``, then ``GetCovarianceBlock`` can be called with
+   ``block1``, ``block2`` as well as ``block2``, ``block1``.
+
+   ``covariance_blocks`` cannot contain duplicates. Bad things will
+   happen if they do.
+
+   Note that the list of ``covariance_blocks`` is only used to
+   determine what parts of the covariance matrix are computed. The
+   full Jacobian is used to do the computation, i.e. they do not have
+   an impact on what part of the Jacobian is used for computation.
+
+   The return value indicates the success or failure of the covariance
+   computation. Please see the documentation for
+   :class:`Covariance::Options` for more on the conditions under which
+   this function returns ``false``.
+
+.. function:: bool GetCovarianceBlock(const double* parameter_block1, const double* parameter_block2, double* covariance_block) const
+
+   Return the block of the cross-covariance matrix corresponding to
+   ``parameter_block1`` and ``parameter_block2``.
+
+   Compute must be called before the first call to ``GetCovarianceBlock``
+   and the pair ``<parameter_block1, parameter_block2>`` OR the pair
+   ``<parameter_block2, parameter_block1>`` must have been present in the
+   vector covariance_blocks when ``Compute`` was called. Otherwise
+   ``GetCovarianceBlock`` will return false.
+
+   ``covariance_block`` must point to a memory location that can store
+   a ``parameter_block1_size x parameter_block2_size`` matrix. The
+   returned covariance will be a row-major matrix.
+
+.. function:: bool GetCovarianceBlockInTangentSpace(const double* parameter_block1, const double* parameter_block2, double* covariance_block) const
+
+   Return the block of the cross-covariance matrix corresponding to
+   ``parameter_block1`` and ``parameter_block2``.
+   Returns cross-covariance in the tangent space if a local
+   parameterization is associated with either parameter block;
+   else returns cross-covariance in the ambient space.
+
+   Compute must be called before the first call to ``GetCovarianceBlock``
+   and the pair ``<parameter_block1, parameter_block2>`` OR the pair
+   ``<parameter_block2, parameter_block1>`` must have been present in the
+   vector covariance_blocks when ``Compute`` was called. Otherwise
+   ``GetCovarianceBlock`` will return false.
+
+   ``covariance_block`` must point to a memory location that can store
+   a ``parameter_block1_local_size x parameter_block2_local_size`` matrix. The
+   returned covariance will be a row-major matrix.
+
+Example Usage
+=============
+
+.. code-block:: c++
+
+ double x[3];
+ double y[2];
+
+ Problem problem;
+ problem.AddParameterBlock(x, 3);
+ problem.AddParameterBlock(y, 2);
+ <Build Problem>
+ <Solve Problem>
+
+ Covariance::Options options;
+ Covariance covariance(options);
+
+ vector<pair<const double*, const double*> > covariance_blocks;
+ covariance_blocks.push_back(make_pair(x, x));
+ covariance_blocks.push_back(make_pair(y, y));
+ covariance_blocks.push_back(make_pair(x, y));
+
+ CHECK(covariance.Compute(covariance_blocks, &problem));
+
+ double covariance_xx[3 * 3];
+ double covariance_yy[2 * 2];
+ double covariance_xy[3 * 2];
+ covariance.GetCovarianceBlock(x, x, covariance_xx)
+ covariance.GetCovarianceBlock(y, y, covariance_yy)
+ covariance.GetCovarianceBlock(x, y, covariance_xy)
diff --git a/docs/source/nnls_modeling.rst b/docs/source/nnls_modeling.rst
new file mode 100644
index 0000000..860b689
--- /dev/null
+++ b/docs/source/nnls_modeling.rst
@@ -0,0 +1,2073 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _`chapter-nnls_modeling`:
+
+=================================
+Modeling Non-linear Least Squares
+=================================
+
+Introduction
+============
+
+Ceres solver consists of two distinct parts. A modeling API which
+provides a rich set of tools to construct an optimization problem one
+term at a time and a solver API that controls the minimization
+algorithm. This chapter is devoted to the task of modeling
+optimization problems using Ceres. :ref:`chapter-nnls_solving` discusses
+the various ways in which an optimization problem can be solved using
+Ceres.
+
+Ceres solves robustified bounds constrained non-linear least squares
+problems of the form:
+
+.. math:: :label: ceresproblem_modeling
+
+   \min_{\mathbf{x}} &\quad \frac{1}{2}\sum_{i}
+   \rho_i\left(\left\|f_i\left(x_{i_1},
+   ... ,x_{i_k}\right)\right\|^2\right)  \\
+   \text{s.t.} &\quad l_j \le x_j \le u_j
+
+In Ceres parlance, the expression
+:math:`\rho_i\left(\left\|f_i\left(x_{i_1},...,x_{i_k}\right)\right\|^2\right)`
+is known as a **residual block**, where :math:`f_i(\cdot)` is a
+:class:`CostFunction` that depends on the **parameter blocks**
+:math:`\left\{x_{i_1},... , x_{i_k}\right\}`.
+
+In most optimization problems small groups of scalars occur
+together. For example the three components of a translation vector and
+the four components of the quaternion that define the pose of a
+camera. We refer to such a group of scalars as a **parameter block**. Of
+course a parameter block can be just a single scalar too.
+
+:math:`\rho_i` is a :class:`LossFunction`. A :class:`LossFunction` is
+a scalar valued function that is used to reduce the influence of
+outliers on the solution of non-linear least squares problems.
+
+:math:`l_j` and :math:`u_j` are lower and upper bounds on the
+parameter block :math:`x_j`.
+
+As a special case, when :math:`\rho_i(x) = x`, i.e., the identity
+function, and :math:`l_j = -\infty` and :math:`u_j = \infty` we get
+the more familiar unconstrained `non-linear least squares problem
+<http://en.wikipedia.org/wiki/Non-linear_least_squares>`_.
+
+.. math:: :label: ceresproblemunconstrained
+
+   \frac{1}{2}\sum_{i} \left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2.
+
+:class:`CostFunction`
+=====================
+
+For each term in the objective function, a :class:`CostFunction` is
+responsible for computing a vector of residuals and Jacobian
+matrices. Concretely, consider a function
+:math:`f\left(x_{1},...,x_{k}\right)` that depends on parameter blocks
+:math:`\left[x_{1}, ... , x_{k}\right]`.
+
+Then, given :math:`\left[x_{1}, ... , x_{k}\right]`,
+:class:`CostFunction` is responsible for computing the vector
+:math:`f\left(x_{1},...,x_{k}\right)` and the Jacobian matrices
+
+.. math:: J_i =  \frac{\partial}{\partial x_i} f(x_1, ..., x_k) \quad \forall i \in \{1, \ldots, k\}
+
+.. class:: CostFunction
+
+   .. code-block:: c++
+
+    class CostFunction {
+     public:
+      virtual bool Evaluate(double const* const* parameters,
+                            double* residuals,
+                            double** jacobians) = 0;
+      const vector<int32>& parameter_block_sizes();
+      int num_residuals() const;
+
+     protected:
+      vector<int32>* mutable_parameter_block_sizes();
+      void set_num_residuals(int num_residuals);
+    };
+
+
+The signature of the :class:`CostFunction` (number and sizes of input
+parameter blocks and number of outputs) is stored in
+:member:`CostFunction::parameter_block_sizes_` and
+:member:`CostFunction::num_residuals_` respectively. User code
+inheriting from this class is expected to set these two members with
+the corresponding accessors. This information will be verified by the
+:class:`Problem` when added with :func:`Problem::AddResidualBlock`.
+
+.. function:: bool CostFunction::Evaluate(double const* const* parameters, double* residuals, double** jacobians)
+
+   Compute the residual vector and the Jacobian matrices.
+
+   ``parameters`` is an array of arrays of size
+   ``CostFunction::parameter_block_sizes_.size()`` and
+   ``parameters[i]`` is an array of size ``parameter_block_sizes_[i]``
+   that contains the :math:`i^{\text{th}}` parameter block that the
+   ``CostFunction`` depends on.
+
+   ``parameters`` is never ``NULL``.
+
+   ``residuals`` is an array of size ``num_residuals_``.
+
+   ``residuals`` is never ``NULL``.
+
+   ``jacobians`` is an array of arrays of size
+   ``CostFunction::parameter_block_sizes_.size()``.
+
+   If ``jacobians`` is ``NULL``, the user is only expected to compute
+   the residuals.
+
+   ``jacobians[i]`` is a row-major array of size ``num_residuals x
+   parameter_block_sizes_[i]``.
+
+   If ``jacobians[i]`` is **not** ``NULL``, the user is required to
+   compute the Jacobian of the residual vector with respect to
+   ``parameters[i]`` and store it in this array, i.e.
+
+   ``jacobians[i][r * parameter_block_sizes_[i] + c]`` =
+   :math:`\frac{\displaystyle \partial \text{residual}[r]}{\displaystyle \partial \text{parameters}[i][c]}`
+
+   If ``jacobians[i]`` is ``NULL``, then this computation can be
+   skipped. This is the case when the corresponding parameter block is
+   marked constant.
+
+   The return value indicates whether the computation of the residuals
+   and/or jacobians was successful or not. This can be used to
+   communicate numerical failures in Jacobian computations for
+   instance.
+
+:class:`SizedCostFunction`
+==========================
+
+.. class:: SizedCostFunction
+
+   If the size of the parameter blocks and the size of the residual
+   vector is known at compile time (this is the common case),
+   :class:`SizeCostFunction` can be used where these values can be
+   specified as template parameters and the user only needs to
+   implement :func:`CostFunction::Evaluate`.
+
+   .. code-block:: c++
+
+    template<int kNumResiduals,
+             int N0 = 0, int N1 = 0, int N2 = 0, int N3 = 0, int N4 = 0,
+             int N5 = 0, int N6 = 0, int N7 = 0, int N8 = 0, int N9 = 0>
+    class SizedCostFunction : public CostFunction {
+     public:
+      virtual bool Evaluate(double const* const* parameters,
+                            double* residuals,
+                            double** jacobians) const = 0;
+    };
+
+
+:class:`AutoDiffCostFunction`
+=============================
+
+.. class:: AutoDiffCostFunction
+
+   Defining a :class:`CostFunction` or a :class:`SizedCostFunction`
+   can be a tedious and error prone especially when computing
+   derivatives.  To this end Ceres provides `automatic differentiation
+   <http://en.wikipedia.org/wiki/Automatic_differentiation>`_.
+
+   .. code-block:: c++
+
+     template <typename CostFunctor,
+            int kNumResiduals,  // Number of residuals, or ceres::DYNAMIC.
+            int N0,       // Number of parameters in block 0.
+            int N1 = 0,   // Number of parameters in block 1.
+            int N2 = 0,   // Number of parameters in block 2.
+            int N3 = 0,   // Number of parameters in block 3.
+            int N4 = 0,   // Number of parameters in block 4.
+            int N5 = 0,   // Number of parameters in block 5.
+            int N6 = 0,   // Number of parameters in block 6.
+            int N7 = 0,   // Number of parameters in block 7.
+            int N8 = 0,   // Number of parameters in block 8.
+            int N9 = 0>   // Number of parameters in block 9.
+     class AutoDiffCostFunction : public
+     SizedCostFunction<kNumResiduals, N0, N1, N2, N3, N4, N5, N6, N7, N8, N9> {
+      public:
+       explicit AutoDiffCostFunction(CostFunctor* functor);
+       // Ignore the template parameter kNumResiduals and use
+       // num_residuals instead.
+       AutoDiffCostFunction(CostFunctor* functor, int num_residuals);
+     };
+
+   To get an auto differentiated cost function, you must define a
+   class with a templated ``operator()`` (a functor) that computes the
+   cost function in terms of the template parameter ``T``. The
+   autodiff framework substitutes appropriate ``Jet`` objects for
+   ``T`` in order to compute the derivative when necessary, but this
+   is hidden, and you should write the function as if ``T`` were a
+   scalar type (e.g. a double-precision floating point number).
+
+   The function must write the computed value in the last argument
+   (the only non-``const`` one) and return true to indicate success.
+
+   For example, consider a scalar error :math:`e = k - x^\top y`,
+   where both :math:`x` and :math:`y` are two-dimensional vector
+   parameters and :math:`k` is a constant. The form of this error,
+   which is the difference between a constant and an expression, is a
+   common pattern in least squares problems. For example, the value
+   :math:`x^\top y` might be the model expectation for a series of
+   measurements, where there is an instance of the cost function for
+   each measurement :math:`k`.
+
+   The actual cost added to the total problem is :math:`e^2`, or
+   :math:`(k - x^\top y)^2`; however, the squaring is implicitly done
+   by the optimization framework.
+
+   To write an auto-differentiable cost function for the above model,
+   first define the object
+
+   .. code-block:: c++
+
+    class MyScalarCostFunctor {
+      MyScalarCostFunctor(double k): k_(k) {}
+
+      template <typename T>
+      bool operator()(const T* const x , const T* const y, T* e) const {
+        e[0] = k_ - x[0] * y[0] - x[1] * y[1];
+        return true;
+      }
+
+     private:
+      double k_;
+    };
+
+
+   Note that in the declaration of ``operator()`` the input parameters
+   ``x`` and ``y`` come first, and are passed as const pointers to arrays
+   of ``T``. If there were three input parameters, then the third input
+   parameter would come after ``y``. The output is always the last
+   parameter, and is also a pointer to an array. In the example above,
+   ``e`` is a scalar, so only ``e[0]`` is set.
+
+   Then given this class definition, the auto differentiated cost
+   function for it can be constructed as follows.
+
+   .. code-block:: c++
+
+    CostFunction* cost_function
+        = new AutoDiffCostFunction<MyScalarCostFunctor, 1, 2, 2>(
+            new MyScalarCostFunctor(1.0));              ^  ^  ^
+                                                        |  |  |
+                            Dimension of residual ------+  |  |
+                            Dimension of x ----------------+  |
+                            Dimension of y -------------------+
+
+
+   In this example, there is usually an instance for each measurement
+   of ``k``.
+
+   In the instantiation above, the template parameters following
+   ``MyScalarCostFunction``, ``<1, 2, 2>`` describe the functor as
+   computing a 1-dimensional output from two arguments, both
+   2-dimensional.
+
+   :class:`AutoDiffCostFunction` also supports cost functions with a
+   runtime-determined number of residuals. For example:
+
+   .. code-block:: c++
+
+     CostFunction* cost_function
+         = new AutoDiffCostFunction<MyScalarCostFunctor, DYNAMIC, 2, 2>(
+             new CostFunctorWithDynamicNumResiduals(1.0),   ^     ^  ^
+             runtime_number_of_residuals); <----+           |     |  |
+                                                |           |     |  |
+                                                |           |     |  |
+               Actual number of residuals ------+           |     |  |
+               Indicate dynamic number of residuals --------+     |  |
+               Dimension of x ------------------------------------+  |
+               Dimension of y ---------------------------------------+
+
+   The framework can currently accommodate cost functions of up to 10
+   independent variables, and there is no limit on the dimensionality
+   of each of them.
+
+   **WARNING 1** A common beginner's error when first using
+   :class:`AutoDiffCostFunction` is to get the sizing wrong. In particular,
+   there is a tendency to set the template parameters to (dimension of
+   residual, number of parameters) instead of passing a dimension
+   parameter for *every parameter block*. In the example above, that
+   would be ``<MyScalarCostFunction, 1, 2>``, which is missing the 2
+   as the last template argument.
+
+
+:class:`DynamicAutoDiffCostFunction`
+====================================
+
+.. class:: DynamicAutoDiffCostFunction
+
+   :class:`AutoDiffCostFunction` requires that the number of parameter
+   blocks and their sizes be known at compile time. It also has an
+   upper limit of 10 parameter blocks. In a number of applications,
+   this is not enough e.g., Bezier curve fitting, Neural Network
+   training etc.
+
+     .. code-block:: c++
+
+      template <typename CostFunctor, int Stride = 4>
+      class DynamicAutoDiffCostFunction : public CostFunction {
+      };
+
+   In such cases :class:`DynamicAutoDiffCostFunction` can be
+   used. Like :class:`AutoDiffCostFunction` the user must define a
+   templated functor, but the signature of the functor differs
+   slightly. The expected interface for the cost functors is:
+
+     .. code-block:: c++
+
+       struct MyCostFunctor {
+         template<typename T>
+         bool operator()(T const* const* parameters, T* residuals) const {
+         }
+       }
+
+   Since the sizing of the parameters is done at runtime, you must
+   also specify the sizes after creating the dynamic autodiff cost
+   function. For example:
+
+     .. code-block:: c++
+
+       DynamicAutoDiffCostFunction<MyCostFunctor, 4>* cost_function =
+         new DynamicAutoDiffCostFunction<MyCostFunctor, 4>(
+           new MyCostFunctor());
+       cost_function->AddParameterBlock(5);
+       cost_function->AddParameterBlock(10);
+       cost_function->SetNumResiduals(21);
+
+   Under the hood, the implementation evaluates the cost function
+   multiple times, computing a small set of the derivatives (four by
+   default, controlled by the ``Stride`` template parameter) with each
+   pass. There is a performance tradeoff with the size of the passes;
+   Smaller sizes are more cache efficient but result in larger number
+   of passes, and larger stride lengths can destroy cache-locality
+   while reducing the number of passes over the cost function. The
+   optimal value depends on the number and sizes of the various
+   parameter blocks.
+
+   As a rule of thumb, try using :class:`AutoDiffCostFunction` before
+   you use :class:`DynamicAutoDiffCostFunction`.
+
+:class:`NumericDiffCostFunction`
+================================
+
+.. class:: NumericDiffCostFunction
+
+  In some cases, its not possible to define a templated cost functor,
+  for example when the evaluation of the residual involves a call to a
+  library function that you do not have control over.  In such a
+  situation, `numerical differentiation
+  <http://en.wikipedia.org/wiki/Numerical_differentiation>`_ can be
+  used.
+
+  .. NOTE ::
+
+    TODO(sameeragarwal): Add documentation for the constructor and for
+    NumericDiffOptions. Update DynamicNumericDiffOptions in a similar
+    manner.
+
+  .. code-block:: c++
+
+      template <typename CostFunctor,
+                NumericDiffMethodType method = CENTRAL,
+                int kNumResiduals,  // Number of residuals, or ceres::DYNAMIC.
+                int N0,       // Number of parameters in block 0.
+                int N1 = 0,   // Number of parameters in block 1.
+                int N2 = 0,   // Number of parameters in block 2.
+                int N3 = 0,   // Number of parameters in block 3.
+                int N4 = 0,   // Number of parameters in block 4.
+                int N5 = 0,   // Number of parameters in block 5.
+                int N6 = 0,   // Number of parameters in block 6.
+                int N7 = 0,   // Number of parameters in block 7.
+                int N8 = 0,   // Number of parameters in block 8.
+                int N9 = 0>   // Number of parameters in block 9.
+      class NumericDiffCostFunction : public
+      SizedCostFunction<kNumResiduals, N0, N1, N2, N3, N4, N5, N6, N7, N8, N9> {
+      };
+
+  To get a numerically differentiated :class:`CostFunction`, you must
+  define a class with a ``operator()`` (a functor) that computes the
+  residuals. The functor must write the computed value in the last
+  argument (the only non-``const`` one) and return ``true`` to
+  indicate success.  Please see :class:`CostFunction` for details on
+  how the return value may be used to impose simple constraints on the
+  parameter block. e.g., an object of the form
+
+  .. code-block:: c++
+
+     struct ScalarFunctor {
+      public:
+       bool operator()(const double* const x1,
+                       const double* const x2,
+                       double* residuals) const;
+     }
+
+  For example, consider a scalar error :math:`e = k - x'y`, where both
+  :math:`x` and :math:`y` are two-dimensional column vector
+  parameters, the prime sign indicates transposition, and :math:`k` is
+  a constant. The form of this error, which is the difference between
+  a constant and an expression, is a common pattern in least squares
+  problems. For example, the value :math:`x'y` might be the model
+  expectation for a series of measurements, where there is an instance
+  of the cost function for each measurement :math:`k`.
+
+  To write an numerically-differentiable class:`CostFunction` for the
+  above model, first define the object
+
+  .. code-block::  c++
+
+     class MyScalarCostFunctor {
+       MyScalarCostFunctor(double k): k_(k) {}
+
+       bool operator()(const double* const x,
+                       const double* const y,
+                       double* residuals) const {
+         residuals[0] = k_ - x[0] * y[0] + x[1] * y[1];
+         return true;
+       }
+
+      private:
+       double k_;
+     };
+
+  Note that in the declaration of ``operator()`` the input parameters
+  ``x`` and ``y`` come first, and are passed as const pointers to
+  arrays of ``double`` s. If there were three input parameters, then
+  the third input parameter would come after ``y``. The output is
+  always the last parameter, and is also a pointer to an array. In the
+  example above, the residual is a scalar, so only ``residuals[0]`` is
+  set.
+
+  Then given this class definition, the numerically differentiated
+  :class:`CostFunction` with central differences used for computing
+  the derivative can be constructed as follows.
+
+  .. code-block:: c++
+
+    CostFunction* cost_function
+        = new NumericDiffCostFunction<MyScalarCostFunctor, CENTRAL, 1, 2, 2>(
+            new MyScalarCostFunctor(1.0));                    ^     ^  ^  ^
+                                                              |     |  |  |
+                                  Finite Differencing Scheme -+     |  |  |
+                                  Dimension of residual ------------+  |  |
+                                  Dimension of x ----------------------+  |
+                                  Dimension of y -------------------------+
+
+  In this example, there is usually an instance for each measurement
+  of `k`.
+
+  In the instantiation above, the template parameters following
+  ``MyScalarCostFunctor``, ``1, 2, 2``, describe the functor as
+  computing a 1-dimensional output from two arguments, both
+  2-dimensional.
+
+  NumericDiffCostFunction also supports cost functions with a
+  runtime-determined number of residuals. For example:
+
+   .. code-block:: c++
+
+     CostFunction* cost_function
+         = new NumericDiffCostFunction<MyScalarCostFunctor, CENTRAL, DYNAMIC, 2, 2>(
+             new CostFunctorWithDynamicNumResiduals(1.0),               ^     ^  ^
+             TAKE_OWNERSHIP,                                            |     |  |
+             runtime_number_of_residuals); <----+                       |     |  |
+                                                |                       |     |  |
+                                                |                       |     |  |
+               Actual number of residuals ------+                       |     |  |
+               Indicate dynamic number of residuals --------------------+     |  |
+               Dimension of x ------------------------------------------------+  |
+               Dimension of y ---------------------------------------------------+
+
+
+  The framework can currently accommodate cost functions of up to 10
+  independent variables, and there is no limit on the dimensionality
+  of each of them.
+
+  There are three available numeric differentiation schemes in ceres-solver:
+
+  The ``FORWARD`` difference method, which approximates :math:`f'(x)`
+  by computing :math:`\frac{f(x+h)-f(x)}{h}`, computes the cost
+  function one additional time at :math:`x+h`. It is the fastest but
+  least accurate method.
+
+  The ``CENTRAL`` difference method is more accurate at the cost of
+  twice as many function evaluations than forward difference,
+  estimating :math:`f'(x)` by computing
+  :math:`\frac{f(x+h)-f(x-h)}{2h}`.
+
+  The ``RIDDERS`` difference method[Ridders]_ is an adaptive scheme
+  that estimates derivatives by performing multiple central
+  differences at varying scales. Specifically, the algorithm starts at
+  a certain :math:`h` and as the derivative is estimated, this step
+  size decreases.  To conserve function evaluations and estimate the
+  derivative error, the method performs Richardson extrapolations
+  between the tested step sizes.  The algorithm exhibits considerably
+  higher accuracy, but does so by additional evaluations of the cost
+  function.
+
+  Consider using ``CENTRAL`` differences to begin with. Based on the
+  results, either try forward difference to improve performance or
+  Ridders' method to improve accuracy.
+
+  **WARNING** A common beginner's error when first using
+  :class:`NumericDiffCostFunction` is to get the sizing wrong. In
+  particular, there is a tendency to set the template parameters to
+  (dimension of residual, number of parameters) instead of passing a
+  dimension parameter for *every parameter*. In the example above,
+  that would be ``<MyScalarCostFunctor, 1, 2>``, which is missing the
+  last ``2`` argument. Please be careful when setting the size
+  parameters.
+
+
+Numeric Differentiation & LocalParameterization
+-----------------------------------------------
+
+   If your cost function depends on a parameter block that must lie on
+   a manifold and the functor cannot be evaluated for values of that
+   parameter block not on the manifold then you may have problems
+   numerically differentiating such functors.
+
+   This is because numeric differentiation in Ceres is performed by
+   perturbing the individual coordinates of the parameter blocks that
+   a cost functor depends on. In doing so, we assume that the
+   parameter blocks live in an Euclidean space and ignore the
+   structure of manifold that they live As a result some of the
+   perturbations may not lie on the manifold corresponding to the
+   parameter block.
+
+   For example consider a four dimensional parameter block that is
+   interpreted as a unit Quaternion. Perturbing the coordinates of
+   this parameter block will violate the unit norm property of the
+   parameter block.
+
+   Fixing this problem requires that :class:`NumericDiffCostFunction`
+   be aware of the :class:`LocalParameterization` associated with each
+   parameter block and only generate perturbations in the local
+   tangent space of each parameter block.
+
+   For now this is not considered to be a serious enough problem to
+   warrant changing the :class:`NumericDiffCostFunction` API. Further,
+   in most cases it is relatively straightforward to project a point
+   off the manifold back onto the manifold before using it in the
+   functor. For example in case of the Quaternion, normalizing the
+   4-vector before using it does the trick.
+
+   **Alternate Interface**
+
+   For a variety of reasons, including compatibility with legacy code,
+   :class:`NumericDiffCostFunction` can also take
+   :class:`CostFunction` objects as input. The following describes
+   how.
+
+   To get a numerically differentiated cost function, define a
+   subclass of :class:`CostFunction` such that the
+   :func:`CostFunction::Evaluate` function ignores the ``jacobians``
+   parameter. The numeric differentiation wrapper will fill in the
+   jacobian parameter if necessary by repeatedly calling the
+   :func:`CostFunction::Evaluate` with small changes to the
+   appropriate parameters, and computing the slope. For performance,
+   the numeric differentiation wrapper class is templated on the
+   concrete cost function, even though it could be implemented only in
+   terms of the :class:`CostFunction` interface.
+
+   The numerically differentiated version of a cost function for a
+   cost function can be constructed as follows:
+
+   .. code-block:: c++
+
+     CostFunction* cost_function
+         = new NumericDiffCostFunction<MyCostFunction, CENTRAL, 1, 4, 8>(
+             new MyCostFunction(...), TAKE_OWNERSHIP);
+
+   where ``MyCostFunction`` has 1 residual and 2 parameter blocks with
+   sizes 4 and 8 respectively. Look at the tests for a more detailed
+   example.
+
+:class:`DynamicNumericDiffCostFunction`
+=======================================
+
+.. class:: DynamicNumericDiffCostFunction
+
+   Like :class:`AutoDiffCostFunction` :class:`NumericDiffCostFunction`
+   requires that the number of parameter blocks and their sizes be
+   known at compile time. It also has an upper limit of 10 parameter
+   blocks. In a number of applications, this is not enough.
+
+     .. code-block:: c++
+
+      template <typename CostFunctor, NumericDiffMethodType method = CENTRAL>
+      class DynamicNumericDiffCostFunction : public CostFunction {
+      };
+
+   In such cases when numeric differentiation is desired,
+   :class:`DynamicNumericDiffCostFunction` can be used.
+
+   Like :class:`NumericDiffCostFunction` the user must define a
+   functor, but the signature of the functor differs slightly. The
+   expected interface for the cost functors is:
+
+     .. code-block:: c++
+
+       struct MyCostFunctor {
+         bool operator()(double const* const* parameters, double* residuals) const {
+         }
+       }
+
+   Since the sizing of the parameters is done at runtime, you must
+   also specify the sizes after creating the dynamic numeric diff cost
+   function. For example:
+
+     .. code-block:: c++
+
+       DynamicNumericDiffCostFunction<MyCostFunctor>* cost_function =
+         new DynamicNumericDiffCostFunction<MyCostFunctor>(new MyCostFunctor);
+       cost_function->AddParameterBlock(5);
+       cost_function->AddParameterBlock(10);
+       cost_function->SetNumResiduals(21);
+
+   As a rule of thumb, try using :class:`NumericDiffCostFunction` before
+   you use :class:`DynamicNumericDiffCostFunction`.
+
+   **WARNING** The same caution about mixing local parameterizations
+   with numeric differentiation applies as is the case with
+   :class:`NumericDiffCostFunction`.
+
+:class:`CostFunctionToFunctor`
+==============================
+
+.. class:: CostFunctionToFunctor
+
+   :class:`CostFunctionToFunctor` is an adapter class that allows
+   users to use :class:`CostFunction` objects in templated functors
+   which are to be used for automatic differentiation. This allows
+   the user to seamlessly mix analytic, numeric and automatic
+   differentiation.
+
+   For example, let us assume that
+
+   .. code-block:: c++
+
+     class IntrinsicProjection : public SizedCostFunction<2, 5, 3> {
+       public:
+         IntrinsicProjection(const double* observation);
+         virtual bool Evaluate(double const* const* parameters,
+                               double* residuals,
+                               double** jacobians) const;
+     };
+
+   is a :class:`CostFunction` that implements the projection of a
+   point in its local coordinate system onto its image plane and
+   subtracts it from the observed point projection. It can compute its
+   residual and either via analytic or numerical differentiation can
+   compute its jacobians.
+
+   Now we would like to compose the action of this
+   :class:`CostFunction` with the action of camera extrinsics, i.e.,
+   rotation and translation. Say we have a templated function
+
+   .. code-block:: c++
+
+      template<typename T>
+      void RotateAndTranslatePoint(const T* rotation,
+                                   const T* translation,
+                                   const T* point,
+                                   T* result);
+
+
+   Then we can now do the following,
+
+   .. code-block:: c++
+
+    struct CameraProjection {
+      CameraProjection(double* observation)
+      : intrinsic_projection_(new IntrinsicProjection(observation)) {
+      }
+
+      template <typename T>
+      bool operator()(const T* rotation,
+                      const T* translation,
+                      const T* intrinsics,
+                      const T* point,
+                      T* residual) const {
+        T transformed_point[3];
+        RotateAndTranslatePoint(rotation, translation, point, transformed_point);
+
+        // Note that we call intrinsic_projection_, just like it was
+        // any other templated functor.
+        return intrinsic_projection_(intrinsics, transformed_point, residual);
+      }
+
+     private:
+      CostFunctionToFunctor<2,5,3> intrinsic_projection_;
+    };
+
+   Note that :class:`CostFunctionToFunctor` takes ownership of the
+   :class:`CostFunction` that was passed in to the constructor.
+
+   In the above example, we assumed that ``IntrinsicProjection`` is a
+   ``CostFunction`` capable of evaluating its value and its
+   derivatives. Suppose, if that were not the case and
+   ``IntrinsicProjection`` was defined as follows:
+
+   .. code-block:: c++
+
+    struct IntrinsicProjection
+      IntrinsicProjection(const double* observation) {
+        observation_[0] = observation[0];
+        observation_[1] = observation[1];
+      }
+
+      bool operator()(const double* calibration,
+                      const double* point,
+                      double* residuals) {
+        double projection[2];
+        ThirdPartyProjectionFunction(calibration, point, projection);
+        residuals[0] = observation_[0] - projection[0];
+        residuals[1] = observation_[1] - projection[1];
+        return true;
+      }
+     double observation_[2];
+    };
+
+
+  Here ``ThirdPartyProjectionFunction`` is some third party library
+  function that we have no control over. So this function can compute
+  its value and we would like to use numeric differentiation to
+  compute its derivatives. In this case we can use a combination of
+  ``NumericDiffCostFunction`` and ``CostFunctionToFunctor`` to get the
+  job done.
+
+  .. code-block:: c++
+
+   struct CameraProjection {
+     CameraProjection(double* observation)
+       intrinsic_projection_(
+         new NumericDiffCostFunction<IntrinsicProjection, CENTRAL, 2, 5, 3>(
+           new IntrinsicProjection(observation)) {
+     }
+
+     template <typename T>
+     bool operator()(const T* rotation,
+                     const T* translation,
+                     const T* intrinsics,
+                     const T* point,
+                     T* residuals) const {
+       T transformed_point[3];
+       RotateAndTranslatePoint(rotation, translation, point, transformed_point);
+       return intrinsic_projection_(intrinsics, transformed_point, residual);
+     }
+
+    private:
+     CostFunctionToFunctor<2,5,3> intrinsic_projection_;
+   };
+
+:class:`DynamicCostFunctionToFunctor`
+=====================================
+
+.. class:: DynamicCostFunctionToFunctor
+
+   :class:`DynamicCostFunctionToFunctor` provides the same functionality as
+   :class:`CostFunctionToFunctor` for cases where the number and size of the
+   parameter vectors and residuals are not known at compile-time. The API
+   provided by :class:`DynamicCostFunctionToFunctor` matches what would be
+   expected by :class:`DynamicAutoDiffCostFunction`, i.e. it provides a
+   templated functor of this form:
+
+   .. code-block:: c++
+
+    template<typename T>
+    bool operator()(T const* const* parameters, T* residuals) const;
+
+   Similar to the example given for :class:`CostFunctionToFunctor`, let us
+   assume that
+
+   .. code-block:: c++
+
+     class IntrinsicProjection : public CostFunction {
+       public:
+         IntrinsicProjection(const double* observation);
+         virtual bool Evaluate(double const* const* parameters,
+                               double* residuals,
+                               double** jacobians) const;
+     };
+
+   is a :class:`CostFunction` that projects a point in its local coordinate
+   system onto its image plane and subtracts it from the observed point
+   projection.
+
+   Using this :class:`CostFunction` in a templated functor would then look like
+   this:
+
+   .. code-block:: c++
+
+    struct CameraProjection {
+      CameraProjection(double* observation)
+          : intrinsic_projection_(new IntrinsicProjection(observation)) {
+      }
+
+      template <typename T>
+      bool operator()(T const* const* parameters,
+                      T* residual) const {
+        const T* rotation = parameters[0];
+        const T* translation = parameters[1];
+        const T* intrinsics = parameters[2];
+        const T* point = parameters[3];
+
+        T transformed_point[3];
+        RotateAndTranslatePoint(rotation, translation, point, transformed_point);
+
+        const T* projection_parameters[2];
+        projection_parameters[0] = intrinsics;
+        projection_parameters[1] = transformed_point;
+        return intrinsic_projection_(projection_parameters, residual);
+      }
+
+     private:
+      DynamicCostFunctionToFunctor intrinsic_projection_;
+    };
+
+   Like :class:`CostFunctionToFunctor`, :class:`DynamicCostFunctionToFunctor`
+   takes ownership of the :class:`CostFunction` that was passed in to the
+   constructor.
+
+:class:`ConditionedCostFunction`
+================================
+
+.. class:: ConditionedCostFunction
+
+   This class allows you to apply different conditioning to the residual
+   values of a wrapped cost function. An example where this is useful is
+   where you have an existing cost function that produces N values, but you
+   want the total cost to be something other than just the sum of these
+   squared values - maybe you want to apply a different scaling to some
+   values, to change their contribution to the cost.
+
+   Usage:
+
+   .. code-block:: c++
+
+       //  my_cost_function produces N residuals
+       CostFunction* my_cost_function = ...
+       CHECK_EQ(N, my_cost_function->num_residuals());
+       vector<CostFunction*> conditioners;
+
+       //  Make N 1x1 cost functions (1 parameter, 1 residual)
+       CostFunction* f_1 = ...
+       conditioners.push_back(f_1);
+
+       CostFunction* f_N = ...
+       conditioners.push_back(f_N);
+       ConditionedCostFunction* ccf =
+         new ConditionedCostFunction(my_cost_function, conditioners);
+
+
+   Now ``ccf`` 's ``residual[i]`` (i=0..N-1) will be passed though the
+   :math:`i^{\text{th}}` conditioner.
+
+   .. code-block:: c++
+
+      ccf_residual[i] = f_i(my_cost_function_residual[i])
+
+   and the Jacobian will be affected appropriately.
+
+
+:class:`GradientChecker`
+================================
+
+.. class:: GradientChecker
+
+    This class compares the Jacobians returned by a cost function against
+    derivatives estimated using finite differencing. It is meant as a tool for
+    unit testing, giving you more fine-grained control than the check_gradients
+    option in the solver options.
+
+    The condition enforced is that
+
+    .. math:: \forall{i,j}: \frac{J_{ij} - J'_{ij}}{max_{ij}(J_{ij} - J'_{ij})} < r
+
+    where :math:`J_{ij}` is the jacobian as computed by the supplied cost
+    function (by the user) multiplied by the local parameterization Jacobian,
+    :math:`J'_{ij}` is the jacobian as computed by finite differences,
+    multiplied by the local parameterization Jacobian as well, and :math:`r`
+    is the relative precision.
+
+   Usage:
+
+   .. code-block:: c++
+
+       //  my_cost_function takes two parameter blocks. The first has a local
+       //  parameterization associated with it.
+       CostFunction* my_cost_function = ...
+       LocalParameterization* my_parameterization = ...
+       NumericDiffOptions numeric_diff_options;
+
+       std::vector<LocalParameterization*> local_parameterizations;
+       local_parameterizations.push_back(my_parameterization);
+       local_parameterizations.push_back(NULL);
+
+       std::vector parameter1;
+       std::vector parameter2;
+       // Fill parameter 1 & 2 with test data...
+
+       std::vector<double*> parameter_blocks;
+       parameter_blocks.push_back(parameter1.data());
+       parameter_blocks.push_back(parameter2.data());
+
+       GradientChecker gradient_checker(my_cost_function,
+           local_parameterizations, numeric_diff_options);
+       GradientCheckResults results;
+       if (!gradient_checker.Probe(parameter_blocks.data(), 1e-9, &results) {
+         LOG(ERROR) << "An error has occurred:\n" << results.error_log;
+       }
+
+
+:class:`NormalPrior`
+====================
+
+.. class:: NormalPrior
+
+   .. code-block:: c++
+
+     class NormalPrior: public CostFunction {
+      public:
+       // Check that the number of rows in the vector b are the same as the
+       // number of columns in the matrix A, crash otherwise.
+       NormalPrior(const Matrix& A, const Vector& b);
+
+       virtual bool Evaluate(double const* const* parameters,
+                             double* residuals,
+                             double** jacobians) const;
+      };
+
+   Implements a cost function of the form
+
+   .. math::  cost(x) = ||A(x - b)||^2
+
+   where, the matrix :math:`A` and the vector :math:`b` are fixed and :math:`x`
+   is the variable. In case the user is interested in implementing a cost
+   function of the form
+
+  .. math::  cost(x) = (x - \mu)^T S^{-1} (x - \mu)
+
+  where, :math:`\mu` is a vector and :math:`S` is a covariance matrix,
+  then, :math:`A = S^{-1/2}`, i.e the matrix :math:`A` is the square
+  root of the inverse of the covariance, also known as the stiffness
+  matrix. There are however no restrictions on the shape of
+  :math:`A`. It is free to be rectangular, which would be the case if
+  the covariance matrix :math:`S` is rank deficient.
+
+
+
+.. _`section-loss_function`:
+
+:class:`LossFunction`
+=====================
+
+.. class:: LossFunction
+
+   For least squares problems where the minimization may encounter
+   input terms that contain outliers, that is, completely bogus
+   measurements, it is important to use a loss function that reduces
+   their influence.
+
+   Consider a structure from motion problem. The unknowns are 3D
+   points and camera parameters, and the measurements are image
+   coordinates describing the expected reprojected position for a
+   point in a camera. For example, we want to model the geometry of a
+   street scene with fire hydrants and cars, observed by a moving
+   camera with unknown parameters, and the only 3D points we care
+   about are the pointy tippy-tops of the fire hydrants. Our magic
+   image processing algorithm, which is responsible for producing the
+   measurements that are input to Ceres, has found and matched all
+   such tippy-tops in all image frames, except that in one of the
+   frame it mistook a car's headlight for a hydrant. If we didn't do
+   anything special the residual for the erroneous measurement will
+   result in the entire solution getting pulled away from the optimum
+   to reduce the large error that would otherwise be attributed to the
+   wrong measurement.
+
+   Using a robust loss function, the cost for large residuals is
+   reduced. In the example above, this leads to outlier terms getting
+   down-weighted so they do not overly influence the final solution.
+
+   .. code-block:: c++
+
+    class LossFunction {
+     public:
+      virtual void Evaluate(double s, double out[3]) const = 0;
+    };
+
+
+   The key method is :func:`LossFunction::Evaluate`, which given a
+   non-negative scalar ``s``, computes
+
+   .. math:: out = \begin{bmatrix}\rho(s), & \rho'(s), & \rho''(s)\end{bmatrix}
+
+   Here the convention is that the contribution of a term to the cost
+   function is given by :math:`\frac{1}{2}\rho(s)`, where :math:`s
+   =\|f_i\|^2`. Calling the method with a negative value of :math:`s`
+   is an error and the implementations are not required to handle that
+   case.
+
+   Most sane choices of :math:`\rho` satisfy:
+
+   .. math::
+
+      \rho(0) &= 0\\
+      \rho'(0) &= 1\\
+      \rho'(s) &< 1 \text{ in the outlier region}\\
+      \rho''(s) &< 0 \text{ in the outlier region}
+
+   so that they mimic the squared cost for small residuals.
+
+   **Scaling**
+
+   Given one robustifier :math:`\rho(s)` one can change the length
+   scale at which robustification takes place, by adding a scale
+   factor :math:`a > 0` which gives us :math:`\rho(s,a) = a^2 \rho(s /
+   a^2)` and the first and second derivatives as :math:`\rho'(s /
+   a^2)` and :math:`(1 / a^2) \rho''(s / a^2)` respectively.
+
+
+   The reason for the appearance of squaring is that :math:`a` is in
+   the units of the residual vector norm whereas :math:`s` is a squared
+   norm. For applications it is more convenient to specify :math:`a` than
+   its square.
+
+Instances
+---------
+
+Ceres includes a number of predefined loss functions. For simplicity
+we described their unscaled versions. The figure below illustrates
+their shape graphically. More details can be found in
+``include/ceres/loss_function.h``.
+
+.. figure:: loss.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+   Shape of the various common loss functions.
+
+.. class:: TrivialLoss
+
+      .. math:: \rho(s) = s
+
+.. class:: HuberLoss
+
+   .. math:: \rho(s) = \begin{cases} s & s \le 1\\ 2 \sqrt{s} - 1 & s > 1 \end{cases}
+
+.. class:: SoftLOneLoss
+
+   .. math:: \rho(s) = 2 (\sqrt{1+s} - 1)
+
+.. class:: CauchyLoss
+
+   .. math:: \rho(s) = \log(1 + s)
+
+.. class:: ArctanLoss
+
+   .. math:: \rho(s) = \arctan(s)
+
+.. class:: TolerantLoss
+
+   .. math:: \rho(s,a,b) = b \log(1 + e^{(s - a) / b}) - b \log(1 + e^{-a / b})
+
+.. class:: ComposedLoss
+
+   Given two loss functions ``f`` and ``g``, implements the loss
+   function ``h(s) = f(g(s))``.
+
+   .. code-block:: c++
+
+      class ComposedLoss : public LossFunction {
+       public:
+        explicit ComposedLoss(const LossFunction* f,
+                              Ownership ownership_f,
+                              const LossFunction* g,
+                              Ownership ownership_g);
+      };
+
+.. class:: ScaledLoss
+
+   Sometimes you want to simply scale the output value of the
+   robustifier. For example, you might want to weight different error
+   terms differently (e.g., weight pixel reprojection errors
+   differently from terrain errors).
+
+   Given a loss function :math:`\rho(s)` and a scalar :math:`a`, :class:`ScaledLoss`
+   implements the function :math:`a \rho(s)`.
+
+   Since we treat a ``NULL`` Loss function as the Identity loss
+   function, :math:`rho` = ``NULL``: is a valid input and will result
+   in the input being scaled by :math:`a`. This provides a simple way
+   of implementing a scaled ResidualBlock.
+
+.. class:: LossFunctionWrapper
+
+   Sometimes after the optimization problem has been constructed, we
+   wish to mutate the scale of the loss function. For example, when
+   performing estimation from data which has substantial outliers,
+   convergence can be improved by starting out with a large scale,
+   optimizing the problem and then reducing the scale. This can have
+   better convergence behavior than just using a loss function with a
+   small scale.
+
+   This templated class allows the user to implement a loss function
+   whose scale can be mutated after an optimization problem has been
+   constructed, e.g,
+
+   .. code-block:: c++
+
+     Problem problem;
+
+     // Add parameter blocks
+
+     CostFunction* cost_function =
+         new AutoDiffCostFunction < UW_Camera_Mapper, 2, 9, 3>(
+             new UW_Camera_Mapper(feature_x, feature_y));
+
+     LossFunctionWrapper* loss_function(new HuberLoss(1.0), TAKE_OWNERSHIP);
+     problem.AddResidualBlock(cost_function, loss_function, parameters);
+
+     Solver::Options options;
+     Solver::Summary summary;
+     Solve(options, &problem, &summary);
+
+     loss_function->Reset(new HuberLoss(1.0), TAKE_OWNERSHIP);
+     Solve(options, &problem, &summary);
+
+
+Theory
+------
+
+Let us consider a problem with a single problem and a single parameter
+block.
+
+.. math::
+
+ \min_x \frac{1}{2}\rho(f^2(x))
+
+
+Then, the robustified gradient and the Gauss-Newton Hessian are
+
+.. math::
+
+        g(x) &= \rho'J^\top(x)f(x)\\
+        H(x) &= J^\top(x)\left(\rho' + 2 \rho''f(x)f^\top(x)\right)J(x)
+
+where the terms involving the second derivatives of :math:`f(x)` have
+been ignored. Note that :math:`H(x)` is indefinite if
+:math:`\rho''f(x)^\top f(x) + \frac{1}{2}\rho' < 0`. If this is not
+the case, then its possible to re-weight the residual and the Jacobian
+matrix such that the corresponding linear least squares problem for
+the robustified Gauss-Newton step.
+
+
+Let :math:`\alpha` be a root of
+
+.. math:: \frac{1}{2}\alpha^2 - \alpha - \frac{\rho''}{\rho'}\|f(x)\|^2 = 0.
+
+
+Then, define the rescaled residual and Jacobian as
+
+.. math::
+
+        \tilde{f}(x) &= \frac{\sqrt{\rho'}}{1 - \alpha} f(x)\\
+        \tilde{J}(x) &= \sqrt{\rho'}\left(1 - \alpha
+                        \frac{f(x)f^\top(x)}{\left\|f(x)\right\|^2} \right)J(x)
+
+
+In the case :math:`2 \rho''\left\|f(x)\right\|^2 + \rho' \lesssim 0`,
+we limit :math:`\alpha \le 1- \epsilon` for some small
+:math:`\epsilon`. For more details see [Triggs]_.
+
+With this simple rescaling, one can use any Jacobian based non-linear
+least squares algorithm to robustified non-linear least squares
+problems.
+
+
+:class:`LocalParameterization`
+==============================
+
+.. class:: LocalParameterization
+
+   .. code-block:: c++
+
+     class LocalParameterization {
+      public:
+       virtual ~LocalParameterization() {}
+       virtual bool Plus(const double* x,
+                         const double* delta,
+                         double* x_plus_delta) const = 0;
+       virtual bool ComputeJacobian(const double* x, double* jacobian) const = 0;
+       virtual bool MultiplyByJacobian(const double* x,
+                                       const int num_rows,
+                                       const double* global_matrix,
+                                       double* local_matrix) const;
+       virtual int GlobalSize() const = 0;
+       virtual int LocalSize() const = 0;
+     };
+
+   Sometimes the parameters :math:`x` can overparameterize a
+   problem. In that case it is desirable to choose a parameterization
+   to remove the null directions of the cost. More generally, if
+   :math:`x` lies on a manifold of a smaller dimension than the
+   ambient space that it is embedded in, then it is numerically and
+   computationally more effective to optimize it using a
+   parameterization that lives in the tangent space of that manifold
+   at each point.
+
+   For example, a sphere in three dimensions is a two dimensional
+   manifold, embedded in a three dimensional space. At each point on
+   the sphere, the plane tangent to it defines a two dimensional
+   tangent space. For a cost function defined on this sphere, given a
+   point :math:`x`, moving in the direction normal to the sphere at
+   that point is not useful. Thus a better way to parameterize a point
+   on a sphere is to optimize over two dimensional vector
+   :math:`\Delta x` in the tangent space at the point on the sphere
+   point and then "move" to the point :math:`x + \Delta x`, where the
+   move operation involves projecting back onto the sphere. Doing so
+   removes a redundant dimension from the optimization, making it
+   numerically more robust and efficient.
+
+   More generally we can define a function
+
+   .. math:: x' = \boxplus(x, \Delta x),
+
+   where :math:`x'` has the same size as :math:`x`, and :math:`\Delta
+   x` is of size less than or equal to :math:`x`. The function
+   :math:`\boxplus`, generalizes the definition of vector
+   addition. Thus it satisfies the identity
+
+   .. math:: \boxplus(x, 0) = x,\quad \forall x.
+
+   Instances of :class:`LocalParameterization` implement the
+   :math:`\boxplus` operation and its derivative with respect to
+   :math:`\Delta x` at :math:`\Delta x = 0`.
+
+
+.. function:: int LocalParameterization::GlobalSize()
+
+   The dimension of the ambient space in which the parameter block
+   :math:`x` lives.
+
+.. function:: int LocalParameterization::LocalSize()
+
+   The size of the tangent space
+   that :math:`\Delta x` lives in.
+
+.. function:: bool LocalParameterization::Plus(const double* x, const double* delta, double* x_plus_delta) const
+
+    :func:`LocalParameterization::Plus` implements :math:`\boxplus(x,\Delta x)`.
+
+.. function:: bool LocalParameterization::ComputeJacobian(const double* x, double* jacobian) const
+
+   Computes the Jacobian matrix
+
+   .. math:: J = \left . \frac{\partial }{\partial \Delta x} \boxplus(x,\Delta x)\right|_{\Delta x = 0}
+
+   in row major form.
+
+.. function:: bool MultiplyByJacobian(const double* x, const int num_rows, const double* global_matrix, double* local_matrix) const
+
+   local_matrix = global_matrix * jacobian
+
+   global_matrix is a num_rows x GlobalSize  row major matrix.
+   local_matrix is a num_rows x LocalSize row major matrix.
+   jacobian is the matrix returned by :func:`LocalParameterization::ComputeJacobian` at :math:`x`.
+
+   This is only used by GradientProblem. For most normal uses, it is
+   okay to use the default implementation.
+
+Instances
+---------
+
+.. class:: IdentityParameterization
+
+   A trivial version of :math:`\boxplus` is when :math:`\Delta x` is
+   of the same size as :math:`x` and
+
+   .. math::  \boxplus(x, \Delta x) = x + \Delta x
+
+.. class:: SubsetParameterization
+
+   A more interesting case if :math:`x` is a two dimensional vector,
+   and the user wishes to hold the first coordinate constant. Then,
+   :math:`\Delta x` is a scalar and :math:`\boxplus` is defined as
+
+   .. math::
+
+      \boxplus(x, \Delta x) = x + \left[ \begin{array}{c} 0 \\ 1
+                                  \end{array} \right] \Delta x
+
+   :class:`SubsetParameterization` generalizes this construction to
+   hold any part of a parameter block constant.
+
+.. class:: QuaternionParameterization
+
+   Another example that occurs commonly in Structure from Motion
+   problems is when camera rotations are parameterized using a
+   quaternion. There, it is useful only to make updates orthogonal to
+   that 4-vector defining the quaternion. One way to do this is to let
+   :math:`\Delta x` be a 3 dimensional vector and define
+   :math:`\boxplus` to be
+
+    .. math:: \boxplus(x, \Delta x) = \left[ \cos(|\Delta x|), \frac{\sin\left(|\Delta x|\right)}{|\Delta x|} \Delta x \right] * x
+      :label: quaternion
+
+   The multiplication between the two 4-vectors on the right hand side
+   is the standard quaternion
+   product. :class:`QuaternionParameterization` is an implementation
+   of :eq:`quaternion`.
+
+.. class:: EigenQuaternionParameterization
+
+   Eigen uses a different internal memory layout for the elements of the
+   quaternion than what is commonly used. Specifically, Eigen stores the
+   elements in memory as [x, y, z, w] where the real part is last
+   whereas it is typically stored first. Note, when creating an Eigen
+   quaternion through the constructor the elements are accepted in w, x,
+   y, z order. Since Ceres operates on parameter blocks which are raw
+   double pointers this difference is important and requires a different
+   parameterization. :class:`EigenQuaternionParameterization` uses the
+   same update as :class:`QuaternionParameterization` but takes into
+   account Eigen's internal memory element ordering.
+
+.. class:: HomogeneousVectorParameterization
+
+   In computer vision, homogeneous vectors are commonly used to
+   represent entities in projective geometry such as points in
+   projective space. One example where it is useful to use this
+   over-parameterization is in representing points whose triangulation
+   is ill-conditioned. Here it is advantageous to use homogeneous
+   vectors, instead of an Euclidean vector, because it can represent
+   points at infinity.
+
+   When using homogeneous vectors it is useful to only make updates
+   orthogonal to that :math:`n`-vector defining the homogeneous
+   vector [HartleyZisserman]_. One way to do this is to let :math:`\Delta x`
+   be a :math:`n-1` dimensional vector and define :math:`\boxplus` to be
+
+    .. math:: \boxplus(x, \Delta x) = \left[ \frac{\sin\left(0.5 |\Delta x|\right)}{|\Delta x|} \Delta x, \cos(0.5 |\Delta x|) \right] * x
+
+   The multiplication between the two vectors on the right hand side
+   is defined as an operator which applies the update orthogonal to
+   :math:`x` to remain on the sphere. Note, it is assumed that
+   last element of :math:`x` is the scalar component of the homogeneous
+   vector.
+
+
+.. class:: ProductParameterization
+
+   Consider an optimization problem over the space of rigid
+   transformations :math:`SE(3)`, which is the Cartesian product of
+   :math:`SO(3)` and :math:`\mathbb{R}^3`. Suppose you are using
+   Quaternions to represent the rotation, Ceres ships with a local
+   parameterization for that and :math:`\mathbb{R}^3` requires no, or
+   :class:`IdentityParameterization` parameterization. So how do we
+   construct a local parameterization for a parameter block a rigid
+   transformation?
+
+   In cases, where a parameter block is the Cartesian product of a
+   number of manifolds and you have the local parameterization of the
+   individual manifolds available, :class:`ProductParameterization`
+   can be used to construct a local parameterization of the cartesian
+   product. For the case of the rigid transformation, where say you
+   have a parameter block of size 7, where the first four entries
+   represent the rotation as a quaternion, a local parameterization
+   can be constructed as
+
+   .. code-block:: c++
+
+     ProductParameterization se3_param(new QuaternionParameterization(),
+                                       new IdentityTransformation(3));
+
+
+:class:`AutoDiffLocalParameterization`
+======================================
+
+.. class:: AutoDiffLocalParameterization
+
+  :class:`AutoDiffLocalParameterization` does for
+  :class:`LocalParameterization` what :class:`AutoDiffCostFunction`
+  does for :class:`CostFunction`. It allows the user to define a
+  templated functor that implements the
+  :func:`LocalParameterization::Plus` operation and it uses automatic
+  differentiation to implement the computation of the Jacobian.
+
+  To get an auto differentiated local parameterization, you must
+  define a class with a templated operator() (a functor) that computes
+
+     .. math:: x' = \boxplus(x, \Delta x),
+
+  For example, Quaternions have a three dimensional local
+  parameterization. Its plus operation can be implemented as (taken
+  from `internal/ceres/autodiff_local_parameterization_test.cc
+  <https://ceres-solver.googlesource.com/ceres-solver/+/master/internal/ceres/autodiff_local_parameterization_test.cc>`_
+  )
+
+    .. code-block:: c++
+
+      struct QuaternionPlus {
+        template<typename T>
+        bool operator()(const T* x, const T* delta, T* x_plus_delta) const {
+          const T squared_norm_delta =
+              delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2];
+
+          T q_delta[4];
+          if (squared_norm_delta > 0.0) {
+            T norm_delta = sqrt(squared_norm_delta);
+            const T sin_delta_by_delta = sin(norm_delta) / norm_delta;
+            q_delta[0] = cos(norm_delta);
+            q_delta[1] = sin_delta_by_delta * delta[0];
+            q_delta[2] = sin_delta_by_delta * delta[1];
+            q_delta[3] = sin_delta_by_delta * delta[2];
+          } else {
+            // We do not just use q_delta = [1,0,0,0] here because that is a
+            // constant and when used for automatic differentiation will
+            // lead to a zero derivative. Instead we take a first order
+            // approximation and evaluate it at zero.
+            q_delta[0] = T(1.0);
+            q_delta[1] = delta[0];
+            q_delta[2] = delta[1];
+            q_delta[3] = delta[2];
+          }
+
+          Quaternionproduct(q_delta, x, x_plus_delta);
+          return true;
+        }
+      };
+
+  Given this struct, the auto differentiated local
+  parameterization can now be constructed as
+
+  .. code-block:: c++
+
+     LocalParameterization* local_parameterization =
+         new AutoDiffLocalParameterization<QuaternionPlus, 4, 3>;
+                                                           |  |
+                                Global Size ---------------+  |
+                                Local Size -------------------+
+
+
+:class:`Problem`
+================
+
+.. class:: Problem
+
+   :class:`Problem` holds the robustified bounds constrained
+   non-linear least squares problem :eq:`ceresproblem_modeling`. To
+   create a least squares problem, use the
+   :func:`Problem::AddResidualBlock` and
+   :func:`Problem::AddParameterBlock` methods.
+
+   For example a problem containing 3 parameter blocks of sizes 3, 4
+   and 5 respectively and two residual blocks of size 2 and 6:
+
+   .. code-block:: c++
+
+     double x1[] = { 1.0, 2.0, 3.0 };
+     double x2[] = { 1.0, 2.0, 3.0, 5.0 };
+     double x3[] = { 1.0, 2.0, 3.0, 6.0, 7.0 };
+
+     Problem problem;
+     problem.AddResidualBlock(new MyUnaryCostFunction(...), x1);
+     problem.AddResidualBlock(new MyBinaryCostFunction(...), x2, x3);
+
+   :func:`Problem::AddResidualBlock` as the name implies, adds a
+   residual block to the problem. It adds a :class:`CostFunction`, an
+   optional :class:`LossFunction` and connects the
+   :class:`CostFunction` to a set of parameter block.
+
+   The cost function carries with it information about the sizes of
+   the parameter blocks it expects. The function checks that these
+   match the sizes of the parameter blocks listed in
+   ``parameter_blocks``. The program aborts if a mismatch is
+   detected. ``loss_function`` can be ``NULL``, in which case the cost
+   of the term is just the squared norm of the residuals.
+
+   The user has the option of explicitly adding the parameter blocks
+   using :func:`Problem::AddParameterBlock`. This causes additional
+   correctness checking; however, :func:`Problem::AddResidualBlock`
+   implicitly adds the parameter blocks if they are not present, so
+   calling :func:`Problem::AddParameterBlock` explicitly is not
+   required.
+
+   :func:`Problem::AddParameterBlock` explicitly adds a parameter
+   block to the :class:`Problem`. Optionally it allows the user to
+   associate a :class:`LocalParameterization` object with the
+   parameter block too. Repeated calls with the same arguments are
+   ignored. Repeated calls with the same double pointer but a
+   different size results in undefined behavior.
+
+   You can set any parameter block to be constant using
+   :func:`Problem::SetParameterBlockConstant` and undo this using
+   :func:`SetParameterBlockVariable`.
+
+   In fact you can set any number of parameter blocks to be constant,
+   and Ceres is smart enough to figure out what part of the problem
+   you have constructed depends on the parameter blocks that are free
+   to change and only spends time solving it. So for example if you
+   constructed a problem with a million parameter blocks and 2 million
+   residual blocks, but then set all but one parameter blocks to be
+   constant and say only 10 residual blocks depend on this one
+   non-constant parameter block. Then the computational effort Ceres
+   spends in solving this problem will be the same if you had defined
+   a problem with one parameter block and 10 residual blocks.
+
+   **Ownership**
+
+   :class:`Problem` by default takes ownership of the
+   ``cost_function``, ``loss_function`` and ``local_parameterization``
+   pointers. These objects remain live for the life of the
+   :class:`Problem`. If the user wishes to keep control over the
+   destruction of these objects, then they can do this by setting the
+   corresponding enums in the :class:`Problem::Options` struct.
+
+   Note that even though the Problem takes ownership of ``cost_function``
+   and ``loss_function``, it does not preclude the user from re-using
+   them in another residual block. The destructor takes care to call
+   delete on each ``cost_function`` or ``loss_function`` pointer only
+   once, regardless of how many residual blocks refer to them.
+
+.. function:: ResidualBlockId Problem::AddResidualBlock(CostFunction* cost_function, LossFunction* loss_function, const vector<double*> parameter_blocks)
+.. function:: ResidualBlockId Problem::AddResidualBlock(CostFunction* cost_function, LossFunction* loss_function, double *x0, double *x1, ...)
+
+   Add a residual block to the overall cost function. The cost
+   function carries with it information about the sizes of the
+   parameter blocks it expects. The function checks that these match
+   the sizes of the parameter blocks listed in parameter_blocks. The
+   program aborts if a mismatch is detected. loss_function can be
+   NULL, in which case the cost of the term is just the squared norm
+   of the residuals.
+
+   The parameter blocks may be passed together as a
+   ``vector<double*>``, or as up to ten separate ``double*`` pointers.
+
+   The user has the option of explicitly adding the parameter blocks
+   using AddParameterBlock. This causes additional correctness
+   checking; however, AddResidualBlock implicitly adds the parameter
+   blocks if they are not present, so calling AddParameterBlock
+   explicitly is not required.
+
+   The Problem object by default takes ownership of the
+   cost_function and loss_function pointers. These objects remain
+   live for the life of the Problem object. If the user wishes to
+   keep control over the destruction of these objects, then they can
+   do this by setting the corresponding enums in the Options struct.
+
+   Note: Even though the Problem takes ownership of cost_function
+   and loss_function, it does not preclude the user from re-using
+   them in another residual block. The destructor takes care to call
+   delete on each cost_function or loss_function pointer only once,
+   regardless of how many residual blocks refer to them.
+
+   Example usage:
+
+   .. code-block:: c++
+
+      double x1[] = {1.0, 2.0, 3.0};
+      double x2[] = {1.0, 2.0, 5.0, 6.0};
+      double x3[] = {3.0, 6.0, 2.0, 5.0, 1.0};
+      vector<double*> v1;
+      v1.push_back(x1);
+      vector<double*> v2;
+      v2.push_back(x2);
+      v2.push_back(x1);
+
+      Problem problem;
+
+      problem.AddResidualBlock(new MyUnaryCostFunction(...), NULL, x1);
+      problem.AddResidualBlock(new MyBinaryCostFunction(...), NULL, x2, x1);
+      problem.AddResidualBlock(new MyUnaryCostFunction(...), NULL, v1);
+      problem.AddResidualBlock(new MyBinaryCostFunction(...), NULL, v2);
+
+.. function:: void Problem::AddParameterBlock(double* values, int size, LocalParameterization* local_parameterization)
+
+   Add a parameter block with appropriate size to the problem.
+   Repeated calls with the same arguments are ignored. Repeated calls
+   with the same double pointer but a different size results in
+   undefined behavior.
+
+.. function:: void Problem::AddParameterBlock(double* values, int size)
+
+   Add a parameter block with appropriate size and parameterization to
+   the problem. Repeated calls with the same arguments are
+   ignored. Repeated calls with the same double pointer but a
+   different size results in undefined behavior.
+
+.. function:: void Problem::RemoveResidualBlock(ResidualBlockId residual_block)
+
+   Remove a residual block from the problem. Any parameters that the residual
+   block depends on are not removed. The cost and loss functions for the
+   residual block will not get deleted immediately; won't happen until the
+   problem itself is deleted.  If Problem::Options::enable_fast_removal is
+   true, then the removal is fast (almost constant time). Otherwise, removing a
+   residual block will incur a scan of the entire Problem object to verify that
+   the residual_block represents a valid residual in the problem.
+
+   **WARNING:** Removing a residual or parameter block will destroy
+   the implicit ordering, rendering the jacobian or residuals returned
+   from the solver uninterpretable. If you depend on the evaluated
+   jacobian, do not use remove! This may change in a future release.
+   Hold the indicated parameter block constant during optimization.
+
+.. function:: void Problem::RemoveParameterBlock(double* values)
+
+   Remove a parameter block from the problem. The parameterization of
+   the parameter block, if it exists, will persist until the deletion
+   of the problem (similar to cost/loss functions in residual block
+   removal). Any residual blocks that depend on the parameter are also
+   removed, as described above in RemoveResidualBlock().  If
+   Problem::Options::enable_fast_removal is true, then
+   the removal is fast (almost constant time). Otherwise, removing a
+   parameter block will incur a scan of the entire Problem object.
+
+   **WARNING:** Removing a residual or parameter block will destroy
+   the implicit ordering, rendering the jacobian or residuals returned
+   from the solver uninterpretable. If you depend on the evaluated
+   jacobian, do not use remove! This may change in a future release.
+
+.. function:: void Problem::SetParameterBlockConstant(double* values)
+
+   Hold the indicated parameter block constant during optimization.
+
+.. function:: void Problem::SetParameterBlockVariable(double* values)
+
+   Allow the indicated parameter to vary during optimization.
+
+.. function:: void Problem::SetParameterization(double* values, LocalParameterization* local_parameterization)
+
+   Set the local parameterization for one of the parameter blocks.
+   The local_parameterization is owned by the Problem by default. It
+   is acceptable to set the same parameterization for multiple
+   parameters; the destructor is careful to delete local
+   parameterizations only once. The local parameterization can only be
+   set once per parameter, and cannot be changed once set.
+
+.. function:: LocalParameterization* Problem::GetParameterization(double* values) const
+
+   Get the local parameterization object associated with this
+   parameter block. If there is no parameterization object associated
+   then `NULL` is returned
+
+.. function:: void Problem::SetParameterLowerBound(double* values, int index, double lower_bound)
+
+   Set the lower bound for the parameter at position `index` in the
+   parameter block corresponding to `values`. By default the lower
+   bound is ``-std::numeric_limits<double>::max()``, which is treated
+   by the solver as the same as :math:`-\infty`.
+
+.. function:: void Problem::SetParameterUpperBound(double* values, int index, double upper_bound)
+
+   Set the upper bound for the parameter at position `index` in the
+   parameter block corresponding to `values`. By default the value is
+   ``std::numeric_limits<double>::max()``, which is treated by the
+   solver as the same as :math:`\infty`.
+
+.. function:: double Problem::GetParameterLowerBound(double* values, int index)
+
+   Get the lower bound for the parameter with position `index`. If the
+   parameter is not bounded by the user, then its lower bound is
+   ``-std::numeric_limits<double>::max()``.
+
+.. function:: double Problem::GetParameterUpperBound(double* values, int index)
+
+   Get the upper bound for the parameter with position `index`. If the
+   parameter is not bounded by the user, then its upper bound is
+   ``std::numeric_limits<double>::max()``.
+
+.. function:: int Problem::NumParameterBlocks() const
+
+   Number of parameter blocks in the problem. Always equals
+   parameter_blocks().size() and parameter_block_sizes().size().
+
+.. function:: int Problem::NumParameters() const
+
+   The size of the parameter vector obtained by summing over the sizes
+   of all the parameter blocks.
+
+.. function:: int Problem::NumResidualBlocks() const
+
+   Number of residual blocks in the problem. Always equals
+   residual_blocks().size().
+
+.. function:: int Problem::NumResiduals() const
+
+   The size of the residual vector obtained by summing over the sizes
+   of all of the residual blocks.
+
+.. function:: int Problem::ParameterBlockSize(const double* values) const
+
+   The size of the parameter block.
+
+.. function:: int Problem::ParameterBlockLocalSize(const double* values) const
+
+   The size of local parameterization for the parameter block. If
+   there is no local parameterization associated with this parameter
+   block, then ``ParameterBlockLocalSize`` = ``ParameterBlockSize``.
+
+.. function:: bool Problem::HasParameterBlock(const double* values) const
+
+   Is the given parameter block present in the problem or not?
+
+.. function:: void Problem::GetParameterBlocks(vector<double*>* parameter_blocks) const
+
+   Fills the passed ``parameter_blocks`` vector with pointers to the
+   parameter blocks currently in the problem. After this call,
+   ``parameter_block.size() == NumParameterBlocks``.
+
+.. function:: void Problem::GetResidualBlocks(vector<ResidualBlockId>* residual_blocks) const
+
+   Fills the passed `residual_blocks` vector with pointers to the
+   residual blocks currently in the problem. After this call,
+   `residual_blocks.size() == NumResidualBlocks`.
+
+.. function:: void Problem::GetParameterBlocksForResidualBlock(const ResidualBlockId residual_block, vector<double*>* parameter_blocks) const
+
+   Get all the parameter blocks that depend on the given residual
+   block.
+
+.. function:: void Problem::GetResidualBlocksForParameterBlock(const double* values, vector<ResidualBlockId>* residual_blocks) const
+
+   Get all the residual blocks that depend on the given parameter
+   block.
+
+   If `Problem::Options::enable_fast_removal` is
+   `true`, then getting the residual blocks is fast and depends only
+   on the number of residual blocks. Otherwise, getting the residual
+   blocks for a parameter block will incur a scan of the entire
+   :class:`Problem` object.
+
+.. function:: const CostFunction* GetCostFunctionForResidualBlock(const ResidualBlockId residual_block) const
+
+   Get the :class:`CostFunction` for the given residual block.
+
+.. function:: const LossFunction* GetLossFunctionForResidualBlock(const ResidualBlockId residual_block) const
+
+   Get the :class:`LossFunction` for the given residual block.
+
+.. function:: bool Problem::Evaluate(const Problem::EvaluateOptions& options, double* cost, vector<double>* residuals, vector<double>* gradient, CRSMatrix* jacobian)
+
+   Evaluate a :class:`Problem`. Any of the output pointers can be
+   `NULL`. Which residual blocks and parameter blocks are used is
+   controlled by the :class:`Problem::EvaluateOptions` struct below.
+
+   .. NOTE::
+
+      The evaluation will use the values stored in the memory
+      locations pointed to by the parameter block pointers used at the
+      time of the construction of the problem, for example in the
+      following code:
+
+      .. code-block:: c++
+
+        Problem problem;
+        double x = 1;
+        problem.Add(new MyCostFunction, NULL, &x);
+
+        double cost = 0.0;
+        problem.Evaluate(Problem::EvaluateOptions(), &cost, NULL, NULL, NULL);
+
+      The cost is evaluated at `x = 1`. If you wish to evaluate the
+      problem at `x = 2`, then
+
+      .. code-block:: c++
+
+         x = 2;
+         problem.Evaluate(Problem::EvaluateOptions(), &cost, NULL, NULL, NULL);
+
+      is the way to do so.
+
+   .. NOTE::
+
+      If no local parameterizations are used, then the size of
+      the gradient vector is the sum of the sizes of all the parameter
+      blocks. If a parameter block has a local parameterization, then
+      it contributes "LocalSize" entries to the gradient vector.
+
+   .. NOTE::
+
+      This function cannot be called while the problem is being
+      solved, for example it cannot be called from an
+      :class:`IterationCallback` at the end of an iteration during a
+      solve.
+
+.. class:: Problem::EvaluateOptions
+
+   Options struct that is used to control :func:`Problem::Evaluate`.
+
+.. member:: vector<double*> Problem::EvaluateOptions::parameter_blocks
+
+   The set of parameter blocks for which evaluation should be
+   performed. This vector determines the order in which parameter
+   blocks occur in the gradient vector and in the columns of the
+   jacobian matrix. If parameter_blocks is empty, then it is assumed
+   to be equal to a vector containing ALL the parameter
+   blocks. Generally speaking the ordering of the parameter blocks in
+   this case depends on the order in which they were added to the
+   problem and whether or not the user removed any parameter blocks.
+
+   **NOTE** This vector should contain the same pointers as the ones
+   used to add parameter blocks to the Problem. These parameter block
+   should NOT point to new memory locations. Bad things will happen if
+   you do.
+
+.. member:: vector<ResidualBlockId> Problem::EvaluateOptions::residual_blocks
+
+   The set of residual blocks for which evaluation should be
+   performed. This vector determines the order in which the residuals
+   occur, and how the rows of the jacobian are ordered. If
+   residual_blocks is empty, then it is assumed to be equal to the
+   vector containing all the residual blocks.
+
+.. member:: bool Problem::EvaluateOptions::apply_loss_function
+
+   Even though the residual blocks in the problem may contain loss
+   functions, setting apply_loss_function to false will turn off the
+   application of the loss function to the output of the cost
+   function. This is of use for example if the user wishes to analyse
+   the solution quality by studying the distribution of residuals
+   before and after the solve.
+
+.. member:: int Problem::EvaluateOptions::num_threads
+
+   Number of threads to use. (Requires OpenMP).
+
+``rotation.h``
+==============
+
+Many applications of Ceres Solver involve optimization problems where
+some of the variables correspond to rotations. To ease the pain of
+work with the various representations of rotations (angle-axis,
+quaternion and matrix) we provide a handy set of templated
+functions. These functions are templated so that the user can use them
+within Ceres Solver's automatic differentiation framework.
+
+.. function:: template <typename T> void AngleAxisToQuaternion(T const* angle_axis, T* quaternion)
+
+   Convert a value in combined axis-angle representation to a
+   quaternion.
+
+   The value ``angle_axis`` is a triple whose norm is an angle in radians,
+   and whose direction is aligned with the axis of rotation, and
+   ``quaternion`` is a 4-tuple that will contain the resulting quaternion.
+
+.. function::  template <typename T> void QuaternionToAngleAxis(T const* quaternion, T* angle_axis)
+
+   Convert a quaternion to the equivalent combined axis-angle
+   representation.
+
+   The value ``quaternion`` must be a unit quaternion - it is not
+   normalized first, and ``angle_axis`` will be filled with a value
+   whose norm is the angle of rotation in radians, and whose direction
+   is the axis of rotation.
+
+.. function:: template <typename T, int row_stride, int col_stride> void RotationMatrixToAngleAxis(const MatrixAdapter<const T, row_stride, col_stride>& R, T * angle_axis)
+.. function:: template <typename T, int row_stride, int col_stride> void AngleAxisToRotationMatrix(T const * angle_axis, const MatrixAdapter<T, row_stride, col_stride>& R)
+.. function:: template <typename T> void RotationMatrixToAngleAxis(T const * R, T * angle_axis)
+.. function:: template <typename T> void AngleAxisToRotationMatrix(T const * angle_axis, T * R)
+
+   Conversions between 3x3 rotation matrix with given column and row strides and
+   axis-angle rotation representations. The functions that take a pointer to T instead
+   of a MatrixAdapter assume a column major representation with unit row stride and a column stride of 3.
+
+.. function:: template <typename T, int row_stride, int col_stride> void EulerAnglesToRotationMatrix(const T* euler, const MatrixAdapter<T, row_stride, col_stride>& R)
+.. function:: template <typename T> void EulerAnglesToRotationMatrix(const T* euler, int row_stride, T* R)
+
+   Conversions between 3x3 rotation matrix with given column and row strides and
+   Euler angle (in degrees) rotation representations.
+
+   The {pitch,roll,yaw} Euler angles are rotations around the {x,y,z}
+   axes, respectively.  They are applied in that same order, so the
+   total rotation R is Rz * Ry * Rx.
+
+   The function that takes a pointer to T as the rotation matrix assumes a row
+   major representation with unit column stride and a row stride of 3.
+   The additional parameter row_stride is required to be 3.
+
+.. function:: template <typename T, int row_stride, int col_stride> void QuaternionToScaledRotation(const T q[4], const MatrixAdapter<T, row_stride, col_stride>& R)
+.. function:: template <typename T> void QuaternionToScaledRotation(const T q[4], T R[3 * 3])
+
+   Convert a 4-vector to a 3x3 scaled rotation matrix.
+
+   The choice of rotation is such that the quaternion
+   :math:`\begin{bmatrix} 1 &0 &0 &0\end{bmatrix}` goes to an identity
+   matrix and for small :math:`a, b, c` the quaternion
+   :math:`\begin{bmatrix}1 &a &b &c\end{bmatrix}` goes to the matrix
+
+   .. math::
+
+     I + 2 \begin{bmatrix} 0 & -c & b \\ c & 0 & -a\\ -b & a & 0
+           \end{bmatrix} + O(q^2)
+
+   which corresponds to a Rodrigues approximation, the last matrix
+   being the cross-product matrix of :math:`\begin{bmatrix} a& b&
+   c\end{bmatrix}`. Together with the property that :math:`R(q1 * q2)
+   = R(q1) * R(q2)` this uniquely defines the mapping from :math:`q` to
+   :math:`R`.
+
+   In the function that accepts a pointer to T instead of a MatrixAdapter,
+   the rotation matrix ``R`` is a row-major matrix with unit column stride
+   and a row stride of 3.
+
+   No normalization of the quaternion is performed, i.e.
+   :math:`R = \|q\|^2  Q`, where :math:`Q` is an orthonormal matrix
+   such that :math:`\det(Q) = 1` and :math:`Q*Q' = I`.
+
+
+.. function:: template <typename T> void QuaternionToRotation(const T q[4], const MatrixAdapter<T, row_stride, col_stride>& R)
+.. function:: template <typename T> void QuaternionToRotation(const T q[4], T R[3 * 3])
+
+   Same as above except that the rotation matrix is normalized by the
+   Frobenius norm, so that :math:`R R' = I` (and :math:`\det(R) = 1`).
+
+.. function:: template <typename T> void UnitQuaternionRotatePoint(const T q[4], const T pt[3], T result[3])
+
+   Rotates a point pt by a quaternion q:
+
+   .. math:: \text{result} = R(q)  \text{pt}
+
+   Assumes the quaternion is unit norm. If you pass in a quaternion
+   with :math:`|q|^2 = 2` then you WILL NOT get back 2 times the
+   result you get for a unit quaternion.
+
+
+.. function:: template <typename T> void QuaternionRotatePoint(const T q[4], const T pt[3], T result[3])
+
+   With this function you do not need to assume that :math:`q` has unit norm.
+   It does assume that the norm is non-zero.
+
+.. function:: template <typename T> void QuaternionProduct(const T z[4], const T w[4], T zw[4])
+
+   .. math:: zw = z * w
+
+   where :math:`*` is the Quaternion product between 4-vectors.
+
+
+.. function:: template <typename T> void CrossProduct(const T x[3], const T y[3], T x_cross_y[3])
+
+   .. math:: \text{x_cross_y} = x \times y
+
+.. function:: template <typename T> void AngleAxisRotatePoint(const T angle_axis[3], const T pt[3], T result[3])
+
+   .. math:: y = R(\text{angle_axis}) x
+
+
+Cubic Interpolation
+===================
+
+Optimization problems often involve functions that are given in the
+form of a table of values, for example an image. Evaluating these
+functions and their derivatives requires interpolating these
+values. Interpolating tabulated functions is a vast area of research
+and there are a lot of libraries which implement a variety of
+interpolation schemes. However, using them within the automatic
+differentiation framework in Ceres is quite painful. To this end,
+Ceres provides the ability to interpolate one dimensional and two
+dimensional tabular functions.
+
+The one dimensional interpolation is based on the Cubic Hermite
+Spline, also known as the Catmull-Rom Spline. This produces a first
+order differentiable interpolating function. The two dimensional
+interpolation scheme is a generalization of the one dimensional scheme
+where the interpolating function is assumed to be separable in the two
+dimensions,
+
+More details of the construction can be found `Linear Methods for
+Image Interpolation <http://www.ipol.im/pub/art/2011/g_lmii/>`_ by
+Pascal Getreuer.
+
+.. class:: CubicInterpolator
+
+Given as input an infinite one dimensional grid, which provides the
+following interface.
+
+.. code::
+
+  struct Grid1D {
+    enum { DATA_DIMENSION = 2; };
+    void GetValue(int n, double* f) const;
+  };
+
+Where, ``GetValue`` gives us the value of a function :math:`f`
+(possibly vector valued) for any integer :math:`n` and the enum
+``DATA_DIMENSION`` indicates the dimensionality of the function being
+interpolated. For example if you are interpolating rotations in
+axis-angle format over time, then ``DATA_DIMENSION = 3``.
+
+:class:`CubicInterpolator` uses Cubic Hermite splines to produce a
+smooth approximation to it that can be used to evaluate the
+:math:`f(x)` and :math:`f'(x)` at any point on the real number
+line. For example, the following code interpolates an array of four
+numbers.
+
+.. code::
+
+  const double data[] = {1.0, 2.0, 5.0, 6.0};
+  Grid1D<double, 1> array(x, 0, 4);
+  CubicInterpolator interpolator(array);
+  double f, dfdx;
+  interpolator.Evaluate(1.5, &f, &dfdx);
+
+
+In the above code we use ``Grid1D`` a templated helper class that
+allows easy interfacing between ``C++`` arrays and
+:class:`CubicInterpolator`.
+
+``Grid1D`` supports vector valued functions where the various
+coordinates of the function can be interleaved or stacked. It also
+allows the use of any numeric type as input, as long as it can be
+safely cast to a double.
+
+.. class:: BiCubicInterpolator
+
+Given as input an infinite two dimensional grid, which provides the
+following interface:
+
+.. code::
+
+  struct Grid2D {
+    enum { DATA_DIMENSION = 2 };
+    void GetValue(int row, int col, double* f) const;
+  };
+
+Where, ``GetValue`` gives us the value of a function :math:`f`
+(possibly vector valued) for any pair of integers :code:`row` and
+:code:`col` and the enum ``DATA_DIMENSION`` indicates the
+dimensionality of the function being interpolated. For example if you
+are interpolating a color image with three channels (Red, Green &
+Blue), then ``DATA_DIMENSION = 3``.
+
+:class:`BiCubicInterpolator` uses the cubic convolution interpolation
+algorithm of R. Keys [Keys]_, to produce a smooth approximation to it
+that can be used to evaluate the :math:`f(r,c)`, :math:`\frac{\partial
+f(r,c)}{\partial r}` and :math:`\frac{\partial f(r,c)}{\partial c}` at
+any any point in the real plane.
+
+For example the following code interpolates a two dimensional array.
+
+.. code::
+
+   const double data[] = {1.0, 3.0, -1.0, 4.0,
+                          3.6, 2.1,  4.2, 2.0,
+                          2.0, 1.0,  3.1, 5.2};
+   Grid2D<double, 1>  array(data, 0, 3, 0, 4);
+   BiCubicInterpolator interpolator(array);
+   double f, dfdr, dfdc;
+   interpolator.Evaluate(1.2, 2.5, &f, &dfdr, &dfdc);
+
+In the above code, the templated helper class ``Grid2D`` is used to
+make a ``C++`` array look like a two dimensional table to
+:class:`BiCubicInterpolator`.
+
+``Grid2D`` supports row or column major layouts. It also supports
+vector valued functions where the individual coordinates of the
+function may be interleaved or stacked. It also allows the use of any
+numeric type as input, as long as it can be safely cast to double.
diff --git a/docs/source/nnls_solving.rst b/docs/source/nnls_solving.rst
new file mode 100644
index 0000000..713d54d
--- /dev/null
+++ b/docs/source/nnls_solving.rst
@@ -0,0 +1,2344 @@
+
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-nnls_solving:
+
+================================
+Solving Non-linear Least Squares
+================================
+
+Introduction
+============
+
+Effective use of Ceres requires some familiarity with the basic
+components of a non-linear least squares solver, so before we describe
+how to configure and use the solver, we will take a brief look at how
+some of the core optimization algorithms in Ceres work.
+
+Let :math:`x \in \mathbb{R}^n` be an :math:`n`-dimensional vector of
+variables, and
+:math:`F(x) = \left[f_1(x), ... ,  f_{m}(x) \right]^{\top}` be a
+:math:`m`-dimensional function of :math:`x`.  We are interested in
+solving the optimization problem [#f1]_
+
+.. math:: \arg \min_x \frac{1}{2}\|F(x)\|^2\ . \\
+          L \le x \le U
+  :label: nonlinsq
+
+Where, :math:`L` and :math:`U` are lower and upper bounds on the
+parameter vector :math:`x`.
+
+Since the efficient global minimization of :eq:`nonlinsq` for
+general :math:`F(x)` is an intractable problem, we will have to settle
+for finding a local minimum.
+
+In the following, the Jacobian :math:`J(x)` of :math:`F(x)` is an
+:math:`m\times n` matrix, where :math:`J_{ij}(x) = \partial_j f_i(x)`
+and the gradient vector is :math:`g(x) = \nabla \frac{1}{2}\|F(x)\|^2
+= J(x)^\top F(x)`.
+
+The general strategy when solving non-linear optimization problems is
+to solve a sequence of approximations to the original problem
+[NocedalWright]_. At each iteration, the approximation is solved to
+determine a correction :math:`\Delta x` to the vector :math:`x`. For
+non-linear least squares, an approximation can be constructed by using
+the linearization :math:`F(x+\Delta x) \approx F(x) + J(x)\Delta x`,
+which leads to the following linear least squares problem:
+
+.. math:: \min_{\Delta x} \frac{1}{2}\|J(x)\Delta x + F(x)\|^2
+   :label: linearapprox
+
+Unfortunately, naively solving a sequence of these problems and
+updating :math:`x \leftarrow x+ \Delta x` leads to an algorithm that
+may not converge.  To get a convergent algorithm, we need to control
+the size of the step :math:`\Delta x`. Depending on how the size of
+the step :math:`\Delta x` is controlled, non-linear optimization
+algorithms can be divided into two major categories [NocedalWright]_.
+
+1. **Trust Region** The trust region approach approximates the
+   objective function using using a model function (often a quadratic)
+   over a subset of the search space known as the trust region. If the
+   model function succeeds in minimizing the true objective function
+   the trust region is expanded; conversely, otherwise it is
+   contracted and the model optimization problem is solved again.
+
+2. **Line Search** The line search approach first finds a descent
+   direction along which the objective function will be reduced and
+   then computes a step size that decides how far should move along
+   that direction. The descent direction can be computed by various
+   methods, such as gradient descent, Newton's method and Quasi-Newton
+   method. The step size can be determined either exactly or
+   inexactly.
+
+Trust region methods are in some sense dual to line search methods:
+trust region methods first choose a step size (the size of the trust
+region) and then a step direction while line search methods first
+choose a step direction and then a step size. Ceres implements
+multiple algorithms in both categories.
+
+.. _section-trust-region-methods:
+
+Trust Region Methods
+====================
+
+The basic trust region algorithm looks something like this.
+
+   1. Given an initial point :math:`x` and a trust region radius :math:`\mu`.
+   2. Solve
+
+      .. math::
+         \arg \min_{\Delta x}& \frac{1}{2}\|J(x)\Delta x + F(x)\|^2 \\
+         \text{such that} &\|D(x)\Delta x\|^2 \le \mu\\
+         &L \le x + \Delta x \le U.
+
+   3. :math:`\rho = \frac{\displaystyle \|F(x + \Delta x)\|^2 -
+      \|F(x)\|^2}{\displaystyle \|J(x)\Delta x + F(x)\|^2 -
+      \|F(x)\|^2}`
+   4. if :math:`\rho > \epsilon` then  :math:`x = x + \Delta x`.
+   5. if :math:`\rho > \eta_1` then :math:`\mu = 2  \mu`
+   6. else if :math:`\rho < \eta_2` then :math:`\mu = 0.5 * \mu`
+   7. Go to 2.
+
+Here, :math:`\mu` is the trust region radius, :math:`D(x)` is some
+matrix used to define a metric on the domain of :math:`F(x)` and
+:math:`\rho` measures the quality of the step :math:`\Delta x`, i.e.,
+how well did the linear model predict the decrease in the value of the
+non-linear objective. The idea is to increase or decrease the radius
+of the trust region depending on how well the linearization predicts
+the behavior of the non-linear objective, which in turn is reflected
+in the value of :math:`\rho`.
+
+The key computational step in a trust-region algorithm is the solution
+of the constrained optimization problem
+
+.. math::
+   \arg \min_{\Delta x}&\quad \frac{1}{2}\|J(x)\Delta x + F(x)\|^2 \\
+   \text{such that} &\quad \|D(x)\Delta x\|^2 \le \mu\\
+    &\quad L \le x + \Delta x \le U.
+   :label: trp
+
+There are a number of different ways of solving this problem, each
+giving rise to a different concrete trust-region algorithm. Currently,
+Ceres implements two trust-region algorithms - Levenberg-Marquardt
+and Dogleg, each of which is augmented with a line search if bounds
+constraints are present [Kanzow]_. The user can choose between them by
+setting :member:`Solver::Options::trust_region_strategy_type`.
+
+.. rubric:: Footnotes
+
+.. [#f1] At the level of the non-linear solver, the block structure is
+         not relevant, therefore our discussion here is in terms of an
+         optimization problem defined over a state vector of size
+         :math:`n`. Similarly the presence of loss functions is also
+         ignored as the problem is internally converted into a pure
+         non-linear least squares problem.
+
+
+.. _section-levenberg-marquardt:
+
+Levenberg-Marquardt
+-------------------
+
+The Levenberg-Marquardt algorithm [Levenberg]_  [Marquardt]_ is the
+most popular algorithm for solving non-linear least squares problems.
+It was also the first trust region algorithm to be developed
+[Levenberg]_ [Marquardt]_. Ceres implements an exact step [Madsen]_
+and an inexact step variant of the Levenberg-Marquardt algorithm
+[WrightHolt]_ [NashSofer]_.
+
+It can be shown, that the solution to :eq:`trp` can be obtained by
+solving an unconstrained optimization of the form
+
+.. math:: \arg\min_{\Delta x} \frac{1}{2}\|J(x)\Delta x + F(x)\|^2 +\lambda  \|D(x)\Delta x\|^2
+
+Where, :math:`\lambda` is a Lagrange multiplier that is inverse
+related to :math:`\mu`. In Ceres, we solve for
+
+.. math:: \arg\min_{\Delta x} \frac{1}{2}\|J(x)\Delta x + F(x)\|^2 + \frac{1}{\mu} \|D(x)\Delta x\|^2
+   :label: lsqr
+
+The matrix :math:`D(x)` is a non-negative diagonal matrix, typically
+the square root of the diagonal of the matrix :math:`J(x)^\top J(x)`.
+
+Before going further, let us make some notational simplifications. We
+will assume that the matrix :math:`\frac{1}{\sqrt{\mu}} D` has been concatenated
+at the bottom of the matrix :math:`J` and similarly a vector of zeros
+has been added to the bottom of the vector :math:`f` and the rest of
+our discussion will be in terms of :math:`J` and :math:`f`, i.e, the
+linear least squares problem.
+
+.. math:: \min_{\Delta x} \frac{1}{2} \|J(x)\Delta x + f(x)\|^2 .
+   :label: simple
+
+For all but the smallest problems the solution of :eq:`simple` in
+each iteration of the Levenberg-Marquardt algorithm is the dominant
+computational cost in Ceres. Ceres provides a number of different
+options for solving :eq:`simple`. There are two major classes of
+methods - factorization and iterative.
+
+The factorization methods are based on computing an exact solution of
+:eq:`lsqr` using a Cholesky or a QR factorization and lead to an exact
+step Levenberg-Marquardt algorithm. But it is not clear if an exact
+solution of :eq:`lsqr` is necessary at each step of the LM algorithm
+to solve :eq:`nonlinsq`. In fact, we have already seen evidence
+that this may not be the case, as :eq:`lsqr` is itself a regularized
+version of :eq:`linearapprox`. Indeed, it is possible to
+construct non-linear optimization algorithms in which the linearized
+problem is solved approximately. These algorithms are known as inexact
+Newton or truncated Newton methods [NocedalWright]_.
+
+An inexact Newton method requires two ingredients. First, a cheap
+method for approximately solving systems of linear
+equations. Typically an iterative linear solver like the Conjugate
+Gradients method is used for this
+purpose [NocedalWright]_. Second, a termination rule for
+the iterative solver. A typical termination rule is of the form
+
+.. math:: \|H(x) \Delta x + g(x)\| \leq \eta_k \|g(x)\|.
+   :label: inexact
+
+Here, :math:`k` indicates the Levenberg-Marquardt iteration number and
+:math:`0 < \eta_k <1` is known as the forcing sequence.  [WrightHolt]_
+prove that a truncated Levenberg-Marquardt algorithm that uses an
+inexact Newton step based on :eq:`inexact` converges for any
+sequence :math:`\eta_k \leq \eta_0 < 1` and the rate of convergence
+depends on the choice of the forcing sequence :math:`\eta_k`.
+
+Ceres supports both exact and inexact step solution strategies. When
+the user chooses a factorization based linear solver, the exact step
+Levenberg-Marquardt algorithm is used. When the user chooses an
+iterative linear solver, the inexact step Levenberg-Marquardt
+algorithm is used.
+
+.. _section-dogleg:
+
+Dogleg
+------
+
+Another strategy for solving the trust region problem :eq:`trp` was
+introduced by M. J. D. Powell. The key idea there is to compute two
+vectors
+
+.. math::
+
+        \Delta x^{\text{Gauss-Newton}} &= \arg \min_{\Delta x}\frac{1}{2} \|J(x)\Delta x + f(x)\|^2.\\
+        \Delta x^{\text{Cauchy}} &= -\frac{\|g(x)\|^2}{\|J(x)g(x)\|^2}g(x).
+
+Note that the vector :math:`\Delta x^{\text{Gauss-Newton}}` is the
+solution to :eq:`linearapprox` and :math:`\Delta
+x^{\text{Cauchy}}` is the vector that minimizes the linear
+approximation if we restrict ourselves to moving along the direction
+of the gradient. Dogleg methods finds a vector :math:`\Delta x`
+defined by :math:`\Delta x^{\text{Gauss-Newton}}` and :math:`\Delta
+x^{\text{Cauchy}}` that solves the trust region problem. Ceres
+supports two variants that can be chose by setting
+:member:`Solver::Options::dogleg_type`.
+
+``TRADITIONAL_DOGLEG`` as described by Powell, constructs two line
+segments using the Gauss-Newton and Cauchy vectors and finds the point
+farthest along this line shaped like a dogleg (hence the name) that is
+contained in the trust-region. For more details on the exact reasoning
+and computations, please see Madsen et al [Madsen]_.
+
+``SUBSPACE_DOGLEG`` is a more sophisticated method that considers the
+entire two dimensional subspace spanned by these two vectors and finds
+the point that minimizes the trust region problem in this subspace
+[ByrdSchnabel]_.
+
+The key advantage of the Dogleg over Levenberg-Marquardt is that if
+the step computation for a particular choice of :math:`\mu` does not
+result in sufficient decrease in the value of the objective function,
+Levenberg-Marquardt solves the linear approximation from scratch with
+a smaller value of :math:`\mu`. Dogleg on the other hand, only needs
+to compute the interpolation between the Gauss-Newton and the Cauchy
+vectors, as neither of them depend on the value of :math:`\mu`.
+
+The Dogleg method can only be used with the exact factorization based
+linear solvers.
+
+.. _section-inner-iterations:
+
+Inner Iterations
+----------------
+
+Some non-linear least squares problems have additional structure in
+the way the parameter blocks interact that it is beneficial to modify
+the way the trust region step is computed. For example, consider the
+following regression problem
+
+.. math::   y = a_1 e^{b_1 x} + a_2 e^{b_3 x^2 + c_1}
+
+
+Given a set of pairs :math:`\{(x_i, y_i)\}`, the user wishes to estimate
+:math:`a_1, a_2, b_1, b_2`, and :math:`c_1`.
+
+Notice that the expression on the left is linear in :math:`a_1` and
+:math:`a_2`, and given any value for :math:`b_1, b_2` and :math:`c_1`,
+it is possible to use linear regression to estimate the optimal values
+of :math:`a_1` and :math:`a_2`. It's possible to analytically
+eliminate the variables :math:`a_1` and :math:`a_2` from the problem
+entirely. Problems like these are known as separable least squares
+problem and the most famous algorithm for solving them is the Variable
+Projection algorithm invented by Golub & Pereyra [GolubPereyra]_.
+
+Similar structure can be found in the matrix factorization with
+missing data problem. There the corresponding algorithm is known as
+Wiberg's algorithm [Wiberg]_.
+
+Ruhe & Wedin present an analysis of various algorithms for solving
+separable non-linear least squares problems and refer to *Variable
+Projection* as Algorithm I in their paper [RuheWedin]_.
+
+Implementing Variable Projection is tedious and expensive. Ruhe &
+Wedin present a simpler algorithm with comparable convergence
+properties, which they call Algorithm II.  Algorithm II performs an
+additional optimization step to estimate :math:`a_1` and :math:`a_2`
+exactly after computing a successful Newton step.
+
+
+This idea can be generalized to cases where the residual is not
+linear in :math:`a_1` and :math:`a_2`, i.e.,
+
+.. math:: y = f_1(a_1, e^{b_1 x}) + f_2(a_2, e^{b_3 x^2 + c_1})
+
+In this case, we solve for the trust region step for the full problem,
+and then use it as the starting point to further optimize just `a_1`
+and `a_2`. For the linear case, this amounts to doing a single linear
+least squares solve. For non-linear problems, any method for solving
+the :math:`a_1` and :math:`a_2` optimization problems will do. The
+only constraint on :math:`a_1` and :math:`a_2` (if they are two
+different parameter block) is that they do not co-occur in a residual
+block.
+
+This idea can be further generalized, by not just optimizing
+:math:`(a_1, a_2)`, but decomposing the graph corresponding to the
+Hessian matrix's sparsity structure into a collection of
+non-overlapping independent sets and optimizing each of them.
+
+Setting :member:`Solver::Options::use_inner_iterations` to ``true``
+enables the use of this non-linear generalization of Ruhe & Wedin's
+Algorithm II.  This version of Ceres has a higher iteration
+complexity, but also displays better convergence behavior per
+iteration.
+
+Setting :member:`Solver::Options::num_threads` to the maximum number
+possible is highly recommended.
+
+.. _section-non-monotonic-steps:
+
+Non-monotonic Steps
+-------------------
+
+Note that the basic trust-region algorithm described in
+:ref:`section-trust-region-methods` is a descent algorithm in that it
+only accepts a point if it strictly reduces the value of the objective
+function.
+
+Relaxing this requirement allows the algorithm to be more efficient in
+the long term at the cost of some local increase in the value of the
+objective function.
+
+This is because allowing for non-decreasing objective function values
+in a principled manner allows the algorithm to *jump over boulders* as
+the method is not restricted to move into narrow valleys while
+preserving its convergence properties.
+
+Setting :member:`Solver::Options::use_nonmonotonic_steps` to ``true``
+enables the non-monotonic trust region algorithm as described by Conn,
+Gould & Toint in [Conn]_.
+
+Even though the value of the objective function may be larger
+than the minimum value encountered over the course of the
+optimization, the final parameters returned to the user are the
+ones corresponding to the minimum cost over all iterations.
+
+The option to take non-monotonic steps is available for all trust
+region strategies.
+
+
+.. _section-line-search-methods:
+
+Line Search Methods
+===================
+
+The line search method in Ceres Solver cannot handle bounds
+constraints right now, so it can only be used for solving
+unconstrained problems.
+
+Line search algorithms
+
+   1. Given an initial point :math:`x`
+   2. :math:`\Delta x = -H^{-1}(x) g(x)`
+   3. :math:`\arg \min_\mu \frac{1}{2} \| F(x + \mu \Delta x) \|^2`
+   4. :math:`x = x + \mu \Delta x`
+   5. Goto 2.
+
+Here :math:`H(x)` is some approximation to the Hessian of the
+objective function, and :math:`g(x)` is the gradient at
+:math:`x`. Depending on the choice of :math:`H(x)` we get a variety of
+different search directions :math:`\Delta x`.
+
+Step 4, which is a one dimensional optimization or `Line Search` along
+:math:`\Delta x` is what gives this class of methods its name.
+
+Different line search algorithms differ in their choice of the search
+direction :math:`\Delta x` and the method used for one dimensional
+optimization along :math:`\Delta x`. The choice of :math:`H(x)` is the
+primary source of computational complexity in these
+methods. Currently, Ceres Solver supports three choices of search
+directions, all aimed at large scale problems.
+
+1. ``STEEPEST_DESCENT`` This corresponds to choosing :math:`H(x)` to
+   be the identity matrix. This is not a good search direction for
+   anything but the simplest of the problems. It is only included here
+   for completeness.
+
+2. ``NONLINEAR_CONJUGATE_GRADIENT`` A generalization of the Conjugate
+   Gradient method to non-linear functions. The generalization can be
+   performed in a number of different ways, resulting in a variety of
+   search directions. Ceres Solver currently supports
+   ``FLETCHER_REEVES``, ``POLAK_RIBIERE`` and ``HESTENES_STIEFEL``
+   directions.
+
+3. ``BFGS`` A generalization of the Secant method to multiple
+   dimensions in which a full, dense approximation to the inverse
+   Hessian is maintained and used to compute a quasi-Newton step
+   [NocedalWright]_.  BFGS is currently the best known general
+   quasi-Newton algorithm.
+
+4. ``LBFGS`` A limited memory approximation to the full ``BFGS``
+   method in which the last `M` iterations are used to approximate the
+   inverse Hessian used to compute a quasi-Newton step [Nocedal]_,
+   [ByrdNocedal]_.
+
+Currently Ceres Solver supports both a backtracking and interpolation
+based Armijo line search algorithm, and a sectioning / zoom
+interpolation (strong) Wolfe condition line search algorithm.
+However, note that in order for the assumptions underlying the
+``BFGS`` and ``LBFGS`` methods to be guaranteed to be satisfied the
+Wolfe line search algorithm should be used.
+
+.. _section-linear-solver:
+
+LinearSolver
+============
+
+Recall that in both of the trust-region methods described above, the
+key computational cost is the solution of a linear least squares
+problem of the form
+
+.. math:: \min_{\Delta x} \frac{1}{2} \|J(x)\Delta x + f(x)\|^2 .
+   :label: simple2
+
+Let :math:`H(x)= J(x)^\top J(x)` and :math:`g(x) = -J(x)^\top
+f(x)`. For notational convenience let us also drop the dependence on
+:math:`x`. Then it is easy to see that solving :eq:`simple2` is
+equivalent to solving the *normal equations*.
+
+.. math:: H \Delta x = g
+   :label: normal
+
+Ceres provides a number of different options for solving :eq:`normal`.
+
+.. _section-qr:
+
+``DENSE_QR``
+------------
+
+For small problems (a couple of hundred parameters and a few thousand
+residuals) with relatively dense Jacobians, ``DENSE_QR`` is the method
+of choice [Bjorck]_. Let :math:`J = QR` be the QR-decomposition of
+:math:`J`, where :math:`Q` is an orthonormal matrix and :math:`R` is
+an upper triangular matrix [TrefethenBau]_. Then it can be shown that
+the solution to :eq:`normal` is given by
+
+.. math:: \Delta x^* = -R^{-1}Q^\top f
+
+
+Ceres uses ``Eigen`` 's dense QR factorization routines.
+
+.. _section-cholesky:
+
+``DENSE_NORMAL_CHOLESKY`` & ``SPARSE_NORMAL_CHOLESKY``
+------------------------------------------------------
+
+Large non-linear least square problems are usually sparse. In such
+cases, using a dense QR factorization is inefficient. Let :math:`H =
+R^\top R` be the Cholesky factorization of the normal equations, where
+:math:`R` is an upper triangular matrix, then the solution to
+:eq:`normal` is given by
+
+.. math::
+
+    \Delta x^* = R^{-1} R^{-\top} g.
+
+
+The observant reader will note that the :math:`R` in the Cholesky
+factorization of :math:`H` is the same upper triangular matrix
+:math:`R` in the QR factorization of :math:`J`. Since :math:`Q` is an
+orthonormal matrix, :math:`J=QR` implies that :math:`J^\top J = R^\top
+Q^\top Q R = R^\top R`. There are two variants of Cholesky
+factorization -- sparse and dense.
+
+``DENSE_NORMAL_CHOLESKY``  as the name implies performs a dense
+Cholesky factorization of the normal equations. Ceres uses
+``Eigen`` 's dense LDLT factorization routines.
+
+``SPARSE_NORMAL_CHOLESKY``, as the name implies performs a sparse
+Cholesky factorization of the normal equations. This leads to
+substantial savings in time and memory for large sparse
+problems. Ceres uses the sparse Cholesky factorization routines in
+Professor Tim Davis' ``SuiteSparse`` or ``CXSparse`` packages [Chen]_
+or the sparse Cholesky factorization algorithm in ``Eigen`` (which
+incidently is a port of the algorithm implemented inside ``CXSparse``)
+
+.. _section-cgnr:
+
+``CGNR``
+--------
+
+For general sparse problems, if the problem is too large for
+``CHOLMOD`` or a sparse linear algebra library is not linked into
+Ceres, another option is the ``CGNR`` solver. This solver uses the
+Conjugate Gradients solver on the *normal equations*, but without
+forming the normal equations explicitly. It exploits the relation
+
+.. math::
+    H x = J^\top J x = J^\top(J x)
+
+The convergence of Conjugate Gradients depends on the conditioner
+number :math:`\kappa(H)`. Usually :math:`H` is poorly conditioned and
+a :ref:`section-preconditioner` must be used to get reasonable
+performance. Currently only the ``JACOBI`` preconditioner is available
+for use with ``CGNR``. It uses the block diagonal of :math:`H` to
+precondition the normal equations.
+
+When the user chooses ``CGNR`` as the linear solver, Ceres
+automatically switches from the exact step algorithm to an inexact
+step algorithm.
+
+.. _section-schur:
+
+``DENSE_SCHUR`` & ``SPARSE_SCHUR``
+----------------------------------
+
+While it is possible to use ``SPARSE_NORMAL_CHOLESKY`` to solve bundle
+adjustment problems, bundle adjustment problem have a special
+structure, and a more efficient scheme for solving :eq:`normal`
+can be constructed.
+
+Suppose that the SfM problem consists of :math:`p` cameras and
+:math:`q` points and the variable vector :math:`x` has the block
+structure :math:`x = [y_{1}, ... ,y_{p},z_{1}, ... ,z_{q}]`. Where,
+:math:`y` and :math:`z` correspond to camera and point parameters,
+respectively.  Further, let the camera blocks be of size :math:`c` and
+the point blocks be of size :math:`s` (for most problems :math:`c` =
+:math:`6`--`9` and :math:`s = 3`). Ceres does not impose any constancy
+requirement on these block sizes, but choosing them to be constant
+simplifies the exposition.
+
+A key characteristic of the bundle adjustment problem is that there is
+no term :math:`f_{i}` that includes two or more point blocks.  This in
+turn implies that the matrix :math:`H` is of the form
+
+.. math:: H = \left[ \begin{matrix} B & E\\ E^\top & C \end{matrix} \right]\ ,
+   :label: hblock
+
+where :math:`B \in \mathbb{R}^{pc\times pc}` is a block sparse matrix
+with :math:`p` blocks of size :math:`c\times c` and :math:`C \in
+\mathbb{R}^{qs\times qs}` is a block diagonal matrix with :math:`q` blocks
+of size :math:`s\times s`. :math:`E \in \mathbb{R}^{pc\times qs}` is a
+general block sparse matrix, with a block of size :math:`c\times s`
+for each observation. Let us now block partition :math:`\Delta x =
+[\Delta y,\Delta z]` and :math:`g=[v,w]` to restate :eq:`normal`
+as the block structured linear system
+
+.. math:: \left[ \begin{matrix} B & E\\ E^\top & C \end{matrix}
+                \right]\left[ \begin{matrix} \Delta y \\ \Delta z
+                    \end{matrix} \right] = \left[ \begin{matrix} v\\ w
+                    \end{matrix} \right]\ ,
+   :label: linear2
+
+and apply Gaussian elimination to it. As we noted above, :math:`C` is
+a block diagonal matrix, with small diagonal blocks of size
+:math:`s\times s`.  Thus, calculating the inverse of :math:`C` by
+inverting each of these blocks is cheap. This allows us to eliminate
+:math:`\Delta z` by observing that :math:`\Delta z = C^{-1}(w - E^\top
+\Delta y)`, giving us
+
+.. math:: \left[B - EC^{-1}E^\top\right] \Delta y = v - EC^{-1}w\ .
+   :label: schur
+
+The matrix
+
+.. math:: S = B - EC^{-1}E^\top
+
+is the Schur complement of :math:`C` in :math:`H`. It is also known as
+the *reduced camera matrix*, because the only variables
+participating in :eq:`schur` are the ones corresponding to the
+cameras. :math:`S \in \mathbb{R}^{pc\times pc}` is a block structured
+symmetric positive definite matrix, with blocks of size :math:`c\times
+c`. The block :math:`S_{ij}` corresponding to the pair of images
+:math:`i` and :math:`j` is non-zero if and only if the two images
+observe at least one common point.
+
+
+Now, :eq:`linear2` can be solved by first forming :math:`S`, solving for
+:math:`\Delta y`, and then back-substituting :math:`\Delta y` to
+obtain the value of :math:`\Delta z`.  Thus, the solution of what was
+an :math:`n\times n`, :math:`n=pc+qs` linear system is reduced to the
+inversion of the block diagonal matrix :math:`C`, a few matrix-matrix
+and matrix-vector multiplies, and the solution of block sparse
+:math:`pc\times pc` linear system :eq:`schur`.  For almost all
+problems, the number of cameras is much smaller than the number of
+points, :math:`p \ll q`, thus solving :eq:`schur` is
+significantly cheaper than solving :eq:`linear2`. This is the
+*Schur complement trick* [Brown]_.
+
+This still leaves open the question of solving :eq:`schur`. The
+method of choice for solving symmetric positive definite systems
+exactly is via the Cholesky factorization [TrefethenBau]_ and
+depending upon the structure of the matrix, there are, in general, two
+options. The first is direct factorization, where we store and factor
+:math:`S` as a dense matrix [TrefethenBau]_. This method has
+:math:`O(p^2)` space complexity and :math:`O(p^3)` time complexity and
+is only practical for problems with up to a few hundred cameras. Ceres
+implements this strategy as the ``DENSE_SCHUR`` solver.
+
+
+But, :math:`S` is typically a fairly sparse matrix, as most images
+only see a small fraction of the scene. This leads us to the second
+option: Sparse Direct Methods. These methods store :math:`S` as a
+sparse matrix, use row and column re-ordering algorithms to maximize
+the sparsity of the Cholesky decomposition, and focus their compute
+effort on the non-zero part of the factorization [Chen]_. Sparse
+direct methods, depending on the exact sparsity structure of the Schur
+complement, allow bundle adjustment algorithms to significantly scale
+up over those based on dense factorization. Ceres implements this
+strategy as the ``SPARSE_SCHUR`` solver.
+
+.. _section-iterative_schur:
+
+``ITERATIVE_SCHUR``
+-------------------
+
+Another option for bundle adjustment problems is to apply
+Preconditioned Conjugate Gradients to the reduced camera matrix
+:math:`S` instead of :math:`H`. One reason to do this is that
+:math:`S` is a much smaller matrix than :math:`H`, but more
+importantly, it can be shown that :math:`\kappa(S)\leq \kappa(H)`.
+Ceres implements Conjugate Gradients on :math:`S` as the
+``ITERATIVE_SCHUR`` solver. When the user chooses ``ITERATIVE_SCHUR``
+as the linear solver, Ceres automatically switches from the exact step
+algorithm to an inexact step algorithm.
+
+The key computational operation when using Conjuagate Gradients is the
+evaluation of the matrix vector product :math:`Sx` for an arbitrary
+vector :math:`x`. There are two ways in which this product can be
+evaluated, and this can be controlled using
+``Solver::Options::use_explicit_schur_complement``. Depending on the
+problem at hand, the performance difference between these two methods
+can be quite substantial.
+
+  1. **Implicit** This is default. Implicit evaluation is suitable for
+     large problems where the cost of computing and storing the Schur
+     Complement :math:`S` is prohibitive. Because PCG only needs
+     access to :math:`S` via its product with a vector, one way to
+     evaluate :math:`Sx` is to observe that
+
+     .. math::  x_1 &= E^\top x
+     .. math::  x_2 &= C^{-1} x_1
+     .. math::  x_3 &= Ex_2\\
+     .. math::  x_4 &= Bx\\
+     .. math::   Sx &= x_4 - x_3
+        :label: schurtrick1
+
+     Thus, we can run PCG on :math:`S` with the same computational
+     effort per iteration as PCG on :math:`H`, while reaping the
+     benefits of a more powerful preconditioner. In fact, we do not
+     even need to compute :math:`H`, :eq:`schurtrick1` can be
+     implemented using just the columns of :math:`J`.
+
+     Equation :eq:`schurtrick1` is closely related to *Domain
+     Decomposition methods* for solving large linear systems that
+     arise in structural engineering and partial differential
+     equations. In the language of Domain Decomposition, each point in
+     a bundle adjustment problem is a domain, and the cameras form the
+     interface between these domains. The iterative solution of the
+     Schur complement then falls within the sub-category of techniques
+     known as Iterative Sub-structuring [Saad]_ [Mathew]_.
+
+  2. **Explicit** The complexity of implicit matrix-vector product
+     evaluation scales with the number of non-zeros in the
+     Jacobian. For small to medium sized problems, the cost of
+     constructing the Schur Complement is small enough that it is
+     better to construct it explicitly in memory and use it to
+     evaluate the product :math:`Sx`.
+
+When the user chooses ``ITERATIVE_SCHUR`` as the linear solver, Ceres
+automatically switches from the exact step algorithm to an inexact
+step algorithm.
+
+  .. NOTE::
+
+     In exact arithmetic, the choice of implicit versus explicit Schur
+     complement would have no impact on solution quality. However, in
+     practice if the Jacobian is poorly conditioned, one may observe
+     (usually small) differences in solution quality. This is a
+     natural consequence of performing computations in finite arithmetic.
+
+
+.. _section-preconditioner:
+
+Preconditioner
+--------------
+
+The convergence rate of Conjugate Gradients for
+solving :eq:`normal` depends on the distribution of eigenvalues
+of :math:`H` [Saad]_. A useful upper bound is
+:math:`\sqrt{\kappa(H)}`, where, :math:`\kappa(H)` is the condition
+number of the matrix :math:`H`. For most bundle adjustment problems,
+:math:`\kappa(H)` is high and a direct application of Conjugate
+Gradients to :eq:`normal` results in extremely poor performance.
+
+The solution to this problem is to replace :eq:`normal` with a
+*preconditioned* system.  Given a linear system, :math:`Ax =b` and a
+preconditioner :math:`M` the preconditioned system is given by
+:math:`M^{-1}Ax = M^{-1}b`. The resulting algorithm is known as
+Preconditioned Conjugate Gradients algorithm (PCG) and its worst case
+complexity now depends on the condition number of the *preconditioned*
+matrix :math:`\kappa(M^{-1}A)`.
+
+The computational cost of using a preconditioner :math:`M` is the cost
+of computing :math:`M` and evaluating the product :math:`M^{-1}y` for
+arbitrary vectors :math:`y`. Thus, there are two competing factors to
+consider: How much of :math:`H`'s structure is captured by :math:`M`
+so that the condition number :math:`\kappa(HM^{-1})` is low, and the
+computational cost of constructing and using :math:`M`.  The ideal
+preconditioner would be one for which :math:`\kappa(M^{-1}A)
+=1`. :math:`M=A` achieves this, but it is not a practical choice, as
+applying this preconditioner would require solving a linear system
+equivalent to the unpreconditioned problem.  It is usually the case
+that the more information :math:`M` has about :math:`H`, the more
+expensive it is use. For example, Incomplete Cholesky factorization
+based preconditioners have much better convergence behavior than the
+Jacobi preconditioner, but are also much more expensive.
+
+The simplest of all preconditioners is the diagonal or Jacobi
+preconditioner, i.e., :math:`M=\operatorname{diag}(A)`, which for
+block structured matrices like :math:`H` can be generalized to the
+block Jacobi preconditioner. Ceres implements the block Jacobi
+preconditioner and refers to it as ``JACOBI``. When used with
+:ref:`section-cgnr` it refers to the block diagonal of :math:`H` and
+when used with :ref:`section-iterative_schur` it refers to the block
+diagonal of :math:`B` [Mandel]_.
+
+Another obvious choice for :ref:`section-iterative_schur` is the block
+diagonal of the Schur complement matrix :math:`S`, i.e, the block
+Jacobi preconditioner for :math:`S`. Ceres implements it and refers to
+is as the ``SCHUR_JACOBI`` preconditioner.
+
+For bundle adjustment problems arising in reconstruction from
+community photo collections, more effective preconditioners can be
+constructed by analyzing and exploiting the camera-point visibility
+structure of the scene [KushalAgarwal]_. Ceres implements the two
+visibility based preconditioners described by Kushal & Agarwal as
+``CLUSTER_JACOBI`` and ``CLUSTER_TRIDIAGONAL``. These are fairly new
+preconditioners and Ceres' implementation of them is in its early
+stages and is not as mature as the other preconditioners described
+above.
+
+.. _section-ordering:
+
+Ordering
+--------
+
+The order in which variables are eliminated in a linear solver can
+have a significant of impact on the efficiency and accuracy of the
+method. For example when doing sparse Cholesky factorization, there
+are matrices for which a good ordering will give a Cholesky factor
+with :math:`O(n)` storage, where as a bad ordering will result in an
+completely dense factor.
+
+Ceres allows the user to provide varying amounts of hints to the
+solver about the variable elimination ordering to use. This can range
+from no hints, where the solver is free to decide the best ordering
+based on the user's choices like the linear solver being used, to an
+exact order in which the variables should be eliminated, and a variety
+of possibilities in between.
+
+Instances of the :class:`ParameterBlockOrdering` class are used to
+communicate this information to Ceres.
+
+Formally an ordering is an ordered partitioning of the parameter
+blocks. Each parameter block belongs to exactly one group, and each
+group has a unique integer associated with it, that determines its
+order in the set of groups. We call these groups *Elimination Groups*
+
+Given such an ordering, Ceres ensures that the parameter blocks in the
+lowest numbered elimination group are eliminated first, and then the
+parameter blocks in the next lowest numbered elimination group and so
+on. Within each elimination group, Ceres is free to order the
+parameter blocks as it chooses. For example, consider the linear system
+
+.. math::
+  x + y &= 3\\
+  2x + 3y &= 7
+
+There are two ways in which it can be solved. First eliminating
+:math:`x` from the two equations, solving for :math:`y` and then back
+substituting for :math:`x`, or first eliminating :math:`y`, solving
+for :math:`x` and back substituting for :math:`y`. The user can
+construct three orderings here.
+
+1. :math:`\{0: x\}, \{1: y\}` : Eliminate :math:`x` first.
+2. :math:`\{0: y\}, \{1: x\}` : Eliminate :math:`y` first.
+3. :math:`\{0: x, y\}`        : Solver gets to decide the elimination order.
+
+Thus, to have Ceres determine the ordering automatically using
+heuristics, put all the variables in the same elimination group. The
+identity of the group does not matter. This is the same as not
+specifying an ordering at all. To control the ordering for every
+variable, create an elimination group per variable, ordering them in
+the desired order.
+
+If the user is using one of the Schur solvers (``DENSE_SCHUR``,
+``SPARSE_SCHUR``, ``ITERATIVE_SCHUR``) and chooses to specify an
+ordering, it must have one important property. The lowest numbered
+elimination group must form an independent set in the graph
+corresponding to the Hessian, or in other words, no two parameter
+blocks in in the first elimination group should co-occur in the same
+residual block. For the best performance, this elimination group
+should be as large as possible. For standard bundle adjustment
+problems, this corresponds to the first elimination group containing
+all the 3d points, and the second containing the all the cameras
+parameter blocks.
+
+If the user leaves the choice to Ceres, then the solver uses an
+approximate maximum independent set algorithm to identify the first
+elimination group [LiSaad]_.
+
+.. _section-solver-options:
+
+:class:`Solver::Options`
+========================
+
+.. class:: Solver::Options
+
+   :class:`Solver::Options` controls the overall behavior of the
+   solver. We list the various settings and their default values below.
+
+.. function:: bool Solver::Options::IsValid(string* error) const
+
+   Validate the values in the options struct and returns true on
+   success. If there is a problem, the method returns false with
+   ``error`` containing a textual description of the cause.
+
+.. member:: MinimizerType Solver::Options::minimizer_type
+
+   Default: ``TRUST_REGION``
+
+   Choose between ``LINE_SEARCH`` and ``TRUST_REGION`` algorithms. See
+   :ref:`section-trust-region-methods` and
+   :ref:`section-line-search-methods` for more details.
+
+.. member:: LineSearchDirectionType Solver::Options::line_search_direction_type
+
+   Default: ``LBFGS``
+
+   Choices are ``STEEPEST_DESCENT``, ``NONLINEAR_CONJUGATE_GRADIENT``,
+   ``BFGS`` and ``LBFGS``.
+
+.. member:: LineSearchType Solver::Options::line_search_type
+
+   Default: ``WOLFE``
+
+   Choices are ``ARMIJO`` and ``WOLFE`` (strong Wolfe conditions).
+   Note that in order for the assumptions underlying the ``BFGS`` and
+   ``LBFGS`` line search direction algorithms to be guaranteed to be
+   satisifed, the ``WOLFE`` line search should be used.
+
+.. member:: NonlinearConjugateGradientType Solver::Options::nonlinear_conjugate_gradient_type
+
+   Default: ``FLETCHER_REEVES``
+
+   Choices are ``FLETCHER_REEVES``, ``POLAK_RIBIERE`` and
+   ``HESTENES_STIEFEL``.
+
+.. member:: int Solver::Options::max_lbfgs_rank
+
+   Default: 20
+
+   The L-BFGS hessian approximation is a low rank approximation to the
+   inverse of the Hessian matrix. The rank of the approximation
+   determines (linearly) the space and time complexity of using the
+   approximation. Higher the rank, the better is the quality of the
+   approximation. The increase in quality is however is bounded for a
+   number of reasons.
+
+     1. The method only uses secant information and not actual
+        derivatives.
+
+     2. The Hessian approximation is constrained to be positive
+        definite.
+
+   So increasing this rank to a large number will cost time and space
+   complexity without the corresponding increase in solution
+   quality. There are no hard and fast rules for choosing the maximum
+   rank. The best choice usually requires some problem specific
+   experimentation.
+
+.. member:: bool Solver::Options::use_approximate_eigenvalue_bfgs_scaling
+
+   Default: ``false``
+
+   As part of the ``BFGS`` update step / ``LBFGS`` right-multiply
+   step, the initial inverse Hessian approximation is taken to be the
+   Identity.  However, [Oren]_ showed that using instead :math:`I *
+   \gamma`, where :math:`\gamma` is a scalar chosen to approximate an
+   eigenvalue of the true inverse Hessian can result in improved
+   convergence in a wide variety of cases.  Setting
+   ``use_approximate_eigenvalue_bfgs_scaling`` to true enables this
+   scaling in ``BFGS`` (before first iteration) and ``LBFGS`` (at each
+   iteration).
+
+   Precisely, approximate eigenvalue scaling equates to
+
+   .. math:: \gamma = \frac{y_k' s_k}{y_k' y_k}
+
+   With:
+
+  .. math:: y_k = \nabla f_{k+1} - \nabla f_k
+  .. math:: s_k = x_{k+1} - x_k
+
+  Where :math:`f()` is the line search objective and :math:`x` the
+  vector of parameter values [NocedalWright]_.
+
+  It is important to note that approximate eigenvalue scaling does
+  **not** *always* improve convergence, and that it can in fact
+  *significantly* degrade performance for certain classes of problem,
+  which is why it is disabled by default.  In particular it can
+  degrade performance when the sensitivity of the problem to different
+  parameters varies significantly, as in this case a single scalar
+  factor fails to capture this variation and detrimentally downscales
+  parts of the Jacobian approximation which correspond to
+  low-sensitivity parameters. It can also reduce the robustness of the
+  solution to errors in the Jacobians.
+
+.. member:: LineSearchIterpolationType Solver::Options::line_search_interpolation_type
+
+   Default: ``CUBIC``
+
+   Degree of the polynomial used to approximate the objective
+   function. Valid values are ``BISECTION``, ``QUADRATIC`` and
+   ``CUBIC``.
+
+.. member:: double Solver::Options::min_line_search_step_size
+
+   The line search terminates if:
+
+   .. math:: \|\Delta x_k\|_\infty < \text{min_line_search_step_size}
+
+   where :math:`\|\cdot\|_\infty` refers to the max norm, and
+   :math:`\Delta x_k` is the step change in the parameter values at
+   the :math:`k`-th iteration.
+
+.. member:: double Solver::Options::line_search_sufficient_function_decrease
+
+   Default: ``1e-4``
+
+   Solving the line search problem exactly is computationally
+   prohibitive. Fortunately, line search based optimization algorithms
+   can still guarantee convergence if instead of an exact solution,
+   the line search algorithm returns a solution which decreases the
+   value of the objective function sufficiently. More precisely, we
+   are looking for a step size s.t.
+
+   .. math:: f(\text{step_size}) \le f(0) + \text{sufficient_decrease} * [f'(0) * \text{step_size}]
+
+   This condition is known as the Armijo condition.
+
+.. member:: double Solver::Options::max_line_search_step_contraction
+
+   Default: ``1e-3``
+
+   In each iteration of the line search,
+
+   .. math:: \text{new_step_size} >= \text{max_line_search_step_contraction} * \text{step_size}
+
+   Note that by definition, for contraction:
+
+   .. math:: 0 < \text{max_step_contraction} < \text{min_step_contraction} < 1
+
+.. member:: double Solver::Options::min_line_search_step_contraction
+
+   Default: ``0.6``
+
+   In each iteration of the line search,
+
+   .. math:: \text{new_step_size} <= \text{min_line_search_step_contraction} * \text{step_size}
+
+   Note that by definition, for contraction:
+
+   .. math:: 0 < \text{max_step_contraction} < \text{min_step_contraction} < 1
+
+.. member:: int Solver::Options::max_num_line_search_step_size_iterations
+
+   Default: ``20``
+
+   Maximum number of trial step size iterations during each line
+   search, if a step size satisfying the search conditions cannot be
+   found within this number of trials, the line search will stop.
+
+   As this is an 'artificial' constraint (one imposed by the user, not
+   the underlying math), if ``WOLFE`` line search is being used, *and*
+   points satisfying the Armijo sufficient (function) decrease
+   condition have been found during the current search (in :math:`<=`
+   ``max_num_line_search_step_size_iterations``).  Then, the step size
+   with the lowest function value which satisfies the Armijo condition
+   will be returned as the new valid step, even though it does *not*
+   satisfy the strong Wolfe conditions.  This behaviour protects
+   against early termination of the optimizer at a sub-optimal point.
+
+.. member:: int Solver::Options::max_num_line_search_direction_restarts
+
+   Default: ``5``
+
+   Maximum number of restarts of the line search direction algorithm
+   before terminating the optimization. Restarts of the line search
+   direction algorithm occur when the current algorithm fails to
+   produce a new descent direction. This typically indicates a
+   numerical failure, or a breakdown in the validity of the
+   approximations used.
+
+.. member:: double Solver::Options::line_search_sufficient_curvature_decrease
+
+   Default: ``0.9``
+
+   The strong Wolfe conditions consist of the Armijo sufficient
+   decrease condition, and an additional requirement that the
+   step size be chosen s.t. the *magnitude* ('strong' Wolfe
+   conditions) of the gradient along the search direction
+   decreases sufficiently. Precisely, this second condition
+   is that we seek a step size s.t.
+
+   .. math:: \|f'(\text{step_size})\| <= \text{sufficient_curvature_decrease} * \|f'(0)\|
+
+   Where :math:`f()` is the line search objective and :math:`f'()` is the derivative
+   of :math:`f` with respect to the step size: :math:`\frac{d f}{d~\text{step size}}`.
+
+.. member:: double Solver::Options::max_line_search_step_expansion
+
+   Default: ``10.0``
+
+   During the bracketing phase of a Wolfe line search, the step size
+   is increased until either a point satisfying the Wolfe conditions
+   is found, or an upper bound for a bracket containing a point
+   satisfying the conditions is found.  Precisely, at each iteration
+   of the expansion:
+
+   .. math:: \text{new_step_size} <= \text{max_step_expansion} * \text{step_size}
+
+   By definition for expansion
+
+   .. math:: \text{max_step_expansion} > 1.0
+
+.. member:: TrustRegionStrategyType Solver::Options::trust_region_strategy_type
+
+   Default: ``LEVENBERG_MARQUARDT``
+
+   The trust region step computation algorithm used by
+   Ceres. Currently ``LEVENBERG_MARQUARDT`` and ``DOGLEG`` are the two
+   valid choices. See :ref:`section-levenberg-marquardt` and
+   :ref:`section-dogleg` for more details.
+
+.. member:: DoglegType Solver::Options::dogleg_type
+
+   Default: ``TRADITIONAL_DOGLEG``
+
+   Ceres supports two different dogleg strategies.
+   ``TRADITIONAL_DOGLEG`` method by Powell and the ``SUBSPACE_DOGLEG``
+   method described by [ByrdSchnabel]_ .  See :ref:`section-dogleg`
+   for more details.
+
+.. member:: bool Solver::Options::use_nonmonotonic_steps
+
+   Default: ``false``
+
+   Relax the requirement that the trust-region algorithm take strictly
+   decreasing steps. See :ref:`section-non-monotonic-steps` for more
+   details.
+
+.. member:: int Solver::Options::max_consecutive_nonmonotonic_steps
+
+   Default: ``5``
+
+   The window size used by the step selection algorithm to accept
+   non-monotonic steps.
+
+.. member:: int Solver::Options::max_num_iterations
+
+   Default: ``50``
+
+   Maximum number of iterations for which the solver should run.
+
+.. member:: double Solver::Options::max_solver_time_in_seconds
+
+   Default: ``1e6``
+   Maximum amount of time for which the solver should run.
+
+.. member:: int Solver::Options::num_threads
+
+   Default: ``1``
+
+   Number of threads used by Ceres to evaluate the Jacobian.
+
+.. member::  double Solver::Options::initial_trust_region_radius
+
+   Default: ``1e4``
+
+   The size of the initial trust region. When the
+   ``LEVENBERG_MARQUARDT`` strategy is used, the reciprocal of this
+   number is the initial regularization parameter.
+
+.. member:: double Solver::Options::max_trust_region_radius
+
+   Default: ``1e16``
+
+   The trust region radius is not allowed to grow beyond this value.
+
+.. member:: double Solver::Options::min_trust_region_radius
+
+   Default: ``1e-32``
+
+   The solver terminates, when the trust region becomes smaller than
+   this value.
+
+.. member:: double Solver::Options::min_relative_decrease
+
+   Default: ``1e-3``
+
+   Lower threshold for relative decrease before a trust-region step is
+   accepted.
+
+.. member:: double Solver::Options::min_lm_diagonal
+
+   Default: ``1e6``
+
+   The ``LEVENBERG_MARQUARDT`` strategy, uses a diagonal matrix to
+   regularize the trust region step. This is the lower bound on
+   the values of this diagonal matrix.
+
+.. member:: double Solver::Options::max_lm_diagonal
+
+   Default:  ``1e32``
+
+   The ``LEVENBERG_MARQUARDT`` strategy, uses a diagonal matrix to
+   regularize the trust region step. This is the upper bound on
+   the values of this diagonal matrix.
+
+.. member:: int Solver::Options::max_num_consecutive_invalid_steps
+
+   Default: ``5``
+
+   The step returned by a trust region strategy can sometimes be
+   numerically invalid, usually because of conditioning
+   issues. Instead of crashing or stopping the optimization, the
+   optimizer can go ahead and try solving with a smaller trust
+   region/better conditioned problem. This parameter sets the number
+   of consecutive retries before the minimizer gives up.
+
+.. member:: double Solver::Options::function_tolerance
+
+   Default: ``1e-6``
+
+   Solver terminates if
+
+   .. math:: \frac{|\Delta \text{cost}|}{\text{cost}} <= \text{function_tolerance}
+
+   where, :math:`\Delta \text{cost}` is the change in objective
+   function value (up or down) in the current iteration of
+   Levenberg-Marquardt.
+
+.. member:: double Solver::Options::gradient_tolerance
+
+   Default: ``1e-10``
+
+   Solver terminates if
+
+   .. math:: \|x - \Pi \boxplus(x, -g(x))\|_\infty <= \text{gradient_tolerance}
+
+   where :math:`\|\cdot\|_\infty` refers to the max norm, :math:`\Pi`
+   is projection onto the bounds constraints and :math:`\boxplus` is
+   Plus operation for the overall local parameterization associated
+   with the parameter vector.
+
+.. member:: double Solver::Options::parameter_tolerance
+
+   Default: ``1e-8``
+
+   Solver terminates if
+
+   .. math:: \|\Delta x\| <= (\|x\| + \text{parameter_tolerance}) * \text{parameter_tolerance}
+
+   where :math:`\Delta x` is the step computed by the linear solver in
+   the current iteration.
+
+.. member:: LinearSolverType Solver::Options::linear_solver_type
+
+   Default: ``SPARSE_NORMAL_CHOLESKY`` / ``DENSE_QR``
+
+   Type of linear solver used to compute the solution to the linear
+   least squares problem in each iteration of the Levenberg-Marquardt
+   algorithm. If Ceres is built with support for ``SuiteSparse`` or
+   ``CXSparse`` or ``Eigen``'s sparse Cholesky factorization, the
+   default is ``SPARSE_NORMAL_CHOLESKY``, it is ``DENSE_QR``
+   otherwise.
+
+.. member:: PreconditionerType Solver::Options::preconditioner_type
+
+   Default: ``JACOBI``
+
+   The preconditioner used by the iterative linear solver. The default
+   is the block Jacobi preconditioner. Valid values are (in increasing
+   order of complexity) ``IDENTITY``, ``JACOBI``, ``SCHUR_JACOBI``,
+   ``CLUSTER_JACOBI`` and ``CLUSTER_TRIDIAGONAL``. See
+   :ref:`section-preconditioner` for more details.
+
+.. member:: VisibilityClusteringType Solver::Options::visibility_clustering_type
+
+   Default: ``CANONICAL_VIEWS``
+
+   Type of clustering algorithm to use when constructing a visibility
+   based preconditioner. The original visibility based preconditioning
+   paper and implementation only used the canonical views algorithm.
+
+   This algorithm gives high quality results but for large dense
+   graphs can be particularly expensive. As its worst case complexity
+   is cubic in size of the graph.
+
+   Another option is to use ``SINGLE_LINKAGE`` which is a simple
+   thresholded single linkage clustering algorithm that only pays
+   attention to tightly coupled blocks in the Schur complement. This
+   is a fast algorithm that works well.
+
+   The optimal choice of the clustering algorithm depends on the
+   sparsity structure of the problem, but generally speaking we
+   recommend that you try ``CANONICAL_VIEWS`` first and if it is too
+   expensive try ``SINGLE_LINKAGE``.
+
+.. member:: DenseLinearAlgebraLibrary Solver::Options::dense_linear_algebra_library_type
+
+   Default:``EIGEN``
+
+   Ceres supports using multiple dense linear algebra libraries for
+   dense matrix factorizations. Currently ``EIGEN`` and ``LAPACK`` are
+   the valid choices. ``EIGEN`` is always available, ``LAPACK`` refers
+   to the system ``BLAS + LAPACK`` library which may or may not be
+   available.
+
+   This setting affects the ``DENSE_QR``, ``DENSE_NORMAL_CHOLESKY``
+   and ``DENSE_SCHUR`` solvers. For small to moderate sized probem
+   ``EIGEN`` is a fine choice but for large problems, an optimized
+   ``LAPACK + BLAS`` implementation can make a substantial difference
+   in performance.
+
+.. member:: SparseLinearAlgebraLibrary Solver::Options::sparse_linear_algebra_library_type
+
+   Default: The highest available according to: ``SUITE_SPARSE`` >
+   ``CX_SPARSE`` > ``EIGEN_SPARSE`` > ``NO_SPARSE``
+
+   Ceres supports the use of three sparse linear algebra libraries,
+   ``SuiteSparse``, which is enabled by setting this parameter to
+   ``SUITE_SPARSE``, ``CXSparse``, which can be selected by setting
+   this parameter to ``CX_SPARSE`` and ``Eigen`` which is enabled by
+   setting this parameter to ``EIGEN_SPARSE``.  Lastly, ``NO_SPARSE``
+   means that no sparse linear solver should be used; note that this is
+   irrespective of whether Ceres was compiled with support for one.
+
+   ``SuiteSparse`` is a sophisticated and complex sparse linear
+   algebra library and should be used in general.
+
+   If your needs/platforms prevent you from using ``SuiteSparse``,
+   consider using ``CXSparse``, which is a much smaller, easier to
+   build library. As can be expected, its performance on large
+   problems is not comparable to that of ``SuiteSparse``.
+
+   Last but not the least you can use the sparse linear algebra
+   routines in ``Eigen``. Currently the performance of this library is
+   the poorest of the three. But this should change in the near
+   future.
+
+   Another thing to consider here is that the sparse Cholesky
+   factorization libraries in Eigen are licensed under ``LGPL`` and
+   building Ceres with support for ``EIGEN_SPARSE`` will result in an
+   LGPL licensed library (since the corresponding code from Eigen is
+   compiled into the library).
+
+   The upside is that you do not need to build and link to an external
+   library to use ``EIGEN_SPARSE``.
+
+
+.. member:: shared_ptr<ParameterBlockOrdering> Solver::Options::linear_solver_ordering
+
+   Default: ``NULL``
+
+   An instance of the ordering object informs the solver about the
+   desired order in which parameter blocks should be eliminated by the
+   linear solvers. See section~\ref{sec:ordering`` for more details.
+
+   If ``NULL``, the solver is free to choose an ordering that it
+   thinks is best.
+
+   See :ref:`section-ordering` for more details.
+
+.. member:: bool Solver::Options::use_explicit_schur_complement
+
+   Default: ``false``
+
+   Use an explicitly computed Schur complement matrix with
+   ``ITERATIVE_SCHUR``.
+
+   By default this option is disabled and ``ITERATIVE_SCHUR``
+   evaluates evaluates matrix-vector products between the Schur
+   complement and a vector implicitly by exploiting the algebraic
+   expression for the Schur complement.
+
+   The cost of this evaluation scales with the number of non-zeros in
+   the Jacobian.
+
+   For small to medium sized problems there is a sweet spot where
+   computing the Schur complement is cheap enough that it is much more
+   efficient to explicitly compute it and use it for evaluating the
+   matrix-vector products.
+
+   Enabling this option tells ``ITERATIVE_SCHUR`` to use an explicitly
+   computed Schur complement. This can improve the performance of the
+   ``ITERATIVE_SCHUR`` solver significantly.
+
+   .. NOTE:
+
+     This option can only be used with the ``SCHUR_JACOBI``
+     preconditioner.
+
+.. member:: bool Solver::Options::use_post_ordering
+
+   Default: ``false``
+
+   Sparse Cholesky factorization algorithms use a fill-reducing
+   ordering to permute the columns of the Jacobian matrix. There are
+   two ways of doing this.
+
+   1. Compute the Jacobian matrix in some order and then have the
+      factorization algorithm permute the columns of the Jacobian.
+
+   2. Compute the Jacobian with its columns already permuted.
+
+   The first option incurs a significant memory penalty. The
+   factorization algorithm has to make a copy of the permuted Jacobian
+   matrix, thus Ceres pre-permutes the columns of the Jacobian matrix
+   and generally speaking, there is no performance penalty for doing
+   so.
+
+   In some rare cases, it is worth using a more complicated reordering
+   algorithm which has slightly better runtime performance at the
+   expense of an extra copy of the Jacobian matrix. Setting
+   ``use_postordering`` to ``true`` enables this tradeoff.
+
+.. member:: bool Solver::Options::dynamic_sparsity
+
+   Some non-linear least squares problems are symbolically dense but
+   numerically sparse. i.e. at any given state only a small number of
+   Jacobian entries are non-zero, but the position and number of
+   non-zeros is different depending on the state. For these problems
+   it can be useful to factorize the sparse jacobian at each solver
+   iteration instead of including all of the zero entries in a single
+   general factorization.
+
+   If your problem does not have this property (or you do not know),
+   then it is probably best to keep this false, otherwise it will
+   likely lead to worse performance.
+
+   This setting only affects the `SPARSE_NORMAL_CHOLESKY` solver.
+
+.. member:: int Solver::Options::min_linear_solver_iterations
+
+   Default: ``0``
+
+   Minimum number of iterations used by the linear solver. This only
+   makes sense when the linear solver is an iterative solver, e.g.,
+   ``ITERATIVE_SCHUR`` or ``CGNR``.
+
+.. member:: int Solver::Options::max_linear_solver_iterations
+
+   Default: ``500``
+
+   Minimum number of iterations used by the linear solver. This only
+   makes sense when the linear solver is an iterative solver, e.g.,
+   ``ITERATIVE_SCHUR`` or ``CGNR``.
+
+.. member:: double Solver::Options::eta
+
+   Default: ``1e-1``
+
+   Forcing sequence parameter. The truncated Newton solver uses this
+   number to control the relative accuracy with which the Newton step
+   is computed. This constant is passed to
+   ``ConjugateGradientsSolver`` which uses it to terminate the
+   iterations when
+
+   .. math:: \frac{Q_i - Q_{i-1}}{Q_i} < \frac{\eta}{i}
+
+.. member:: bool Solver::Options::jacobi_scaling
+
+   Default: ``true``
+
+   ``true`` means that the Jacobian is scaled by the norm of its
+   columns before being passed to the linear solver. This improves the
+   numerical conditioning of the normal equations.
+
+.. member:: bool Solver::Options::use_inner_iterations
+
+   Default: ``false``
+
+   Use a non-linear version of a simplified variable projection
+   algorithm. Essentially this amounts to doing a further optimization
+   on each Newton/Trust region step using a coordinate descent
+   algorithm.  For more details, see :ref:`section-inner-iterations`.
+
+.. member:: double Solver::Options::inner_iteration_tolerance
+
+   Default: ``1e-3``
+
+   Generally speaking, inner iterations make significant progress in
+   the early stages of the solve and then their contribution drops
+   down sharply, at which point the time spent doing inner iterations
+   is not worth it.
+
+   Once the relative decrease in the objective function due to inner
+   iterations drops below ``inner_iteration_tolerance``, the use of
+   inner iterations in subsequent trust region minimizer iterations is
+   disabled.
+
+.. member:: shared_ptr<ParameterBlockOrdering> Solver::Options::inner_iteration_ordering
+
+   Default: ``NULL``
+
+   If :member:`Solver::Options::use_inner_iterations` true, then the
+   user has two choices.
+
+   1. Let the solver heuristically decide which parameter blocks to
+      optimize in each inner iteration. To do this, set
+      :member:`Solver::Options::inner_iteration_ordering` to ``NULL``.
+
+   2. Specify a collection of of ordered independent sets. The lower
+      numbered groups are optimized before the higher number groups
+      during the inner optimization phase. Each group must be an
+      independent set. Not all parameter blocks need to be included in
+      the ordering.
+
+   See :ref:`section-ordering` for more details.
+
+.. member:: LoggingType Solver::Options::logging_type
+
+   Default: ``PER_MINIMIZER_ITERATION``
+
+.. member:: bool Solver::Options::minimizer_progress_to_stdout
+
+   Default: ``false``
+
+   By default the :class:`Minimizer` progress is logged to ``STDERR``
+   depending on the ``vlog`` level. If this flag is set to true, and
+   :member:`Solver::Options::logging_type` is not ``SILENT``, the logging
+   output is sent to ``STDOUT``.
+
+   For ``TRUST_REGION_MINIMIZER`` the progress display looks like
+
+   .. code-block:: bash
+
+      iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+         0  4.185660e+06    0.00e+00    1.09e+08   0.00e+00   0.00e+00  1.00e+04       0    7.59e-02    3.37e-01
+         1  1.062590e+05    4.08e+06    8.99e+06   5.36e+02   9.82e-01  3.00e+04       1    1.65e-01    5.03e-01
+         2  4.992817e+04    5.63e+04    8.32e+06   3.19e+02   6.52e-01  3.09e+04       1    1.45e-01    6.48e-01
+
+   Here
+
+   #. ``cost`` is the value of the objective function.
+   #. ``cost_change`` is the change in the value of the objective
+      function if the step computed in this iteration is accepted.
+   #. ``|gradient|`` is the max norm of the gradient.
+   #. ``|step|`` is the change in the parameter vector.
+   #. ``tr_ratio`` is the ratio of the actual change in the objective
+      function value to the change in the value of the trust
+      region model.
+   #. ``tr_radius`` is the size of the trust region radius.
+   #. ``ls_iter`` is the number of linear solver iterations used to
+      compute the trust region step. For direct/factorization based
+      solvers it is always 1, for iterative solvers like
+      ``ITERATIVE_SCHUR`` it is the number of iterations of the
+      Conjugate Gradients algorithm.
+   #. ``iter_time`` is the time take by the current iteration.
+   #. ``total_time`` is the total time taken by the minimizer.
+
+   For ``LINE_SEARCH_MINIMIZER`` the progress display looks like
+
+   .. code-block:: bash
+
+      0: f: 2.317806e+05 d: 0.00e+00 g: 3.19e-01 h: 0.00e+00 s: 0.00e+00 e:  0 it: 2.98e-02 tt: 8.50e-02
+      1: f: 2.312019e+05 d: 5.79e+02 g: 3.18e-01 h: 2.41e+01 s: 1.00e+00 e:  1 it: 4.54e-02 tt: 1.31e-01
+      2: f: 2.300462e+05 d: 1.16e+03 g: 3.17e-01 h: 4.90e+01 s: 2.54e-03 e:  1 it: 4.96e-02 tt: 1.81e-01
+
+   Here
+
+   #. ``f`` is the value of the objective function.
+   #. ``d`` is the change in the value of the objective function if
+      the step computed in this iteration is accepted.
+   #. ``g`` is the max norm of the gradient.
+   #. ``h`` is the change in the parameter vector.
+   #. ``s`` is the optimal step length computed by the line search.
+   #. ``it`` is the time take by the current iteration.
+   #. ``tt`` is the total time taken by the minimizer.
+
+.. member:: vector<int> Solver::Options::trust_region_minimizer_iterations_to_dump
+
+   Default: ``empty``
+
+   List of iterations at which the trust region minimizer should dump
+   the trust region problem. Useful for testing and benchmarking. If
+   ``empty``, no problems are dumped.
+
+.. member:: string Solver::Options::trust_region_problem_dump_directory
+
+   Default: ``/tmp``
+
+    Directory to which the problems should be written to. Should be
+    non-empty if
+    :member:`Solver::Options::trust_region_minimizer_iterations_to_dump` is
+    non-empty and
+    :member:`Solver::Options::trust_region_problem_dump_format_type` is not
+    ``CONSOLE``.
+
+.. member:: DumpFormatType Solver::Options::trust_region_problem_dump_format
+
+   Default: ``TEXTFILE``
+
+   The format in which trust region problems should be logged when
+   :member:`Solver::Options::trust_region_minimizer_iterations_to_dump`
+   is non-empty.  There are three options:
+
+   * ``CONSOLE`` prints the linear least squares problem in a human
+      readable format to ``stderr``. The Jacobian is printed as a
+      dense matrix. The vectors :math:`D`, :math:`x` and :math:`f` are
+      printed as dense vectors. This should only be used for small
+      problems.
+
+   * ``TEXTFILE`` Write out the linear least squares problem to the
+     directory pointed to by
+     :member:`Solver::Options::trust_region_problem_dump_directory` as
+     text files which can be read into ``MATLAB/Octave``. The Jacobian
+     is dumped as a text file containing :math:`(i,j,s)` triplets, the
+     vectors :math:`D`, `x` and `f` are dumped as text files
+     containing a list of their values.
+
+     A ``MATLAB/Octave`` script called
+     ``ceres_solver_iteration_???.m`` is also output, which can be
+     used to parse and load the problem into memory.
+
+.. member:: bool Solver::Options::check_gradients
+
+   Default: ``false``
+
+   Check all Jacobians computed by each residual block with finite
+   differences. This is expensive since it involves computing the
+   derivative by normal means (e.g. user specified, autodiff, etc),
+   then also computing it using finite differences. The results are
+   compared, and if they differ substantially, the optimization fails
+   and the details are stored in the solver summary.
+
+.. member:: double Solver::Options::gradient_check_relative_precision
+
+   Default: ``1e08``
+
+   Precision to check for in the gradient checker. If the relative
+   difference between an element in a Jacobian exceeds this number,
+   then the Jacobian for that cost term is dumped.
+
+.. member:: double Solver::Options::gradient_check_numeric_derivative_relative_step_size
+
+   Default: ``1e-6``
+
+   .. NOTE::
+
+      This option only applies to the numeric differentiation used for
+      checking the user provided derivatives when when
+      `Solver::Options::check_gradients` is true. If you are using
+      :class:`NumericDiffCostFunction` and are interested in changing
+      the step size for numeric differentiation in your cost function,
+      please have a look at :class:`NumericDiffOptions`.
+
+   Relative shift used for taking numeric derivatives when
+   `Solver::Options::check_gradients` is `true`.
+
+   For finite differencing, each dimension is evaluated at slightly
+   shifted values, e.g., for forward differences, the numerical
+   derivative is
+
+   .. math::
+
+     \delta &= gradient\_check\_numeric\_derivative\_relative\_step\_size\\
+     \Delta f &= \frac{f((1 + \delta)  x) - f(x)}{\delta x}
+
+   The finite differencing is done along each dimension. The reason to
+   use a relative (rather than absolute) step size is that this way,
+   numeric differentiation works for functions where the arguments are
+   typically large (e.g. :math:`10^9`) and when the values are small
+   (e.g. :math:`10^{-5}`). It is possible to construct *torture cases*
+   which break this finite difference heuristic, but they do not come
+   up often in practice.
+
+.. member:: vector<IterationCallback> Solver::Options::callbacks
+
+   Callbacks that are executed at the end of each iteration of the
+   :class:`Minimizer`. They are executed in the order that they are
+   specified in this vector. By default, parameter blocks are updated
+   only at the end of the optimization, i.e., when the
+   :class:`Minimizer` terminates. This behavior is controlled by
+   :member:`Solver::Options::update_state_every_iteration`. If the user
+   wishes to have access to the updated parameter blocks when his/her
+   callbacks are executed, then set
+   :member:`Solver::Options::update_state_every_iteration` to true.
+
+   The solver does NOT take ownership of these pointers.
+
+.. member:: bool Solver::Options::update_state_every_iteration
+
+   Default: ``false``
+
+   If true, the user's parameter blocks are updated at the end of
+   every Minimizer iteration, otherwise they are updated when the
+   Minimizer terminates. This is useful if, for example, the user
+   wishes to visualize the state of the optimization every iteration
+   (in combination with an IterationCallback).
+
+   **Note**: If :member:`Solver::Options::evaluation_callback` is set,
+   then the behaviour of this flag is slightly different in each case:
+
+   1. If :member:`Solver::Options::update_state_every_iteration` is
+      false, then the user's state is changed at every residual and/or
+      jacobian evaluation. Any user provided IterationCallbacks should
+      **not** inspect and depend on the user visible state while the
+      solver is running, since they it have undefined contents.
+
+   2. If :member:`Solver::Options::update_state_every_iteration` is
+      false, then the user's state is changed at every residual and/or
+      jacobian evaluation, BUT the solver will ensure that before the
+      user provided `IterationCallbacks` are called, the user visible
+      state will be updated to the current best point found by the
+      solver.
+
+.. member:: bool Solver::Options::evaluation_callback
+
+   Default: ``NULL``
+
+   If non-``NULL``, gets notified when Ceres is about to evaluate the
+   residuals and/or Jacobians. This enables sharing computation between
+   residuals, which in some cases is important for efficient cost
+   evaluation. See :class:`EvaluationCallback` for details.
+
+   **Note**: Evaluation callbacks are incompatible with inner
+   iterations.
+
+   **Warning**: This interacts with
+   :member:`Solver::Options::update_state_every_iteration`. See the
+   documentation for that option for more details.
+
+   The solver does `not`  take ownership of the pointer.
+
+:class:`ParameterBlockOrdering`
+===============================
+
+.. class:: ParameterBlockOrdering
+
+   ``ParameterBlockOrdering`` is a class for storing and manipulating
+   an ordered collection of groups/sets with the following semantics:
+
+   Group IDs are non-negative integer values. Elements are any type
+   that can serve as a key in a map or an element of a set.
+
+   An element can only belong to one group at a time. A group may
+   contain an arbitrary number of elements.
+
+   Groups are ordered by their group id.
+
+.. function:: bool ParameterBlockOrdering::AddElementToGroup(const double* element, const int group)
+
+   Add an element to a group. If a group with this id does not exist,
+   one is created. This method can be called any number of times for
+   the same element. Group ids should be non-negative numbers.  Return
+   value indicates if adding the element was a success.
+
+.. function:: void ParameterBlockOrdering::Clear()
+
+   Clear the ordering.
+
+.. function:: bool ParameterBlockOrdering::Remove(const double* element)
+
+   Remove the element, no matter what group it is in. If the element
+   is not a member of any group, calling this method will result in a
+   crash.  Return value indicates if the element was actually removed.
+
+.. function:: void ParameterBlockOrdering::Reverse()
+
+   Reverse the order of the groups in place.
+
+.. function:: int ParameterBlockOrdering::GroupId(const double* element) const
+
+   Return the group id for the element. If the element is not a member
+   of any group, return -1.
+
+.. function:: bool ParameterBlockOrdering::IsMember(const double* element) const
+
+   True if there is a group containing the parameter block.
+
+.. function:: int ParameterBlockOrdering::GroupSize(const int group) const
+
+   This function always succeeds, i.e., implicitly there exists a
+   group for every integer.
+
+.. function:: int ParameterBlockOrdering::NumElements() const
+
+   Number of elements in the ordering.
+
+.. function:: int ParameterBlockOrdering::NumGroups() const
+
+   Number of groups with one or more elements.
+
+:class:`EvaluationCallback`
+===========================
+
+.. class:: EvaluationCallback
+
+   Interface for receiving callbacks before Ceres evaluates residuals or
+   Jacobians:
+
+   .. code-block:: c++
+
+      class EvaluationCallback {
+       public:
+        virtual ~EvaluationCallback() {}
+        virtual void PrepareForEvaluation()(bool evaluate_jacobians
+                                            bool new_evaluation_point) = 0;
+      };
+
+   ``PrepareForEvaluation()`` is called before Ceres requests residuals
+   or jacobians for a given setting of the parameters. User parameters
+   (the double* values provided to the cost functions) are fixed until
+   the next call to ``PrepareForEvaluation()``. If
+   ``new_evaluation_point == true``, then this is a new point that is
+   different from the last evaluated point. Otherwise, it is the same
+   point that was evaluated previously (either jacobian or residual) and
+   the user can use cached results from previous evaluations. If
+   ``evaluate_jacobians`` is true, then Ceres will request jacobians in
+   the upcoming cost evaluation.
+
+   Using this callback interface, Ceres can notify you when it is about
+   to evaluate the residuals or jacobians. With the callback, you can
+   share computation between residual blocks by doing the shared
+   computation in PrepareForEvaluation() before Ceres calls
+   CostFunction::Evaluate() on all the residuals. It also enables
+   caching results between a pure residual evaluation and a residual &
+   jacobian evaluation, via the new_evaluation_point argument.
+
+   One use case for this callback is if the cost function compute is
+   moved to the GPU. In that case, the prepare call does the actual cost
+   function evaluation, and subsequent calls from Ceres to the actual
+   cost functions merely copy the results from the GPU onto the
+   corresponding blocks for Ceres to plug into the solver.
+
+   **Note**: Ceres provides no mechanism to share data other than the
+   notification from the callback. Users must provide access to
+   pre-computed shared data to their cost functions behind the scenes;
+   this all happens without Ceres knowing. One approach is to put a
+   pointer to the shared data in each cost function (recommended) or to
+   use a global shared variable (discouraged; bug-prone).  As far as
+   Ceres is concerned, it is evaluating cost functions like any other;
+   it just so happens that behind the scenes the cost functions reuse
+   pre-computed data to execute faster.
+
+   See ``evaluation_callback_test.cc`` for code that explicitly verifies
+   the preconditions between ``PrepareForEvaluation()`` and
+   ``CostFunction::Evaluate()``.
+
+:class:`IterationCallback`
+==========================
+
+.. class:: IterationSummary
+
+   :class:`IterationSummary` describes the state of the minimizer at
+   the end of each iteration.
+
+.. member:: int32 IterationSummary::iteration
+
+   Current iteration number.
+
+.. member:: bool IterationSummary::step_is_valid
+
+   Step was numerically valid, i.e., all values are finite and the
+   step reduces the value of the linearized model.
+
+    **Note**: :member:`IterationSummary::step_is_valid` is `false`
+    when :member:`IterationSummary::iteration` = 0.
+
+.. member::  bool IterationSummary::step_is_nonmonotonic
+
+    Step did not reduce the value of the objective function
+    sufficiently, but it was accepted because of the relaxed
+    acceptance criterion used by the non-monotonic trust region
+    algorithm.
+
+    **Note**: :member:`IterationSummary::step_is_nonmonotonic` is
+    `false` when when :member:`IterationSummary::iteration` = 0.
+
+.. member:: bool IterationSummary::step_is_successful
+
+   Whether or not the minimizer accepted this step or not.
+
+   If the ordinary trust region algorithm is used, this means that the
+   relative reduction in the objective function value was greater than
+   :member:`Solver::Options::min_relative_decrease`. However, if the
+   non-monotonic trust region algorithm is used
+   (:member:`Solver::Options::use_nonmonotonic_steps` = `true`), then
+   even if the relative decrease is not sufficient, the algorithm may
+   accept the step and the step is declared successful.
+
+   **Note**: :member:`IterationSummary::step_is_successful` is `false`
+   when when :member:`IterationSummary::iteration` = 0.
+
+.. member:: double IterationSummary::cost
+
+   Value of the objective function.
+
+.. member:: double IterationSummary::cost_change
+
+   Change in the value of the objective function in this
+   iteration. This can be positive or negative.
+
+.. member:: double IterationSummary::gradient_max_norm
+
+   Infinity norm of the gradient vector.
+
+.. member:: double IterationSummary::gradient_norm
+
+   2-norm of the gradient vector.
+
+.. member:: double IterationSummary::step_norm
+
+   2-norm of the size of the step computed in this iteration.
+
+.. member:: double IterationSummary::relative_decrease
+
+   For trust region algorithms, the ratio of the actual change in cost
+   and the change in the cost of the linearized approximation.
+
+   This field is not used when a linear search minimizer is used.
+
+.. member:: double IterationSummary::trust_region_radius
+
+   Size of the trust region at the end of the current iteration. For
+   the Levenberg-Marquardt algorithm, the regularization parameter is
+   1.0 / member::`IterationSummary::trust_region_radius`.
+
+.. member:: double IterationSummary::eta
+
+   For the inexact step Levenberg-Marquardt algorithm, this is the
+   relative accuracy with which the step is solved. This number is
+   only applicable to the iterative solvers capable of solving linear
+   systems inexactly. Factorization-based exact solvers always have an
+   eta of 0.0.
+
+.. member:: double IterationSummary::step_size
+
+   Step sized computed by the line search algorithm.
+
+   This field is not used when a trust region minimizer is used.
+
+.. member:: int IterationSummary::line_search_function_evaluations
+
+   Number of function evaluations used by the line search algorithm.
+
+   This field is not used when a trust region minimizer is used.
+
+.. member:: int IterationSummary::linear_solver_iterations
+
+   Number of iterations taken by the linear solver to solve for the
+   trust region step.
+
+   Currently this field is not used when a line search minimizer is
+   used.
+
+.. member:: double IterationSummary::iteration_time_in_seconds
+
+   Time (in seconds) spent inside the minimizer loop in the current
+   iteration.
+
+.. member:: double IterationSummary::step_solver_time_in_seconds
+
+   Time (in seconds) spent inside the trust region step solver.
+
+.. member:: double IterationSummary::cumulative_time_in_seconds
+
+   Time (in seconds) since the user called Solve().
+
+
+.. class:: IterationCallback
+
+   Interface for specifying callbacks that are executed at the end of
+   each iteration of the minimizer.
+
+   .. code-block:: c++
+
+      class IterationCallback {
+       public:
+        virtual ~IterationCallback() {}
+        virtual CallbackReturnType operator()(const IterationSummary& summary) = 0;
+      };
+
+
+  The solver uses the return value of ``operator()`` to decide whether
+  to continue solving or to terminate. The user can return three
+  values.
+
+  #. ``SOLVER_ABORT`` indicates that the callback detected an abnormal
+     situation. The solver returns without updating the parameter
+     blocks (unless ``Solver::Options::update_state_every_iteration`` is
+     set true). Solver returns with ``Solver::Summary::termination_type``
+     set to ``USER_FAILURE``.
+
+  #. ``SOLVER_TERMINATE_SUCCESSFULLY`` indicates that there is no need
+     to optimize anymore (some user specified termination criterion
+     has been met). Solver returns with
+     ``Solver::Summary::termination_type``` set to ``USER_SUCCESS``.
+
+  #. ``SOLVER_CONTINUE`` indicates that the solver should continue
+     optimizing.
+
+  For example, the following :class:`IterationCallback` is used
+  internally by Ceres to log the progress of the optimization.
+
+  .. code-block:: c++
+
+    class LoggingCallback : public IterationCallback {
+     public:
+      explicit LoggingCallback(bool log_to_stdout)
+          : log_to_stdout_(log_to_stdout) {}
+
+      ~LoggingCallback() {}
+
+      CallbackReturnType operator()(const IterationSummary& summary) {
+        const char* kReportRowFormat =
+            "% 4d: f:% 8e d:% 3.2e g:% 3.2e h:% 3.2e "
+            "rho:% 3.2e mu:% 3.2e eta:% 3.2e li:% 3d";
+        string output = StringPrintf(kReportRowFormat,
+                                     summary.iteration,
+                                     summary.cost,
+                                     summary.cost_change,
+                                     summary.gradient_max_norm,
+                                     summary.step_norm,
+                                     summary.relative_decrease,
+                                     summary.trust_region_radius,
+                                     summary.eta,
+                                     summary.linear_solver_iterations);
+        if (log_to_stdout_) {
+          cout << output << endl;
+        } else {
+          VLOG(1) << output;
+        }
+        return SOLVER_CONTINUE;
+      }
+
+     private:
+      const bool log_to_stdout_;
+    };
+
+
+
+:class:`CRSMatrix`
+==================
+
+.. class:: CRSMatrix
+
+   A compressed row sparse matrix used primarily for communicating the
+   Jacobian matrix to the user.
+
+.. member:: int CRSMatrix::num_rows
+
+   Number of rows.
+
+.. member:: int CRSMatrix::num_cols
+
+   Number of columns.
+
+.. member:: vector<int> CRSMatrix::rows
+
+   :member:`CRSMatrix::rows` is a :member:`CRSMatrix::num_rows` + 1
+   sized array that points into the :member:`CRSMatrix::cols` and
+   :member:`CRSMatrix::values` array.
+
+.. member:: vector<int> CRSMatrix::cols
+
+   :member:`CRSMatrix::cols` contain as many entries as there are
+   non-zeros in the matrix.
+
+   For each row ``i``, ``cols[rows[i]]`` ... ``cols[rows[i + 1] - 1]``
+   are the indices of the non-zero columns of row ``i``.
+
+.. member:: vector<int> CRSMatrix::values
+
+   :member:`CRSMatrix::values` contain as many entries as there are
+   non-zeros in the matrix.
+
+   For each row ``i``,
+   ``values[rows[i]]`` ... ``values[rows[i + 1] - 1]`` are the values
+   of the non-zero columns of row ``i``.
+
+e.g., consider the 3x4 sparse matrix
+
+.. code-block:: c++
+
+   0 10  0  4
+   0  2 -3  2
+   1  2  0  0
+
+The three arrays will be:
+
+.. code-block:: c++
+
+            -row0-  ---row1---  -row2-
+   rows   = [ 0,      2,          5,     7]
+   cols   = [ 1,  3,  1,  2,  3,  0,  1]
+   values = [10,  4,  2, -3,  2,  1,  2]
+
+
+:class:`Solver::Summary`
+========================
+
+.. class:: Solver::Summary
+
+   Summary of the various stages of the solver after termination.
+
+.. function:: string Solver::Summary::BriefReport() const
+
+   A brief one line description of the state of the solver after
+   termination.
+
+.. function:: string Solver::Summary::FullReport() const
+
+   A full multiline description of the state of the solver after
+   termination.
+
+.. function:: bool Solver::Summary::IsSolutionUsable() const
+
+   Whether the solution returned by the optimization algorithm can be
+   relied on to be numerically sane. This will be the case if
+   `Solver::Summary:termination_type` is set to `CONVERGENCE`,
+   `USER_SUCCESS` or `NO_CONVERGENCE`, i.e., either the solver
+   converged by meeting one of the convergence tolerances or because
+   the user indicated that it had converged or it ran to the maximum
+   number of iterations or time.
+
+.. member:: MinimizerType Solver::Summary::minimizer_type
+
+   Type of minimization algorithm used.
+
+.. member:: TerminationType Solver::Summary::termination_type
+
+   The cause of the minimizer terminating.
+
+.. member:: string Solver::Summary::message
+
+   Reason why the solver terminated.
+
+.. member:: double Solver::Summary::initial_cost
+
+   Cost of the problem (value of the objective function) before the
+   optimization.
+
+.. member:: double Solver::Summary::final_cost
+
+   Cost of the problem (value of the objective function) after the
+   optimization.
+
+.. member:: double Solver::Summary::fixed_cost
+
+   The part of the total cost that comes from residual blocks that
+   were held fixed by the preprocessor because all the parameter
+   blocks that they depend on were fixed.
+
+.. member:: vector<IterationSummary> Solver::Summary::iterations
+
+   :class:`IterationSummary` for each minimizer iteration in order.
+
+.. member:: int Solver::Summary::num_successful_steps
+
+   Number of minimizer iterations in which the step was
+   accepted. Unless :member:`Solver::Options::use_non_monotonic_steps`
+   is `true` this is also the number of steps in which the objective
+   function value/cost went down.
+
+.. member:: int Solver::Summary::num_unsuccessful_steps
+
+   Number of minimizer iterations in which the step was rejected
+   either because it did not reduce the cost enough or the step was
+   not numerically valid.
+
+.. member:: int Solver::Summary::num_inner_iteration_steps
+
+   Number of times inner iterations were performed.
+
+ .. member:: int Solver::Summary::num_line_search_steps
+
+    Total number of iterations inside the line search algorithm across
+    all invocations. We call these iterations "steps" to distinguish
+    them from the outer iterations of the line search and trust region
+    minimizer algorithms which call the line search algorithm as a
+    subroutine.
+
+.. member:: double Solver::Summary::preprocessor_time_in_seconds
+
+   Time (in seconds) spent in the preprocessor.
+
+.. member:: double Solver::Summary::minimizer_time_in_seconds
+
+   Time (in seconds) spent in the Minimizer.
+
+.. member:: double Solver::Summary::postprocessor_time_in_seconds
+
+   Time (in seconds) spent in the post processor.
+
+.. member:: double Solver::Summary::total_time_in_seconds
+
+   Time (in seconds) spent in the solver.
+
+.. member:: double Solver::Summary::linear_solver_time_in_seconds
+
+   Time (in seconds) spent in the linear solver computing the trust
+   region step.
+
+.. member:: int Solver::Summary::num_linear_solves
+
+   Number of times the Newton step was computed by solving a linear
+   system. This does not include linear solves used by inner
+   iterations.
+
+.. member:: double Solver::Summary::residual_evaluation_time_in_seconds
+
+   Time (in seconds) spent evaluating the residual vector.
+
+.. member:: int Solver::Summary::num_residual_evaluations
+
+   Number of times only the residuals were evaluated.
+
+.. member:: double Solver::Summary::jacobian_evaluation_time_in_seconds
+
+   Time (in seconds) spent evaluating the Jacobian matrix.
+
+.. member:: int Solver::Summary::num_jacobian_evaluations
+
+   Number of times only the Jacobian and the residuals were evaluated.
+
+.. member:: double Solver::Summary::inner_iteration_time_in_seconds
+
+   Time (in seconds) spent doing inner iterations.
+
+.. member:: int Solver::Summary::num_parameter_blocks
+
+   Number of parameter blocks in the problem.
+
+.. member:: int Solver::Summary::num_parameters
+
+   Number of parameters in the problem.
+
+.. member:: int Solver::Summary::num_effective_parameters
+
+   Dimension of the tangent space of the problem (or the number of
+   columns in the Jacobian for the problem). This is different from
+   :member:`Solver::Summary::num_parameters` if a parameter block is
+   associated with a :class:`LocalParameterization`.
+
+.. member:: int Solver::Summary::num_residual_blocks
+
+   Number of residual blocks in the problem.
+
+.. member:: int Solver::Summary::num_residuals
+
+   Number of residuals in the problem.
+
+.. member:: int Solver::Summary::num_parameter_blocks_reduced
+
+   Number of parameter blocks in the problem after the inactive and
+   constant parameter blocks have been removed. A parameter block is
+   inactive if no residual block refers to it.
+
+.. member:: int Solver::Summary::num_parameters_reduced
+
+   Number of parameters in the reduced problem.
+
+.. member:: int Solver::Summary::num_effective_parameters_reduced
+
+   Dimension of the tangent space of the reduced problem (or the
+   number of columns in the Jacobian for the reduced problem). This is
+   different from :member:`Solver::Summary::num_parameters_reduced` if
+   a parameter block in the reduced problem is associated with a
+   :class:`LocalParameterization`.
+
+.. member:: int Solver::Summary::num_residual_blocks_reduced
+
+   Number of residual blocks in the reduced problem.
+
+.. member:: int Solver::Summary::num_residuals_reduced
+
+   Number of residuals in the reduced problem.
+
+.. member:: int Solver::Summary::num_threads_given
+
+   Number of threads specified by the user for Jacobian and residual
+   evaluation.
+
+.. member:: int Solver::Summary::num_threads_used
+
+   Number of threads actually used by the solver for Jacobian and
+   residual evaluation. This number is not equal to
+   :member:`Solver::Summary::num_threads_given` if none of `OpenMP`
+   or `CXX11_THREADS` is available.
+
+.. member:: LinearSolverType Solver::Summary::linear_solver_type_given
+
+   Type of the linear solver requested by the user.
+
+.. member:: LinearSolverType Solver::Summary::linear_solver_type_used
+
+   Type of the linear solver actually used. This may be different from
+   :member:`Solver::Summary::linear_solver_type_given` if Ceres
+   determines that the problem structure is not compatible with the
+   linear solver requested or if the linear solver requested by the
+   user is not available, e.g. The user requested
+   `SPARSE_NORMAL_CHOLESKY` but no sparse linear algebra library was
+   available.
+
+.. member:: vector<int> Solver::Summary::linear_solver_ordering_given
+
+   Size of the elimination groups given by the user as hints to the
+   linear solver.
+
+.. member:: vector<int> Solver::Summary::linear_solver_ordering_used
+
+   Size of the parameter groups used by the solver when ordering the
+   columns of the Jacobian.  This maybe different from
+   :member:`Solver::Summary::linear_solver_ordering_given` if the user
+   left :member:`Solver::Summary::linear_solver_ordering_given` blank
+   and asked for an automatic ordering, or if the problem contains
+   some constant or inactive parameter blocks.
+
+.. member:: std::string Solver::Summary::schur_structure_given
+
+    For Schur type linear solvers, this string describes the template
+    specialization which was detected in the problem and should be
+    used.
+
+.. member:: std::string Solver::Summary::schur_structure_used
+
+   For Schur type linear solvers, this string describes the template
+   specialization that was actually instantiated and used. The reason
+   this will be different from
+   :member:`Solver::Summary::schur_structure_given` is because the
+   corresponding template specialization does not exist.
+
+   Template specializations can be added to ceres by editing
+   ``internal/ceres/generate_template_specializations.py``
+
+.. member:: bool Solver::Summary::inner_iterations_given
+
+   `True` if the user asked for inner iterations to be used as part of
+   the optimization.
+
+.. member:: bool Solver::Summary::inner_iterations_used
+
+   `True` if the user asked for inner iterations to be used as part of
+   the optimization and the problem structure was such that they were
+   actually performed. For example, in a problem with just one parameter
+   block, inner iterations are not performed.
+
+.. member:: vector<int> inner_iteration_ordering_given
+
+   Size of the parameter groups given by the user for performing inner
+   iterations.
+
+.. member:: vector<int> inner_iteration_ordering_used
+
+   Size of the parameter groups given used by the solver for
+   performing inner iterations. This maybe different from
+   :member:`Solver::Summary::inner_iteration_ordering_given` if the
+   user left :member:`Solver::Summary::inner_iteration_ordering_given`
+   blank and asked for an automatic ordering, or if the problem
+   contains some constant or inactive parameter blocks.
+
+.. member:: PreconditionerType Solver::Summary::preconditioner_type_given
+
+   Type of the preconditioner requested by the user.
+
+.. member:: PreconditionerType Solver::Summary::preconditioner_type_used
+
+   Type of the preconditioner actually used. This may be different
+   from :member:`Solver::Summary::linear_solver_type_given` if Ceres
+   determines that the problem structure is not compatible with the
+   linear solver requested or if the linear solver requested by the
+   user is not available.
+
+.. member:: VisibilityClusteringType Solver::Summary::visibility_clustering_type
+
+   Type of clustering algorithm used for visibility based
+   preconditioning. Only meaningful when the
+   :member:`Solver::Summary::preconditioner_type` is
+   ``CLUSTER_JACOBI`` or ``CLUSTER_TRIDIAGONAL``.
+
+.. member:: TrustRegionStrategyType Solver::Summary::trust_region_strategy_type
+
+   Type of trust region strategy.
+
+.. member:: DoglegType Solver::Summary::dogleg_type
+
+   Type of dogleg strategy used for solving the trust region problem.
+
+.. member:: DenseLinearAlgebraLibraryType Solver::Summary::dense_linear_algebra_library_type
+
+   Type of the dense linear algebra library used.
+
+.. member:: SparseLinearAlgebraLibraryType Solver::Summary::sparse_linear_algebra_library_type
+
+   Type of the sparse linear algebra library used.
+
+.. member:: LineSearchDirectionType Solver::Summary::line_search_direction_type
+
+   Type of line search direction used.
+
+.. member:: LineSearchType Solver::Summary::line_search_type
+
+   Type of the line search algorithm used.
+
+.. member:: LineSearchInterpolationType Solver::Summary::line_search_interpolation_type
+
+   When performing line search, the degree of the polynomial used to
+   approximate the objective function.
+
+.. member:: NonlinearConjugateGradientType Solver::Summary::nonlinear_conjugate_gradient_type
+
+   If the line search direction is `NONLINEAR_CONJUGATE_GRADIENT`,
+   then this indicates the particular variant of non-linear conjugate
+   gradient used.
+
+.. member:: int Solver::Summary::max_lbfgs_rank
+
+   If the type of the line search direction is `LBFGS`, then this
+   indicates the rank of the Hessian approximation.
diff --git a/docs/source/nnls_tutorial.rst b/docs/source/nnls_tutorial.rst
new file mode 100644
index 0000000..3c39086
--- /dev/null
+++ b/docs/source/nnls_tutorial.rst
@@ -0,0 +1,1037 @@
+.. highlight:: c++
+
+.. default-domain:: cpp
+
+.. _chapter-nnls_tutorial:
+
+========================
+Non-linear Least Squares
+========================
+
+Introduction
+============
+
+Ceres can solve bounds constrained robustified non-linear least
+squares problems of the form
+
+.. math:: :label: ceresproblem
+
+   \min_{\mathbf{x}} &\quad \frac{1}{2}\sum_{i} \rho_i\left(\left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2\right) \\
+   \text{s.t.} &\quad l_j \le x_j \le u_j
+
+Problems of this form comes up in a broad range of areas across
+science and engineering - from `fitting curves`_ in statistics, to
+constructing `3D models from photographs`_ in computer vision.
+
+.. _fitting curves: http://en.wikipedia.org/wiki/Nonlinear_regression
+.. _3D models from photographs: http://en.wikipedia.org/wiki/Bundle_adjustment
+
+In this chapter we will learn how to solve :eq:`ceresproblem` using
+Ceres Solver. Full working code for all the examples described in this
+chapter and more can be found in the `examples
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/>`_
+directory.
+
+The expression
+:math:`\rho_i\left(\left\|f_i\left(x_{i_1},...,x_{i_k}\right)\right\|^2\right)`
+is known as a ``ResidualBlock``, where :math:`f_i(\cdot)` is a
+:class:`CostFunction` that depends on the parameter blocks
+:math:`\left[x_{i_1},... , x_{i_k}\right]`. In most optimization
+problems small groups of scalars occur together. For example the three
+components of a translation vector and the four components of the
+quaternion that define the pose of a camera. We refer to such a group
+of small scalars as a ``ParameterBlock``. Of course a
+``ParameterBlock`` can just be a single parameter. :math:`l_j` and
+:math:`u_j` are bounds on the parameter block :math:`x_j`.
+
+:math:`\rho_i` is a :class:`LossFunction`. A :class:`LossFunction` is
+a scalar function that is used to reduce the influence of outliers on
+the solution of non-linear least squares problems.
+
+As a special case, when :math:`\rho_i(x) = x`, i.e., the identity
+function, and :math:`l_j = -\infty` and :math:`u_j = \infty` we get
+the more familiar `non-linear least squares problem
+<http://en.wikipedia.org/wiki/Non-linear_least_squares>`_.
+
+.. math:: \frac{1}{2}\sum_{i} \left\|f_i\left(x_{i_1}, ... ,x_{i_k}\right)\right\|^2.
+   :label: ceresproblemnonrobust
+
+.. _section-hello-world:
+
+Hello World!
+============
+
+To get started, consider the problem of finding the minimum of the
+function
+
+.. math:: \frac{1}{2}(10 -x)^2.
+
+This is a trivial problem, whose minimum is located at :math:`x = 10`,
+but it is a good place to start to illustrate the basics of solving a
+problem with Ceres [#f1]_.
+
+The first step is to write a functor that will evaluate this the
+function :math:`f(x) = 10 - x`:
+
+.. code-block:: c++
+
+   struct CostFunctor {
+      template <typename T>
+      bool operator()(const T* const x, T* residual) const {
+        residual[0] = 10.0 - x[0];
+        return true;
+      }
+   };
+
+The important thing to note here is that ``operator()`` is a templated
+method, which assumes that all its inputs and outputs are of some type
+``T``. The use of templating here allows Ceres to call
+``CostFunctor::operator<T>()``, with ``T=double`` when just the value
+of the residual is needed, and with a special type ``T=Jet`` when the
+Jacobians are needed. In :ref:`section-derivatives` we will discuss the
+various ways of supplying derivatives to Ceres in more detail.
+
+Once we have a way of computing the residual function, it is now time
+to construct a non-linear least squares problem using it and have
+Ceres solve it.
+
+.. code-block:: c++
+
+   int main(int argc, char** argv) {
+     google::InitGoogleLogging(argv[0]);
+
+     // The variable to solve for with its initial value.
+     double initial_x = 5.0;
+     double x = initial_x;
+
+     // Build the problem.
+     Problem problem;
+
+     // Set up the only cost function (also known as residual). This uses
+     // auto-differentiation to obtain the derivative (jacobian).
+     CostFunction* cost_function =
+         new AutoDiffCostFunction<CostFunctor, 1, 1>(new CostFunctor);
+     problem.AddResidualBlock(cost_function, NULL, &x);
+
+     // Run the solver!
+     Solver::Options options;
+     options.linear_solver_type = ceres::DENSE_QR;
+     options.minimizer_progress_to_stdout = true;
+     Solver::Summary summary;
+     Solve(options, &problem, &summary);
+
+     std::cout << summary.BriefReport() << "\n";
+     std::cout << "x : " << initial_x
+               << " -> " << x << "\n";
+     return 0;
+   }
+
+:class:`AutoDiffCostFunction` takes a ``CostFunctor`` as input,
+automatically differentiates it and gives it a :class:`CostFunction`
+interface.
+
+Compiling and running `examples/helloworld.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+gives us
+
+.. code-block:: bash
+
+   iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+      0  4.512500e+01    0.00e+00    9.50e+00   0.00e+00   0.00e+00  1.00e+04       0    5.33e-04    3.46e-03
+      1  4.511598e-07    4.51e+01    9.50e-04   9.50e+00   1.00e+00  3.00e+04       1    5.00e-04    4.05e-03
+      2  5.012552e-16    4.51e-07    3.17e-08   9.50e-04   1.00e+00  9.00e+04       1    1.60e-05    4.09e-03
+   Ceres Solver Report: Iterations: 2, Initial cost: 4.512500e+01, Final cost: 5.012552e-16, Termination: CONVERGENCE
+   x : 0.5 -> 10
+
+Starting from a :math:`x=5`, the solver in two iterations goes to 10
+[#f2]_. The careful reader will note that this is a linear problem and
+one linear solve should be enough to get the optimal value.  The
+default configuration of the solver is aimed at non-linear problems,
+and for reasons of simplicity we did not change it in this example. It
+is indeed possible to obtain the solution to this problem using Ceres
+in one iteration. Also note that the solver did get very close to the
+optimal function value of 0 in the very first iteration. We will
+discuss these issues in greater detail when we talk about convergence
+and parameter settings for Ceres.
+
+.. rubric:: Footnotes
+
+.. [#f1] `examples/helloworld.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+.. [#f2] Actually the solver ran for three iterations, and it was
+   by looking at the value returned by the linear solver in the third
+   iteration, it observed that the update to the parameter block was too
+   small and declared convergence. Ceres only prints out the display at
+   the end of an iteration, and terminates as soon as it detects
+   convergence, which is why you only see two iterations here and not
+   three.
+
+.. _section-derivatives:
+
+
+Derivatives
+===========
+
+Ceres Solver like most optimization packages, depends on being able to
+evaluate the value and the derivatives of each term in the objective
+function at arbitrary parameter values. Doing so correctly and
+efficiently is essential to getting good results.  Ceres Solver
+provides a number of ways of doing so. You have already seen one of
+them in action --
+Automatic Differentiation in `examples/helloworld.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld.cc>`_
+
+We now consider the other two possibilities. Analytic and numeric
+derivatives.
+
+
+Numeric Derivatives
+-------------------
+
+In some cases, its not possible to define a templated cost functor,
+for example when the evaluation of the residual involves a call to a
+library function that you do not have control over.  In such a
+situation, numerical differentiation can be used. The user defines a
+functor which computes the residual value and construct a
+:class:`NumericDiffCostFunction` using it. e.g., for :math:`f(x) = 10 - x`
+the corresponding functor would be
+
+.. code-block:: c++
+
+  struct NumericDiffCostFunctor {
+    bool operator()(const double* const x, double* residual) const {
+      residual[0] = 10.0 - x[0];
+      return true;
+    }
+  };
+
+Which is added to the :class:`Problem` as:
+
+.. code-block:: c++
+
+  CostFunction* cost_function =
+    new NumericDiffCostFunction<NumericDiffCostFunctor, ceres::CENTRAL, 1, 1>(
+        new NumericDiffCostFunctor);
+  problem.AddResidualBlock(cost_function, NULL, &x);
+
+Notice the parallel from when we were using automatic differentiation
+
+.. code-block:: c++
+
+  CostFunction* cost_function =
+      new AutoDiffCostFunction<CostFunctor, 1, 1>(new CostFunctor);
+  problem.AddResidualBlock(cost_function, NULL, &x);
+
+The construction looks almost identical to the one used for automatic
+differentiation, except for an extra template parameter that indicates
+the kind of finite differencing scheme to be used for computing the
+numerical derivatives [#f3]_. For more details see the documentation
+for :class:`NumericDiffCostFunction`.
+
+**Generally speaking we recommend automatic differentiation instead of
+numeric differentiation. The use of C++ templates makes automatic
+differentiation efficient, whereas numeric differentiation is
+expensive, prone to numeric errors, and leads to slower convergence.**
+
+
+Analytic Derivatives
+--------------------
+
+In some cases, using automatic differentiation is not possible. For
+example, it may be the case that it is more efficient to compute the
+derivatives in closed form instead of relying on the chain rule used
+by the automatic differentiation code.
+
+In such cases, it is possible to supply your own residual and jacobian
+computation code. To do this, define a subclass of
+:class:`CostFunction` or :class:`SizedCostFunction` if you know the
+sizes of the parameters and residuals at compile time. Here for
+example is ``SimpleCostFunction`` that implements :math:`f(x) = 10 -
+x`.
+
+.. code-block:: c++
+
+  class QuadraticCostFunction : public ceres::SizedCostFunction<1, 1> {
+   public:
+    virtual ~QuadraticCostFunction() {}
+    virtual bool Evaluate(double const* const* parameters,
+                          double* residuals,
+                          double** jacobians) const {
+      const double x = parameters[0][0];
+      residuals[0] = 10 - x;
+
+      // Compute the Jacobian if asked for.
+      if (jacobians != NULL && jacobians[0] != NULL) {
+        jacobians[0][0] = -1;
+      }
+      return true;
+    }
+  };
+
+
+``SimpleCostFunction::Evaluate`` is provided with an input array of
+``parameters``, an output array ``residuals`` for residuals and an
+output array ``jacobians`` for Jacobians. The ``jacobians`` array is
+optional, ``Evaluate`` is expected to check when it is non-null, and
+if it is the case then fill it with the values of the derivative of
+the residual function. In this case since the residual function is
+linear, the Jacobian is constant [#f4]_ .
+
+As can be seen from the above code fragments, implementing
+:class:`CostFunction` objects is a bit tedious. We recommend that
+unless you have a good reason to manage the jacobian computation
+yourself, you use :class:`AutoDiffCostFunction` or
+:class:`NumericDiffCostFunction` to construct your residual blocks.
+
+More About Derivatives
+----------------------
+
+Computing derivatives is by far the most complicated part of using
+Ceres, and depending on the circumstance the user may need more
+sophisticated ways of computing derivatives. This section just
+scratches the surface of how derivatives can be supplied to
+Ceres. Once you are comfortable with using
+:class:`NumericDiffCostFunction` and :class:`AutoDiffCostFunction` we
+recommend taking a look at :class:`DynamicAutoDiffCostFunction`,
+:class:`CostFunctionToFunctor`, :class:`NumericDiffFunctor` and
+:class:`ConditionedCostFunction` for more advanced ways of
+constructing and computing cost functions.
+
+.. rubric:: Footnotes
+
+.. [#f3] `examples/helloworld_numeric_diff.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld_numeric_diff.cc>`_.
+.. [#f4] `examples/helloworld_analytic_diff.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/helloworld_analytic_diff.cc>`_.
+
+
+.. _section-powell:
+
+Powell's Function
+=================
+
+Consider now a slightly more complicated example -- the minimization
+of Powell's function. Let :math:`x = \left[x_1, x_2, x_3, x_4 \right]`
+and
+
+.. math::
+
+  \begin{align}
+     f_1(x) &= x_1 + 10x_2 \\
+     f_2(x) &= \sqrt{5}  (x_3 - x_4)\\
+     f_3(x) &= (x_2 - 2x_3)^2\\
+     f_4(x) &= \sqrt{10}  (x_1 - x_4)^2\\
+       F(x) &= \left[f_1(x),\ f_2(x),\ f_3(x),\ f_4(x) \right]
+  \end{align}
+
+
+:math:`F(x)` is a function of four parameters, has four residuals
+and we wish to find :math:`x` such that :math:`\frac{1}{2}\|F(x)\|^2`
+is minimized.
+
+Again, the first step is to define functors that evaluate of the terms
+in the objective functor. Here is the code for evaluating
+:math:`f_4(x_1, x_4)`:
+
+.. code-block:: c++
+
+ struct F4 {
+   template <typename T>
+   bool operator()(const T* const x1, const T* const x4, T* residual) const {
+     residual[0] = sqrt(10.0) * (x1[0] - x4[0]) * (x1[0] - x4[0]);
+     return true;
+   }
+ };
+
+
+Similarly, we can define classes ``F1``, ``F2`` and ``F3`` to evaluate
+:math:`f_1(x_1, x_2)`, :math:`f_2(x_3, x_4)` and :math:`f_3(x_2, x_3)`
+respectively. Using these, the problem can be constructed as follows:
+
+
+.. code-block:: c++
+
+  double x1 =  3.0; double x2 = -1.0; double x3 =  0.0; double x4 = 1.0;
+
+  Problem problem;
+
+  // Add residual terms to the problem using the using the autodiff
+  // wrapper to get the derivatives automatically.
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F1, 1, 1, 1>(new F1), NULL, &x1, &x2);
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F2, 1, 1, 1>(new F2), NULL, &x3, &x4);
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F3, 1, 1, 1>(new F3), NULL, &x2, &x3)
+  problem.AddResidualBlock(
+    new AutoDiffCostFunction<F4, 1, 1, 1>(new F4), NULL, &x1, &x4);
+
+
+Note that each ``ResidualBlock`` only depends on the two parameters
+that the corresponding residual object depends on and not on all four
+parameters. Compiling and running `examples/powell.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/powell.cc>`_
+gives us:
+
+.. code-block:: bash
+
+    Initial x1 = 3, x2 = -1, x3 = 0, x4 = 1
+    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+       0  1.075000e+02    0.00e+00    1.55e+02   0.00e+00   0.00e+00  1.00e+04       0    4.95e-04    2.30e-03
+       1  5.036190e+00    1.02e+02    2.00e+01   2.16e+00   9.53e-01  3.00e+04       1    4.39e-05    2.40e-03
+       2  3.148168e-01    4.72e+00    2.50e+00   6.23e-01   9.37e-01  9.00e+04       1    9.06e-06    2.43e-03
+       3  1.967760e-02    2.95e-01    3.13e-01   3.08e-01   9.37e-01  2.70e+05       1    8.11e-06    2.45e-03
+       4  1.229900e-03    1.84e-02    3.91e-02   1.54e-01   9.37e-01  8.10e+05       1    6.91e-06    2.48e-03
+       5  7.687123e-05    1.15e-03    4.89e-03   7.69e-02   9.37e-01  2.43e+06       1    7.87e-06    2.50e-03
+       6  4.804625e-06    7.21e-05    6.11e-04   3.85e-02   9.37e-01  7.29e+06       1    5.96e-06    2.52e-03
+       7  3.003028e-07    4.50e-06    7.64e-05   1.92e-02   9.37e-01  2.19e+07       1    5.96e-06    2.55e-03
+       8  1.877006e-08    2.82e-07    9.54e-06   9.62e-03   9.37e-01  6.56e+07       1    5.96e-06    2.57e-03
+       9  1.173223e-09    1.76e-08    1.19e-06   4.81e-03   9.37e-01  1.97e+08       1    7.87e-06    2.60e-03
+      10  7.333425e-11    1.10e-09    1.49e-07   2.40e-03   9.37e-01  5.90e+08       1    6.20e-06    2.63e-03
+      11  4.584044e-12    6.88e-11    1.86e-08   1.20e-03   9.37e-01  1.77e+09       1    6.91e-06    2.65e-03
+      12  2.865573e-13    4.30e-12    2.33e-09   6.02e-04   9.37e-01  5.31e+09       1    5.96e-06    2.67e-03
+      13  1.791438e-14    2.69e-13    2.91e-10   3.01e-04   9.37e-01  1.59e+10       1    7.15e-06    2.69e-03
+
+    Ceres Solver v1.12.0 Solve Report
+    ----------------------------------
+                                         Original                  Reduced
+    Parameter blocks                            4                        4
+    Parameters                                  4                        4
+    Residual blocks                             4                        4
+    Residual                                    4                        4
+
+    Minimizer                        TRUST_REGION
+
+    Dense linear algebra library            EIGEN
+    Trust region strategy     LEVENBERG_MARQUARDT
+
+                                            Given                     Used
+    Linear solver                        DENSE_QR                 DENSE_QR
+    Threads                                     1                        1
+    Linear solver threads                       1                        1
+
+    Cost:
+    Initial                          1.075000e+02
+    Final                            1.791438e-14
+    Change                           1.075000e+02
+
+    Minimizer iterations                       14
+    Successful steps                           14
+    Unsuccessful steps                          0
+
+    Time (in seconds):
+    Preprocessor                            0.002
+
+      Residual evaluation                   0.000
+      Jacobian evaluation                   0.000
+      Linear solver                         0.000
+    Minimizer                               0.001
+
+    Postprocessor                           0.000
+    Total                                   0.005
+
+    Termination:                      CONVERGENCE (Gradient tolerance reached. Gradient max norm: 3.642190e-11 <= 1.000000e-10)
+
+    Final x1 = 0.000292189, x2 = -2.92189e-05, x3 = 4.79511e-05, x4 = 4.79511e-05
+
+It is easy to see that the optimal solution to this problem is at
+:math:`x_1=0, x_2=0, x_3=0, x_4=0` with an objective function value of
+:math:`0`. In 10 iterations, Ceres finds a solution with an objective
+function value of :math:`4\times 10^{-12}`.
+
+.. rubric:: Footnotes
+
+.. [#f5] `examples/powell.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/powell.cc>`_.
+
+
+.. _section-fitting:
+
+Curve Fitting
+=============
+
+The examples we have seen until now are simple optimization problems
+with no data. The original purpose of least squares and non-linear
+least squares analysis was fitting curves to data. It is only
+appropriate that we now consider an example of such a problem
+[#f6]_. It contains data generated by sampling the curve :math:`y =
+e^{0.3x + 0.1}` and adding Gaussian noise with standard deviation
+:math:`\sigma = 0.2`. Let us fit some data to the curve
+
+.. math::  y = e^{mx + c}.
+
+We begin by defining a templated object to evaluate the
+residual. There will be a residual for each observation.
+
+.. code-block:: c++
+
+ struct ExponentialResidual {
+   ExponentialResidual(double x, double y)
+       : x_(x), y_(y) {}
+
+   template <typename T>
+   bool operator()(const T* const m, const T* const c, T* residual) const {
+     residual[0] = y_ - exp(m[0] * x_ + c[0]);
+     return true;
+   }
+
+  private:
+   // Observations for a sample.
+   const double x_;
+   const double y_;
+ };
+
+Assuming the observations are in a :math:`2n` sized array called
+``data`` the problem construction is a simple matter of creating a
+:class:`CostFunction` for every observation.
+
+
+.. code-block:: c++
+
+ double m = 0.0;
+ double c = 0.0;
+
+ Problem problem;
+ for (int i = 0; i < kNumObservations; ++i) {
+   CostFunction* cost_function =
+        new AutoDiffCostFunction<ExponentialResidual, 1, 1, 1>(
+            new ExponentialResidual(data[2 * i], data[2 * i + 1]));
+   problem.AddResidualBlock(cost_function, NULL, &m, &c);
+ }
+
+Compiling and running `examples/curve_fitting.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/curve_fitting.cc>`_
+gives us:
+
+.. code-block:: bash
+
+    iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+       0  1.211734e+02    0.00e+00    3.61e+02   0.00e+00   0.00e+00  1.00e+04       0    5.34e-04    2.56e-03
+       1  1.211734e+02   -2.21e+03    0.00e+00   7.52e-01  -1.87e+01  5.00e+03       1    4.29e-05    3.25e-03
+       2  1.211734e+02   -2.21e+03    0.00e+00   7.51e-01  -1.86e+01  1.25e+03       1    1.10e-05    3.28e-03
+       3  1.211734e+02   -2.19e+03    0.00e+00   7.48e-01  -1.85e+01  1.56e+02       1    1.41e-05    3.31e-03
+       4  1.211734e+02   -2.02e+03    0.00e+00   7.22e-01  -1.70e+01  9.77e+00       1    1.00e-05    3.34e-03
+       5  1.211734e+02   -7.34e+02    0.00e+00   5.78e-01  -6.32e+00  3.05e-01       1    1.00e-05    3.36e-03
+       6  3.306595e+01    8.81e+01    4.10e+02   3.18e-01   1.37e+00  9.16e-01       1    2.79e-05    3.41e-03
+       7  6.426770e+00    2.66e+01    1.81e+02   1.29e-01   1.10e+00  2.75e+00       1    2.10e-05    3.45e-03
+       8  3.344546e+00    3.08e+00    5.51e+01   3.05e-02   1.03e+00  8.24e+00       1    2.10e-05    3.48e-03
+       9  1.987485e+00    1.36e+00    2.33e+01   8.87e-02   9.94e-01  2.47e+01       1    2.10e-05    3.52e-03
+      10  1.211585e+00    7.76e-01    8.22e+00   1.05e-01   9.89e-01  7.42e+01       1    2.10e-05    3.56e-03
+      11  1.063265e+00    1.48e-01    1.44e+00   6.06e-02   9.97e-01  2.22e+02       1    2.60e-05    3.61e-03
+      12  1.056795e+00    6.47e-03    1.18e-01   1.47e-02   1.00e+00  6.67e+02       1    2.10e-05    3.64e-03
+      13  1.056751e+00    4.39e-05    3.79e-03   1.28e-03   1.00e+00  2.00e+03       1    2.10e-05    3.68e-03
+    Ceres Solver Report: Iterations: 13, Initial cost: 1.211734e+02, Final cost: 1.056751e+00, Termination: CONVERGENCE
+    Initial m: 0 c: 0
+    Final   m: 0.291861 c: 0.131439
+
+Starting from parameter values :math:`m = 0, c=0` with an initial
+objective function value of :math:`121.173` Ceres finds a solution
+:math:`m= 0.291861, c = 0.131439` with an objective function value of
+:math:`1.05675`. These values are a bit different than the
+parameters of the original model :math:`m=0.3, c= 0.1`, but this is
+expected. When reconstructing a curve from noisy data, we expect to
+see such deviations. Indeed, if you were to evaluate the objective
+function for :math:`m=0.3, c=0.1`, the fit is worse with an objective
+function value of :math:`1.082425`.  The figure below illustrates the fit.
+
+.. figure:: least_squares_fit.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+   Least squares curve fitting.
+
+
+.. rubric:: Footnotes
+
+.. [#f6] `examples/curve_fitting.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/curve_fitting.cc>`_
+
+
+Robust Curve Fitting
+=====================
+
+Now suppose the data we are given has some outliers, i.e., we have
+some points that do not obey the noise model. If we were to use the
+code above to fit such data, we would get a fit that looks as
+below. Notice how the fitted curve deviates from the ground truth.
+
+.. figure:: non_robust_least_squares_fit.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+To deal with outliers, a standard technique is to use a
+:class:`LossFunction`. Loss functions reduce the influence of
+residual blocks with high residuals, usually the ones corresponding to
+outliers. To associate a loss function with a residual block, we change
+
+.. code-block:: c++
+
+   problem.AddResidualBlock(cost_function, NULL , &m, &c);
+
+to
+
+.. code-block:: c++
+
+   problem.AddResidualBlock(cost_function, new CauchyLoss(0.5) , &m, &c);
+
+:class:`CauchyLoss` is one of the loss functions that ships with Ceres
+Solver. The argument :math:`0.5` specifies the scale of the loss
+function. As a result, we get the fit below [#f7]_. Notice how the
+fitted curve moves back closer to the ground truth curve.
+
+.. figure:: robust_least_squares_fit.png
+   :figwidth: 500px
+   :height: 400px
+   :align: center
+
+   Using :class:`LossFunction` to reduce the effect of outliers on a
+   least squares fit.
+
+
+.. rubric:: Footnotes
+
+.. [#f7] `examples/robust_curve_fitting.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/robust_curve_fitting.cc>`_
+
+
+Bundle Adjustment
+=================
+
+One of the main reasons for writing Ceres was our need to solve large
+scale bundle adjustment problems [HartleyZisserman]_, [Triggs]_.
+
+Given a set of measured image feature locations and correspondences,
+the goal of bundle adjustment is to find 3D point positions and camera
+parameters that minimize the reprojection error. This optimization
+problem is usually formulated as a non-linear least squares problem,
+where the error is the squared :math:`L_2` norm of the difference between
+the observed feature location and the projection of the corresponding
+3D point on the image plane of the camera. Ceres has extensive support
+for solving bundle adjustment problems.
+
+Let us solve a problem from the `BAL
+<http://grail.cs.washington.edu/projects/bal/>`_ dataset [#f8]_.
+
+The first step as usual is to define a templated functor that computes
+the reprojection error/residual. The structure of the functor is
+similar to the ``ExponentialResidual``, in that there is an
+instance of this object responsible for each image observation.
+
+Each residual in a BAL problem depends on a three dimensional point
+and a nine parameter camera. The nine parameters defining the camera
+are: three for rotation as a Rodrigues' axis-angle vector, three
+for translation, one for focal length and two for radial distortion.
+The details of this camera model can be found the `Bundler homepage
+<http://phototour.cs.washington.edu/bundler/>`_ and the `BAL homepage
+<http://grail.cs.washington.edu/projects/bal/>`_.
+
+.. code-block:: c++
+
+ struct SnavelyReprojectionError {
+   SnavelyReprojectionError(double observed_x, double observed_y)
+       : observed_x(observed_x), observed_y(observed_y) {}
+
+   template <typename T>
+   bool operator()(const T* const camera,
+                   const T* const point,
+                   T* residuals) const {
+     // camera[0,1,2] are the angle-axis rotation.
+     T p[3];
+     ceres::AngleAxisRotatePoint(camera, point, p);
+     // camera[3,4,5] are the translation.
+     p[0] += camera[3]; p[1] += camera[4]; p[2] += camera[5];
+
+     // Compute the center of distortion. The sign change comes from
+     // the camera model that Noah Snavely's Bundler assumes, whereby
+     // the camera coordinate system has a negative z axis.
+     T xp = - p[0] / p[2];
+     T yp = - p[1] / p[2];
+
+     // Apply second and fourth order radial distortion.
+     const T& l1 = camera[7];
+     const T& l2 = camera[8];
+     T r2 = xp*xp + yp*yp;
+     T distortion = 1.0 + r2  * (l1 + l2  * r2);
+
+     // Compute final projected point position.
+     const T& focal = camera[6];
+     T predicted_x = focal * distortion * xp;
+     T predicted_y = focal * distortion * yp;
+
+     // The error is the difference between the predicted and observed position.
+     residuals[0] = predicted_x - T(observed_x);
+     residuals[1] = predicted_y - T(observed_y);
+     return true;
+   }
+
+    // Factory to hide the construction of the CostFunction object from
+    // the client code.
+    static ceres::CostFunction* Create(const double observed_x,
+                                       const double observed_y) {
+      return (new ceres::AutoDiffCostFunction<SnavelyReprojectionError, 2, 9, 3>(
+                  new SnavelyReprojectionError(observed_x, observed_y)));
+    }
+
+   double observed_x;
+   double observed_y;
+ };
+
+
+Note that unlike the examples before, this is a non-trivial function
+and computing its analytic Jacobian is a bit of a pain. Automatic
+differentiation makes life much simpler. The function
+:func:`AngleAxisRotatePoint` and other functions for manipulating
+rotations can be found in ``include/ceres/rotation.h``.
+
+Given this functor, the bundle adjustment problem can be constructed
+as follows:
+
+.. code-block:: c++
+
+ ceres::Problem problem;
+ for (int i = 0; i < bal_problem.num_observations(); ++i) {
+   ceres::CostFunction* cost_function =
+       SnavelyReprojectionError::Create(
+            bal_problem.observations()[2 * i + 0],
+            bal_problem.observations()[2 * i + 1]);
+   problem.AddResidualBlock(cost_function,
+                            NULL /* squared loss */,
+                            bal_problem.mutable_camera_for_observation(i),
+                            bal_problem.mutable_point_for_observation(i));
+ }
+
+
+Notice that the problem construction for bundle adjustment is very
+similar to the curve fitting example -- one term is added to the
+objective function per observation.
+
+Since this is a large sparse problem (well large for ``DENSE_QR``
+anyways), one way to solve this problem is to set
+:member:`Solver::Options::linear_solver_type` to
+``SPARSE_NORMAL_CHOLESKY`` and call :func:`Solve`. And while this is
+a reasonable thing to do, bundle adjustment problems have a special
+sparsity structure that can be exploited to solve them much more
+efficiently. Ceres provides three specialized solvers (collectively
+known as Schur-based solvers) for this task. The example code uses the
+simplest of them ``DENSE_SCHUR``.
+
+.. code-block:: c++
+
+ ceres::Solver::Options options;
+ options.linear_solver_type = ceres::DENSE_SCHUR;
+ options.minimizer_progress_to_stdout = true;
+ ceres::Solver::Summary summary;
+ ceres::Solve(options, &problem, &summary);
+ std::cout << summary.FullReport() << "\n";
+
+For a more sophisticated bundle adjustment example which demonstrates
+the use of Ceres' more advanced features including its various linear
+solvers, robust loss functions and local parameterizations see
+`examples/bundle_adjuster.cc
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/bundle_adjuster.cc>`_
+
+
+.. rubric:: Footnotes
+
+.. [#f8] `examples/simple_bundle_adjuster.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/simple_bundle_adjuster.cc>`_
+
+Other Examples
+==============
+
+Besides the examples in this chapter, the  `example
+<https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/>`_
+directory contains a number of other examples:
+
+#. `bundle_adjuster.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/bundle_adjuster.cc>`_
+   shows how to use the various features of Ceres to solve bundle
+   adjustment problems.
+
+#. `circle_fit.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/circle_fit.cc>`_
+   shows how to fit data to a circle.
+
+#. `ellipse_approximation.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/ellipse_approximation.cc>`_
+   fits points randomly distributed on an ellipse with an approximate
+   line segment contour. This is done by jointly optimizing the
+   control points of the line segment contour along with the preimage
+   positions for the data points. The purpose of this example is to
+   show an example use case for ``Solver::Options::dynamic_sparsity``,
+   and how it can benefit problems which are numerically dense but
+   dynamically sparse.
+
+#. `denoising.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/denoising.cc>`_
+   implements image denoising using the `Fields of Experts
+   <http://www.gris.informatik.tu-darmstadt.de/~sroth/research/foe/index.html>`_
+   model.
+
+#. `nist.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/nist.cc>`_
+   implements and attempts to solves the `NIST
+   <http://www.itl.nist.gov/div898/strd/nls/nls_main.shtml>`_
+   non-linear regression problems.
+
+#. `more_garbow_hillstrom.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/more_garbow_hillstrom.cc>`_
+   A subset of the test problems from the paper
+
+   Testing Unconstrained Optimization Software
+   Jorge J. More, Burton S. Garbow and Kenneth E. Hillstrom
+   ACM Transactions on Mathematical Software, 7(1), pp. 17-41, 1981
+
+   which were augmented with bounds and used for testing bounds
+   constrained optimization algorithms by
+
+   A Trust Region Approach to Linearly Constrained Optimization
+   David M. Gay
+   Numerical Analysis (Griffiths, D.F., ed.), pp. 72-105
+   Lecture Notes in Mathematics 1066, Springer Verlag, 1984.
+
+
+#. `libmv_bundle_adjuster.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/libmv_bundle_adjuster.cc>`_
+   is the bundle adjustment algorithm used by `Blender <www.blender.org>`_/libmv.
+
+#. `libmv_homography.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/libmv_homography.cc>`_
+   This file demonstrates solving for a homography between two sets of
+   points and using a custom exit criterion by having a callback check
+   for image-space error.
+
+#. `robot_pose_mle.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/robot_pose_mle.cc>`_
+   This example demonstrates how to use the ``DynamicAutoDiffCostFunction``
+   variant of CostFunction. The ``DynamicAutoDiffCostFunction`` is meant to
+   be used in cases where the number of parameter blocks or the sizes are not
+   known at compile time.
+
+   This example simulates a robot traversing down a 1-dimension hallway with
+   noise odometry readings and noisy range readings of the end of the hallway.
+   By fusing the noisy odometry and sensor readings this example demonstrates
+   how to compute the maximum likelihood estimate (MLE) of the robot's pose at
+   each timestep.
+
+#. `slam/pose_graph_2d/pose_graph_2d.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/slam/pose_graph_2d/pose_graph_2d.cc>`_
+   The Simultaneous Localization and Mapping (SLAM) problem consists of building
+   a map of an unknown environment while simultaneously localizing against this
+   map. The main difficulty of this problem stems from not having any additional
+   external aiding information such as GPS. SLAM has been considered one of the
+   fundamental challenges of robotics. There are many resources on SLAM
+   [#f9]_. A pose graph optimization problem is one example of a SLAM
+   problem. The following explains how to formulate the pose graph based SLAM
+   problem in 2-Dimensions with relative pose constraints.
+
+   Consider a robot moving in a 2-Dimensional plane. The robot has access to a
+   set of sensors such as wheel odometry or a laser range scanner. From these
+   raw measurements, we want to estimate the trajectory of the robot as well as
+   build a map of the environment. In order to reduce the computational
+   complexity of the problem, the pose graph approach abstracts the raw
+   measurements away.  Specifically, it creates a graph of nodes which represent
+   the pose of the robot, and edges which represent the relative transformation
+   (delta position and orientation) between the two nodes. The edges are virtual
+   measurements derived from the raw sensor measurements, e.g. by integrating
+   the raw wheel odometry or aligning the laser range scans acquired from the
+   robot. A visualization of the resulting graph is shown below.
+
+   .. figure:: slam2d.png
+      :figwidth: 500px
+      :height: 400px
+      :align: center
+
+      Visual representation of a graph SLAM problem.
+
+   The figure depicts the pose of the robot as the triangles, the measurements
+   are indicated by the connecting lines, and the loop closure measurements are
+   shown as dotted lines. Loop closures are measurements between non-sequential
+   robot states and they reduce the accumulation of error over time. The
+   following will describe the mathematical formulation of the pose graph
+   problem.
+
+   The robot at timestamp :math:`t` has state :math:`x_t = [p^T, \psi]^T` where
+   :math:`p` is a 2D vector that represents the position in the plane and
+   :math:`\psi` is the orientation in radians. The measurement of the relative
+   transform between the robot state at two timestamps :math:`a` and :math:`b`
+   is given as: :math:`z_{ab} = [\hat{p}_{ab}^T, \hat{\psi}_{ab}]`. The residual
+   implemented in the Ceres cost function which computes the error between the
+   measurement and the predicted measurement is:
+
+   .. math:: r_{ab} =
+	     \left[
+	     \begin{array}{c}
+	       R_a^T\left(p_b - p_a\right) - \hat{p}_{ab} \\
+	       \mathrm{Normalize}\left(\psi_b - \psi_a - \hat{\psi}_{ab}\right)
+	     \end{array}
+	     \right]
+
+   where the function :math:`\mathrm{Normalize}()` normalizes the angle in the range
+   :math:`[-\pi,\pi)`, and :math:`R` is the rotation matrix given by
+
+   .. math:: R_a =
+	     \left[
+	     \begin{array}{cc}
+	       \cos \psi_a & -\sin \psi_a \\
+	       \sin \psi_a & \cos \psi_a \\
+	     \end{array}
+	     \right]
+
+   To finish the cost function, we need to weight the residual by the
+   uncertainty of the measurement. Hence, we pre-multiply the residual by the
+   inverse square root of the covariance matrix for the measurement,
+   i.e. :math:`\Sigma_{ab}^{-\frac{1}{2}} r_{ab}` where :math:`\Sigma_{ab}` is
+   the covariance.
+
+   Lastly, we use a local parameterization to normalize the orientation in the
+   range which is normalized between :math:`[-\pi,\pi)`.  Specially, we define
+   the :member:`AngleLocalParameterization::operator()` function to be:
+   :math:`\mathrm{Normalize}(\psi + \delta \psi)`.
+
+   This package includes an executable :member:`pose_graph_2d` that will read a
+   problem definition file. This executable can work with any 2D problem
+   definition that uses the g2o format. It would be relatively straightforward
+   to implement a new reader for a different format such as TORO or
+   others. :member:`pose_graph_2d` will print the Ceres solver full summary and
+   then output to disk the original and optimized poses (``poses_original.txt``
+   and ``poses_optimized.txt``, respectively) of the robot in the following
+   format:
+
+   .. code-block:: bash
+
+      pose_id x y yaw_radians
+      pose_id x y yaw_radians
+      pose_id x y yaw_radians
+
+   where ``pose_id`` is the corresponding integer ID from the file
+   definition. Note, the file will be sorted in ascending order for the
+   ``pose_id``.
+
+   The executable :member:`pose_graph_2d` expects the first argument to be
+   the path to the problem definition. To run the executable,
+
+   .. code-block:: bash
+
+      /path/to/bin/pose_graph_2d /path/to/dataset/dataset.g2o
+
+   A python script is provided to visualize the resulting output files.
+
+   .. code-block:: bash
+
+      /path/to/repo/examples/slam/pose_graph_2d/plot_results.py --optimized_poses ./poses_optimized.txt --initial_poses ./poses_original.txt
+
+   As an example, a standard synthetic benchmark dataset [#f10]_ created by
+   Edwin Olson which has 3500 nodes in a grid world with a total of 5598 edges
+   was solved.  Visualizing the results with the provided script produces:
+
+   .. figure:: manhattan_olson_3500_result.png
+      :figwidth: 600px
+      :height: 600px
+      :align: center
+
+   with the original poses in green and the optimized poses in blue. As shown,
+   the optimized poses more closely match the underlying grid world. Note, the
+   left side of the graph has a small yaw drift due to a lack of relative
+   constraints to provide enough information to reconstruct the trajectory.
+
+   .. rubric:: Footnotes
+
+   .. [#f9] Giorgio Grisetti, Rainer Kummerle, Cyrill Stachniss, Wolfram
+      Burgard. A Tutorial on Graph-Based SLAM. IEEE Intelligent Transportation
+      Systems Magazine, 52(3):199–222, 2010.
+
+   .. [#f10] E. Olson, J. Leonard, and S. Teller, “Fast iterative optimization of
+      pose graphs with poor initial estimates,” in Robotics and Automation
+      (ICRA), IEEE International Conference on, 2006, pp. 2262–2269.
+
+#. `slam/pose_graph_3d/pose_graph_3d.cc
+   <https://ceres-solver.googlesource.com/ceres-solver/+/master/examples/slam/pose_graph_3d/pose_graph_3d.cc>`_
+   The following explains how to formulate the pose graph based SLAM problem in
+   3-Dimensions with relative pose constraints. The example also illustrates how
+   to use Eigen's geometry module with Ceres's automatic differentiation
+   functionality.
+
+   The robot at timestamp :math:`t` has state :math:`x_t = [p^T, q^T]^T` where
+   :math:`p` is a 3D vector that represents the position and :math:`q` is the
+   orientation represented as an Eigen quaternion. The measurement of the
+   relative transform between the robot state at two timestamps :math:`a` and
+   :math:`b` is given as: :math:`z_{ab} = [\hat{p}_{ab}^T, \hat{q}_{ab}^T]^T`.
+   The residual implemented in the Ceres cost function which computes the error
+   between the measurement and the predicted measurement is:
+
+   .. math:: r_{ab} =
+             \left[
+             \begin{array}{c}
+                R(q_a)^{T} (p_b - p_a) - \hat{p}_{ab} \\
+                2.0 \mathrm{vec}\left((q_a^{-1} q_b) \hat{q}_{ab}^{-1}\right)
+             \end{array}
+             \right]
+
+   where the function :math:`\mathrm{vec}()` returns the vector part of the
+   quaternion, i.e. :math:`[q_x, q_y, q_z]`, and :math:`R(q)` is the rotation
+   matrix for the quaternion.
+
+   To finish the cost function, we need to weight the residual by the
+   uncertainty of the measurement. Hence, we pre-multiply the residual by the
+   inverse square root of the covariance matrix for the measurement,
+   i.e. :math:`\Sigma_{ab}^{-\frac{1}{2}} r_{ab}` where :math:`\Sigma_{ab}` is
+   the covariance.
+
+   Given that we are using a quaternion to represent the orientation, we need to
+   use a local parameterization (:class:`EigenQuaternionParameterization`) to
+   only apply updates orthogonal to the 4-vector defining the
+   quaternion. Eigen's quaternion uses a different internal memory layout for
+   the elements of the quaternion than what is commonly used. Specifically,
+   Eigen stores the elements in memory as :math:`[x, y, z, w]` where the real
+   part is last whereas it is typically stored first. Note, when creating an
+   Eigen quaternion through the constructor the elements are accepted in
+   :math:`w`, :math:`x`, :math:`y`, :math:`z` order. Since Ceres operates on
+   parameter blocks which are raw double pointers this difference is important
+   and requires a different parameterization.
+
+   This package includes an executable :member:`pose_graph_3d` that will read a
+   problem definition file. This executable can work with any 3D problem
+   definition that uses the g2o format with quaternions used for the orientation
+   representation. It would be relatively straightforward to implement a new
+   reader for a different format such as TORO or others. :member:`pose_graph_3d`
+   will print the Ceres solver full summary and then output to disk the original
+   and optimized poses (``poses_original.txt`` and ``poses_optimized.txt``,
+   respectively) of the robot in the following format:
+
+   .. code-block:: bash
+
+      pose_id x y z q_x q_y q_z q_w
+      pose_id x y z q_x q_y q_z q_w
+      pose_id x y z q_x q_y q_z q_w
+      ...
+
+   where ``pose_id`` is the corresponding integer ID from the file
+   definition. Note, the file will be sorted in ascending order for the
+   ``pose_id``.
+
+   The executable :member:`pose_graph_3d` expects the first argument to be the
+   path to the problem definition. The executable can be run via
+
+   .. code-block:: bash
+
+      /path/to/bin/pose_graph_3d /path/to/dataset/dataset.g2o
+
+   A script is provided to visualize the resulting output files. There is also
+   an option to enable equal axes using ``--axes_equal``
+
+   .. code-block:: bash
+
+      /path/to/repo/examples/slam/pose_graph_3d/plot_results.py --optimized_poses ./poses_optimized.txt --initial_poses ./poses_original.txt
+
+   As an example, a standard synthetic benchmark dataset [#f9]_ where the robot is
+   traveling on the surface of a sphere which has 2500 nodes with a total of
+   4949 edges was solved. Visualizing the results with the provided script
+   produces:
+
+   .. figure:: pose_graph_3d_ex.png
+      :figwidth: 600px
+      :height: 300px
+      :align: center
diff --git a/docs/source/non_robust_least_squares_fit.png b/docs/source/non_robust_least_squares_fit.png
new file mode 100644
index 0000000..643d162
--- /dev/null
+++ b/docs/source/non_robust_least_squares_fit.png
Binary files differ
diff --git a/docs/source/numerical_derivatives.rst b/docs/source/numerical_derivatives.rst
new file mode 100644
index 0000000..57b46bf
--- /dev/null
+++ b/docs/source/numerical_derivatives.rst
@@ -0,0 +1,403 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-numerical_derivatives:
+
+===================
+Numeric derivatives
+===================
+
+The other extreme from using analytic derivatives is to use numeric
+derivatives. The key observation here is that the process of
+differentiating a function :math:`f(x)` w.r.t :math:`x` can be written
+as the limiting process:
+
+.. math::
+   Df(x) = \lim_{h \rightarrow 0} \frac{f(x + h) - f(x)}{h}
+
+
+Forward Differences
+===================
+
+Now of course one cannot perform the limiting operation numerically on
+a computer so we do the next best thing, which is to choose a small
+value of :math:`h` and approximate the derivative as
+
+.. math::
+   Df(x) \approx \frac{f(x + h) - f(x)}{h}
+
+
+The above formula is the simplest most basic form of numeric
+differentiation. It is known as the *Forward Difference* formula.
+
+So how would one go about constructing a numerically differentiated
+version of ``Rat43Analytic`` (`Rat43
+<http://www.itl.nist.gov/div898/strd/nls/data/ratkowsky3.shtml>`_) in
+Ceres Solver. This is done in two steps:
+
+  1. Define *Functor* that given the parameter values will evaluate the
+     residual for a given :math:`(x,y)`.
+  2. Construct a :class:`CostFunction` by using
+     :class:`NumericDiffCostFunction` to wrap an instance of
+     ``Rat43CostFunctor``.
+
+.. code-block:: c++
+
+  struct Rat43CostFunctor {
+    Rat43CostFunctor(const double x, const double y) : x_(x), y_(y) {}
+
+    bool operator()(const double* parameters, double* residuals) const {
+      const double b1 = parameters[0];
+      const double b2 = parameters[1];
+      const double b3 = parameters[2];
+      const double b4 = parameters[3];
+      residuals[0] = b1 * pow(1.0 + exp(b2 -  b3 * x_), -1.0 / b4) - y_;
+      return true;
+    }
+
+    const double x_;
+    const double y_;
+  }
+
+  CostFunction* cost_function =
+    new NumericDiffCostFunction<Rat43CostFunctor, FORWARD, 1, 4>(
+      new Rat43CostFunctor(x, y));
+
+This is about the minimum amount of work one can expect to do to
+define the cost function. The only thing that the user needs to do is
+to make sure that the evaluation of the residual is implemented
+correctly and efficiently.
+
+Before going further, it is instructive to get an estimate of the
+error in the forward difference formula. We do this by considering the
+`Taylor expansion <https://en.wikipedia.org/wiki/Taylor_series>`_ of
+:math:`f` near :math:`x`.
+
+.. math::
+   \begin{align}
+   f(x+h) &= f(x) + h Df(x) + \frac{h^2}{2!} D^2f(x) +
+   \frac{h^3}{3!}D^3f(x) + \cdots \\
+   Df(x) &= \frac{f(x + h) - f(x)}{h} - \left [\frac{h}{2!}D^2f(x) +
+   \frac{h^2}{3!}D^3f(x) + \cdots  \right]\\
+   Df(x) &= \frac{f(x + h) - f(x)}{h} + O(h)
+   \end{align}
+
+i.e., the error in the forward difference formula is
+:math:`O(h)` [#f4]_.
+
+
+Implementation Details
+----------------------
+
+:class:`NumericDiffCostFunction` implements a generic algorithm to
+numerically differentiate a given functor. While the actual
+implementation of :class:`NumericDiffCostFunction` is complicated, the
+net result is a :class:`CostFunction` that roughly looks something
+like the following:
+
+.. code-block:: c++
+
+  class Rat43NumericDiffForward : public SizedCostFunction<1,4> {
+     public:
+       Rat43NumericDiffForward(const Rat43Functor* functor) : functor_(functor) {}
+       virtual ~Rat43NumericDiffForward() {}
+       virtual bool Evaluate(double const* const* parameters,
+                             double* residuals,
+			     double** jacobians) const {
+ 	 functor_(parameters[0], residuals);
+	 if (!jacobians) return true;
+	 double* jacobian = jacobians[0];
+	 if (!jacobian) return true;
+
+	 const double f = residuals[0];
+	 double parameters_plus_h[4];
+	 for (int i = 0; i < 4; ++i) {
+	   std::copy(parameters, parameters + 4, parameters_plus_h);
+	   const double kRelativeStepSize = 1e-6;
+	   const double h = std::abs(parameters[i]) * kRelativeStepSize;
+	   parameters_plus_h[i] += h;
+           double f_plus;
+  	   functor_(parameters_plus_h, &f_plus);
+	   jacobian[i] = (f_plus - f) / h;
+         }
+	 return true;
+       }
+
+     private:
+       std::unique_ptr<Rat43Functor> functor_;
+   };
+
+
+Note the choice of step size :math:`h` in the above code, instead of
+an absolute step size which is the same for all parameters, we use a
+relative step size of :math:`\text{kRelativeStepSize} = 10^{-6}`. This
+gives better derivative estimates than an absolute step size [#f2]_
+[#f3]_. This choice of step size only works for parameter values that
+are not close to zero. So the actual implementation of
+:class:`NumericDiffCostFunction`, uses a more complex step size
+selection logic, where close to zero, it switches to a fixed step
+size.
+
+
+Central Differences
+===================
+
+:math:`O(h)` error in the Forward Difference formula is okay but not
+great. A better method is to use the *Central Difference* formula:
+
+.. math::
+   Df(x) \approx \frac{f(x + h) - f(x - h)}{2h}
+
+Notice that if the value of :math:`f(x)` is known, the Forward
+Difference formula only requires one extra evaluation, but the Central
+Difference formula requires two evaluations, making it twice as
+expensive. So is the extra evaluation worth it?
+
+To answer this question, we again compute the error of approximation
+in the central difference formula:
+
+.. math::
+   \begin{align}
+  f(x + h) &= f(x) + h Df(x) + \frac{h^2}{2!}
+  D^2f(x) + \frac{h^3}{3!} D^3f(x) + \frac{h^4}{4!} D^4f(x) + \cdots\\
+    f(x - h) &= f(x) - h Df(x) + \frac{h^2}{2!}
+  D^2f(x) - \frac{h^3}{3!} D^3f(c_2) + \frac{h^4}{4!} D^4f(x) +
+  \cdots\\
+  Df(x) & =  \frac{f(x + h) - f(x - h)}{2h} + \frac{h^2}{3!}
+  D^3f(x) +  \frac{h^4}{5!}
+  D^5f(x) + \cdots \\
+  Df(x) & =  \frac{f(x + h) - f(x - h)}{2h} + O(h^2)
+   \end{align}
+
+The error of the Central Difference formula is :math:`O(h^2)`, i.e.,
+the error goes down quadratically whereas the error in the Forward
+Difference formula only goes down linearly.
+
+Using central differences instead of forward differences in Ceres
+Solver is a simple matter of changing a template argument to
+:class:`NumericDiffCostFunction` as follows:
+
+.. code-block:: c++
+
+  CostFunction* cost_function =
+    new NumericDiffCostFunction<Rat43CostFunctor, CENTRAL, 1, 4>(
+      new Rat43CostFunctor(x, y));
+
+But what do these differences in the error mean in practice? To see
+this, consider the problem of evaluating the derivative of the
+univariate function
+
+.. math::
+   f(x) = \frac{e^x}{\sin x - x^2},
+
+at :math:`x = 1.0`.
+
+It is easy to determine that :math:`Df(1.0) =
+140.73773557129658`. Using this value as reference, we can now compute
+the relative error in the forward and central difference formulae as a
+function of the absolute step size and plot them.
+
+.. figure:: forward_central_error.png
+   :figwidth: 100%
+   :align: center
+
+Reading the graph from right to left, a number of things stand out in
+the above graph:
+
+ 1. The graph for both formulae have two distinct regions. At first,
+    starting from a large value of :math:`h` the error goes down as
+    the effect of truncating the Taylor series dominates, but as the
+    value of :math:`h` continues to decrease, the error starts
+    increasing again as roundoff error starts to dominate the
+    computation. So we cannot just keep on reducing the value of
+    :math:`h` to get better estimates of :math:`Df`. The fact that we
+    are using finite precision arithmetic becomes a limiting factor.
+ 2. Forward Difference formula is not a great method for evaluating
+    derivatives. Central Difference formula converges much more
+    quickly to a more accurate estimate of the derivative with
+    decreasing step size. So unless the evaluation of :math:`f(x)` is
+    so expensive that you absolutely cannot afford the extra
+    evaluation required by central differences, **do not use the
+    Forward Difference formula**.
+ 3. Neither formula works well for a poorly chosen value of :math:`h`.
+
+
+Ridders' Method
+===============
+
+So, can we get better estimates of :math:`Df` without requiring such
+small values of :math:`h` that we start hitting floating point
+roundoff errors?
+
+One possible approach is to find a method whose error goes down faster
+than :math:`O(h^2)`. This can be done by applying `Richardson
+Extrapolation
+<https://en.wikipedia.org/wiki/Richardson_extrapolation>`_ to the
+problem of differentiation. This is also known as *Ridders' Method*
+[Ridders]_.
+
+Let us recall, the error in the central differences formula.
+
+.. math::
+   \begin{align}
+   Df(x) & =  \frac{f(x + h) - f(x - h)}{2h} + \frac{h^2}{3!}
+   D^3f(x) +  \frac{h^4}{5!}
+   D^5f(x) + \cdots\\
+           & =  \frac{f(x + h) - f(x - h)}{2h} + K_2 h^2 + K_4 h^4 + \cdots
+   \end{align}
+
+The key thing to note here is that the terms :math:`K_2, K_4, ...`
+are independent of :math:`h` and only depend on :math:`x`.
+
+Let us now define:
+
+.. math::
+
+   A(1, m) = \frac{f(x + h/2^{m-1}) - f(x - h/2^{m-1})}{2h/2^{m-1}}.
+
+Then observe that
+
+.. math::
+
+   Df(x) = A(1,1) + K_2 h^2 + K_4 h^4 + \cdots
+
+and
+
+.. math::
+
+   Df(x) = A(1, 2) + K_2 (h/2)^2 + K_4 (h/2)^4 + \cdots
+
+Here we have halved the step size to obtain a second central
+differences estimate of :math:`Df(x)`. Combining these two estimates,
+we get:
+
+.. math::
+
+   Df(x) = \frac{4 A(1, 2) - A(1,1)}{4 - 1} + O(h^4)
+
+which is an approximation of :math:`Df(x)` with truncation error that
+goes down as :math:`O(h^4)`. But we do not have to stop here. We can
+iterate this process to obtain even more accurate estimates as
+follows:
+
+.. math::
+
+   A(n, m) =  \begin{cases}
+    \frac{\displaystyle f(x + h/2^{m-1}) - f(x -
+    h/2^{m-1})}{\displaystyle 2h/2^{m-1}} & n = 1 \\
+   \frac{\displaystyle 4^{n-1} A(n - 1, m + 1) - A(n - 1, m)}{\displaystyle 4^{n-1} - 1} & n > 1
+   \end{cases}
+
+It is straightforward to show that the approximation error in
+:math:`A(n, 1)` is :math:`O(h^{2n})`. To see how the above formula can
+be implemented in practice to compute :math:`A(n,1)` it is helpful to
+structure the computation as the following tableau:
+
+.. math::
+   \begin{array}{ccccc}
+   A(1,1) & A(1, 2) & A(1, 3) & A(1, 4) & \cdots\\
+          & A(2, 1) & A(2, 2) & A(2, 3) & \cdots\\
+	  &         & A(3, 1) & A(3, 2) & \cdots\\
+	  &         &         & A(4, 1) & \cdots \\
+	  &         &         &         & \ddots
+   \end{array}
+
+So, to compute :math:`A(n, 1)` for increasing values of :math:`n` we
+move from the left to the right, computing one column at a
+time. Assuming that the primary cost here is the evaluation of the
+function :math:`f(x)`, the cost of computing a new column of the above
+tableau is two function evaluations. Since the cost of evaluating
+:math:`A(1, n)`, requires evaluating the central difference formula
+for step size of :math:`2^{1-n}h`
+
+Applying this method to :math:`f(x) = \frac{e^x}{\sin x - x^2}`
+starting with a fairly large step size :math:`h = 0.01`, we get:
+
+.. math::
+   \begin{array}{rrrrr}
+   141.678097131 &140.971663667 &140.796145400 &140.752333523 &140.741384778\\
+   &140.736185846 &140.737639311 &140.737729564 &140.737735196\\
+   & &140.737736209 &140.737735581 &140.737735571\\
+   & & &140.737735571 &140.737735571\\
+   & & & &140.737735571\\
+   \end{array}
+
+Compared to the *correct* value :math:`Df(1.0) = 140.73773557129658`,
+:math:`A(5, 1)` has a relative error of :math:`10^{-13}`. For
+comparison, the relative error for the central difference formula with
+the same stepsize (:math:`0.01/2^4 = 0.000625`) is :math:`10^{-5}`.
+
+The above tableau is the basis of Ridders' method for numeric
+differentiation. The full implementation is an adaptive scheme that
+tracks its own estimation error and stops automatically when the
+desired precision is reached. Of course it is more expensive than the
+forward and central difference formulae, but is also significantly
+more robust and accurate.
+
+Using Ridder's method instead of forward or central differences in
+Ceres is again a simple matter of changing a template argument to
+:class:`NumericDiffCostFunction` as follows:
+
+.. code-block:: c++
+
+  CostFunction* cost_function =
+    new NumericDiffCostFunction<Rat43CostFunctor, RIDDERS, 1, 4>(
+      new Rat43CostFunctor(x, y));
+
+The following graph shows the relative error of the three methods as a
+function of the absolute step size. For Ridders's method we assume
+that the step size for evaluating :math:`A(n,1)` is :math:`2^{1-n}h`.
+
+.. figure:: forward_central_ridders_error.png
+   :figwidth: 100%
+   :align: center
+
+Using the 10 function evaluations that are needed to compute
+:math:`A(5,1)` we are able to approximate :math:`Df(1.0)` about a 1000
+times better than the best central differences estimate. To put these
+numbers in perspective, machine epsilon for double precision
+arithmetic is :math:`\approx 2.22 \times 10^{-16}`.
+
+Going back to ``Rat43``, let us also look at the runtime cost of the
+various methods for computing numeric derivatives.
+
+==========================   =========
+CostFunction                 Time (ns)
+==========================   =========
+Rat43Analytic                      255
+Rat43AnalyticOptimized              92
+Rat43NumericDiffForward            262
+Rat43NumericDiffCentral            517
+Rat43NumericDiffRidders           3760
+==========================   =========
+
+As expected, Central Differences is about twice as expensive as
+Forward Differences and the remarkable accuracy improvements of
+Ridders' method cost an order of magnitude more runtime.
+
+Recommendations
+===============
+
+Numeric differentiation should be used when you cannot compute the
+derivatives either analytically or using automatic differentiation. This
+is usually the case when you are calling an external library or
+function whose analytic form you do not know or even if you do, you
+are not in a position to re-write it in a manner required to use
+:ref:`chapter-automatic_derivatives`.
+
+
+When using numeric differentiation, use at least Central Differences,
+and if execution time is not a concern or the objective function is
+such that determining a good static relative step size is hard,
+Ridders' method is recommended.
+
+.. rubric:: Footnotes
+
+.. [#f2] `Numerical Differentiation
+	 <https://en.wikipedia.org/wiki/Numerical_differentiation#Practical_considerations_using_floating_point_arithmetic>`_
+.. [#f3] [Press]_ Numerical Recipes, Section 5.7
+.. [#f4] In asymptotic error analysis, an error of :math:`O(h^k)`
+	 means that the absolute-value of the error is at most some
+	 constant times :math:`h^k` when :math:`h` is close enough to
+	 :math:`0`.
diff --git a/docs/source/pose_graph_3d_ex.png b/docs/source/pose_graph_3d_ex.png
new file mode 100644
index 0000000..ae2cfc3
--- /dev/null
+++ b/docs/source/pose_graph_3d_ex.png
Binary files differ
diff --git a/docs/source/robust_least_squares_fit.png b/docs/source/robust_least_squares_fit.png
new file mode 100644
index 0000000..89003c9
--- /dev/null
+++ b/docs/source/robust_least_squares_fit.png
Binary files differ
diff --git a/docs/source/slam2d.png b/docs/source/slam2d.png
new file mode 100644
index 0000000..ad287ee
--- /dev/null
+++ b/docs/source/slam2d.png
Binary files differ
diff --git a/docs/source/solving_faqs.rst b/docs/source/solving_faqs.rst
new file mode 100644
index 0000000..64604c4
--- /dev/null
+++ b/docs/source/solving_faqs.rst
@@ -0,0 +1,171 @@
+.. _chapter-solving_faqs:
+
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+=======
+Solving
+=======
+
+#. How do I evaluate the Jacobian for a solved problem?
+
+   Using :func:`Problem::Evaluate`.
+
+#. How do I choose the right linear solver?
+
+   When using the ``TRUST_REGION`` minimizer, the choice of linear
+   solver is an important decision. It affects solution quality and
+   runtime. Here is a simple way to reason about it.
+
+   1. For small (a few hundred parameters) or dense problems use
+      ``DENSE_QR``.
+
+   2. For general sparse problems (i.e., the Jacobian matrix has a
+      substantial number of zeros) use
+      ``SPARSE_NORMAL_CHOLESKY``. This requires that you have
+      ``SuiteSparse`` or ``CXSparse`` installed.
+
+   3. For bundle adjustment problems with up to a hundred or so
+      cameras, use ``DENSE_SCHUR``.
+
+   4. For larger bundle adjustment problems with sparse Schur
+      Complement/Reduced camera matrices use ``SPARSE_SCHUR``. This
+      requires that you build Ceres with support for ``SuiteSparse``,
+      ``CXSparse`` or Eigen's sparse linear algebra libraries.
+
+      If you do not have access to these libraries for whatever
+      reason, ``ITERATIVE_SCHUR`` with ``SCHUR_JACOBI`` is an
+      excellent alternative.
+
+   5. For large bundle adjustment problems (a few thousand cameras or
+      more) use the ``ITERATIVE_SCHUR`` solver. There are a number of
+      preconditioner choices here. ``SCHUR_JACOBI`` offers an
+      excellent balance of speed and accuracy. This is also the
+      recommended option if you are solving medium sized problems for
+      which ``DENSE_SCHUR`` is too slow but ``SuiteSparse`` is not
+      available.
+
+      .. NOTE::
+
+        If you are solving small to medium sized problems, consider
+        setting ``Solver::Options::use_explicit_schur_complement`` to
+        ``true``, it can result in a substantial performance boost.
+
+      If you are not satisfied with ``SCHUR_JACOBI``'s performance try
+      ``CLUSTER_JACOBI`` and ``CLUSTER_TRIDIAGONAL`` in that
+      order. They require that you have ``SuiteSparse``
+      installed. Both of these preconditioners use a clustering
+      algorithm. Use ``SINGLE_LINKAGE`` before ``CANONICAL_VIEWS``.
+
+#. Use :func:`Solver::Summary::FullReport` to diagnose performance problems.
+
+   When diagnosing Ceres performance issues - runtime and convergence,
+   the first place to start is by looking at the output of
+   ``Solver::Summary::FullReport``. Here is an example
+
+   .. code-block:: bash
+
+     ./bin/bundle_adjuster --input ../data/problem-16-22106-pre.txt
+
+     iter      cost      cost_change  |gradient|   |step|    tr_ratio  tr_radius  ls_iter  iter_time  total_time
+        0  4.185660e+06    0.00e+00    2.16e+07   0.00e+00   0.00e+00  1.00e+04       0    7.50e-02    3.58e-01
+        1  1.980525e+05    3.99e+06    5.34e+06   2.40e+03   9.60e-01  3.00e+04       1    1.84e-01    5.42e-01
+        2  5.086543e+04    1.47e+05    2.11e+06   1.01e+03   8.22e-01  4.09e+04       1    1.53e-01    6.95e-01
+        3  1.859667e+04    3.23e+04    2.87e+05   2.64e+02   9.85e-01  1.23e+05       1    1.71e-01    8.66e-01
+        4  1.803857e+04    5.58e+02    2.69e+04   8.66e+01   9.93e-01  3.69e+05       1    1.61e-01    1.03e+00
+        5  1.803391e+04    4.66e+00    3.11e+02   1.02e+01   1.00e+00  1.11e+06       1    1.49e-01    1.18e+00
+
+     Ceres Solver v1.12.0 Solve Report
+     ----------------------------------
+                                          Original                  Reduced
+     Parameter blocks                        22122                    22122
+     Parameters                              66462                    66462
+     Residual blocks                         83718                    83718
+     Residual                               167436                   167436
+
+     Minimizer                        TRUST_REGION
+
+     Sparse linear algebra library    SUITE_SPARSE
+     Trust region strategy     LEVENBERG_MARQUARDT
+
+                                             Given                     Used
+     Linear solver                    SPARSE_SCHUR             SPARSE_SCHUR
+     Threads                                     1                        1
+     Linear solver threads                       1                        1
+     Linear solver ordering              AUTOMATIC                22106, 16
+
+     Cost:
+     Initial                          4.185660e+06
+     Final                            1.803391e+04
+     Change                           4.167626e+06
+
+     Minimizer iterations                        5
+     Successful steps                            5
+     Unsuccessful steps                          0
+
+     Time (in seconds):
+     Preprocessor                            0.283
+
+       Residual evaluation                   0.061
+       Jacobian evaluation                   0.361
+       Linear solver                         0.382
+     Minimizer                               0.895
+
+     Postprocessor                           0.002
+     Total                                   1.220
+
+     Termination:                   NO_CONVERGENCE (Maximum number of iterations reached.)
+
+  Let us focus on run-time performance. The relevant lines to look at
+  are
+
+
+   .. code-block:: bash
+
+     Time (in seconds):
+     Preprocessor                            0.283
+
+       Residual evaluation                   0.061
+       Jacobian evaluation                   0.361
+       Linear solver                         0.382
+     Minimizer                               0.895
+
+     Postprocessor                           0.002
+     Total                                   1.220
+
+
+  Which tell us that of the total 1.2 seconds, about .3 seconds was
+  spent in the linear solver and the rest was mostly spent in
+  preprocessing and jacobian evaluation.
+
+  The preprocessing seems particularly expensive. Looking back at the
+  report, we observe
+
+   .. code-block:: bash
+
+     Linear solver ordering              AUTOMATIC                22106, 16
+
+  Which indicates that we are using automatic ordering for the
+  ``SPARSE_SCHUR`` solver. This can be expensive at times. A straight
+  forward way to deal with this is to give the ordering manually. For
+  ``bundle_adjuster`` this can be done by passing the flag
+  ``-ordering=user``. Doing so and looking at the timing block of the
+  full report gives us
+
+   .. code-block:: bash
+
+     Time (in seconds):
+     Preprocessor                            0.051
+
+       Residual evaluation                   0.053
+       Jacobian evaluation                   0.344
+       Linear solver                         0.372
+     Minimizer                               0.854
+
+     Postprocessor                           0.002
+     Total                                   0.935
+
+
+
+  The preprocessor time has gone down by more than 5.5x!.
diff --git a/docs/source/spivak_notation.rst b/docs/source/spivak_notation.rst
new file mode 100644
index 0000000..3ac56ba
--- /dev/null
+++ b/docs/source/spivak_notation.rst
@@ -0,0 +1,53 @@
+.. default-domain:: cpp
+
+.. cpp:namespace:: ceres
+
+.. _chapter-spivak_notation:
+
+===============
+Spivak Notation
+===============
+
+To preserve our collective sanities, we will use Spivak's notation for
+derivatives. It is a functional notation that makes reading and
+reasoning about expressions involving derivatives simple.
+
+For a univariate function :math:`f`, :math:`f(a)` denotes its value at
+:math:`a`. :math:`Df` denotes its first derivative, and
+:math:`Df(a)` is the derivative evaluated at :math:`a`, i.e
+
+.. math::
+   Df(a) = \left . \frac{d}{dx} f(x) \right |_{x = a}
+
+:math:`D^kf` denotes the :math:`k^{\text{th}}` derivative of :math:`f`.
+
+For a bi-variate function :math:`g(x,y)`. :math:`D_1g` and
+:math:`D_2g` denote the partial derivatives of :math:`g` w.r.t the
+first and second variable respectively. In the classical notation this
+is equivalent to saying:
+
+.. math::
+
+   D_1 g = \frac{\partial}{\partial x}g(x,y) \text{ and }  D_2 g  = \frac{\partial}{\partial y}g(x,y).
+
+
+:math:`Dg` denotes the Jacobian of `g`, i.e.,
+
+.. math::
+
+  Dg = \begin{bmatrix} D_1g & D_2g \end{bmatrix}
+
+More generally for a multivariate function :math:`g:\mathbb{R}^n
+\longrightarrow \mathbb{R}^m`, :math:`Dg` denotes the :math:`m\times
+n` Jacobian matrix. :math:`D_i g` is the partial derivative of
+:math:`g` w.r.t the :math:`i^{\text{th}}` coordinate and the
+:math:`i^{\text{th}}` column of :math:`Dg`.
+
+Finally, :math:`D^2_1g` and :math:`D_1D_2g` have the obvious meaning
+as higher order partial derivatives.
+
+For more see Michael Spivak's book `Calculus on Manifolds
+<https://www.amazon.com/Calculus-Manifolds-Approach-Classical-Theorems/dp/0805390219>`_
+or a brief discussion of the `merits of this notation
+<http://www.vendian.org/mncharity/dir3/dxdoc/>`_ by
+Mitchell N. Charity.
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
new file mode 100644
index 0000000..a3fd212
--- /dev/null
+++ b/docs/source/tutorial.rst
@@ -0,0 +1,11 @@
+.. _chapter-tutorial:
+
+========
+Tutorial
+========
+
+.. toctree::
+   :maxdepth: 3
+
+   nnls_tutorial
+   gradient_tutorial
diff --git a/docs/source/users.rst b/docs/source/users.rst
new file mode 100644
index 0000000..b4f90fa
--- /dev/null
+++ b/docs/source/users.rst
@@ -0,0 +1,75 @@
+.. _chapter-users:
+
+=====
+Users
+=====
+
+* At `Google <http://www.google.com>`_, Ceres is used to:
+
+  * Estimate the pose of `Street View`_ cars, aircrafts, and satellites.
+  * Build 3D models for `PhotoTours`_.
+  * Estimate satellite image sensor characteristics.
+  * Stitch `panoramas`_ on Android and iOS.
+  * Apply `Lens Blur`_ on Android.
+  * Solve `bundle adjustment`_ and `SLAM`_ problems in `Project
+    Tango`_.
+
+* `Willow Garage`_ uses Ceres to solve `SLAM`_ problems.
+* `Southwest Research Institute <http://www.swri.org/>`_ uses Ceres for
+  `calibrating robot-camera systems`_.
+* `Blender <http://www.blender.org>`_ uses Ceres for `planar
+  tracking`_ and `bundle adjustment`_.
+* `OpenMVG <http://imagine.enpc.fr/~moulonp/openMVG/>`_ an open source
+  multi-view geometry library uses Ceres for `bundle adjustment`_.
+* `Microsoft Research <http://research.microsoft.com/en-us/>`_ uses
+  Ceres for nonlinear optimization of objectives involving subdivision
+  surfaces under `skinned control meshes`_.
+* `Matterport <http://www.matterport.com>`_, uses Ceres for global
+  alignment of 3D point clouds and for pose graph optimization.
+* `Obvious Engineering <http://obviousengine.com/>`_ uses Ceres for
+  bundle adjustment for their 3D photography app `Seene
+  <http://seene.co/>`_.
+* The `Autonomous Systems Lab <http://www.asl.ethz.ch/>`_ at ETH
+  Zurich uses Ceres for
+
+  * Camera and Camera/IMU Calibration.
+  * Large scale optimization of visual, inertial, gps and
+    wheel-odometry data for long term autonomy.
+
+* `OpenPTrack <http://openptrack.org/>`_ uses Ceres for camera
+  calibration.
+* The `Intelligent Autonomous System Lab <http://robotics.dei.unipd.it/>`_
+  at University of Padova, Italy, uses Ceres for
+
+  * Camera/depth sensors network calibration.
+  * Depth sensor distortion map estimation.
+
+* `Theia <http://cs.ucsb.edu/~cmsweeney/theia>`_ is an open source
+  Structure from Motion library that uses Ceres for `bundle adjustment`_
+  and camera pose estimation.
+
+* The `Applied Research Laboratory <https://www.arl.psu.edu/>`_ at
+  Pennsylvania State University uses in their synthetic aperture Sonar
+  beamforming engine, called ASASIN , for estimating platform
+  kinematics.
+
+* `Colmap <https://github.com/colmap/colmap>`_ is an open source
+  structure from motion library that makes heavy use of Ceres for
+  bundle adjustment with support for many camera models and for other
+  non-linear least-squares problems (relative, absolute pose
+  refinement, etc.).
+
+
+
+.. _bundle adjustment: http://en.wikipedia.org/wiki/Structure_from_motion
+.. _Street View: http://youtu.be/z00ORu4bU-A
+.. _PhotoTours: http://google-latlong.blogspot.com/2012/04/visit-global-landmarks-with-photo-tours.html
+.. _panoramas: http://www.google.com/maps/about/contribute/photosphere/
+.. _Project Tango: https://www.google.com/atap/projecttango/
+.. _planar tracking: http://mango.blender.org/development/planar-tracking-preview/
+.. _Willow Garage: https://www.willowgarage.com/blog/2013/08/09/enabling-robots-see-better-through-improved-camera-calibration
+.. _Lens Blur: http://googleresearch.blogspot.com/2014/04/lens-blur-in-new-google-camera-app.html
+.. _SLAM: http://en.wikipedia.org/wiki/Simultaneous_localization_and_mapping
+.. _calibrating robot-camera systems:
+   http://rosindustrial.org/news/2014/9/24/industrial-calibration-library-update-and-presentation
+.. _skinned control meshes: http://research.microsoft.com/en-us/projects/handmodelingfrommonoculardepth/
diff --git a/docs/source/version_history.rst b/docs/source/version_history.rst
new file mode 100644
index 0000000..0bef4ad
--- /dev/null
+++ b/docs/source/version_history.rst
@@ -0,0 +1,1382 @@
+.. _chapter-version-history:
+
+===============
+Version History
+===============
+
+1.14.0
+======
+
+New Features
+------------
+
+#. New ``EvaluationCallback`` API. (Keir Mierle)
+#. TBB based threading (Yury Prokazov & Mike Vitus)
+#. C++11 threads based threading (Mike Vitus)
+#. A ``ceres::Context`` object to cache and keep track of global
+   state. (Mike Vitus)
+#. TinySolver - A small dense solver meant for solving small problems
+   really fast. [EXPERIMENTAL] (Keir Mierle & Sameer Agarwal)
+#. Bazel Build. (Keir Mierle & Rodrigo Queiro)
+
+
+Backward Incompatible API Changes
+---------------------------------
+
+#. ``Solver::Options::num_linear_solver_threads`` is deprecated,
+   ``Solver::Options::num_threads`` controls all parallelism in Ceres
+   Solver now. Similarly,
+   ``Solver::Summary::num_linear_solver_threads_given`` and
+   ``Solver::Summary::num_linear_solver_threads_used`` are also
+   deprecated.
+
+
+Bug Fixes & Minor Changes
+-------------------------
+
+#. Remove armv7 from target architectures when building for iOS >= 11. (Alex Stewart)
+#. Corrects the documentation of Problem::AddResidualBlock. (Mike Vitus)
+#. Fixes the configuration check in port.h. (Mike Vitus)
+#. Add small_blas_gemm_benchmark. (Sameer Agarwal)
+#. Implement some C++11 math functions for Jet (Emil Ernerfeldt)
+#. Fix integer conversion warning in MSVC. (Alex Stewart)
+#. Improve NDK build error handling (Keir Mierle)
+#. Fix build: -Wreorder, test fail (Keir Mierle)
+#. An implementation of SubsetPreconditioner. (Sameer Agarwal)
+#. Split bundle adjustment tests into individual binaries (Keir Mierle)
+#. Require Eigen >= 3.3.4 on aarch64. (Alex Stewart)
+#. Fix TBB detection on Windows. (Alex Stewart)
+#. Improve ExecutionSummary (Sameer Agarwal)
+#. Remove as typo from callbacks.h (Sameer Agarwal)
+#. Removes two unimplemented class functions. (Mike Vitus)
+#. Update EigenTypes to deal with 1 column matrices (Sameer Agarwal)
+#. Add GradientProblemSolver::Options::update_state_every_iteration (Sameer Agarwal)
+#. Fixes the pose graph example documentation. (Mike Vitus)
+#. Fix Eigen >= 3.3 compilation if EIGEN_DONT_VECTORIZE set (Janick Martinez Esturo)
+#. Add an optional dependency on the Google Benchmark library. (Sameer Agarwal)
+#. Fix the documentation for CostFunction::Evaluate. (Sameer Agarwal)
+#. Fix a mathematical typo. (Sameer Agarwal)
+#. Add TBB information to Ceres version string. (Alex Stewart)
+#. Move discussion of dependency licensing to Sphinx docs. (Alex Stewart)
+#. Fix an erroneous namespace comment (Sameer Agarwal)
+#. Fix use of unnamed type as template argument warnings on Clang. (Alex Stewart)
+#. Add link for CLA in docs; minor fixes (Keir Mierle)
+#. Fix tiny_solver_test (Sameer Agarwal)
+#. Improve compatibility with ceres::Solver (Sameer Agarwal)
+#. Refactor nist.cc to be compatible with TinySolver (Sameer Agarwal)
+#. Report timings with microsecond resolution (Thomas Gamper)
+#. Add missing Eigen traits to Jets (Sameer Agarwal)
+#. Use high-resolution timer on Windows (Thomas Gamper)
+#. Add a comment about default constructed reference counts= (Keir Mierle)
+#. Delete cost and loss functions when not in use. (Sameer Agarwal)
+#. Fix assert_ndk_version for >= r11. (Alex Stewart)
+#. Add docs explaining how to build Ceres with OpenMP on OS X. (Alex Stewart)
+#. Update LAPACK option to refer to direct use by Ceres only. (Alex Stewart)
+#. Hide optional SuiteSparse vars in CMake GUI by default. (Alex Stewart)
+#. Always hide TBB_LIBRARY in CMake GUI by default. (Alex Stewart)
+#. Fix typo in definition of f3 in powell example (x4 -> x3). (Alex Stewart)
+#. Fix suppression of C++11 propagation warning. (Alex Stewart)
+#. Add new Schur specialization for 2, 4, 6. (Chris Sweeney)
+#. Use const keyword for 'int thread_id' variables. (pmoulon)
+
+
+1.13.0
+======
+
+New Features
+------------
+#. ``LineSearchMinimizer`` and ``GradientProblemSolver`` are up to 2x
+   faster due to fewer function evaluations. (Sameer Agarwal)
+#. ``SPARSE_NORMAL_CHOLESKY`` is significantly faster because Ceres
+   now computes the normal equations exploiting the static block
+   sparsity structure. (Cheng Wang & Sameer Agarwal)
+#. Add compound with scalar operators for Jets. (Alex Stewart)
+#. Enable support for AVX instructions for Jets. (Alex Stewart)
+
+Backward Incompatible API Changes
+---------------------------------
+The enum ``CovarianceAlgorithmType`` which controls the linear algebra
+algorithm used to compute the covariance used to combine the choice of
+the algorithm and the choice of the sparse linear algebra library into
+the enum name. So we had ``SUITE_SPARSE_QR`` and
+``EIGEN_SPARSE_QR``. ``Covariance::Options`` now has a separate member
+allowing the user to choose the sparse linear algebra library, just
+like the solver and ``CovarianceAlgorithmType`` now takes values
+``DENSE_SVD`` and ``SPARSE_QR``. This is a forward looking change that
+will allow us to develop more flexible covariance estimation
+algorithms with multiple linear algebra backends.
+
+Bug Fixes & Minor Changes
+-------------------------
+#. Fix ``InvertPSDMatrix`` as it was triggering an Eigen assert in
+   Debug mode. (Philipp Hubner)
+#. Fix cmake error from CeresConfig.cmake when Ceres not found (Taylor
+   Braun-Jones)
+#. Completely refactored ``SparseNormalCholeskySolver``. (Sameer
+   Agarwal)
+#. Fixed time reporting in ``Summary::FullReport`` when
+   ``LineSearchMinimizer`` is used. (Sameer Agarwal)
+#. Remove unused file: collections_port.cc. (Sameer Agarwal)
+#. ``SPARSE_SCHUR`` + ``CX_SPARSE`` = Faster (Sameer Agarwal)
+#. Refactored a number of linear solver tests to be more thorough and
+   informative. (Sameer Agarwal)
+#. Pass user-specified search hints as HINTS not PATHS. (Alex Stewart)
+#. Prefer Eigen installs over exported build directories. (Alex
+   Stewart)
+#. Add OpenMP flags when compiling for C if enabled. (Alex Stewart)
+#. Add a missing ``CERES_EXPORT`` to GradientChecker (Sameer Agarwal)
+#. Use target_compile_features() to specify C++11 requirement if
+   available. (Alex Stewart)
+#. Update docs: .netrc --> .gitcookies (Keir Mierle)
+#. Fix implicit precision loss warning on 64-bit archs (Ricardo
+   Sanchez-Saez)
+#. Optionally use exported Eigen CMake configuration if
+   available. (Alex Stewart)
+#. Use ``Ceres_[SOURCE/BINARY]_DIR`` not ``CMAKE_XXX_DIR`` to support
+   nesting. (Alex Stewart)
+#. Update ``Problem::EvaluateOptions`` documentation. (Sameer Agarwal)
+#. Add public headers to CMake target for IDEs. (Devin Lane)
+#. Add an article on interfacing with automatic
+   differentiation. (Sameer Agarwal)
+#. Add default Fedora/Debian locations for CXSparse to search
+   paths. (Alex Stewart)
+#. Add a test for ``LineSearchMinimizer`` (Sameer Agarwal)
+#. Flatten the table of contents. (Sameer Agarwal)
+#. Fix when ``LineSearchMinimizer`` adds the ``IterationSummary``` to
+   ``Solver::Summary`` (Sameer Agarwal)
+#. Fix search path for miniglog headers when Ceres is exported. (Alex
+   Stewart)
+#. Fix ambiguous reference to ``WARNING`` when using miniglog. (Alex
+   Stewart)
+#. Fix Jet/Eigen compatibility for Eigen > 3.3 (Julien Pilet)
+#. Add max severity option when ``MINIGLOG`` is enabled (Taylor
+   Braun-Jones)
+#. Improvements to Schur template specializations (Sameer Agarwal)
+#. Added an article on derivatives (Sameer Agarwal)
+#. Require Eigen >= 3.3 to define ScalarBinaryOpTraits in Jet. (Alex
+   Stewart)
+#. A hacky fix for the Eigen::FullPivLU changes. (Sameer Agarwal)
+#. Specify ``ScalarBinaryOpTraits`` for Jet types. (Chris Sweeney)
+#. Remove spurious conversion from doubles to Jets. (Sameer Agarwal)
+#. Fix an error in the tutorial code for ``NumericDiffCostFunction``
+   (Sameer Agarwal)
+#. ``CERES_EXPORT`` fix to compile Ceres as DLL (Je Hyeong Hong)
+#. Fix detection of deprecated Bessel function names on MSVC. (Alex
+   Stewart)
+#. Ensure that partial evaluation of residuals triggers an error
+   (Sameer Agarwal)
+#. Fix detection of CMake-built glog on Windows. (Alex Stewart)
+#. Add additional search paths for glog & Eigen on Windows. (Alex
+   Stewart)
+#. Various minor grammar and bug fixes to the documentation (Sameer
+   Agarwal, Alex Stewart, William Rucklidge)
+
+
+1.12.0
+======
+
+New Features
+------------
+#. Aligned ``Jet`` matrices for improved automatic differentiation
+   performance. (Andrew Hunter)
+#. Auto-differentiable implementations of Bessel functions, ``floor``,
+   and ``ceil`` (Alessandro Gentilini & Michael Vitus)
+#. New 2D and 3D SLAM examples. (Michael Vitus)
+#. Added ``EigenQuaternionParameterization``. (Michael Vitus)
+#. Added ``Problem::IsParameterBlockConstant`` (Thomas Schneider)
+#. A complete refactoring of ``TrustRegionMinimizer``. (Sameer Agarwal)
+#. Gradient checking cleanup and local parameterization bugfix (David
+   Gossow)
+
+
+Backward Incompatible API Changes
+---------------------------------
+#. ``Solver::Options::numeric_derivative_relative_step_size`` has been
+   renamed to
+   ``Solver::Options::gradient_check_numeric_derivative_relative_step_size``. (Sameer
+   Agarwal)
+
+Bug Fixes & Minor Changes
+-------------------------
+#. Clear XXX_FOUND in Find<XXX>.cmake prior to searching. (Alex
+   Stewart)
+#. Fix versioning in the documentation (Sameer Agarwal)
+#. Fix missing gflags imported target definition in
+   CeresConfig.cmake. (Alex Stewart)
+#. Make gflags a public dependency of Ceres if it and glog are
+   found. (Alex Stewart)
+#. Add support for glog exported CMake target. (Alex Stewart)
+#. Use ``google::GLOG_WARNING`` instead of ``WARNING`` in tests to
+   support MSVC. (Alex Stewart)
+#. Update gtest and gmock to
+   ``a2b8a8e07628e5fd60644b6dd99c1b5e7d7f1f47`` (Sameer Agarwal)
+#. Add MSVC-specific ``#define`` to expose math constants in
+   ``<cmath>``. (Alex Stewart)
+#. Fix typo. indepdendent -> independent (Hung Lun)
+#. Fix potential invalid reset of CMAKE_FIND_LIBRARY_PREFIXES on MSVC
+   (Alex Stewart)
+#. Fix use of alignas(0) which is not ignored on GCC (Alex Stewart)
+#. Use default alignment if alignof(std::max_align_t) < 16 with C++11
+   (Alex Stewart)
+#. Introduce a common base class for DynamicAutoDiffCostFunction and
+   DynamicNumericDiffCostFunction. (Sameer Agarwal)
+#. Fix an exact equality test causing breakage in
+   gradient_checker_test. (Sameer Agarwal)
+#. Add GradientProblemSolver::Options::parameter_tolerance. (Sameer
+   Agarwal)
+#. Add missing T() wrappers for constants. (Rob Carroll)
+#. Remove two checks from rotation.h (Sameer Agarwal)
+#. Relax the tolerance in QuaternionParameterizationTestHelper. (Je
+   Hyeong Hong)
+#. Occured -> Occurred. (Sameer Agarwal)
+#. Fix a test error in autodiff_test.cc. (Je Hyeong Hong)
+#. Fix documentation source for templated function in ``rotation.h``.
+#. Add ``package.xml`` to enable Catkin builds. (Damon Kohler)
+#. Relaxing Jacobian matching in Gradient Checker test. (David Gossow)
+#. Allow SubsetParameterization to hold all parameters constant
+   (Sameer Agarwal)
+#. Fix an Intel compiler error in covariance_impl.cc (Je Hyeong Hong)
+#. Removing duplicate include directive. (David Gossow)
+#. Remove two DCHECKs from CubicHermiteSpline. (Sameer Agarwal)
+#. Fix some compiler warnings. (Richard Trieu)
+#. Update ExpectArraysClose to use ExpectClose instead of
+   EXPECT_NEAR. (Phillip Hubner)
+#. FindWithDefault returns by value rather than reference. (@aradval)
+#. Fix compiler errors on some systems. (David Gossow)
+#. Note that Problem::Evaluate cannot be called from an
+   IterationCallback. (Sameer Agarwal)
+#. Use ProductParameterization in bundle_adjuster.cc (Sameer Agarwal)
+#. Enable support for OpenMP in Clang if detected. (Alex Stewart)
+#. Remove duplicate entry for the NIST example in the docs. (Michael
+   Vitus)
+#. Add additional logging for analyzing orderings (Sameer Agarwal)
+#. Add readme for the sampled_function example. (Michael Vitus)
+#. Use _j[0,1,n]() Bessel functions on MSVC to avoid deprecation
+   errors. (Alex Stewart & Kichang Kim)
+#. Fix: Copy minimizer option ``is_silent`` to
+   ``LineSearchDirection::Options`` (Nicolai Wojke)
+#. Fix typos in ``users.rst`` (Sameer Agarwal)
+#. Make some Jet comparisons exact. (Sameer Agarwal)
+#. Add colmap to users.rst (Sameer Agarwal)
+#. Fix step norm evaluation in LineSearchMinimizer (Sameer Agarwal)
+#. Remove use of -Werror when compiling Ceres. (Alex Stewart)
+#. Report Ceres compile options as components in find_package(). (Alex
+   Stewart)
+#. Fix a spelling error in nnls_modeling.rst (Timer)
+#. Only use collapse() directive with OpenMP 3.0 or higher. (Keir
+   Mierle)
+#. Fix install path for CeresConfig.cmake to be architecture-aware.
+#. Fix double conversion to degrees in rotation_test (Keir Mierle)
+#. Make Jet string output more readable (Keir Mierle)
+#. Fix rotation_test IsClose() and related tests (Keir Mierle)
+#. Loosen an exact equality in local_parameterization_test (Sameer
+   Agarwal)
+#. make_docs: Pass the file encoding to open() (Niels Ole Salscheider)
+#. Fix error message returned when using SUITE_SPARSE_QR in covariance
+   estimation on a ceres built without SuiteSparse support. (Simon
+   Rutishauser)
+#. Fix CXX11 option to be available on MinGW & CygWin, but not
+   MSVC. (Alex Stewart)
+#. Fix missing early return() in xxx_not_found() dependency
+   macros. (Alex Stewart)
+#. Initialize ``inner_iterations_were_useful_`` correctly. (Sameer
+   Agarwal)
+#. Add an implementation for GradientProblemSolver::Options::IsValid
+   (Sameer Agarwal)
+#. Fix use of va_copy() if compiling with explicit C++ version <
+   C++11. (Alex Stewart)
+#. Install CMake files to lib/cmake/Ceres (Niels Ole Salscheider)
+#. Allow users to override the documentation install directory. (Niels
+   Ole Salscheider)
+#. Add covariance matrix for a vector of parameters (Wannes Van Loock)
+#. Saner tolerances & stricter LRE test. (Sameer Agarwal)
+#. Fix a malformed sentence in the tutorial. (Sameer Agarwal)
+#. Add logging for sparse Cholesky factorization using Eigen. (Sameer
+   Agarwal)
+#. Use std::adjacent_find instead of std::unique. (Sameer Agarwal)
+#. Improve logging in CompressedRowJacobianWriter on crash. (Sameer
+   Agarwal)
+#. Fix free parameter block handling in covariance computation (Wannes
+   Van Loock)
+#. Report the number of line search steps in FullReport. (Sameer
+   Agarwal)
+#. Make CMake read Ceres version directly from
+   include/ceres/version.h. (Alex Stewart)
+#. Lots of code style/lint changes. (William Rucklidge)
+#. Fix covariance computation for constant blocks (Wannes Van Loock)
+#. Add IOS_DEPLOYMENT_TARGET variable to iOS.cmake (Eduard Feicho)
+#. Make miniglog threadsafe on non-windows system by using
+   localtime_r() instead of localtime() for time formatting (Simon
+   Rutishauser)
+
+1.11.0
+======
+
+New Features
+------------
+#. Adaptive numeric differentiation using Ridders' method. (Tal
+   Ben-Nun)
+#. Add ``CubicInterpolator`` and ``BiCubicInterpolator`` to allow
+   smooth interpolation of sampled functions and integration with
+   automatic differentiation.
+#. Add method to return covariance in tangent space. (Michael Vitus &
+   Steve Hsu)
+#. Add Homogeneous vector parameterization. (Michael Vitus)
+#. Add a ``ProductParameterization``, a local parameterization that
+   can be constructed as a cartesian product of other local
+   parameterization.
+#. Add DynamicCostFunctionToFunctor. (David Gossow)
+#. Optionally export Ceres build directory into local CMake package
+   registry.
+#. Faster ``SPARSE_NORMAL_CHOLESKY`` in the presence of dynamic
+   sparsity.
+
+Bug Fixes & Minor Changes
+-------------------------
+#. Remove use of link-time optimisation (LTO) for all compilers due to
+   portability issues with gtest / type_info::operator== & Eigen with
+   Clang on OS X vs GCC 4.9+ on Linux requiring contradictory 'fixes'.
+#. Use link-time optimisation (LTO) only when compiling Ceres itself,
+   not tests or examples, to bypass gtest / type_info::operator==
+   issue.
+#. Use old minimum iOS version flags on Xcode < 7.0.
+#. Add gtest-specific flags when building/using as a shared library.
+#. Clean up iOS.cmake to use xcrun/xcodebuild & libtool.
+#. Import the latest version of ``googletest``.
+#. Refactored ``system_test`` into ``bundle_adjustment_test`` and
+   ``system_test``, where each test case is its own test.
+#. Fix invalid memory access bug in
+   ``CompressedRowSparseMatrix::AppendRows`` when it was called with a
+   matrix of size zero.
+#. Build position independent code when compiling Ceres statically
+   (Alexander Alekhin).
+#. Fix a bug in DetectStructure (Johannes Schonberger).
+#. Reduce memory footprint of SubsetParameterization (Johannes
+   Schonberger).
+#. Fix for reorder program unit test when built without suitesparse
+   (Sergey Sharybin).
+#. Fix a bug in the Schur eliminator (Werner Trobin).
+#. Fix a bug in the reordering code (Bernhard Zeisl).
+#. Add missing CERES_EXPORT to ComposedLoss (Simon Rutishauser).
+#. Add the option to use numeric differentiation to ``nist`` and
+   ``more_garbow_hillstrom``.
+#. Fix EIGENSPARSE option help s/t it displays in CMake ncurses GUI.
+#. Fix SparseNormalCholeskySolver with dynamic sparsity (Richie
+   Stebbing).
+#. Remove legacy dependency detection macros.
+#. Fix failed if() condition expansion if gflags is not found.
+#. Update all CMake to lowercase function name style.
+#. Update minimum iOS version to 7.0 for shared_ptr/unordered_map.
+#. Fix bug in gflags' <= 2.1.2 exported CMake configuration.
+#. Remove the spec file needed for generating RPMs.
+#. Fix a typo in small_blas.h (Werber Trobin).
+#. Cleanup FindGflags & use installed gflags CMake config if present.
+#. Add default glog install location on Windows to search paths
+   (bvanevery).
+#. Add default Eigen install location on Windows to search paths
+   (bvanevery).
+#. Fix explanation of config.h generation in bare config.h.
+#. Fix unused parameter compiler warnings in numeric_diff.h.
+#. Increase tolerance for a test in polynomial_test (Taylor Braun
+   Jones).
+#. Fix addition of Gerrit commit hook when Ceres is a git submodule
+   (Chris Cooper).
+#. Fix missing EIGEN_VERSION expansion typo.
+#. Fix links to SuiteSparse & CXSparse (Henrique Mendonça).
+#. Ensure Eigen is at least 3.1.0 for Eigen/SparseCore.
+#. Add option to use C++11 (not TR1) shared_ptr & unordered_map
+   (Norman Goldstein).
+#. Fix an incorrect usage message in bundle_adjuster.cc
+#. Gracefully disable docs if Sphinx is not found.
+#. Explicitly use (new) default OS X rpath policy if present.
+#. Add support of EIGEN_SPARSE type in
+   IsSparseLinearAlgebraLibraryTypeAvailable function (Pierre Moulon).
+#. Allow the LossFunction contained in a LossFunctionWrapper to be
+   NULL. This is consistent with how NULL LossFunctions are treated
+   everywhere else. (Simon Rutishauser).
+#. Improve numeric differentation near zero.
+#. Refactored DynamicNumericDiffCostFunction to use NumericDiff (Tal
+   Ben-Nun).
+#. Remove use of :caption tag in Sphinx.
+#. Add a small test to make sure GradientProblemSolver works correctly
+   (Petter Strandmark).
+#. Add simple unit tests for GradientProblem (Petter Strandmark).
+#. Make the robust curve fitting example robust.
+#. Homogenize convergence operators in docs and code (Johannes
+   Schonberger).
+#. Add parameter_tolerance convergence to line search minimizer
+   (Johannes Schonberger).
+#. Fix bug where pow(JetA,JetB) returned wrong result for JetA==0
+   (Russell Smith).
+#. Remove duplicate step norm computation (Johannes Schonberger).
+#. Enhance usability when encountering Eigen version mismatches
+   (Andrew Hundt).
+#. Add PLY file logger before and after BA in order to ease visual
+   comparison (Pierre Moulon).
+#. Fix CMake config file docs to include 2.8.x & 3.x styles.
+#. Python3 fixes (Markus Moll).
+#. Remove confusing code from DenseJacobianWriter (Michael Vitus).
+#. Add documentation on CMake package installation process.
+#. Revert a call to SolveUpperTriangularUsingCholesky.
+#. Make CERES_EIGEN_VERSION macro independent of CMake.
+#. Add versions of dependencies used to FullReport().
+#. Ensure local config.h is used if Ceres is already installed.
+#. Small messaging and comment updates in CMake
+#. Handle possible presence of library prefixes in MSVC (Sylvain
+   Duchêne).
+#. Use -O2 not -O3 on MinGW to workaround issue with Eigen
+   (s1m3mu3@gmail.com).
+#. Increase tolerance in small_blas test for Cygwin
+   (s1m3mu3@gmail.com).
+#. Fix iOS cmake file for cmake 3.0 (Jack Feng)
+#. Fix missing gflags shlwapi dependency on MinGW (s1m3mu3@gmail.com).
+#. Add thread dependency & fix namespace detection on Windows for
+   gflags (arrigo.benedetti@gmail.com).
+#. Rename macros in the public API to have a ``CERES_`` prefix.
+#. Fix ``OrderedGroup::Reverse()`` when it is empty (Chris Sweeney).
+#. Update the code to point to ceres-solver.org.
+#. Update documentation to point to the GitHub issue tracker.
+#. Disable ``LAPACK`` for iOS builds. (Greg Coombe)
+#. Force use of single-thread in ``Problem::Evaluate()`` without
+   OpenMP.
+#. Less strict check for multithreading. (Chris Sweeney)
+#. Update tolerances in small_blas_test.cc (Philipp Hubner)
+#. Documentation corrections (Steve Hsu)
+#. Fixed ``sampled_function.cc`` (Pablo Speciale)
+#. Fix example code in the documentation. (Rodney Hoskinson)
+#. Improve the error handling in Conjugate Gradients.
+#. Improve preconditioner documentation.
+#. Remove dead code from fpclassify.h.
+#. Make Android.mk threads sensitive.
+#. Changed the ``CURRENT_CONFIG_INSTALL_DIR`` to be a variable local
+   to Ceres. (Chris Sweeney)
+#. Fix typo in the comments in ``Jet.h``. (Julius Ziegler)
+#. Add the ASL at ETH Zurich, Theia & OpenPTrack to the list of users.
+#. Fixed a typo in the documentation. (Richard Stebbing)
+#. Fixed a boundary handling bug in the BiCubic interpolation
+   code. (Bernhard Zeisl)
+#. Fixed a ``MSVC`` compilation bug in the cubic interpolation code
+   (Johannes Schönberger)
+#. Add covariance related files to the Android build.
+#. Update Ubuntu 14.04 installation instructions. (Filippo Basso)
+#. Improved logging for linear solver failures.
+#. Improved crash messages in ``Problem``.
+#. Hide Homebrew related variables in CMake GUI.
+#. Add SuiteSparse link dependency for
+   compressed_col_sparse_matrix_utils_test.
+#. Autodetect Homebrew install prefix on OSX.
+#. Lint changes from William Rucklidge and Jim Roseborough.
+#. Remove ``using namespace std:`` from ``port.h``
+#. Add note about glog not currently compiling against gflags 2.1.
+#. Add explicit no sparse linear algebra library available option.
+#. Improve some wording in the FAQ. (Vasily Vylkov)
+#. Delete Incomplete LQ Factorization.
+#. Add a pointer to MacPorts. (Markus Moll)
+
+
+1.10.0
+======
+
+New Features
+------------
+#. Ceres Solver can now be used to solve general unconstrained
+   optimization problems. See the documentation for
+   ``GradientProblem`` and ``GradientProblemSolver``.
+#. ``Eigen`` can now be as a sparse linear algebra backend. This can
+   be done by setting
+   ``Solver::Options::sparse_linear_algebra_library_type`` to
+   ``EIGEN_SPARSE``. Performance should be comparable to
+   ``CX_SPARSE``.
+
+   .. NOTE::
+
+      Because ``Eigen`` is a header only library, and some of the code
+      related to sparse Cholesky factorization is LGPL, building Ceres
+      with support for Eigen's sparse linear algebra is disabled by
+      default and should be enabled explicitly.
+
+   .. NOTE::
+
+      For good performance, use Eigen version 3.2.2 or later.
+
+#. Added ``EIGEN_SPARSE_QR`` algorithm for covariance estimation using
+   ``Eigen``'s sparse QR factorization. (Michael Vitus)
+#. Faster inner iterations when using multiple threads.
+#. Faster ``ITERATIVE_SCHUR`` + ``SCHUR_JACOBI`` for small to medium
+   sized problems (see documentation for
+   ``Solver::Options::use_explicit_schur_complement``).
+#. Faster automatic Schur ordering.
+#. Reduced memory usage when solving problems with dynamic sparsity.
+#. ``CostFunctionToFunctor`` now supports dynamic number of residuals.
+#. A complete re-write of the problem preprocessing phase.
+#. ``Solver::Summary::FullReport`` now reports the build configuration
+   for Ceres.
+#. When building on Android, the ``NDK`` version detection logic has
+   been improved.
+#. The ``CERES_VERSION`` macro has been improved and replaced with the
+   ``CERES_VERSION_STRING`` macro.
+#. Added ``Solver::Options::IsValid`` which allows users to validate
+   their solver configuration before calling ``Solve``.
+#. Added ``Problem::GetCostFunctionForResidualBlock`` and
+   ``Problem::GetLossFunctionForResidualBlock``.
+#. Added Tukey's loss function. (Michael Vitus)
+#. Added RotationMatrixToQuaternion
+#. Compute & report timing information for line searches.
+#. Autodetect gflags namespace.
+#. Expanded ``more_garbow_hillstrom.cc``.
+#. Added a pointer to Tal Ben-Nun's MSVC wrapper to the docs.
+#. Added the ``<2,3,6>`` Schur template specialization. (Alessandro
+   Dal Grande)
+
+Backward Incompatible API Changes
+---------------------------------
+#. ``NumericDiffFunctor`` has been removed. It's API was broken, and
+   the implementation was an unnecessary layer of abstraction over
+   ``CostFunctionToFunctor``.
+#. ``POLAK_RIBIRERE`` conjugate gradients direction type has been
+   renamed to ``POLAK_RIBIERE``.
+#. ``Solver::Options::solver_log`` has been removed. If needed this
+   iteration callback can easily be implemented in user code.
+#. The ``SPARSE_CHOLESKY`` algorithm for covariance estimation has
+   been removed. It is not rank revealing and numerically poorly
+   behaved. Sparse QR factorization is a much better way to do this.
+#. The ``SPARSE_QR`` algorithm for covariance estimation has been
+   renamed to ``SUITE_SPARSE_QR`` to be consistent with
+   ``EIGEN_SPARSE_QR``.
+#. ``Solver::Summary::preconditioner_type`` has been replaced with
+   ``Solver::Summary::preconditioner_type_given`` and
+   ``Solver::Summary::preconditioner_type_used`` to be more consistent
+   with how information about the linear solver is communicated.
+#. ``CERES_VERSION`` and ``CERES_ABI_VERSION`` macros were not
+   terribly useful. They have been replaced with
+   ``CERES_VERSION_MAJOR``, ``CERES_VERSION_MINOR`` ,
+   ``CERES_VERSION_REVISION`` and ``CERES_VERSION_ABI`` macros. In
+   particular the functionality of ``CERES_VERSION`` is provided by
+   ``CERES_VERSION_STRING`` macro.
+
+Bug Fixes
+---------
+#. Do not try the gradient step if TR step line search fails.
+#. Fix missing include in libmv_bundle_adjuster on OSX.
+#. Conditionally log evaluation failure warnings.
+#. Runtime uses four digits after the decimal in Summary:FullReport.
+#. Better options checking for TrustRegionMinimizer.
+#. Fix RotationMatrixToAngleAxis when the angle of rotation is near
+   PI. (Tobias Strauss)
+#. Sometimes gradient norm based convergence would miss a step with a
+   substantial solution quality improvement. (Rodney Hoskinson)
+#. Ignore warnings from within Eigen/SparseQR (3.2.2).
+#. Fix empty Cache HELPSTRING parsing error on OS X 10.10 Yosemite.
+#. Fix a formatting error TrustRegionMinimizer logging.
+#. Add an explicit include for local_parameterization.h (cooordz)
+#. Fix a number of typos in the documentation (Martin Baeuml)
+#. Made the logging in TrustRegionMinimizer consistent with
+   LineSearchMinimizer.
+#. Fix some obsolete documentation in CostFunction::Evaluate.
+#. Fix CG solver options for ITERATIVE_SCHUR, which did not copy
+   min_num_iterations (Johannes Schönberger)
+#. Remove obsolete include of numeric_diff_functor.h. (Martin Baeuml)
+#. Fix max. linear solver iterations in ConjugateGradientsSolver
+   (Johannes Schönberger)
+#. Expand check for lack of a sparse linear algebra library. (Michael
+   Samples and Domink Reitzle)
+#. Fix Eigen Row/ColMajor bug in NumericDiffCostFunction. (Dominik
+   Reitzle)
+#. Fix crash in Covariance if # threads > 1 requested without OpenMP.
+#. Fixed Malformed regex. (Björn Piltz)
+#. Fixed MSVC error C2124: divide or mod by zero. (Björn Piltz)
+#. Add missing #include of <limits> for loss functions.
+#. Make canned loss functions more robust.
+#. Fix type of suppressed compiler warning for Eigen 3.2.0.
+#. Suppress unused variable warning from Eigen 3.2.0.
+#. Add "make install" to the install instructions.
+#. Correct formula in documentation of
+   Solver::Options::function_tolerance. (Alessandro Gentilini)
+#. Add release flags to iOS toolchain.
+#. Fix a broken hyperlink in the documentation. (Henrique Mendonca)
+#. Add fixes for multiple definitions of ERROR on Windows to docs.
+#. Compile miniglog into Ceres if enabled on all platforms.
+#. Add two missing files to Android.mk (Greg Coombe)
+#. Fix Cmake error when using miniglog. (Greg Coombe)
+#. Don't build miniglog unconditionally as a static library (Björn
+   Piltz)
+#. Added a missing include. (Björn Piltz)
+#. Conditionally disable SparseNormalCholesky.
+#. Fix a memory leak in program_test.cc.
+
+
+1.9.0
+=====
+
+New Features
+------------
+#. Bounds constraints: Support for upper and/or lower bounds on
+   parameters when using the trust region minimizer.
+#. Dynamic Sparsity: Problems in which the sparsity structure of the
+   Jacobian changes over the course of the optimization can now be
+   solved much more efficiently. (Richard Stebbing)
+#. Improved support for Microsoft Visual C++ including the ability to
+   build and ship DLLs. (Björn Piltz, Alex Stewart and Sergey
+   Sharybin)
+#. Support for building on iOS 6.0 or higher (Jack Feng).
+#. Autogeneration of config.h that captures all the defines used to
+   build and use Ceres Solver.
+#. Simpler and more informative solver termination type
+   reporting. (See below for more details)
+#. New `website <http://www.ceres-solver.org>`_ based entirely on
+   Sphinx.
+#. ``AutoDiffLocalParameterization`` allows the use of automatic
+   differentiation for defining ``LocalParameterization`` objects
+   (Alex Stewart)
+#. LBFGS is faster due to fewer memory copies.
+#. Parameter blocks are not restricted to be less than 32k in size,
+   they can be up to 2G in size.
+#. Faster ``SPARSE_NORMAL_CHOLESKY`` solver when using ``CX_SPARSE``
+   as the sparse linear algebra library.
+#. Added ``Problem::IsParameterBlockPresent`` and
+   ``Problem::GetParameterization``.
+#. Added the (2,4,9) and (2,4,8) template specializations.
+#. An example demonstrating the use of
+   DynamicAutoDiffCostFunction. (Joydeep Biswas)
+#. Homography estimation example from Blender demonstrating the use of
+   a custom ``IterationCallback``. (Sergey Sharybin)
+#. Support user passing a custom CMAKE_MODULE_PATH (for BLAS /
+   LAPACK).
+
+Backward Incompatible API Changes
+---------------------------------
+#. ``Solver::Options::linear_solver_ordering`` used to be a naked
+   pointer that Ceres took ownership of. This is error prone behaviour
+   which leads to problems when copying the ``Solver::Options`` struct
+   around. This has been replaced with a ``shared_ptr`` to handle
+   ownership correctly across copies.
+
+#. The enum used for reporting the termination/convergence status of
+   the solver has been renamed from ``SolverTerminationType`` to
+   ``TerminationType``.
+
+   The enum values have also changed. ``FUNCTION_TOLERANCE``,
+   ``GRADIENT_TOLERANCE`` and ``PARAMETER_TOLERANCE`` have all been
+   replaced by ``CONVERGENCE``.
+
+   ``NUMERICAL_FAILURE`` has been replaced by ``FAILURE``.
+
+   ``USER_ABORT`` has been renamed to ``USER_FAILURE``.
+
+   Further ``Solver::Summary::error`` has been renamed to
+   ``Solver::Summary::message``. It contains a more detailed
+   explanation for why the solver terminated.
+
+#. ``Solver::Options::gradient_tolerance`` used to be a relative
+   gradient tolerance. i.e., The solver converged when
+
+   .. math:: \|g(x)\|_\infty < \text{gradient_tolerance} *
+      \|g(x_0)\|_\infty
+
+   where :math:`g(x)` is the gradient of the objective function at
+   :math:`x` and :math:`x_0` is the parmeter vector at the start of
+   the optimization.
+
+   This has changed to an absolute tolerance, i.e. the solver
+   converges when
+
+   .. math:: \|g(x)\|_\infty < \text{gradient_tolerance}
+
+#. Ceres cannot be built without the line search minimizer
+   anymore. Thus the preprocessor define
+   ``CERES_NO_LINE_SEARCH_MINIMIZER`` has been removed.
+
+Bug Fixes
+---------
+#. Disabled warning C4251. (Björn Piltz)
+#. Do not propagate 3d party libs through
+   `IMPORTED_LINK_INTERFACE_LIBRARIES_[DEBUG/RELEASE]` mechanism when
+   building shared libraries. (Björn Piltz)
+#. Fixed errant verbose levels (Björn Piltz)
+#. Variety of code cleanups, optimizations and bug fixes to the line
+   search minimizer code (Alex Stewart)
+#. Fixed ``BlockSparseMatrix::Transpose`` when the matrix has row and
+   column blocks. (Richard Bowen)
+#. Better error checking when ``Problem::RemoveResidualBlock`` is
+   called. (Alex Stewart)
+#. Fixed a memory leak in ``SchurComplementSolver``.
+#. Added ``epsilon()`` method to ``NumTraits<ceres::Jet<T, N>
+   >``. (Filippo Basso)
+#. Fixed a bug in `CompressedRowSparseMatrix::AppendRows`` and
+   ``DeleteRows``.q
+#. Handle empty problems consistently.
+#. Restore the state of the ``Problem`` after a call to
+   ``Problem::Evaluate``. (Stefan Leutenegger)
+#. Better error checking and reporting for linear solvers.
+#. Use explicit formula to solve quadratic polynomials instead of the
+   eigenvalue solver.
+#. Fix constant parameter handling in inner iterations (Mikael
+   Persson).
+#. SuiteSparse errors do not cause a fatal crash anymore.
+#. Fix ``corrector_test.cc``.
+#. Relax the requirements on loss function derivatives.
+#. Minor bugfix to logging.h (Scott Ettinger)
+#. Updated ``gmock`` and ``gtest`` to the latest upstream version.
+#. Fix build breakage on old versions of SuiteSparse.
+#. Fixed build issues related to Clang / LLVM 3.4 (Johannes
+   Schönberger)
+#. METIS_FOUND is never set. Changed the commit to fit the setting of
+   the other #._FOUND definitions. (Andreas Franek)
+#. Variety of bug fixes and cleanups to the ``CMake`` build system
+   (Alex Stewart)
+#. Removed fictitious shared library target from the NDK build.
+#. Solver::Options now uses ``shared_ptr`` to handle ownership of
+   ``Solver::Options::linear_solver_ordering`` and
+   ``Solver::Options::inner_iteration_ordering``. As a consequence the
+   ``NDK`` build now depends on ``libc++`` from the ``LLVM`` project.
+#. Variety of lint cleanups (William Rucklidge & Jim Roseborough)
+#. Various internal cleanups including dead code removal.
+
+
+1.8.0
+=====
+
+New Features
+------------
+#. Significant improved ``CMake`` files with better robustness,
+   dependency checking and GUI support. (Alex Stewart)
+#. Added ``DynamicNumericDiffCostFunction`` for numerically
+   differentiated cost functions whose sizing is determined at run
+   time.
+#. ``NumericDiffCostFunction`` now supports a dynamic number of
+   residuals just like ``AutoDiffCostFunction``.
+#. ``Problem`` exposes more of its structure in its API.
+#. Faster automatic differentiation (Tim Langlois)
+#. Added the commonly occurring ``2_d_d`` template specialization for
+   the Schur Eliminator.
+#. Faster ``ITERATIVE_SCHUR`` solver using template specializations.
+#. Faster ``SCHUR_JACOBI`` preconditioner construction.
+#. Faster ``AngleAxisRotatePoint``.
+#. Faster Jacobian evaluation when a loss function is used.
+#. Added support for multiple clustering algorithms in visibility
+   based preconditioning, including a new fast single linkage
+   clustering algorithm.
+
+Bug Fixes
+---------
+#. Fix ordering of ParseCommandLineFlags() & InitGoogleTest() for
+   Windows. (Alex Stewart)
+#. Remove DCHECK_GE checks from fixed_array.h.
+#. Fix build on MSVC 2013 (Petter Strandmark)
+#. Fixed ``AngleAxisToRotationMatrix`` near zero.
+#. Move ``CERES_HASH_NAMESPACE`` macros to ``collections_port.h``.
+#. Fix handling of unordered_map/unordered_set on OSX 10.9.0.
+#. Explicitly link to libm for ``curve_fitting_c.c``. (Alex Stewart)
+#. Minor type conversion fix to autodiff.h
+#. Remove RuntimeNumericDiffCostFunction.
+#. Fix operator= ambiguity on some versions of Clang. (Alex Stewart)
+#. Various Lint cleanups (William Rucklidge & Jim Roseborough)
+#. Modified installation folders for Windows. (Pablo Speciale)
+#. Added librt to link libraries for SuiteSparse_config on
+   Linux. (Alex Stewart)
+#. Check for presence of return-type-c-linkage option with
+   Clang. (Alex Stewart)
+#. Fix Problem::RemoveParameterBlock after calling solve. (Simon
+   Lynen)
+#. Fix a free/delete bug in covariance_impl.cc
+#. Fix two build errors. (Dustin Lang)
+#. Add RequireInitialization = 1 to NumTraits::Jet.
+#. Update gmock/gtest to 1.7.0
+#. Added IterationSummary::gradient_norm.
+#. Reduced verbosity of the inner iteration minimizer.
+#. Fixed a bug in TrustRegionMinimizer. (Michael Vitus)
+#. Removed android/build_android.sh.
+
+
+1.7.0
+=====
+
+Backward Incompatible API Changes
+---------------------------------
+
+#. ``Solver::Options::sparse_linear_algebra_library`` has been renamed
+   to ``Solver::Options::sparse_linear_algebra_library_type``.
+
+New Features
+------------
+#. Sparse and dense covariance estimation.
+#. A new Wolfe line search. (Alex Stewart)
+#. ``BFGS`` line search direction. (Alex Stewart)
+#. C API
+#. Speeded up the use of loss functions > 17x.
+#. Faster ``DENSE_QR``, ``DENSE_NORMAL_CHOLESKY`` and ``DENSE_SCHUR``
+   solvers.
+#. Support for multiple dense linear algebra backends. In particular
+   optimized ``BLAS`` and ``LAPACK`` implementations (e.g., Intel MKL,
+   ACML, OpenBLAS etc) can now be used to do the dense linear algebra
+   for ``DENSE_QR``, ``DENSE_NORMAL_CHOLESKY`` and ``DENSE_SCHUR``
+#. Use of Inner iterations can now be adaptively stopped. Iteration
+   and runtime statistics for inner iterations are not reported in
+   ``Solver::Summary`` and ``Solver::Summary::FullReport``.
+#. Improved inner iteration step acceptance criterion.
+#. Add BlockRandomAccessCRSMatrix.
+#. Speeded up automatic differentiation by 7\%.
+#. Bundle adjustment example from libmv/Blender (Sergey Sharybin)
+#. Shared library building is now controlled by CMake, rather than a
+   custom solution. Previously, Ceres had a custom option, but this is
+   now deprecated in favor of CMake's built in support for switching
+   between static and shared. Turn on BUILD_SHARED_LIBS to get shared
+   Ceres libraries.
+#. No more dependence on Protocol Buffers.
+#. Incomplete LQ factorization.
+#. Ability to write trust region problems to disk.
+#. Add sinh, cosh, tanh and tan functions to automatic differentiation
+   (Johannes Schönberger)
+#. Simplifications to the cmake build file.
+#. ``miniglog`` can now be used as a replacement for ``google-glog``
+   on non Android platforms. (This is NOT recommended).
+
+Bug Fixes
+---------
+#. Fix ``ITERATIVE_SCHUR`` solver to work correctly when the schur
+   complement is of size zero. (Soohyun Bae)
+#. Fix the ``spec`` file for generating ``RPM`` packages (Brian Pitts
+   and Taylor Braun-Jones).
+#. Fix how ceres calls CAMD (Manas Jagadev)
+#. Fix breakage on old versions of SuiteSparse. (Fisher Yu)
+#. Fix warning C4373 in Visual Studio (Petter Strandmark)
+#. Fix compilation error caused by missing suitesparse headers and
+   reorganize them to be more robust. (Sergey Sharybin)
+#. Check GCC Version before adding -fast compiler option on
+   OSX. (Steven Lovegrove)
+#. Add documentation for minimizer progress output.
+#. Lint and other cleanups (William Rucklidge and James Roseborough)
+#. Collections port fix for MSC 2008 (Sergey Sharybin)
+#. Various corrections and cleanups in the documentation.
+#. Change the path where CeresConfig.cmake is installed (Pablo
+   Speciale)
+#. Minor errors in documentation (Pablo Speciale)
+#. Updated depend.cmake to follow CMake IF convention. (Joydeep
+   Biswas)
+#. Stabilize the schur ordering algorithm.
+#. Update license header in split.h.
+#. Enabling -O4 (link-time optimization) only if compiler/linker
+   support it. (Alex Stewart)
+#. Consistent glog path across files.
+#. ceres-solver.spec: Use cleaner, more conventional Release string
+   (Taylor Braun-Jones)
+#. Fix compile bug on RHEL6 due to missing header (Taylor Braun-Jones)
+#. CMake file is less verbose.
+#. Use the latest upstream version of google-test and gmock.
+#. Rationalize some of the variable names in ``Solver::Options``.
+#. Improve Summary::FullReport when line search is used.
+#. Expose line search parameters in ``Solver::Options``.
+#. Fix update of L-BFGS history buffers after they become full. (Alex
+   Stewart)
+#. Fix configuration error on systems without SuiteSparse installed
+   (Sergey Sharybin)
+#. Enforce the read call returns correct value in
+   ``curve_fitting_c.c`` (Arnaud Gelas)
+#. Fix DynamicAutoDiffCostFunction (Richard Stebbing)
+#. Fix Problem::RemoveParameterBlock documentation (Johannes
+   Schönberger)
+#. Fix a logging bug in parameter_block.h
+#. Refactor the preconditioner class structure.
+#. Fix an uninitialized variable warning when building with ``GCC``.
+#. Fix a reallocation bug in
+   ``CreateJacobianBlockSparsityTranspose``. (Yuliy Schwartzburg)
+#. Add a define for O_BINARY.
+#. Fix miniglog-based Android NDK build; now works with NDK r9. (Scott
+   Ettinger)
+
+
+1.6.0
+=====
+
+New Features
+------------
+#. Major Performance improvements.
+
+   a. Schur type solvers (``SPARSE_SCHUR``, ``DENSE_SCHUR``,
+      ``ITERATIVE_SCHUR``) are significantly faster due to custom BLAS
+      routines and fewer heap allocations.
+
+   b. ``SPARSE_SCHUR`` when used with ``CX_SPARSE`` now uses a block
+      AMD for much improved factorization performance.
+
+   c. The jacobian matrix is pre-ordered so that
+      ``SPARSE_NORMAL_CHOLESKY`` and ``SPARSE_SCHUR`` do not have to
+      make copies inside ``CHOLMOD``.
+
+   d. Faster autodiff by replacing division by multplication by inverse.
+
+   e. When compiled without threads, the schur eliminator does not pay
+      the penalty for locking and unlocking mutexes.
+
+#. Users can now use ``linear_solver_ordering`` to affect the
+   fill-reducing ordering used by ``SUITE_SPARSE`` for
+   ``SPARSE_NORMAL_CHOLESKY``.
+#. ``Problem`` can now report the set of parameter blocks it knows about.
+#. ``TrustRegionMinimizer`` uses the evaluator to compute the gradient
+   instead of a matrix vector multiply.
+#. On ``Mac OS``, whole program optimization is enabled.
+#. Users can now use automatic differentiation to define new
+   ``LocalParameterization`` objects. (Sergey Sharybin)
+#. Enable larger tuple sizes for Visual Studio 2012. (Petter Strandmark)
+
+
+Bug Fixes
+---------
+
+#. Update the documentation for ``CostFunction``.
+#. Fixed a typo in the documentation. (Pablo Speciale)
+#. Fix a typo in suitesparse.cc.
+#. Bugfix in ``NumericDiffCostFunction``. (Nicolas Brodu)
+#. Death to BlockSparseMatrixBase.
+#. Change Minimizer::Options::min_trust_region_radius to double.
+#. Update to compile with stricter gcc checks. (Joydeep Biswas)
+#. Do not modify cached CMAKE_CXX_FLAGS_RELEASE. (Sergey Sharybin)
+#. ``<iterator>`` needed for back_insert_iterator. (Petter Strandmark)
+#. Lint cleanup. (William Rucklidge)
+#. Documentation corrections. (Pablo Speciale)
+
+
+1.5.0
+=====
+
+Backward Incompatible API Changes
+---------------------------------
+#. Added ``Problem::Evaluate``. Now you can evaluate a problem or any
+   part of it without calling the solver.
+
+   In light of this the following settings have been deprecated and
+   removed from the API.
+
+   - ``Solver::Options::return_initial_residuals``
+   - ``Solver::Options::return_initial_gradient``
+   - ``Solver::Options::return_initial_jacobian``
+   - ``Solver::Options::return_final_residuals``
+   - ``Solver::Options::return_final_gradient``
+   - ``Solver::Options::return_final_jacobian``
+
+   Instead we recommend using something like this.
+
+   .. code-block:: c++
+
+     Problem problem;
+     // Build problem
+
+     vector<double> initial_residuals;
+     problem.Evaluate(Problem::EvaluateOptions(),
+                      NULL, /* No cost */
+                      &initial_residuals,
+                      NULL, /* No gradient */
+                      NULL  /* No jacobian */ );
+
+     Solver::Options options;
+     Solver::Summary summary;
+     Solver::Solve(options, &problem, &summary);
+
+     vector<double> final_residuals;
+     problem.Evaluate(Problem::EvaluateOptions(),
+                      NULL, /* No cost */
+                      &final_residuals,
+                      NULL, /* No gradient */
+                      NULL  /* No jacobian */ );
+
+
+New Features
+------------
+#. Problem now supports removal of ParameterBlocks and
+   ResidualBlocks. There is a space/time tradeoff in doing this which
+   is controlled by
+   ``Problem::Options::enable_fast_parameter_block_removal``.
+
+#. Ceres now supports Line search based optimization algorithms in
+   addition to trust region algorithms. Currently there is support for
+   gradient descent, non-linear conjugate gradient and LBFGS search
+   directions.
+#. Added ``Problem::Evaluate``. Now you can evaluate a problem or any
+   part of it without calling the solver. In light of this the
+   following settings have been deprecated and removed from the API.
+
+   - ``Solver::Options::return_initial_residuals``
+   - ``Solver::Options::return_initial_gradient``
+   - ``Solver::Options::return_initial_jacobian``
+   - ``Solver::Options::return_final_residuals``
+   - ``Solver::Options::return_final_gradient``
+   - ``Solver::Options::return_final_jacobian``
+
+#. New, much improved HTML documentation using Sphinx.
+#. Changed ``NumericDiffCostFunction`` to take functors like
+   ``AutoDiffCostFunction``.
+#. Added support for mixing automatic, analytic and numeric
+   differentiation. This is done by adding ``CostFunctionToFunctor``
+   and ``NumericDiffFunctor`` objects to the API.
+#. Sped up the robust loss function correction logic when residual is
+   one dimensional.
+#. Sped up ``DenseQRSolver`` by changing the way dense jacobians are
+   stored. This is a 200-500% improvement in linear solver performance
+   depending on the size of the problem.
+#. ``DENSE_SCHUR`` now supports multi-threading.
+#. Greatly expanded ``Summary::FullReport``:
+
+   - Report the ordering used by the ``LinearSolver``.
+   - Report the ordering used by the inner iterations.
+   - Execution timing breakdown into evaluations and linear solves.
+   - Effective size of the problem solved by the solver, which now
+     accounts for the size of the tangent space when using a
+     ``LocalParameterization``.
+#. Ceres when run at the ``VLOG`` level 3 or higher will report
+   detailed timing information about its internals.
+#. Remove extraneous initial and final residual evaluations. This
+   speeds up the solver a bit.
+#. Automatic differenatiation with a dynamic number of parameter
+   blocks. (Based on an idea by Thad Hughes).
+#. Sped up problem construction and destruction.
+#. Added matrix adapters to ``rotation.h`` so that the rotation matrix
+   routines can work with row and column major matrices. (Markus Moll)
+#. ``SCHUR_JACOBI`` can now be used without ``SuiteSparse``.
+#. A ``.spec`` file for producing RPMs. (Taylor Braun-Jones)
+#. ``CMake`` can now build the sphinx documentation (Pablo Speciale)
+#. Add support for creating a CMake config file during build to make
+   embedding Ceres in other CMake-using projects easier. (Pablo
+   Speciale).
+#. Better error reporting in ``Problem`` for missing parameter blocks.
+#. A more flexible ``Android.mk`` and a more modular build. If binary
+   size and/or compile time is a concern, larger parts of the solver
+   can be disabled at compile time.
+
+Bug Fixes
+---------
+#. Compilation fixes for MSVC2010 (Sergey Sharybin)
+#. Fixed "deprecated conversion from string constant to char*"
+   warnings. (Pablo Speciale)
+#. Correctly propagate ifdefs when building without Schur eliminator
+   template specializations.
+#. Correct handling of ``LIB_SUFFIX`` on Linux. (Yuliy Schwartzburg).
+#. Code and signature cleanup in ``rotation.h``.
+#. Make examples independent of internal code.
+#. Disable unused member in ``gtest`` which results in build error on
+   OS X with latest Xcode. (Taylor Braun-Jones)
+#. Pass the correct flags to the linker when using
+   ``pthreads``. (Taylor Braun-Jones)
+#. Only use ``cmake28`` macro when building on RHEL6. (Taylor
+   Braun-Jones)
+#. Remove ``-Wno-return-type-c-linkage`` when compiling with
+   GCC. (Taylor Braun-Jones)
+#. Fix ``No previous prototype`` warnings. (Sergey Sharybin)
+#. MinGW build fixes. (Sergey Sharybin)
+#. Lots of minor code and lint fixes. (William Rucklidge)
+#. Fixed a bug in ``solver_impl.cc`` residual evaluation. (Markus
+   Moll)
+#. Fixed variadic evaluation bug in ``AutoDiff``.
+#. Fixed ``SolverImpl`` tests.
+#. Fixed a bug in ``DenseSparseMatrix::ToDenseMatrix()``.
+#. Fixed an initialization bug in ``ProgramEvaluator``.
+#. Fixes to Android.mk paths (Carlos Hernandez)
+#. Modify ``nist.cc`` to compute accuracy based on ground truth
+   solution rather than the ground truth function value.
+#. Fixed a memory leak in ``cxsparse.cc``. (Alexander Mordvintsev).
+#. Fixed the install directory for libraries by correctly handling
+   ``LIB_SUFFIX``. (Taylor Braun-Jones)
+
+1.4.0
+=====
+
+Backward Incompatible API Changes
+---------------------------------
+The new ordering API breaks existing code. Here the common case fixes.
+
+**Before**
+
+.. code-block:: c++
+
+ options.linear_solver_type = ceres::DENSE_SCHUR
+ options.ordering_type = ceres::SCHUR
+
+**After**
+
+
+.. code-block:: c++
+
+  options.linear_solver_type = ceres::DENSE_SCHUR
+
+
+**Before**
+
+.. code-block:: c++
+
+ options.linear_solver_type = ceres::DENSE_SCHUR;
+ options.ordering_type = ceres::USER;
+ for (int i = 0; i < num_points; ++i) {
+   options.ordering.push_back(my_points[i])
+ }
+ for (int i = 0; i < num_cameras; ++i) {
+   options.ordering.push_back(my_cameras[i])
+ }
+ options.num_eliminate_blocks = num_points;
+
+
+**After**
+
+.. code-block:: c++
+
+ options.linear_solver_type = ceres::DENSE_SCHUR;
+ options.ordering = new ceres::ParameterBlockOrdering;
+ for (int i = 0; i < num_points; ++i) {
+   options.linear_solver_ordering->AddElementToGroup(my_points[i], 0);
+ }
+ for (int i = 0; i < num_cameras; ++i) {
+   options.linear_solver_ordering->AddElementToGroup(my_cameras[i], 1);
+ }
+
+
+New Features
+------------
+#. A new richer, more expressive and consistent API for ordering
+   parameter blocks.
+#. A non-linear generalization of Ruhe & Wedin's Algorithm II. This
+   allows the user to use variable projection on separable and
+   non-separable non-linear least squares problems. With
+   multithreading, this results in significant improvements to the
+   convergence behavior of the solver at a small increase in run time.
+#. An image denoising example using fields of experts. (Petter
+   Strandmark)
+#. Defines for Ceres version and ABI version.
+#. Higher precision timer code where available. (Petter Strandmark)
+#. Example Makefile for users of Ceres.
+#. IterationSummary now informs the user when the step is a
+   non-monotonic step.
+#. Fewer memory allocations when using ``DenseQRSolver``.
+#. GradientChecker for testing CostFunctions (William Rucklidge)
+#. Add support for cost functions with 10 parameter blocks in
+   ``Problem``. (Fisher)
+#. Add support for 10 parameter blocks in ``AutoDiffCostFunction``.
+
+
+Bug Fixes
+---------
+
+#. static cast to force Eigen::Index to long conversion
+#. Change LOG(ERROR) to LOG(WARNING) in ``schur_complement_solver.cc``.
+#. Remove verbose logging from ``DenseQRSolve``.
+#. Fix the Android NDK build.
+#. Better handling of empty and constant Problems.
+#. Remove an internal header that was leaking into the public API.
+#. Memory leak in ``trust_region_minimizer.cc``
+#. Schur ordering was operating on the wrong object (Ricardo Martin)
+#. MSVC fixes (Petter Strandmark)
+#. Various fixes to ``nist.cc`` (Markus Moll)
+#. Fixed a jacobian scaling bug.
+#. Numerically robust computation of ``model_cost_change``.
+#. Signed comparison compiler warning fixes (Ricardo Martin)
+#. Various compiler warning fixes all over.
+#. Inclusion guard fixes (Petter Strandmark)
+#. Segfault in test code (Sergey Popov)
+#. Replaced ``EXPECT/ASSERT_DEATH`` with the more portable
+   ``EXPECT_DEATH_IF_SUPPORTED`` macros.
+#. Fixed the camera projection model in Ceres' implementation of
+   Snavely's camera model. (Ricardo Martin)
+
+
+1.3.0
+=====
+
+New Features
+------------
+#. Android Port (Scott Ettinger also contributed to the port)
+#. Windows port. (Changchang Wu and Pierre Moulon also contributed to the port)
+#. New subspace Dogleg Solver. (Markus Moll)
+#. Trust region algorithm now supports the option of non-monotonic steps.
+#. New loss functions ``ArcTanLossFunction``, ``TolerantLossFunction``
+   and ``ComposedLossFunction``. (James Roseborough).
+#. New ``DENSE_NORMAL_CHOLESKY`` linear solver, which uses Eigen's
+   LDLT factorization on the normal equations.
+#. Cached symbolic factorization when using ``CXSparse``.
+   (Petter Strandark)
+#. New example ``nist.cc`` and data from the NIST non-linear
+   regression test suite. (Thanks to Douglas Bates for suggesting this.)
+#. The traditional Dogleg solver now uses an elliptical trust
+   region (Markus Moll)
+#. Support for returning initial and final gradients & Jacobians.
+#. Gradient computation support in the evaluators, with an eye
+   towards developing first order/gradient based solvers.
+#. A better way to compute ``Solver::Summary::fixed_cost``. (Markus Moll)
+#. ``CMake`` support for building documentation, separate examples,
+   installing and uninstalling the library and Gerrit hooks (Arnaud
+   Gelas)
+#. ``SuiteSparse4`` support (Markus Moll)
+#. Support for building Ceres without ``TR1`` (This leads to
+   slightly slower ``DENSE_SCHUR`` and ``SPARSE_SCHUR`` solvers).
+#. ``BALProblem`` can now write a problem back to disk.
+#. ``bundle_adjuster`` now allows the user to normalize and perturb the
+   problem before solving.
+#. Solver progress logging to file.
+#. Added ``Program::ToString`` and ``ParameterBlock::ToString`` to
+   help with debugging.
+#. Ability to build Ceres as a shared library (MacOS and Linux only),
+   associated versioning and build release script changes.
+#. Portable floating point classification API.
+
+
+Bug Fixes
+---------
+#. Fix how invalid step evaluations are handled.
+#. Change the slop handling around zero for model cost changes to use
+   relative tolerances rather than absolute tolerances.
+#. Fix an inadvertant integer to bool conversion. (Petter Strandmark)
+#. Do not link to ``libgomp`` when building on
+   windows. (Petter Strandmark)
+#. Include ``gflags.h`` in ``test_utils.cc``. (Petter
+   Strandmark)
+#. Use standard random number generation routines. (Petter Strandmark)
+#. ``TrustRegionMinimizer`` does not implicitly negate the
+   steps that it takes. (Markus Moll)
+#. Diagonal scaling allows for equal upper and lower bounds. (Markus Moll)
+#. TrustRegionStrategy does not misuse LinearSolver:Summary anymore.
+#. Fix Eigen3 Row/Column Major storage issue. (Lena Gieseke)
+#. QuaternionToAngleAxis now guarantees an angle in $[-\pi, \pi]$. (Guoxuan Zhang)
+#. Added a workaround for a compiler bug in the Android NDK to the
+   Schur eliminator.
+#. The sparse linear algebra library is only logged in
+   Summary::FullReport if it is used.
+#. Rename the macro ``CERES_DONT_HAVE_PROTOCOL_BUFFERS``
+   to ``CERES_NO_PROTOCOL_BUFFERS`` for consistency.
+#. Fix how static structure detection for the Schur eliminator logs
+   its results.
+#. Correct example code in the documentation. (Petter Strandmark)
+#. Fix ``fpclassify.h`` to work with the Android NDK and STLport.
+#. Fix a memory leak in the ``levenber_marquardt_strategy_test.cc``
+#. Fix an early return bug in the Dogleg solver. (Markus Moll)
+#. Zero initialize Jets.
+#. Moved ``internal/ceres/mock_log.h`` to ``internal/ceres/gmock/mock-log.h``
+#. Unified file path handling in tests.
+#. ``data_fitting.cc`` includes ``gflags``
+#. Renamed Ceres' Mutex class and associated macros to avoid
+   namespace conflicts.
+#. Close the BAL problem file after reading it (Markus Moll)
+#. Fix IsInfinite on Jets.
+#. Drop alignment requirements for Jets.
+#. Fixed Jet to integer comparison. (Keith Leung)
+#. Fix use of uninitialized arrays. (Sebastian Koch & Markus Moll)
+#. Conditionally compile gflag dependencies.(Casey Goodlett)
+#. Add ``data_fitting.cc`` to the examples ``CMake`` file.
+
+
+1.2.3
+=====
+
+Bug Fixes
+---------
+#. ``suitesparse_test`` is enabled even when ``-DSUITESPARSE=OFF``.
+#. ``FixedArray`` internal struct did not respect ``Eigen``
+   alignment requirements (Koichi Akabe & Stephan Kassemeyer).
+#. Fixed ``quadratic.cc`` documentation and code mismatch
+   (Nick Lewycky).
+
+1.2.2
+=====
+
+Bug Fixes
+---------
+#. Fix constant parameter blocks, and other minor fixes (Markus Moll)
+#. Fix alignment issues when combining ``Jet`` and
+   ``FixedArray`` in automatic differeniation.
+#. Remove obsolete ``build_defs`` file.
+
+1.2.1
+=====
+
+New Features
+------------
+#. Powell's Dogleg solver
+#. Documentation now has a brief overview of Trust Region methods and
+   how the Levenberg-Marquardt and Dogleg methods work.
+
+Bug Fixes
+---------
+#. Destructor for ``TrustRegionStrategy`` was not virtual (Markus
+   Moll)
+#. Invalid ``DCHECK`` in ``suitesparse.cc`` (Markus Moll)
+#. Iteration callbacks were not properly invoked (Luis Alberto
+   Zarrabeiti)
+#. Logging level changes in ConjugateGradientsSolver
+#. VisibilityBasedPreconditioner setup does not account for skipped
+   camera pairs. This was debugging code.
+#. Enable SSE support on MacOS
+#. ``system_test`` was taking too long and too much memory (Koichi
+   Akabe)
+
+1.2.0
+=====
+
+New Features
+------------
+
+#. ``CXSparse`` support.
+#. Block oriented fill reducing orderings. This reduces the
+   factorization time for sparse ``CHOLMOD`` significantly.
+#. New Trust region loop with support for multiple trust region step
+   strategies. Currently only Levenberg-Marquardt is supported, but
+   this refactoring opens the door for Dog-leg, Stiehaug and others.
+#. ``CMake`` file restructuring.  Builds in ``Release`` mode by   default, and now has platform specific tuning flags.
+#. Re-organized documentation. No new content, but better
+   organization.
+
+
+Bug Fixes
+---------
+
+#. Fixed integer overflow bug in ``block_random_access_sparse_matrix.cc``.
+#. Renamed some macros to prevent name conflicts.
+#. Fixed incorrect input to ``StateUpdatingCallback``.
+#. Fixes to AutoDiff tests.
+#. Various internal cleanups.
+
+
+1.1.1
+=====
+
+Bug Fixes
+---------
+#. Fix a bug in the handling of constant blocks. (Louis Simard)
+#. Add an optional lower bound to the Levenberg-Marquardt regularizer
+   to prevent oscillating between well and ill posed linear problems.
+#. Some internal refactoring and test fixes.
+
+1.1.0
+=====
+
+New Features
+------------
+#. New iterative linear solver for general sparse problems - ``CGNR``
+   and a block Jacobi preconditioner for it.
+#. Changed the semantics of how ``SuiteSparse`` dependencies are
+   checked and used. Now ``SuiteSparse`` is built by default, only if
+   all of its dependencies are present.
+#. Automatic differentiation now supports dynamic number of residuals.
+#. Support for writing the linear least squares problems to disk in
+   text format so that they can loaded into ``MATLAB``.
+#. Linear solver results are now checked for nan and infinities.
+#. Added ``.gitignore`` file.
+#. A better more robust build system.
+
+
+Bug Fixes
+---------
+#. Fixed a strict weak ordering bug in the schur ordering.
+#. Grammar and typos in the documents and code comments.
+#. Fixed tests which depended on exact equality between floating point
+   values.
+
+1.0.0
+=====
+Initial open source release. Nathan Wiegand contributed to the Mac OSX
+port.
+
+
+Origins
+=======
+
+Ceres Solver grew out of the need for general least squares solving at
+Google. In early 2010, Sameer Agarwal and Fredrik Schaffalitzky
+started the development of Ceres Solver. Fredrik left Google shortly
+thereafter and Keir Mierle stepped in to take his place. After two
+years of on-and-off development, Ceres Solver was released as open
+source in May of 2012.