Squashed 'third_party/eigen/' content from commit 61d72f6

Change-Id: Iccc90fa0b55ab44037f018046d2fcffd90d9d025
git-subtree-dir: third_party/eigen
git-subtree-split: 61d72f6383cfa842868c53e30e087b0258177257
diff --git a/Eigen/src/LU/CMakeLists.txt b/Eigen/src/LU/CMakeLists.txt
new file mode 100644
index 0000000..e0d8d78
--- /dev/null
+++ b/Eigen/src/LU/CMakeLists.txt
@@ -0,0 +1,8 @@
+FILE(GLOB Eigen_LU_SRCS "*.h")
+
+INSTALL(FILES 
+  ${Eigen_LU_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel
+  )
+
+ADD_SUBDIRECTORY(arch)
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
new file mode 100644
index 0000000..bb8e78a
--- /dev/null
+++ b/Eigen/src/LU/Determinant.h
@@ -0,0 +1,101 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_DETERMINANT_H
+#define EIGEN_DETERMINANT_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename Derived>
+inline const typename Derived::Scalar bruteforce_det3_helper
+(const MatrixBase<Derived>& matrix, int a, int b, int c)
+{
+  return matrix.coeff(0,a)
+         * (matrix.coeff(1,b) * matrix.coeff(2,c) - matrix.coeff(1,c) * matrix.coeff(2,b));
+}
+
+template<typename Derived>
+const typename Derived::Scalar bruteforce_det4_helper
+(const MatrixBase<Derived>& matrix, int j, int k, int m, int n)
+{
+  return (matrix.coeff(j,0) * matrix.coeff(k,1) - matrix.coeff(k,0) * matrix.coeff(j,1))
+       * (matrix.coeff(m,2) * matrix.coeff(n,3) - matrix.coeff(n,2) * matrix.coeff(m,3));
+}
+
+template<typename Derived,
+         int DeterminantType = Derived::RowsAtCompileTime
+> struct determinant_impl
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    if(Derived::ColsAtCompileTime==Dynamic && m.rows()==0)
+      return typename traits<Derived>::Scalar(1);
+    return m.partialPivLu().determinant();
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 1>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return m.coeff(0,0);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 2>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return m.coeff(0,0) * m.coeff(1,1) - m.coeff(1,0) * m.coeff(0,1);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 3>
+{
+  static inline typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    return bruteforce_det3_helper(m,0,1,2)
+          - bruteforce_det3_helper(m,1,0,2)
+          + bruteforce_det3_helper(m,2,0,1);
+  }
+};
+
+template<typename Derived> struct determinant_impl<Derived, 4>
+{
+  static typename traits<Derived>::Scalar run(const Derived& m)
+  {
+    // trick by Martin Costabel to compute 4x4 det with only 30 muls
+    return bruteforce_det4_helper(m,0,1,2,3)
+          - bruteforce_det4_helper(m,0,2,1,3)
+          + bruteforce_det4_helper(m,0,3,1,2)
+          + bruteforce_det4_helper(m,1,2,0,3)
+          - bruteforce_det4_helper(m,1,3,0,2)
+          + bruteforce_det4_helper(m,2,3,0,1);
+  }
+};
+
+} // end namespace internal
+
+/** \lu_module
+  *
+  * \returns the determinant of this matrix
+  */
+template<typename Derived>
+inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
+{
+  eigen_assert(rows() == cols());
+  typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
+  return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_DETERMINANT_H
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
new file mode 100644
index 0000000..26bc714
--- /dev/null
+++ b/Eigen/src/LU/FullPivLU.h
@@ -0,0 +1,751 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_LU_H
+#define EIGEN_LU_H
+
+namespace Eigen { 
+
+/** \ingroup LU_Module
+  *
+  * \class FullPivLU
+  *
+  * \brief LU decomposition of a matrix with complete pivoting, and related features
+  *
+  * \param MatrixType the type of the matrix of which we are computing the LU decomposition
+  *
+  * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
+  * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
+  * upper-triangular, and P and Q are permutation matrices. This is a rank-revealing LU
+  * decomposition. The eigenvalues (diagonal coefficients) of U are sorted in such a way that any
+  * zeros are at the end.
+  *
+  * This decomposition provides the generic approach to solving systems of linear equations, computing
+  * the rank, invertibility, inverse, kernel, and determinant.
+  *
+  * This LU decomposition is very stable and well tested with large matrices. However there are use cases where the SVD
+  * decomposition is inherently more stable and/or flexible. For example, when computing the kernel of a matrix,
+  * working with the SVD allows to select the smallest singular values of the matrix, something that
+  * the LU decomposition doesn't see.
+  *
+  * The data of the LU decomposition can be directly accessed through the methods matrixLU(),
+  * permutationP(), permutationQ().
+  *
+  * As an exemple, here is how the original matrix can be retrieved:
+  * \include class_FullPivLU.cpp
+  * Output: \verbinclude class_FullPivLU.out
+  *
+  * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
+  */
+template<typename _MatrixType> class FullPivLU
+{
+  public:
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    typedef typename internal::traits<MatrixType>::StorageKind StorageKind;
+    typedef typename MatrixType::Index Index;
+    typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;
+    typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType;
+    typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;
+
+    /**
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via LU::compute(const MatrixType&).
+      */
+    FullPivLU();
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa FullPivLU()
+      */
+    FullPivLU(Index rows, Index cols);
+
+    /** Constructor.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *               It is required to be nonzero.
+      */
+    FullPivLU(const MatrixType& matrix);
+
+    /** Computes the LU decomposition of the given matrix.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *               It is required to be nonzero.
+      *
+      * \returns a reference to *this
+      */
+    FullPivLU& compute(const MatrixType& matrix);
+
+    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
+      * unit-lower-triangular part is L (at least for square matrices; in the non-square
+      * case, special care is needed, see the documentation of class FullPivLU).
+      *
+      * \sa matrixL(), matrixU()
+      */
+    inline const MatrixType& matrixLU() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_lu;
+    }
+
+    /** \returns the number of nonzero pivots in the LU decomposition.
+      * Here nonzero is meant in the exact sense, not in a fuzzy sense.
+      * So that notion isn't really intrinsically interesting, but it is
+      * still useful when implementing algorithms.
+      *
+      * \sa rank()
+      */
+    inline Index nonzeroPivots() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_nonzero_pivots;
+    }
+
+    /** \returns the absolute value of the biggest pivot, i.e. the biggest
+      *          diagonal coefficient of U.
+      */
+    RealScalar maxPivot() const { return m_maxpivot; }
+
+    /** \returns the permutation matrix P
+      *
+      * \sa permutationQ()
+      */
+    inline const PermutationPType& permutationP() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_p;
+    }
+
+    /** \returns the permutation matrix Q
+      *
+      * \sa permutationP()
+      */
+    inline const PermutationQType& permutationQ() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return m_q;
+    }
+
+    /** \returns the kernel of the matrix, also called its null-space. The columns of the returned matrix
+      * will form a basis of the kernel.
+      *
+      * \note If the kernel has dimension zero, then the returned matrix is a column-vector filled with zeros.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      *
+      * Example: \include FullPivLU_kernel.cpp
+      * Output: \verbinclude FullPivLU_kernel.out
+      *
+      * \sa image()
+      */
+    inline const internal::kernel_retval<FullPivLU> kernel() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::kernel_retval<FullPivLU>(*this);
+    }
+
+    /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix
+      * will form a basis of the kernel.
+      *
+      * \param originalMatrix the original matrix, of which *this is the LU decomposition.
+      *                       The reason why it is needed to pass it here, is that this allows
+      *                       a large optimization, as otherwise this method would need to reconstruct it
+      *                       from the LU decomposition.
+      *
+      * \note If the image has dimension zero, then the returned matrix is a column-vector filled with zeros.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      *
+      * Example: \include FullPivLU_image.cpp
+      * Output: \verbinclude FullPivLU_image.out
+      *
+      * \sa kernel()
+      */
+    inline const internal::image_retval<FullPivLU>
+      image(const MatrixType& originalMatrix) const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::image_retval<FullPivLU>(*this, originalMatrix);
+    }
+
+    /** \return a solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the LU decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
+      *          the only requirement in order for the equation to make sense is that
+      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
+      *
+      * \returns a solution.
+      *
+      * \note_about_checking_solutions
+      *
+      * \note_about_arbitrary_choice_of_solution
+      * \note_about_using_kernel_to_study_multiple_solutions
+      *
+      * Example: \include FullPivLU_solve.cpp
+      * Output: \verbinclude FullPivLU_solve.out
+      *
+      * \sa TriangularView::solve(), kernel(), inverse()
+      */
+    template<typename Rhs>
+    inline const internal::solve_retval<FullPivLU, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns the determinant of the matrix of which
+      * *this is the LU decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the LU decomposition has already been computed.
+      *
+      * \note This is only for square matrices.
+      *
+      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
+      *       optimized paths.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      *
+      * \sa MatrixBase::determinant()
+      */
+    typename internal::traits<MatrixType>::Scalar determinant() const;
+
+    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),
+      * who need to determine when pivots are to be considered nonzero. This is not used for the
+      * LU decomposition itself.
+      *
+      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
+      * uses a formula to automatically determine a reasonable threshold.
+      * Once you have called the present method setThreshold(const RealScalar&),
+      * your value is used instead.
+      *
+      * \param threshold The new value to use as the threshold.
+      *
+      * A pivot will be considered nonzero if its absolute value is strictly greater than
+      *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
+      * where maxpivot is the biggest pivot.
+      *
+      * If you want to come back to the default behavior, call setThreshold(Default_t)
+      */
+    FullPivLU& setThreshold(const RealScalar& threshold)
+    {
+      m_usePrescribedThreshold = true;
+      m_prescribedThreshold = threshold;
+      return *this;
+    }
+
+    /** Allows to come back to the default behavior, letting Eigen use its default formula for
+      * determining the threshold.
+      *
+      * You should pass the special object Eigen::Default as parameter here.
+      * \code lu.setThreshold(Eigen::Default); \endcode
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    FullPivLU& setThreshold(Default_t)
+    {
+      m_usePrescribedThreshold = false;
+      return *this;
+    }
+
+    /** Returns the threshold that will be used by certain methods such as rank().
+      *
+      * See the documentation of setThreshold(const RealScalar&).
+      */
+    RealScalar threshold() const
+    {
+      eigen_assert(m_isInitialized || m_usePrescribedThreshold);
+      return m_usePrescribedThreshold ? m_prescribedThreshold
+      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
+      // and turns out to be identical to Higham's formula used already in LDLt.
+                                      : NumTraits<Scalar>::epsilon() * m_lu.diagonalSize();
+    }
+
+    /** \returns the rank of the matrix of which *this is the LU decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index rank() const
+    {
+      using std::abs;
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      RealScalar premultiplied_threshold = abs(m_maxpivot) * threshold();
+      Index result = 0;
+      for(Index i = 0; i < m_nonzero_pivots; ++i)
+        result += (abs(m_lu.coeff(i,i)) > premultiplied_threshold);
+      return result;
+    }
+
+    /** \returns the dimension of the kernel of the matrix of which *this is the LU decomposition.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline Index dimensionOfKernel() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return cols() - rank();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition represents an injective
+      *          linear map, i.e. has trivial kernel; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInjective() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return rank() == cols();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition represents a surjective
+      *          linear map; false otherwise.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isSurjective() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return rank() == rows();
+    }
+
+    /** \returns true if the matrix of which *this is the LU decomposition is invertible.
+      *
+      * \note This method has to determine which pivots should be considered nonzero.
+      *       For that, it uses the threshold value that you can control by calling
+      *       setThreshold(const RealScalar&).
+      */
+    inline bool isInvertible() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      return isInjective() && (m_lu.rows() == m_lu.cols());
+    }
+
+    /** \returns the inverse of the matrix of which *this is the LU decomposition.
+      *
+      * \note If this matrix is not invertible, the returned matrix has undefined coefficients.
+      *       Use isInvertible() to first determine whether this matrix is invertible.
+      *
+      * \sa MatrixBase::inverse()
+      */
+    inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const
+    {
+      eigen_assert(m_isInitialized && "LU is not initialized.");
+      eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!");
+      return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType>
+               (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+    }
+
+    MatrixType reconstructedMatrix() const;
+
+    inline Index rows() const { return m_lu.rows(); }
+    inline Index cols() const { return m_lu.cols(); }
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
+    MatrixType m_lu;
+    PermutationPType m_p;
+    PermutationQType m_q;
+    IntColVectorType m_rowsTranspositions;
+    IntRowVectorType m_colsTranspositions;
+    Index m_det_pq, m_nonzero_pivots;
+    RealScalar m_maxpivot, m_prescribedThreshold;
+    bool m_isInitialized, m_usePrescribedThreshold;
+};
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU()
+  : m_isInitialized(false), m_usePrescribedThreshold(false)
+{
+}
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU(Index rows, Index cols)
+  : m_lu(rows, cols),
+    m_p(rows),
+    m_q(cols),
+    m_rowsTranspositions(rows),
+    m_colsTranspositions(cols),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+}
+
+template<typename MatrixType>
+FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
+  : m_lu(matrix.rows(), matrix.cols()),
+    m_p(matrix.rows()),
+    m_q(matrix.cols()),
+    m_rowsTranspositions(matrix.rows()),
+    m_colsTranspositions(matrix.cols()),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+  compute(matrix);
+}
+
+template<typename MatrixType>
+FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
+{
+  check_template_parameters();
+  
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
+  
+  m_isInitialized = true;
+  m_lu = matrix;
+
+  const Index size = matrix.diagonalSize();
+  const Index rows = matrix.rows();
+  const Index cols = matrix.cols();
+
+  // will store the transpositions, before we accumulate them at the end.
+  // can't accumulate on-the-fly because that will be done in reverse order for the rows.
+  m_rowsTranspositions.resize(matrix.rows());
+  m_colsTranspositions.resize(matrix.cols());
+  Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i
+
+  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
+  m_maxpivot = RealScalar(0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // First, we need to find the pivot.
+
+    // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
+    Index row_of_biggest_in_corner, col_of_biggest_in_corner;
+    RealScalar biggest_in_corner;
+    biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k)
+                        .cwiseAbs()
+                        .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
+    row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
+    col_of_biggest_in_corner += k; // need to add k to them.
+
+    if(biggest_in_corner==RealScalar(0))
+    {
+      // before exiting, make sure to initialize the still uninitialized transpositions
+      // in a sane state without destroying what we already have.
+      m_nonzero_pivots = k;
+      for(Index i = k; i < size; ++i)
+      {
+        m_rowsTranspositions.coeffRef(i) = i;
+        m_colsTranspositions.coeffRef(i) = i;
+      }
+      break;
+    }
+
+    if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner;
+
+    // Now that we've found the pivot, we need to apply the row/col swaps to
+    // bring it to the location (k,k).
+
+    m_rowsTranspositions.coeffRef(k) = row_of_biggest_in_corner;
+    m_colsTranspositions.coeffRef(k) = col_of_biggest_in_corner;
+    if(k != row_of_biggest_in_corner) {
+      m_lu.row(k).swap(m_lu.row(row_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+    if(k != col_of_biggest_in_corner) {
+      m_lu.col(k).swap(m_lu.col(col_of_biggest_in_corner));
+      ++number_of_transpositions;
+    }
+
+    // Now that the pivot is at the right location, we update the remaining
+    // bottom-right corner by Gaussian elimination.
+
+    if(k<rows-1)
+      m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k);
+    if(k<size-1)
+      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1);
+  }
+
+  // the main loop is over, we still have to accumulate the transpositions to find the
+  // permutations P and Q
+
+  m_p.setIdentity(rows);
+  for(Index k = size-1; k >= 0; --k)
+    m_p.applyTranspositionOnTheRight(k, m_rowsTranspositions.coeff(k));
+
+  m_q.setIdentity(cols);
+  for(Index k = 0; k < size; ++k)
+    m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
+
+  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
+  return *this;
+}
+
+template<typename MatrixType>
+typename internal::traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the determinant of a non-square matrix!");
+  return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: \f$ P^{-1} L U Q^{-1} \f$.
+ * This function is provided for debug purposes. */
+template<typename MatrixType>
+MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  const Index smalldim = (std::min)(m_lu.rows(), m_lu.cols());
+  // LU
+  MatrixType res(m_lu.rows(),m_lu.cols());
+  // FIXME the .toDenseMatrix() should not be needed...
+  res = m_lu.leftCols(smalldim)
+            .template triangularView<UnitLower>().toDenseMatrix()
+      * m_lu.topRows(smalldim)
+            .template triangularView<Upper>().toDenseMatrix();
+
+  // P^{-1}(LU)
+  res = m_p.inverse() * res;
+
+  // (P^{-1}LU)Q^{-1}
+  res = res * m_q.inverse();
+
+  return res;
+}
+
+/********* Implementation of kernel() **************************************************/
+
+namespace internal {
+template<typename _MatrixType>
+struct kernel_retval<FullPivLU<_MatrixType> >
+  : kernel_retval_base<FullPivLU<_MatrixType> >
+{
+  EIGEN_MAKE_KERNEL_HELPERS(FullPivLU<_MatrixType>)
+
+  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
+            MatrixType::MaxColsAtCompileTime,
+            MatrixType::MaxRowsAtCompileTime)
+  };
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    using std::abs;
+    const Index cols = dec().matrixLU().cols(), dimker = cols - rank();
+    if(dimker == 0)
+    {
+      // The Kernel is just {0}, so it doesn't have a basis properly speaking, but let's
+      // avoid crashing/asserting as that depends on floating point calculations. Let's
+      // just return a single column vector filled with zeros.
+      dst.setZero();
+      return;
+    }
+
+    /* Let us use the following lemma:
+      *
+      * Lemma: If the matrix A has the LU decomposition PAQ = LU,
+      * then Ker A = Q(Ker U).
+      *
+      * Proof: trivial: just keep in mind that P, Q, L are invertible.
+      */
+
+    /* Thus, all we need to do is to compute Ker U, and then apply Q.
+      *
+      * U is upper triangular, with eigenvalues sorted so that any zeros appear at the end.
+      * Thus, the diagonal of U ends with exactly
+      * dimKer zero's. Let us use that to construct dimKer linearly
+      * independent vectors in Ker U.
+      */
+
+    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());
+    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();
+    Index p = 0;
+    for(Index i = 0; i < dec().nonzeroPivots(); ++i)
+      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)
+        pivots.coeffRef(p++) = i;
+    eigen_internal_assert(p == rank());
+
+    // we construct a temporaty trapezoid matrix m, by taking the U matrix and
+    // permuting the rows and cols to bring the nonnegligible pivots to the top of
+    // the main diagonal. We need that to be able to apply our triangular solvers.
+    // FIXME when we get triangularView-for-rectangular-matrices, this can be simplified
+    Matrix<typename MatrixType::Scalar, Dynamic, Dynamic, MatrixType::Options,
+           MaxSmallDimAtCompileTime, MatrixType::MaxColsAtCompileTime>
+      m(dec().matrixLU().block(0, 0, rank(), cols));
+    for(Index i = 0; i < rank(); ++i)
+    {
+      if(i) m.row(i).head(i).setZero();
+      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);
+    }
+    m.block(0, 0, rank(), rank());
+    m.block(0, 0, rank(), rank()).template triangularView<StrictlyLower>().setZero();
+    for(Index i = 0; i < rank(); ++i)
+      m.col(i).swap(m.col(pivots.coeff(i)));
+
+    // ok, we have our trapezoid matrix, we can apply the triangular solver.
+    // notice that the math behind this suggests that we should apply this to the
+    // negative of the RHS, but for performance we just put the negative sign elsewhere, see below.
+    m.topLeftCorner(rank(), rank())
+     .template triangularView<Upper>().solveInPlace(
+        m.topRightCorner(rank(), dimker)
+      );
+
+    // now we must undo the column permutation that we had applied!
+    for(Index i = rank()-1; i >= 0; --i)
+      m.col(i).swap(m.col(pivots.coeff(i)));
+
+    // see the negative sign in the next line, that's what we were talking about above.
+    for(Index i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker);
+    for(Index i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+    for(Index k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1);
+  }
+};
+
+/***** Implementation of image() *****************************************************/
+
+template<typename _MatrixType>
+struct image_retval<FullPivLU<_MatrixType> >
+  : image_retval_base<FullPivLU<_MatrixType> >
+{
+  EIGEN_MAKE_IMAGE_HELPERS(FullPivLU<_MatrixType>)
+
+  enum { MaxSmallDimAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(
+            MatrixType::MaxColsAtCompileTime,
+            MatrixType::MaxRowsAtCompileTime)
+  };
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    using std::abs;
+    if(rank() == 0)
+    {
+      // The Image is just {0}, so it doesn't have a basis properly speaking, but let's
+      // avoid crashing/asserting as that depends on floating point calculations. Let's
+      // just return a single column vector filled with zeros.
+      dst.setZero();
+      return;
+    }
+
+    Matrix<Index, Dynamic, 1, 0, MaxSmallDimAtCompileTime, 1> pivots(rank());
+    RealScalar premultiplied_threshold = dec().maxPivot() * dec().threshold();
+    Index p = 0;
+    for(Index i = 0; i < dec().nonzeroPivots(); ++i)
+      if(abs(dec().matrixLU().coeff(i,i)) > premultiplied_threshold)
+        pivots.coeffRef(p++) = i;
+    eigen_internal_assert(p == rank());
+
+    for(Index i = 0; i < rank(); ++i)
+      dst.col(i) = originalMatrix().col(dec().permutationQ().indices().coeff(pivots.coeff(i)));
+  }
+};
+
+/***** Implementation of solve() *****************************************************/
+
+template<typename _MatrixType, typename Rhs>
+struct solve_retval<FullPivLU<_MatrixType>, Rhs>
+  : solve_retval_base<FullPivLU<_MatrixType>, Rhs>
+{
+  EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
+     * So we proceed as follows:
+     * Step 1: compute c = P * rhs.
+     * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
+     * Step 3: replace c by the solution x to Ux = c. May or may not exist.
+     * Step 4: result = Q * c;
+     */
+
+    const Index rows = dec().rows(), cols = dec().cols(),
+              nonzero_pivots = dec().nonzeroPivots();
+    eigen_assert(rhs().rows() == rows);
+    const Index smalldim = (std::min)(rows, cols);
+
+    if(nonzero_pivots == 0)
+    {
+      dst.setZero();
+      return;
+    }
+
+    typename Rhs::PlainObject c(rhs().rows(), rhs().cols());
+
+    // Step 1
+    c = dec().permutationP() * rhs();
+
+    // Step 2
+    dec().matrixLU()
+        .topLeftCorner(smalldim,smalldim)
+        .template triangularView<UnitLower>()
+        .solveInPlace(c.topRows(smalldim));
+    if(rows>cols)
+    {
+      c.bottomRows(rows-cols)
+        -= dec().matrixLU().bottomRows(rows-cols)
+         * c.topRows(cols);
+    }
+
+    // Step 3
+    dec().matrixLU()
+        .topLeftCorner(nonzero_pivots, nonzero_pivots)
+        .template triangularView<Upper>()
+        .solveInPlace(c.topRows(nonzero_pivots));
+
+    // Step 4
+    for(Index i = 0; i < nonzero_pivots; ++i)
+      dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i);
+    for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i)
+      dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+  }
+};
+
+} // end namespace internal
+
+/******* MatrixBase methods *****************************************************************/
+
+/** \lu_module
+  *
+  * \return the full-pivoting LU decomposition of \c *this.
+  *
+  * \sa class FullPivLU
+  */
+template<typename Derived>
+inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::fullPivLu() const
+{
+  return FullPivLU<PlainObject>(eval());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_LU_H
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
new file mode 100644
index 0000000..3cf8871
--- /dev/null
+++ b/Eigen/src/LU/Inverse.h
@@ -0,0 +1,400 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_INVERSE_H
+#define EIGEN_INVERSE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+/**********************************
+*** General case implementation ***
+**********************************/
+
+template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
+struct compute_inverse
+{
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    result = matrix.partialPivLu().inverse();
+  }
+};
+
+template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
+struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ };
+
+/****************************
+*** Size 1 implementation ***
+****************************/
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 1>
+{
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
+{
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& result,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    determinant = matrix.coeff(0,0);
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant;
+  }
+};
+
+/****************************
+*** Size 2 implementation ***
+****************************/
+
+template<typename MatrixType, typename ResultType>
+inline void compute_inverse_size2_helper(
+    const MatrixType& matrix, const typename ResultType::Scalar& invdet,
+    ResultType& result)
+{
+  result.coeffRef(0,0) = matrix.coeff(1,1) * invdet;
+  result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
+  result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
+  result.coeffRef(1,1) = matrix.coeff(0,0) * invdet;
+}
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 2>
+{
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename ResultType::Scalar Scalar;
+    const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant();
+    compute_inverse_size2_helper(matrix, invdet, result);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
+{
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    typedef typename ResultType::Scalar Scalar;
+    determinant = matrix.determinant();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(!invertible) return;
+    const Scalar invdet = Scalar(1) / determinant;
+    compute_inverse_size2_helper(matrix, invdet, inverse);
+  }
+};
+
+/****************************
+*** Size 3 implementation ***
+****************************/
+
+template<typename MatrixType, int i, int j>
+inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
+{
+  enum {
+    i1 = (i+1) % 3,
+    i2 = (i+2) % 3,
+    j1 = (j+1) % 3,
+    j2 = (j+2) % 3
+  };
+  return m.coeff(i1, j1) * m.coeff(i2, j2)
+       - m.coeff(i1, j2) * m.coeff(i2, j1);
+}
+
+template<typename MatrixType, typename ResultType>
+inline void compute_inverse_size3_helper(
+    const MatrixType& matrix,
+    const typename ResultType::Scalar& invdet,
+    const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
+    ResultType& result)
+{
+  result.row(0) = cofactors_col0 * invdet;
+  result.coeffRef(1,0) =  cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
+  result.coeffRef(1,1) =  cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  result.coeffRef(1,2) =  cofactor_3x3<MatrixType,2,1>(matrix) * invdet;
+  result.coeffRef(2,0) =  cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
+  result.coeffRef(2,1) =  cofactor_3x3<MatrixType,1,2>(matrix) * invdet;
+  result.coeffRef(2,2) =  cofactor_3x3<MatrixType,2,2>(matrix) * invdet;
+}
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 3>
+{
+  static inline void run(const MatrixType& matrix, ResultType& result)
+  {
+    typedef typename ResultType::Scalar Scalar;
+    Matrix<typename MatrixType::Scalar,3,1> cofactors_col0;
+    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
+    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
+    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
+    const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
+    const Scalar invdet = Scalar(1) / det;
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
+{
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    typedef typename ResultType::Scalar Scalar;
+    Matrix<Scalar,3,1> cofactors_col0;
+    cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
+    cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
+    cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
+    determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(!invertible) return;
+    const Scalar invdet = Scalar(1) / determinant;
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
+  }
+};
+
+/****************************
+*** Size 4 implementation ***
+****************************/
+
+template<typename Derived>
+inline const typename Derived::Scalar general_det3_helper
+(const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)
+{
+  return matrix.coeff(i1,j1)
+         * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2));
+}
+
+template<typename MatrixType, int i, int j>
+inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
+{
+  enum {
+    i1 = (i+1) % 4,
+    i2 = (i+2) % 4,
+    i3 = (i+3) % 4,
+    j1 = (j+1) % 4,
+    j2 = (j+2) % 4,
+    j3 = (j+3) % 4
+  };
+  return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3)
+       + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3)
+       + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3);
+}
+
+template<int Arch, typename Scalar, typename MatrixType, typename ResultType>
+struct compute_inverse_size4
+{
+  static void run(const MatrixType& matrix, ResultType& result)
+  {
+    result.coeffRef(0,0) =  cofactor_4x4<MatrixType,0,0>(matrix);
+    result.coeffRef(1,0) = -cofactor_4x4<MatrixType,0,1>(matrix);
+    result.coeffRef(2,0) =  cofactor_4x4<MatrixType,0,2>(matrix);
+    result.coeffRef(3,0) = -cofactor_4x4<MatrixType,0,3>(matrix);
+    result.coeffRef(0,2) =  cofactor_4x4<MatrixType,2,0>(matrix);
+    result.coeffRef(1,2) = -cofactor_4x4<MatrixType,2,1>(matrix);
+    result.coeffRef(2,2) =  cofactor_4x4<MatrixType,2,2>(matrix);
+    result.coeffRef(3,2) = -cofactor_4x4<MatrixType,2,3>(matrix);
+    result.coeffRef(0,1) = -cofactor_4x4<MatrixType,1,0>(matrix);
+    result.coeffRef(1,1) =  cofactor_4x4<MatrixType,1,1>(matrix);
+    result.coeffRef(2,1) = -cofactor_4x4<MatrixType,1,2>(matrix);
+    result.coeffRef(3,1) =  cofactor_4x4<MatrixType,1,3>(matrix);
+    result.coeffRef(0,3) = -cofactor_4x4<MatrixType,3,0>(matrix);
+    result.coeffRef(1,3) =  cofactor_4x4<MatrixType,3,1>(matrix);
+    result.coeffRef(2,3) = -cofactor_4x4<MatrixType,3,2>(matrix);
+    result.coeffRef(3,3) =  cofactor_4x4<MatrixType,3,3>(matrix);
+    result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum();
+  }
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse<MatrixType, ResultType, 4>
+ : compute_inverse_size4<Architecture::Target, typename MatrixType::Scalar,
+                            MatrixType, ResultType>
+{
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
+{
+  static inline void run(
+    const MatrixType& matrix,
+    const typename MatrixType::RealScalar& absDeterminantThreshold,
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible
+  )
+  {
+    using std::abs;
+    determinant = matrix.determinant();
+    invertible = abs(determinant) > absDeterminantThreshold;
+    if(invertible) compute_inverse<MatrixType, ResultType>::run(matrix, inverse);
+  }
+};
+
+/*************************
+*** MatrixBase methods ***
+*************************/
+
+template<typename MatrixType>
+struct traits<inverse_impl<MatrixType> >
+{
+  typedef typename MatrixType::PlainObject ReturnType;
+};
+
+template<typename MatrixType>
+struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
+{
+  typedef typename MatrixType::Index Index;
+  typedef typename internal::eval<MatrixType>::type MatrixTypeNested;
+  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
+  MatrixTypeNested m_matrix;
+
+  inverse_impl(const MatrixType& matrix)
+    : m_matrix(matrix)
+  {}
+
+  inline Index rows() const { return m_matrix.rows(); }
+  inline Index cols() const { return m_matrix.cols(); }
+
+  template<typename Dest> inline void evalTo(Dest& dst) const
+  {
+    const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime);
+    EIGEN_ONLY_USED_FOR_DEBUG(Size);
+    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst)))
+              && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
+
+    compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst);
+  }
+};
+
+} // end namespace internal
+
+/** \lu_module
+  *
+  * \returns the matrix inverse of this matrix.
+  *
+  * For small fixed sizes up to 4x4, this method uses cofactors.
+  * In the general case, this method uses class PartialPivLU.
+  *
+  * \note This matrix must be invertible, otherwise the result is undefined. If you need an
+  * invertibility check, do the following:
+  * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck().
+  * \li for the general case, use class FullPivLU.
+  *
+  * Example: \include MatrixBase_inverse.cpp
+  * Output: \verbinclude MatrixBase_inverse.out
+  *
+  * \sa computeInverseAndDetWithCheck()
+  */
+template<typename Derived>
+inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const
+{
+  EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
+  eigen_assert(rows() == cols());
+  return internal::inverse_impl<Derived>(derived());
+}
+
+/** \lu_module
+  *
+  * Computation of matrix inverse and determinant, with invertibility check.
+  *
+  * This is only for fixed-size square matrices of size up to 4x4.
+  *
+  * \param inverse Reference to the matrix in which to store the inverse.
+  * \param determinant Reference to the variable in which to store the determinant.
+  * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
+  * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
+  *                                The matrix will be declared invertible if the absolute value of its
+  *                                determinant is greater than this threshold.
+  *
+  * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp
+  * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out
+  *
+  * \sa inverse(), computeInverseWithCheck()
+  */
+template<typename Derived>
+template<typename ResultType>
+inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
+    ResultType& inverse,
+    typename ResultType::Scalar& determinant,
+    bool& invertible,
+    const RealScalar& absDeterminantThreshold
+  ) const
+{
+  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
+  eigen_assert(rows() == cols());
+  // for 2x2, it's worth giving a chance to avoid evaluating.
+  // for larger sizes, evaluating has negligible cost and limits code size.
+  typedef typename internal::conditional<
+    RowsAtCompileTime == 2,
+    typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type,
+    PlainObject
+  >::type MatrixType;
+  internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
+    (derived(), absDeterminantThreshold, inverse, determinant, invertible);
+}
+
+/** \lu_module
+  *
+  * Computation of matrix inverse, with invertibility check.
+  *
+  * This is only for fixed-size square matrices of size up to 4x4.
+  *
+  * \param inverse Reference to the matrix in which to store the inverse.
+  * \param invertible Reference to the bool variable in which to store whether the matrix is invertible.
+  * \param absDeterminantThreshold Optional parameter controlling the invertibility check.
+  *                                The matrix will be declared invertible if the absolute value of its
+  *                                determinant is greater than this threshold.
+  *
+  * Example: \include MatrixBase_computeInverseWithCheck.cpp
+  * Output: \verbinclude MatrixBase_computeInverseWithCheck.out
+  *
+  * \sa inverse(), computeInverseAndDetWithCheck()
+  */
+template<typename Derived>
+template<typename ResultType>
+inline void MatrixBase<Derived>::computeInverseWithCheck(
+    ResultType& inverse,
+    bool& invertible,
+    const RealScalar& absDeterminantThreshold
+  ) const
+{
+  RealScalar determinant;
+  // i'd love to put some static assertions there, but SFINAE means that they have no effect...
+  eigen_assert(rows() == cols());
+  computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_H
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
new file mode 100644
index 0000000..7d1db94
--- /dev/null
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -0,0 +1,509 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2006-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_PARTIALLU_H
+#define EIGEN_PARTIALLU_H
+
+namespace Eigen { 
+
+/** \ingroup LU_Module
+  *
+  * \class PartialPivLU
+  *
+  * \brief LU decomposition of a matrix with partial pivoting, and related features
+  *
+  * \param MatrixType the type of the matrix of which we are computing the LU decomposition
+  *
+  * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
+  * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
+  * is a permutation matrix.
+  *
+  * Typically, partial pivoting LU decomposition is only considered numerically stable for square invertible
+  * matrices. Thus LAPACK's dgesv and dgesvx require the matrix to be square and invertible. The present class
+  * does the same. It will assert that the matrix is square, but it won't (actually it can't) check that the
+  * matrix is invertible: it is your task to check that you only use this decomposition on invertible matrices.
+  *
+  * The guaranteed safe alternative, working for all matrices, is the full pivoting LU decomposition, provided
+  * by class FullPivLU.
+  *
+  * This is \b not a rank-revealing LU decomposition. Many features are intentionally absent from this class,
+  * such as rank computation. If you need these features, use class FullPivLU.
+  *
+  * This LU decomposition is suitable to invert invertible matrices. It is what MatrixBase::inverse() uses
+  * in the general case.
+  * On the other hand, it is \b not suitable to determine whether a given matrix is invertible.
+  *
+  * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
+  *
+  * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
+  */
+template<typename _MatrixType> class PartialPivLU
+{
+  public:
+
+    typedef _MatrixType MatrixType;
+    enum {
+      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      Options = MatrixType::Options,
+      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+    typedef typename internal::traits<MatrixType>::StorageKind StorageKind;
+    typedef typename MatrixType::Index Index;
+    typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
+    typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
+
+
+    /**
+    * \brief Default Constructor.
+    *
+    * The default constructor is useful in cases in which the user intends to
+    * perform decompositions via PartialPivLU::compute(const MatrixType&).
+    */
+    PartialPivLU();
+
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa PartialPivLU()
+      */
+    PartialPivLU(Index size);
+
+    /** Constructor.
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *
+      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+      * If you need to deal with non-full rank, use class FullPivLU instead.
+      */
+    PartialPivLU(const MatrixType& matrix);
+
+    PartialPivLU& compute(const MatrixType& matrix);
+
+    /** \returns the LU decomposition matrix: the upper-triangular part is U, the
+      * unit-lower-triangular part is L (at least for square matrices; in the non-square
+      * case, special care is needed, see the documentation of class FullPivLU).
+      *
+      * \sa matrixL(), matrixU()
+      */
+    inline const MatrixType& matrixLU() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return m_lu;
+    }
+
+    /** \returns the permutation matrix P.
+      */
+    inline const PermutationType& permutationP() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return m_p;
+    }
+
+    /** This method returns the solution x to the equation Ax=b, where A is the matrix of which
+      * *this is the LU decomposition.
+      *
+      * \param b the right-hand-side of the equation to solve. Can be a vector or a matrix,
+      *          the only requirement in order for the equation to make sense is that
+      *          b.rows()==A.rows(), where A is the matrix of which *this is the LU decomposition.
+      *
+      * \returns the solution.
+      *
+      * Example: \include PartialPivLU_solve.cpp
+      * Output: \verbinclude PartialPivLU_solve.out
+      *
+      * Since this PartialPivLU class assumes anyway that the matrix A is invertible, the solution
+      * theoretically exists and is unique regardless of b.
+      *
+      * \sa TriangularView::solve(), inverse(), computeInverse()
+      */
+    template<typename Rhs>
+    inline const internal::solve_retval<PartialPivLU, Rhs>
+    solve(const MatrixBase<Rhs>& b) const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns the inverse of the matrix of which *this is the LU decomposition.
+      *
+      * \warning The matrix being decomposed here is assumed to be invertible. If you need to check for
+      *          invertibility, use class FullPivLU instead.
+      *
+      * \sa MatrixBase::inverse(), LU::inverse()
+      */
+    inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType>
+               (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+    }
+
+    /** \returns the determinant of the matrix of which
+      * *this is the LU decomposition. It has only linear complexity
+      * (that is, O(n) where n is the dimension of the square matrix)
+      * as the LU decomposition has already been computed.
+      *
+      * \note For fixed-size matrices of size up to 4, MatrixBase::determinant() offers
+      *       optimized paths.
+      *
+      * \warning a determinant can be very big or small, so for matrices
+      * of large enough dimension, there is a risk of overflow/underflow.
+      *
+      * \sa MatrixBase::determinant()
+      */
+    typename internal::traits<MatrixType>::Scalar determinant() const;
+
+    MatrixType reconstructedMatrix() const;
+
+    inline Index rows() const { return m_lu.rows(); }
+    inline Index cols() const { return m_lu.cols(); }
+
+  protected:
+    
+    static void check_template_parameters()
+    {
+      EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
+    }
+    
+    MatrixType m_lu;
+    PermutationType m_p;
+    TranspositionType m_rowsTranspositions;
+    Index m_det_p;
+    bool m_isInitialized;
+};
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU()
+  : m_lu(),
+    m_p(),
+    m_rowsTranspositions(),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+}
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU(Index size)
+  : m_lu(size, size),
+    m_p(size),
+    m_rowsTranspositions(size),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+}
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>::PartialPivLU(const MatrixType& matrix)
+  : m_lu(matrix.rows(), matrix.rows()),
+    m_p(matrix.rows()),
+    m_rowsTranspositions(matrix.rows()),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+  compute(matrix);
+}
+
+namespace internal {
+
+/** \internal This is the blocked version of fullpivlu_unblocked() */
+template<typename Scalar, int StorageOrder, typename PivIndex>
+struct partial_lu_impl
+{
+  // FIXME add a stride to Map, so that the following mapping becomes easier,
+  // another option would be to create an expression being able to automatically
+  // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly
+  // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix,
+  // and Block.
+  typedef Map<Matrix<Scalar, Dynamic, Dynamic, StorageOrder> > MapLU;
+  typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
+  typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef typename MatrixType::Index Index;
+
+  /** \internal performs the LU decomposition in-place of the matrix \a lu
+    * using an unblocked algorithm.
+    *
+    * In addition, this function returns the row transpositions in the
+    * vector \a row_transpositions which must have a size equal to the number
+    * of columns of the matrix \a lu, and an integer \a nb_transpositions
+    * which returns the actual number of transpositions.
+    *
+    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
+    */
+  static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
+  {
+    const Index rows = lu.rows();
+    const Index cols = lu.cols();
+    const Index size = (std::min)(rows,cols);
+    nb_transpositions = 0;
+    Index first_zero_pivot = -1;
+    for(Index k = 0; k < size; ++k)
+    {
+      Index rrows = rows-k-1;
+      Index rcols = cols-k-1;
+        
+      Index row_of_biggest_in_col;
+      RealScalar biggest_in_corner
+        = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
+      row_of_biggest_in_col += k;
+
+      row_transpositions[k] = PivIndex(row_of_biggest_in_col);
+
+      if(biggest_in_corner != RealScalar(0))
+      {
+        if(k != row_of_biggest_in_col)
+        {
+          lu.row(k).swap(lu.row(row_of_biggest_in_col));
+          ++nb_transpositions;
+        }
+
+        // FIXME shall we introduce a safe quotient expression in cas 1/lu.coeff(k,k)
+        // overflow but not the actual quotient?
+        lu.col(k).tail(rrows) /= lu.coeff(k,k);
+      }
+      else if(first_zero_pivot==-1)
+      {
+        // the pivot is exactly zero, we record the index of the first pivot which is exactly 0,
+        // and continue the factorization such we still have A = PLU
+        first_zero_pivot = k;
+      }
+
+      if(k<rows-1)
+        lu.bottomRightCorner(rrows,rcols).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rcols);
+    }
+    return first_zero_pivot;
+  }
+
+  /** \internal performs the LU decomposition in-place of the matrix represented
+    * by the variables \a rows, \a cols, \a lu_data, and \a lu_stride using a
+    * recursive, blocked algorithm.
+    *
+    * In addition, this function returns the row transpositions in the
+    * vector \a row_transpositions which must have a size equal to the number
+    * of columns of the matrix \a lu, and an integer \a nb_transpositions
+    * which returns the actual number of transpositions.
+    *
+    * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise.
+    *
+    * \note This very low level interface using pointers, etc. is to:
+    *   1 - reduce the number of instanciations to the strict minimum
+    *   2 - avoid infinite recursion of the instanciations with Block<Block<Block<...> > >
+    */
+  static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256)
+  {
+    MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols);
+    MatrixType lu(lu1,0,0,rows,cols);
+
+    const Index size = (std::min)(rows,cols);
+
+    // if the matrix is too small, no blocking:
+    if(size<=16)
+    {
+      return unblocked_lu(lu, row_transpositions, nb_transpositions);
+    }
+
+    // automatically adjust the number of subdivisions to the size
+    // of the matrix so that there is enough sub blocks:
+    Index blockSize;
+    {
+      blockSize = size/8;
+      blockSize = (blockSize/16)*16;
+      blockSize = (std::min)((std::max)(blockSize,Index(8)), maxBlockSize);
+    }
+
+    nb_transpositions = 0;
+    Index first_zero_pivot = -1;
+    for(Index k = 0; k < size; k+=blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize); // actual size of the block
+      Index trows = rows - k - bs; // trailing rows
+      Index tsize = size - k - bs; // trailing size
+
+      // partition the matrix:
+      //                          A00 | A01 | A02
+      // lu  = A_0 | A_1 | A_2 =  A10 | A11 | A12
+      //                          A20 | A21 | A22
+      BlockType A_0(lu,0,0,rows,k);
+      BlockType A_2(lu,0,k+bs,rows,tsize);
+      BlockType A11(lu,k,k,bs,bs);
+      BlockType A12(lu,k,k+bs,bs,tsize);
+      BlockType A21(lu,k+bs,k,trows,bs);
+      BlockType A22(lu,k+bs,k+bs,trows,tsize);
+
+      PivIndex nb_transpositions_in_panel;
+      // recursively call the blocked LU algorithm on [A11^T A21^T]^T
+      // with a very small blocking size:
+      Index ret = blocked_lu(trows+bs, bs, &lu.coeffRef(k,k), luStride,
+                   row_transpositions+k, nb_transpositions_in_panel, 16);
+      if(ret>=0 && first_zero_pivot==-1)
+        first_zero_pivot = k+ret;
+
+      nb_transpositions += nb_transpositions_in_panel;
+      // update permutations and apply them to A_0
+      for(Index i=k; i<k+bs; ++i)
+      {
+        Index piv = (row_transpositions[i] += k);
+        A_0.row(i).swap(A_0.row(piv));
+      }
+
+      if(trows)
+      {
+        // apply permutations to A_2
+        for(Index i=k;i<k+bs; ++i)
+          A_2.row(i).swap(A_2.row(row_transpositions[i]));
+
+        // A12 = A11^-1 A12
+        A11.template triangularView<UnitLower>().solveInPlace(A12);
+
+        A22.noalias() -= A21 * A12;
+      }
+    }
+    return first_zero_pivot;
+  }
+};
+
+/** \internal performs the LU decomposition with partial pivoting in-place.
+  */
+template<typename MatrixType, typename TranspositionType>
+void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::Index& nb_transpositions)
+{
+  eigen_assert(lu.cols() == row_transpositions.size());
+  eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
+
+  partial_lu_impl
+    <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::Index>
+    ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
+}
+
+} // end namespace internal
+
+template<typename MatrixType>
+PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
+{
+  check_template_parameters();
+  
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<NumTraits<int>::highest());
+  
+  m_lu = matrix;
+
+  eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
+  const Index size = matrix.rows();
+
+  m_rowsTranspositions.resize(size);
+
+  typename TranspositionType::Index nb_transpositions;
+  internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);
+  m_det_p = (nb_transpositions%2) ? -1 : 1;
+
+  m_p = m_rowsTranspositions;
+
+  m_isInitialized = true;
+  return *this;
+}
+
+template<typename MatrixType>
+typename internal::traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
+{
+  eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+  return Scalar(m_det_p) * m_lu.diagonal().prod();
+}
+
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
+{
+  eigen_assert(m_isInitialized && "LU is not initialized.");
+  // LU
+  MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
+                 * m_lu.template triangularView<Upper>();
+
+  // P^{-1}(LU)
+  res = m_p.inverse() * res;
+
+  return res;
+}
+
+/***** Implementation of solve() *****************************************************/
+
+namespace internal {
+
+template<typename _MatrixType, typename Rhs>
+struct solve_retval<PartialPivLU<_MatrixType>, Rhs>
+  : solve_retval_base<PartialPivLU<_MatrixType>, Rhs>
+{
+  EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs)
+
+  template<typename Dest> void evalTo(Dest& dst) const
+  {
+    /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+    * So we proceed as follows:
+    * Step 1: compute c = Pb.
+    * Step 2: replace c by the solution x to Lx = c.
+    * Step 3: replace c by the solution x to Ux = c.
+    */
+
+    eigen_assert(rhs().rows() == dec().matrixLU().rows());
+
+    // Step 1
+    dst = dec().permutationP() * rhs();
+
+    // Step 2
+    dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst);
+
+    // Step 3
+    dec().matrixLU().template triangularView<Upper>().solveInPlace(dst);
+  }
+};
+
+} // end namespace internal
+
+/******** MatrixBase methods *******/
+
+/** \lu_module
+  *
+  * \return the partial-pivoting LU decomposition of \c *this.
+  *
+  * \sa class PartialPivLU
+  */
+template<typename Derived>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::partialPivLu() const
+{
+  return PartialPivLU<PlainObject>(eval());
+}
+
+#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
+/** \lu_module
+  *
+  * Synonym of partialPivLu().
+  *
+  * \return the partial-pivoting LU decomposition of \c *this.
+  *
+  * \sa class PartialPivLU
+  */
+template<typename Derived>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
+MatrixBase<Derived>::lu() const
+{
+  return PartialPivLU<PlainObject>(eval());
+}
+#endif
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALLU_H
diff --git a/Eigen/src/LU/PartialPivLU_MKL.h b/Eigen/src/LU/PartialPivLU_MKL.h
new file mode 100644
index 0000000..9035953
--- /dev/null
+++ b/Eigen/src/LU/PartialPivLU_MKL.h
@@ -0,0 +1,85 @@
+/*
+ Copyright (c) 2011, Intel Corporation. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors may
+   be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ ********************************************************************************
+ *   Content : Eigen bindings to Intel(R) MKL
+ *     LU decomposition with partial pivoting based on LAPACKE_?getrf function.
+ ********************************************************************************
+*/
+
+#ifndef EIGEN_PARTIALLU_LAPACK_H
+#define EIGEN_PARTIALLU_LAPACK_H
+
+#include "Eigen/src/Core/util/MKL_support.h"
+
+namespace Eigen { 
+
+namespace internal {
+
+/** \internal Specialization for the data types supported by MKL */
+
+#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \
+template<int StorageOrder> \
+struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
+{ \
+  /* \internal performs the LU decomposition in-place of the matrix represented */ \
+  static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
+  { \
+    EIGEN_UNUSED_VARIABLE(maxBlockSize);\
+    lapack_int matrix_order, first_zero_pivot; \
+    lapack_int m, n, lda, *ipiv, info; \
+    EIGTYPE* a; \
+/* Set up parameters for ?getrf */ \
+    matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    lda = luStride; \
+    a = lu_data; \
+    ipiv = row_transpositions; \
+    m = rows; \
+    n = cols; \
+    nb_transpositions = 0; \
+\
+    info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \
+\
+    for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
+\
+    eigen_assert(info >= 0); \
+/* something should be done with nb_transpositions */ \
+\
+    first_zero_pivot = info; \
+    return first_zero_pivot; \
+  } \
+};
+
+EIGEN_MKL_LU_PARTPIV(double, double, d)
+EIGEN_MKL_LU_PARTPIV(float, float, s)
+EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z)
+EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c)
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_PARTIALLU_LAPACK_H
diff --git a/Eigen/src/LU/arch/CMakeLists.txt b/Eigen/src/LU/arch/CMakeLists.txt
new file mode 100644
index 0000000..f6b7ed9
--- /dev/null
+++ b/Eigen/src/LU/arch/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_LU_arch_SRCS "*.h")
+
+INSTALL(FILES
+  ${Eigen_LU_arch_SRCS}
+  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel
+  )
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
new file mode 100644
index 0000000..60b7a23
--- /dev/null
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -0,0 +1,329 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2001 Intel Corporation
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+// The SSE code for the 4x4 float and double matrix inverse in this file
+// comes from the following Intel's library:
+// http://software.intel.com/en-us/articles/optimized-matrix-library-for-use-with-the-intel-pentiumr-4-processors-sse2-instructions/
+//
+// Here is the respective copyright and license statement:
+//
+//   Copyright (c) 2001 Intel Corporation.
+//
+// Permition is granted to use, copy, distribute and prepare derivative works
+// of this library for any purpose and without fee, provided, that the above
+// copyright notice and this statement appear in all copies.
+// Intel makes no representations about the suitability of this software for
+// any purpose, and specifically disclaims all warranties.
+// See LEGAL.TXT for all the legal information.
+
+#ifndef EIGEN_INVERSE_SSE_H
+#define EIGEN_INVERSE_SSE_H
+
+namespace Eigen { 
+
+namespace internal {
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
+{
+  enum {
+    MatrixAlignment     = bool(MatrixType::Flags&AlignedBit),
+    ResultAlignment     = bool(ResultType::Flags&AlignedBit),
+    StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
+  };
+  
+  static void run(const MatrixType& matrix, ResultType& result)
+  {
+    EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
+
+    // Load the full matrix into registers
+    __m128 _L1 = matrix.template packet<MatrixAlignment>( 0);
+    __m128 _L2 = matrix.template packet<MatrixAlignment>( 4);
+    __m128 _L3 = matrix.template packet<MatrixAlignment>( 8);
+    __m128 _L4 = matrix.template packet<MatrixAlignment>(12);
+
+    // The inverse is calculated using "Divide and Conquer" technique. The
+    // original matrix is divide into four 2x2 sub-matrices. Since each
+    // register holds four matrix element, the smaller matrices are
+    // represented as a registers. Hence we get a better locality of the
+    // calculations.
+
+    __m128 A, B, C, D; // the four sub-matrices
+    if(!StorageOrdersMatch)
+    {
+      A = _mm_unpacklo_ps(_L1, _L2);
+      B = _mm_unpacklo_ps(_L3, _L4);
+      C = _mm_unpackhi_ps(_L1, _L2);
+      D = _mm_unpackhi_ps(_L3, _L4);
+    }
+    else
+    {
+      A = _mm_movelh_ps(_L1, _L2);
+      B = _mm_movehl_ps(_L2, _L1);
+      C = _mm_movelh_ps(_L3, _L4);
+      D = _mm_movehl_ps(_L4, _L3);
+    }
+
+    __m128 iA, iB, iC, iD,                 // partial inverse of the sub-matrices
+            DC, AB;
+    __m128 dA, dB, dC, dD;                 // determinant of the sub-matrices
+    __m128 det, d, d1, d2;
+    __m128 rd;                             // reciprocal of the determinant
+
+    //  AB = A# * B
+    AB = _mm_mul_ps(_mm_shuffle_ps(A,A,0x0F), B);
+    AB = _mm_sub_ps(AB,_mm_mul_ps(_mm_shuffle_ps(A,A,0xA5), _mm_shuffle_ps(B,B,0x4E)));
+    //  DC = D# * C
+    DC = _mm_mul_ps(_mm_shuffle_ps(D,D,0x0F), C);
+    DC = _mm_sub_ps(DC,_mm_mul_ps(_mm_shuffle_ps(D,D,0xA5), _mm_shuffle_ps(C,C,0x4E)));
+
+    //  dA = |A|
+    dA = _mm_mul_ps(_mm_shuffle_ps(A, A, 0x5F),A);
+    dA = _mm_sub_ss(dA, _mm_movehl_ps(dA,dA));
+    //  dB = |B|
+    dB = _mm_mul_ps(_mm_shuffle_ps(B, B, 0x5F),B);
+    dB = _mm_sub_ss(dB, _mm_movehl_ps(dB,dB));
+
+    //  dC = |C|
+    dC = _mm_mul_ps(_mm_shuffle_ps(C, C, 0x5F),C);
+    dC = _mm_sub_ss(dC, _mm_movehl_ps(dC,dC));
+    //  dD = |D|
+    dD = _mm_mul_ps(_mm_shuffle_ps(D, D, 0x5F),D);
+    dD = _mm_sub_ss(dD, _mm_movehl_ps(dD,dD));
+
+    //  d = trace(AB*DC) = trace(A#*B*D#*C)
+    d = _mm_mul_ps(_mm_shuffle_ps(DC,DC,0xD8),AB);
+
+    //  iD = C*A#*B
+    iD = _mm_mul_ps(_mm_shuffle_ps(C,C,0xA0), _mm_movelh_ps(AB,AB));
+    iD = _mm_add_ps(iD,_mm_mul_ps(_mm_shuffle_ps(C,C,0xF5), _mm_movehl_ps(AB,AB)));
+    //  iA = B*D#*C
+    iA = _mm_mul_ps(_mm_shuffle_ps(B,B,0xA0), _mm_movelh_ps(DC,DC));
+    iA = _mm_add_ps(iA,_mm_mul_ps(_mm_shuffle_ps(B,B,0xF5), _mm_movehl_ps(DC,DC)));
+
+    //  d = trace(AB*DC) = trace(A#*B*D#*C) [continue]
+    d  = _mm_add_ps(d, _mm_movehl_ps(d, d));
+    d  = _mm_add_ss(d, _mm_shuffle_ps(d, d, 1));
+    d1 = _mm_mul_ss(dA,dD);
+    d2 = _mm_mul_ss(dB,dC);
+
+    //  iD = D*|A| - C*A#*B
+    iD = _mm_sub_ps(_mm_mul_ps(D,_mm_shuffle_ps(dA,dA,0)), iD);
+
+    //  iA = A*|D| - B*D#*C;
+    iA = _mm_sub_ps(_mm_mul_ps(A,_mm_shuffle_ps(dD,dD,0)), iA);
+
+    //  det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
+    det = _mm_sub_ss(_mm_add_ss(d1,d2),d);
+    rd  = _mm_div_ss(_mm_set_ss(1.0f), det);
+
+//     #ifdef ZERO_SINGULAR
+//         rd = _mm_and_ps(_mm_cmpneq_ss(det,_mm_setzero_ps()), rd);
+//     #endif
+
+    //  iB = D * (A#B)# = D*B#*A
+    iB = _mm_mul_ps(D, _mm_shuffle_ps(AB,AB,0x33));
+    iB = _mm_sub_ps(iB, _mm_mul_ps(_mm_shuffle_ps(D,D,0xB1), _mm_shuffle_ps(AB,AB,0x66)));
+    //  iC = A * (D#C)# = A*C#*D
+    iC = _mm_mul_ps(A, _mm_shuffle_ps(DC,DC,0x33));
+    iC = _mm_sub_ps(iC, _mm_mul_ps(_mm_shuffle_ps(A,A,0xB1), _mm_shuffle_ps(DC,DC,0x66)));
+
+    rd = _mm_shuffle_ps(rd,rd,0);
+    rd = _mm_xor_ps(rd, _mm_load_ps((float*)_Sign_PNNP));
+
+    //  iB = C*|B| - D*B#*A
+    iB = _mm_sub_ps(_mm_mul_ps(C,_mm_shuffle_ps(dB,dB,0)), iB);
+
+    //  iC = B*|C| - A*C#*D;
+    iC = _mm_sub_ps(_mm_mul_ps(B,_mm_shuffle_ps(dC,dC,0)), iC);
+
+    //  iX = iX / det
+    iA = _mm_mul_ps(rd,iA);
+    iB = _mm_mul_ps(rd,iB);
+    iC = _mm_mul_ps(rd,iC);
+    iD = _mm_mul_ps(rd,iD);
+
+    result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77));
+    result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22));
+    result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77));
+    result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22));
+  }
+
+};
+
+template<typename MatrixType, typename ResultType>
+struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
+{
+  enum {
+    MatrixAlignment = bool(MatrixType::Flags&AlignedBit),
+    ResultAlignment = bool(ResultType::Flags&AlignedBit),
+    StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
+  };
+  static void run(const MatrixType& matrix, ResultType& result)
+  {
+    const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
+    const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
+
+    // The inverse is calculated using "Divide and Conquer" technique. The
+    // original matrix is divide into four 2x2 sub-matrices. Since each
+    // register of the matrix holds two element, the smaller matrices are
+    // consisted of two registers. Hence we get a better locality of the
+    // calculations.
+
+    // the four sub-matrices
+    __m128d A1, A2, B1, B2, C1, C2, D1, D2;
+    
+    if(StorageOrdersMatch)
+    {
+      A1 = matrix.template packet<MatrixAlignment>( 0); B1 = matrix.template packet<MatrixAlignment>( 2);
+      A2 = matrix.template packet<MatrixAlignment>( 4); B2 = matrix.template packet<MatrixAlignment>( 6);
+      C1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
+      C2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
+    }
+    else
+    {
+      __m128d tmp;
+      A1 = matrix.template packet<MatrixAlignment>( 0); C1 = matrix.template packet<MatrixAlignment>( 2);
+      A2 = matrix.template packet<MatrixAlignment>( 4); C2 = matrix.template packet<MatrixAlignment>( 6);
+      tmp = A1;
+      A1 = _mm_unpacklo_pd(A1,A2);
+      A2 = _mm_unpackhi_pd(tmp,A2);
+      tmp = C1;
+      C1 = _mm_unpacklo_pd(C1,C2);
+      C2 = _mm_unpackhi_pd(tmp,C2);
+      
+      B1 = matrix.template packet<MatrixAlignment>( 8); D1 = matrix.template packet<MatrixAlignment>(10);
+      B2 = matrix.template packet<MatrixAlignment>(12); D2 = matrix.template packet<MatrixAlignment>(14);
+      tmp = B1;
+      B1 = _mm_unpacklo_pd(B1,B2);
+      B2 = _mm_unpackhi_pd(tmp,B2);
+      tmp = D1;
+      D1 = _mm_unpacklo_pd(D1,D2);
+      D2 = _mm_unpackhi_pd(tmp,D2);
+    }
+    
+    __m128d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2,     // partial invese of the sub-matrices
+            DC1, DC2, AB1, AB2;
+    __m128d dA, dB, dC, dD;     // determinant of the sub-matrices
+    __m128d det, d1, d2, rd;
+
+    //  dA = |A|
+    dA = _mm_shuffle_pd(A2, A2, 1);
+    dA = _mm_mul_pd(A1, dA);
+    dA = _mm_sub_sd(dA, _mm_shuffle_pd(dA,dA,3));
+    //  dB = |B|
+    dB = _mm_shuffle_pd(B2, B2, 1);
+    dB = _mm_mul_pd(B1, dB);
+    dB = _mm_sub_sd(dB, _mm_shuffle_pd(dB,dB,3));
+
+    //  AB = A# * B
+    AB1 = _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,3));
+    AB2 = _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,0));
+    AB1 = _mm_sub_pd(AB1, _mm_mul_pd(B2, _mm_shuffle_pd(A1,A1,3)));
+    AB2 = _mm_sub_pd(AB2, _mm_mul_pd(B1, _mm_shuffle_pd(A2,A2,0)));
+
+    //  dC = |C|
+    dC = _mm_shuffle_pd(C2, C2, 1);
+    dC = _mm_mul_pd(C1, dC);
+    dC = _mm_sub_sd(dC, _mm_shuffle_pd(dC,dC,3));
+    //  dD = |D|
+    dD = _mm_shuffle_pd(D2, D2, 1);
+    dD = _mm_mul_pd(D1, dD);
+    dD = _mm_sub_sd(dD, _mm_shuffle_pd(dD,dD,3));
+
+    //  DC = D# * C
+    DC1 = _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,3));
+    DC2 = _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,0));
+    DC1 = _mm_sub_pd(DC1, _mm_mul_pd(C2, _mm_shuffle_pd(D1,D1,3)));
+    DC2 = _mm_sub_pd(DC2, _mm_mul_pd(C1, _mm_shuffle_pd(D2,D2,0)));
+
+    //  rd = trace(AB*DC) = trace(A#*B*D#*C)
+    d1 = _mm_mul_pd(AB1, _mm_shuffle_pd(DC1, DC2, 0));
+    d2 = _mm_mul_pd(AB2, _mm_shuffle_pd(DC1, DC2, 3));
+    rd = _mm_add_pd(d1, d2);
+    rd = _mm_add_sd(rd, _mm_shuffle_pd(rd, rd,3));
+
+    //  iD = C*A#*B
+    iD1 = _mm_mul_pd(AB1, _mm_shuffle_pd(C1,C1,0));
+    iD2 = _mm_mul_pd(AB1, _mm_shuffle_pd(C2,C2,0));
+    iD1 = _mm_add_pd(iD1, _mm_mul_pd(AB2, _mm_shuffle_pd(C1,C1,3)));
+    iD2 = _mm_add_pd(iD2, _mm_mul_pd(AB2, _mm_shuffle_pd(C2,C2,3)));
+
+    //  iA = B*D#*C
+    iA1 = _mm_mul_pd(DC1, _mm_shuffle_pd(B1,B1,0));
+    iA2 = _mm_mul_pd(DC1, _mm_shuffle_pd(B2,B2,0));
+    iA1 = _mm_add_pd(iA1, _mm_mul_pd(DC2, _mm_shuffle_pd(B1,B1,3)));
+    iA2 = _mm_add_pd(iA2, _mm_mul_pd(DC2, _mm_shuffle_pd(B2,B2,3)));
+
+    //  iD = D*|A| - C*A#*B
+    dA = _mm_shuffle_pd(dA,dA,0);
+    iD1 = _mm_sub_pd(_mm_mul_pd(D1, dA), iD1);
+    iD2 = _mm_sub_pd(_mm_mul_pd(D2, dA), iD2);
+
+    //  iA = A*|D| - B*D#*C;
+    dD = _mm_shuffle_pd(dD,dD,0);
+    iA1 = _mm_sub_pd(_mm_mul_pd(A1, dD), iA1);
+    iA2 = _mm_sub_pd(_mm_mul_pd(A2, dD), iA2);
+
+    d1 = _mm_mul_sd(dA, dD);
+    d2 = _mm_mul_sd(dB, dC);
+
+    //  iB = D * (A#B)# = D*B#*A
+    iB1 = _mm_mul_pd(D1, _mm_shuffle_pd(AB2,AB1,1));
+    iB2 = _mm_mul_pd(D2, _mm_shuffle_pd(AB2,AB1,1));
+    iB1 = _mm_sub_pd(iB1, _mm_mul_pd(_mm_shuffle_pd(D1,D1,1), _mm_shuffle_pd(AB2,AB1,2)));
+    iB2 = _mm_sub_pd(iB2, _mm_mul_pd(_mm_shuffle_pd(D2,D2,1), _mm_shuffle_pd(AB2,AB1,2)));
+
+    //  det = |A|*|D| + |B|*|C| - trace(A#*B*D#*C)
+    det = _mm_add_sd(d1, d2);
+    det = _mm_sub_sd(det, rd);
+
+    //  iC = A * (D#C)# = A*C#*D
+    iC1 = _mm_mul_pd(A1, _mm_shuffle_pd(DC2,DC1,1));
+    iC2 = _mm_mul_pd(A2, _mm_shuffle_pd(DC2,DC1,1));
+    iC1 = _mm_sub_pd(iC1, _mm_mul_pd(_mm_shuffle_pd(A1,A1,1), _mm_shuffle_pd(DC2,DC1,2)));
+    iC2 = _mm_sub_pd(iC2, _mm_mul_pd(_mm_shuffle_pd(A2,A2,1), _mm_shuffle_pd(DC2,DC1,2)));
+
+    rd = _mm_div_sd(_mm_set_sd(1.0), det);
+//     #ifdef ZERO_SINGULAR
+//         rd = _mm_and_pd(_mm_cmpneq_sd(det,_mm_setzero_pd()), rd);
+//     #endif
+    rd = _mm_shuffle_pd(rd,rd,0);
+
+    //  iB = C*|B| - D*B#*A
+    dB = _mm_shuffle_pd(dB,dB,0);
+    iB1 = _mm_sub_pd(_mm_mul_pd(C1, dB), iB1);
+    iB2 = _mm_sub_pd(_mm_mul_pd(C2, dB), iB2);
+
+    d1 = _mm_xor_pd(rd, _Sign_PN);
+    d2 = _mm_xor_pd(rd, _Sign_NP);
+
+    //  iC = B*|C| - A*C#*D;
+    dC = _mm_shuffle_pd(dC,dC,0);
+    iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
+    iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
+
+    result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));     // iA# / det
+    result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+    result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));     // iB# / det
+    result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+    result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));     // iC# / det
+    result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+    result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));     // iD# / det
+    result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+  }
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_INVERSE_SSE_H