Squashed 'third_party/eigen/' changes from 61d72f6..cf794d3


Change-Id: I9b814151b01f49af6337a8605d0c42a3a1ed4c72
git-subtree-dir: third_party/eigen
git-subtree-split: cf794d3b741a6278df169e58461f8529f43bce5d
diff --git a/Eigen/src/LU/CMakeLists.txt b/Eigen/src/LU/CMakeLists.txt
deleted file mode 100644
index e0d8d78..0000000
--- a/Eigen/src/LU/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-FILE(GLOB Eigen_LU_SRCS "*.h")
-
-INSTALL(FILES 
-  ${Eigen_LU_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel
-  )
-
-ADD_SUBDIRECTORY(arch)
diff --git a/Eigen/src/LU/Determinant.h b/Eigen/src/LU/Determinant.h
index bb8e78a..d6a3c1e 100644
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@@ -92,7 +92,7 @@
 inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
 {
   eigen_assert(rows() == cols());
-  typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
+  typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested;
   return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
 }
 
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 26bc714..03b6af7 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -10,7 +10,18 @@
 #ifndef EIGEN_LU_H
 #define EIGEN_LU_H
 
-namespace Eigen { 
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef MatrixXpr XprKind;
+  typedef SolverStorage StorageKind;
+  enum { Flags = 0 };
+};
+
+} // end namespace internal
 
 /** \ingroup LU_Module
   *
@@ -18,7 +29,7 @@
   *
   * \brief LU decomposition of a matrix with complete pivoting, and related features
   *
-  * \param MatrixType the type of the matrix of which we are computing the LU decomposition
+  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
   *
   * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is
   * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is
@@ -41,27 +52,28 @@
   * \include class_FullPivLU.cpp
   * Output: \verbinclude class_FullPivLU.out
   *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
   * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse()
   */
 template<typename _MatrixType> class FullPivLU
+  : public SolverBase<FullPivLU<_MatrixType> >
 {
   public:
     typedef _MatrixType MatrixType;
+    typedef SolverBase<FullPivLU> Base;
+
+    EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU)
+    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
     enum {
-      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-      Options = MatrixType::Options,
       MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
     };
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef typename internal::traits<MatrixType>::StorageKind StorageKind;
-    typedef typename MatrixType::Index Index;
-    typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType;
-    typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType;
+    typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType;
+    typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType;
     typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType;
     typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType;
+    typedef typename MatrixType::PlainObject PlainObject;
 
     /**
       * \brief Default Constructor.
@@ -84,7 +96,17 @@
       * \param matrix the matrix of which to compute the LU decomposition.
       *               It is required to be nonzero.
       */
-    FullPivLU(const MatrixType& matrix);
+    template<typename InputType>
+    explicit FullPivLU(const EigenBase<InputType>& matrix);
+
+    /** \brief Constructs a LU factorization from a given matrix
+      *
+      * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref.
+      *
+      * \sa FullPivLU(const EigenBase&)
+      */
+    template<typename InputType>
+    explicit FullPivLU(EigenBase<InputType>& matrix);
 
     /** Computes the LU decomposition of the given matrix.
       *
@@ -93,7 +115,12 @@
       *
       * \returns a reference to *this
       */
-    FullPivLU& compute(const MatrixType& matrix);
+    template<typename InputType>
+    FullPivLU& compute(const EigenBase<InputType>& matrix) {
+      m_lu = matrix.derived();
+      computeInPlace();
+      return *this;
+    }
 
     /** \returns the LU decomposition matrix: the upper-triangular part is U, the
       * unit-lower-triangular part is L (at least for square matrices; in the non-square
@@ -129,7 +156,7 @@
       *
       * \sa permutationQ()
       */
-    inline const PermutationPType& permutationP() const
+    EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const
     {
       eigen_assert(m_isInitialized && "LU is not initialized.");
       return m_p;
@@ -166,7 +193,7 @@
     }
 
     /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix
-      * will form a basis of the kernel.
+      * will form a basis of the image (column-space).
       *
       * \param originalMatrix the original matrix, of which *this is the LU decomposition.
       *                       The reason why it is needed to pass it here, is that this allows
@@ -210,12 +237,22 @@
       *
       * \sa TriangularView::solve(), kernel(), inverse()
       */
+    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
     template<typename Rhs>
-    inline const internal::solve_retval<FullPivLU, Rhs>
+    inline const Solve<FullPivLU, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "LU is not initialized.");
-      return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived());
+      return Solve<FullPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+        the LU decomposition.
+      */
+    inline RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
     }
 
     /** \returns the determinant of the matrix of which
@@ -360,33 +397,46 @@
       *
       * \sa MatrixBase::inverse()
       */
-    inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const
+    inline const Inverse<FullPivLU> inverse() const
     {
       eigen_assert(m_isInitialized && "LU is not initialized.");
       eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!");
-      return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType>
-               (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+      return Inverse<FullPivLU>(*this);
     }
 
     MatrixType reconstructedMatrix() const;
 
-    inline Index rows() const { return m_lu.rows(); }
-    inline Index cols() const { return m_lu.cols(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const;
+
+    template<bool Conjugate, typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const;
+    #endif
 
   protected:
-    
+
     static void check_template_parameters()
     {
       EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
     }
-    
+
+    void computeInPlace();
+
     MatrixType m_lu;
     PermutationPType m_p;
     PermutationQType m_q;
     IntColVectorType m_rowsTranspositions;
     IntRowVectorType m_colsTranspositions;
-    Index m_det_pq, m_nonzero_pivots;
+    Index m_nonzero_pivots;
+    RealScalar m_l1_norm;
     RealScalar m_maxpivot, m_prescribedThreshold;
+    signed char m_det_pq;
     bool m_isInitialized, m_usePrescribedThreshold;
 };
 
@@ -409,7 +459,8 @@
 }
 
 template<typename MatrixType>
-FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
+template<typename InputType>
+FullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix)
   : m_lu(matrix.rows(), matrix.cols()),
     m_p(matrix.rows()),
     m_q(matrix.cols()),
@@ -418,28 +469,41 @@
     m_isInitialized(false),
     m_usePrescribedThreshold(false)
 {
-  compute(matrix);
+  compute(matrix.derived());
 }
 
 template<typename MatrixType>
-FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
+template<typename InputType>
+FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix)
+  : m_lu(matrix.derived()),
+    m_p(matrix.rows()),
+    m_q(matrix.cols()),
+    m_rowsTranspositions(matrix.rows()),
+    m_colsTranspositions(matrix.cols()),
+    m_isInitialized(false),
+    m_usePrescribedThreshold(false)
+{
+  computeInPlace();
+}
+
+template<typename MatrixType>
+void FullPivLU<MatrixType>::computeInPlace()
 {
   check_template_parameters();
-  
-  // the permutations are stored as int indices, so just to be sure:
-  eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
-  
-  m_isInitialized = true;
-  m_lu = matrix;
 
-  const Index size = matrix.diagonalSize();
-  const Index rows = matrix.rows();
-  const Index cols = matrix.cols();
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest());
+
+  m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+
+  const Index size = m_lu.diagonalSize();
+  const Index rows = m_lu.rows();
+  const Index cols = m_lu.cols();
 
   // will store the transpositions, before we accumulate them at the end.
   // can't accumulate on-the-fly because that will be done in reverse order for the rows.
-  m_rowsTranspositions.resize(matrix.rows());
-  m_colsTranspositions.resize(matrix.cols());
+  m_rowsTranspositions.resize(m_lu.rows());
+  m_colsTranspositions.resize(m_lu.cols());
   Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i
 
   m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
@@ -451,14 +515,16 @@
 
     // biggest coefficient in the remaining bottom-right corner (starting at row k, col k)
     Index row_of_biggest_in_corner, col_of_biggest_in_corner;
-    RealScalar biggest_in_corner;
+    typedef internal::scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
+    Score biggest_in_corner;
     biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k)
-                        .cwiseAbs()
+                        .unaryExpr(Scoring())
                         .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner);
     row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
     col_of_biggest_in_corner += k; // need to add k to them.
 
-    if(biggest_in_corner==RealScalar(0))
+    if(biggest_in_corner==Score(0))
     {
       // before exiting, make sure to initialize the still uninitialized transpositions
       // in a sane state without destroying what we already have.
@@ -471,7 +537,8 @@
       break;
     }
 
-    if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner;
+    RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner);
+    if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot;
 
     // Now that we've found the pivot, we need to apply the row/col swaps to
     // bring it to the location (k,k).
@@ -508,7 +575,8 @@
     m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
 
   m_det_pq = (number_of_transpositions%2) ? -1 : 1;
-  return *this;
+
+  m_isInitialized = true;
 }
 
 template<typename MatrixType>
@@ -671,64 +739,136 @@
 
 /***** Implementation of solve() *****************************************************/
 
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<FullPivLU<_MatrixType>, Rhs>
-  : solve_retval_base<FullPivLU<_MatrixType>, Rhs>
+} // end namespace internal
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+template<typename _MatrixType>
+template<typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const
 {
-  EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs)
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
+  * So we proceed as follows:
+  * Step 1: compute c = P * rhs.
+  * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
+  * Step 3: replace c by the solution x to Ux = c. May or may not exist.
+  * Step 4: result = Q * c;
+  */
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  const Index rows = this->rows(),
+              cols = this->cols(),
+              nonzero_pivots = this->rank();
+  eigen_assert(rhs.rows() == rows);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
   {
-    /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}.
-     * So we proceed as follows:
-     * Step 1: compute c = P * rhs.
-     * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible.
-     * Step 3: replace c by the solution x to Ux = c. May or may not exist.
-     * Step 4: result = Q * c;
-     */
+    dst.setZero();
+    return;
+  }
 
-    const Index rows = dec().rows(), cols = dec().cols(),
-              nonzero_pivots = dec().nonzeroPivots();
-    eigen_assert(rhs().rows() == rows);
-    const Index smalldim = (std::min)(rows, cols);
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
 
-    if(nonzero_pivots == 0)
-    {
-      dst.setZero();
-      return;
-    }
+  // Step 1
+  c = permutationP() * rhs;
 
-    typename Rhs::PlainObject c(rhs().rows(), rhs().cols());
+  // Step 2
+  m_lu.topLeftCorner(smalldim,smalldim)
+      .template triangularView<UnitLower>()
+      .solveInPlace(c.topRows(smalldim));
+  if(rows>cols)
+    c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols);
 
-    // Step 1
-    c = dec().permutationP() * rhs();
+  // Step 3
+  m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+      .template triangularView<Upper>()
+      .solveInPlace(c.topRows(nonzero_pivots));
 
+  // Step 4
+  for(Index i = 0; i < nonzero_pivots; ++i)
+    dst.row(permutationQ().indices().coeff(i)) = c.row(i);
+  for(Index i = nonzero_pivots; i < m_lu.cols(); ++i)
+    dst.row(permutationQ().indices().coeff(i)).setZero();
+}
+
+template<typename _MatrixType>
+template<bool Conjugate, typename RhsType, typename DstType>
+void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const
+{
+  /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1},
+   * and since permutations are real and unitary, we can write this
+   * as   A^T = Q U^T L^T P,
+   * So we proceed as follows:
+   * Step 1: compute c = Q^T rhs.
+   * Step 2: replace c by the solution x to U^T x = c. May or may not exist.
+   * Step 3: replace c by the solution x to L^T x = c.
+   * Step 4: result = P^T c.
+   * If Conjugate is true, replace "^T" by "^*" above.
+   */
+
+  const Index rows = this->rows(), cols = this->cols(),
+    nonzero_pivots = this->rank();
+   eigen_assert(rhs.rows() == cols);
+  const Index smalldim = (std::min)(rows, cols);
+
+  if(nonzero_pivots == 0)
+  {
+    dst.setZero();
+    return;
+  }
+
+  typename RhsType::PlainObject c(rhs.rows(), rhs.cols());
+
+  // Step 1
+  c = permutationQ().inverse() * rhs;
+
+  if (Conjugate) {
     // Step 2
-    dec().matrixLU()
-        .topLeftCorner(smalldim,smalldim)
-        .template triangularView<UnitLower>()
-        .solveInPlace(c.topRows(smalldim));
-    if(rows>cols)
-    {
-      c.bottomRows(rows-cols)
-        -= dec().matrixLU().bottomRows(rows-cols)
-         * c.topRows(cols);
-    }
-
-    // Step 3
-    dec().matrixLU()
-        .topLeftCorner(nonzero_pivots, nonzero_pivots)
+    m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
         .template triangularView<Upper>()
+        .adjoint()
         .solveInPlace(c.topRows(nonzero_pivots));
+    // Step 3
+    m_lu.topLeftCorner(smalldim, smalldim)
+        .template triangularView<UnitLower>()
+        .adjoint()
+        .solveInPlace(c.topRows(smalldim));
+  } else {
+    // Step 2
+    m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots)
+        .template triangularView<Upper>()
+        .transpose()
+        .solveInPlace(c.topRows(nonzero_pivots));
+    // Step 3
+    m_lu.topLeftCorner(smalldim, smalldim)
+        .template triangularView<UnitLower>()
+        .transpose()
+        .solveInPlace(c.topRows(smalldim));
+  }
 
-    // Step 4
-    for(Index i = 0; i < nonzero_pivots; ++i)
-      dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i);
-    for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i)
-      dst.row(dec().permutationQ().indices().coeff(i)).setZero();
+  // Step 4
+  PermutationPType invp = permutationP().inverse().eval();
+  for(Index i = 0; i < smalldim; ++i)
+    dst.row(invp.indices().coeff(i)) = c.row(i);
+  for(Index i = smalldim; i < rows; ++i)
+    dst.row(invp.indices().coeff(i)).setZero();
+}
+
+#endif
+
+namespace internal {
+
+
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense>
+{
+  typedef FullPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
+  {
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
   }
 };
-
 } // end namespace internal
 
 /******* MatrixBase methods *****************************************************************/
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/InverseImpl.h
similarity index 86%
rename from Eigen/src/LU/Inverse.h
rename to Eigen/src/LU/InverseImpl.h
index 3cf8871..f49f233 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -2,13 +2,14 @@
 // for linear algebra.
 //
 // Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_INVERSE_H
-#define EIGEN_INVERSE_H
+#ifndef EIGEN_INVERSE_IMPL_H
+#define EIGEN_INVERSE_IMPL_H
 
 namespace Eigen { 
 
@@ -21,6 +22,7 @@
 template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
 struct compute_inverse
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     result = matrix.partialPivLu().inverse();
@@ -37,16 +39,19 @@
 template<typename MatrixType, typename ResultType>
 struct compute_inverse<MatrixType, ResultType, 1>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename MatrixType::Scalar Scalar;
-    result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0);
+    internal::evaluator<MatrixType> matrixEval(matrix);
+    result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0);
   }
 };
 
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -67,19 +72,21 @@
 ****************************/
 
 template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC 
 inline void compute_inverse_size2_helper(
     const MatrixType& matrix, const typename ResultType::Scalar& invdet,
     ResultType& result)
 {
-  result.coeffRef(0,0) = matrix.coeff(1,1) * invdet;
+  result.coeffRef(0,0) =  matrix.coeff(1,1) * invdet;
   result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
   result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
-  result.coeffRef(1,1) = matrix.coeff(0,0) * invdet;
+  result.coeffRef(1,1) =  matrix.coeff(0,0) * invdet;
 }
 
 template<typename MatrixType, typename ResultType>
 struct compute_inverse<MatrixType, ResultType, 2>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename ResultType::Scalar Scalar;
@@ -91,6 +98,7 @@
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -114,6 +122,7 @@
 ****************************/
 
 template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
 inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
 {
   enum {
@@ -127,6 +136,7 @@
 }
 
 template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC
 inline void compute_inverse_size3_helper(
     const MatrixType& matrix,
     const typename ResultType::Scalar& invdet,
@@ -145,6 +155,7 @@
 template<typename MatrixType, typename ResultType>
 struct compute_inverse<MatrixType, ResultType, 3>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename ResultType::Scalar Scalar;
@@ -161,6 +172,7 @@
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -188,6 +200,7 @@
 ****************************/
 
 template<typename Derived>
+EIGEN_DEVICE_FUNC 
 inline const typename Derived::Scalar general_det3_helper
 (const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)
 {
@@ -196,6 +209,7 @@
 }
 
 template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
 inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
 {
   enum {
@@ -214,6 +228,7 @@
 template<int Arch, typename Scalar, typename MatrixType, typename ResultType>
 struct compute_inverse_size4
 {
+  EIGEN_DEVICE_FUNC
   static void run(const MatrixType& matrix, ResultType& result)
   {
     result.coeffRef(0,0) =  cofactor_4x4<MatrixType,0,0>(matrix);
@@ -246,6 +261,7 @@
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -265,38 +281,37 @@
 *** MatrixBase methods ***
 *************************/
 
-template<typename MatrixType>
-struct traits<inverse_impl<MatrixType> >
+} // end namespace internal
+
+namespace internal {
+
+// Specialization for "dense = dense_xpr.inverse()"
+template<typename DstXprType, typename XprType>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>
 {
-  typedef typename MatrixType::PlainObject ReturnType;
-};
-
-template<typename MatrixType>
-struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
-{
-  typedef typename MatrixType::Index Index;
-  typedef typename internal::eval<MatrixType>::type MatrixTypeNested;
-  typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
-  MatrixTypeNested m_matrix;
-
-  inverse_impl(const MatrixType& matrix)
-    : m_matrix(matrix)
-  {}
-
-  inline Index rows() const { return m_matrix.rows(); }
-  inline Index cols() const { return m_matrix.cols(); }
-
-  template<typename Dest> inline void evalTo(Dest& dst) const
+  typedef Inverse<XprType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
   {
-    const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime);
+    Index dstRows = src.rows();
+    Index dstCols = src.cols();
+    if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
+      dst.resize(dstRows, dstCols);
+    
+    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
     EIGEN_ONLY_USED_FOR_DEBUG(Size);
-    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=extract_data(dst)))
+    eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst)))
               && "Aliasing problem detected in inverse(), you need to do inverse().eval() here.");
 
-    compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst);
+    typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type  ActualXprType;
+    typedef typename internal::remove_all<ActualXprType>::type                        ActualXprTypeCleanded;
+    
+    ActualXprType actual_xpr(src.nestedExpression());
+    
+    compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst);
   }
 };
 
+  
 } // end namespace internal
 
 /** \lu_module
@@ -317,11 +332,11 @@
   * \sa computeInverseAndDetWithCheck()
   */
 template<typename Derived>
-inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const
+inline const Inverse<Derived> MatrixBase<Derived>::inverse() const
 {
   EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES)
   eigen_assert(rows() == cols());
-  return internal::inverse_impl<Derived>(derived());
+  return Inverse<Derived>(derived());
 }
 
 /** \lu_module
@@ -357,7 +372,7 @@
   // for larger sizes, evaluating has negligible cost and limits code size.
   typedef typename internal::conditional<
     RowsAtCompileTime == 2,
-    typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type,
+    typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type,
     PlainObject
   >::type MatrixType;
   internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run
@@ -389,7 +404,7 @@
     const RealScalar& absDeterminantThreshold
   ) const
 {
-  RealScalar determinant;
+  Scalar determinant;
   // i'd love to put some static assertions there, but SFINAE means that they have no effect...
   eigen_assert(rows() == cols());
   computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold);
@@ -397,4 +412,4 @@
 
 } // end namespace Eigen
 
-#endif // EIGEN_INVERSE_H
+#endif // EIGEN_INVERSE_IMPL_H
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 7d1db94..d439618 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -11,7 +11,33 @@
 #ifndef EIGEN_PARTIALLU_H
 #define EIGEN_PARTIALLU_H
 
-namespace Eigen { 
+namespace Eigen {
+
+namespace internal {
+template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> >
+ : traits<_MatrixType>
+{
+  typedef MatrixXpr XprKind;
+  typedef SolverStorage StorageKind;
+  typedef traits<_MatrixType> BaseTraits;
+  enum {
+    Flags = BaseTraits::Flags & RowMajorBit,
+    CoeffReadCost = Dynamic
+  };
+};
+
+template<typename T,typename Derived>
+struct enable_if_ref;
+// {
+//   typedef Derived type;
+// };
+
+template<typename T,typename Derived>
+struct enable_if_ref<Ref<T>,Derived> {
+  typedef Derived type;
+};
+
+} // end namespace internal
 
 /** \ingroup LU_Module
   *
@@ -19,7 +45,7 @@
   *
   * \brief LU decomposition of a matrix with partial pivoting, and related features
   *
-  * \param MatrixType the type of the matrix of which we are computing the LU decomposition
+  * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition
   *
   * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A
   * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P
@@ -42,34 +68,33 @@
   *
   * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP().
   *
+  * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism.
+  * 
   * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU
   */
 template<typename _MatrixType> class PartialPivLU
+  : public SolverBase<PartialPivLU<_MatrixType> >
 {
   public:
 
     typedef _MatrixType MatrixType;
+    typedef SolverBase<PartialPivLU> Base;
+    EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU)
+    // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int
     enum {
-      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-      Options = MatrixType::Options,
       MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
       MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
     };
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
-    typedef typename internal::traits<MatrixType>::StorageKind StorageKind;
-    typedef typename MatrixType::Index Index;
     typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType;
     typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType;
-
+    typedef typename MatrixType::PlainObject PlainObject;
 
     /**
-    * \brief Default Constructor.
-    *
-    * The default constructor is useful in cases in which the user intends to
-    * perform decompositions via PartialPivLU::compute(const MatrixType&).
-    */
+      * \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via PartialPivLU::compute(const MatrixType&).
+      */
     PartialPivLU();
 
     /** \brief Default Constructor with memory preallocation
@@ -78,7 +103,7 @@
       * according to the specified problem \a size.
       * \sa PartialPivLU()
       */
-    PartialPivLU(Index size);
+    explicit PartialPivLU(Index size);
 
     /** Constructor.
       *
@@ -87,9 +112,25 @@
       * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
       * If you need to deal with non-full rank, use class FullPivLU instead.
       */
-    PartialPivLU(const MatrixType& matrix);
+    template<typename InputType>
+    explicit PartialPivLU(const EigenBase<InputType>& matrix);
 
-    PartialPivLU& compute(const MatrixType& matrix);
+    /** Constructor for \link InplaceDecomposition inplace decomposition \endlink
+      *
+      * \param matrix the matrix of which to compute the LU decomposition.
+      *
+      * \warning The matrix should have full rank (e.g. if it's square, it should be invertible).
+      * If you need to deal with non-full rank, use class FullPivLU instead.
+      */
+    template<typename InputType>
+    explicit PartialPivLU(EigenBase<InputType>& matrix);
+
+    template<typename InputType>
+    PartialPivLU& compute(const EigenBase<InputType>& matrix) {
+      m_lu = matrix.derived();
+      compute();
+      return *this;
+    }
 
     /** \returns the LU decomposition matrix: the upper-triangular part is U, the
       * unit-lower-triangular part is L (at least for square matrices; in the non-square
@@ -128,12 +169,22 @@
       *
       * \sa TriangularView::solve(), inverse(), computeInverse()
       */
+    // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion.
     template<typename Rhs>
-    inline const internal::solve_retval<PartialPivLU, Rhs>
+    inline const Solve<PartialPivLU, Rhs>
     solve(const MatrixBase<Rhs>& b) const
     {
       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
-      return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived());
+      return Solve<PartialPivLU, Rhs>(*this, b.derived());
+    }
+
+    /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is
+        the LU decomposition.
+      */
+    inline RealScalar rcond() const
+    {
+      eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
+      return internal::rcond_estimate_helper(m_l1_norm, *this);
     }
 
     /** \returns the inverse of the matrix of which *this is the LU decomposition.
@@ -143,11 +194,10 @@
       *
       * \sa MatrixBase::inverse(), LU::inverse()
       */
-    inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const
+    inline const Inverse<PartialPivLU> inverse() const
     {
       eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
-      return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType>
-               (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
+      return Inverse<PartialPivLU>(*this);
     }
 
     /** \returns the determinant of the matrix of which
@@ -163,24 +213,78 @@
       *
       * \sa MatrixBase::determinant()
       */
-    typename internal::traits<MatrixType>::Scalar determinant() const;
+    Scalar determinant() const;
 
     MatrixType reconstructedMatrix() const;
 
     inline Index rows() const { return m_lu.rows(); }
     inline Index cols() const { return m_lu.cols(); }
 
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.rows());
+
+      // Step 1
+      dst = permutationP() * rhs;
+
+      // Step 2
+      m_lu.template triangularView<UnitLower>().solveInPlace(dst);
+
+      // Step 3
+      m_lu.template triangularView<Upper>().solveInPlace(dst);
+    }
+
+    template<bool Conjugate, typename RhsType, typename DstType>
+    EIGEN_DEVICE_FUNC
+    void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const {
+     /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
+      * So we proceed as follows:
+      * Step 1: compute c = Pb.
+      * Step 2: replace c by the solution x to Lx = c.
+      * Step 3: replace c by the solution x to Ux = c.
+      */
+
+      eigen_assert(rhs.rows() == m_lu.cols());
+
+      if (Conjugate) {
+        // Step 1
+        dst = m_lu.template triangularView<Upper>().adjoint().solve(rhs);
+        // Step 2
+        m_lu.template triangularView<UnitLower>().adjoint().solveInPlace(dst);
+      } else {
+        // Step 1
+        dst = m_lu.template triangularView<Upper>().transpose().solve(rhs);
+        // Step 2
+        m_lu.template triangularView<UnitLower>().transpose().solveInPlace(dst);
+      }
+      // Step 3
+      dst = permutationP().transpose() * dst;
+    }
+    #endif
+
   protected:
-    
+
     static void check_template_parameters()
     {
       EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar);
     }
-    
+
+    void compute();
+
     MatrixType m_lu;
     PermutationType m_p;
     TranspositionType m_rowsTranspositions;
-    Index m_det_p;
+    RealScalar m_l1_norm;
+    signed char m_det_p;
     bool m_isInitialized;
 };
 
@@ -189,6 +293,7 @@
   : m_lu(),
     m_p(),
     m_rowsTranspositions(),
+    m_l1_norm(0),
     m_det_p(0),
     m_isInitialized(false)
 {
@@ -199,20 +304,36 @@
   : m_lu(size, size),
     m_p(size),
     m_rowsTranspositions(size),
+    m_l1_norm(0),
     m_det_p(0),
     m_isInitialized(false)
 {
 }
 
 template<typename MatrixType>
-PartialPivLU<MatrixType>::PartialPivLU(const MatrixType& matrix)
-  : m_lu(matrix.rows(), matrix.rows()),
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix)
+  : m_lu(matrix.rows(),matrix.cols()),
     m_p(matrix.rows()),
     m_rowsTranspositions(matrix.rows()),
+    m_l1_norm(0),
     m_det_p(0),
     m_isInitialized(false)
 {
-  compute(matrix);
+  compute(matrix.derived());
+}
+
+template<typename MatrixType>
+template<typename InputType>
+PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix)
+  : m_lu(matrix.derived()),
+    m_p(matrix.rows()),
+    m_rowsTranspositions(matrix.rows()),
+    m_l1_norm(0),
+    m_det_p(0),
+    m_isInitialized(false)
+{
+  compute();
 }
 
 namespace internal {
@@ -230,7 +351,6 @@
   typedef Block<MapLU, Dynamic, Dynamic> MatrixType;
   typedef Block<MatrixType,Dynamic,Dynamic> BlockType;
   typedef typename MatrixType::RealScalar RealScalar;
-  typedef typename MatrixType::Index Index;
 
   /** \internal performs the LU decomposition in-place of the matrix \a lu
     * using an unblocked algorithm.
@@ -244,6 +364,8 @@
     */
   static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions)
   {
+    typedef scalar_score_coeff_op<Scalar> Scoring;
+    typedef typename Scoring::result_type Score;
     const Index rows = lu.rows();
     const Index cols = lu.cols();
     const Index size = (std::min)(rows,cols);
@@ -253,15 +375,15 @@
     {
       Index rrows = rows-k-1;
       Index rcols = cols-k-1;
-        
+
       Index row_of_biggest_in_col;
-      RealScalar biggest_in_corner
-        = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
+      Score biggest_in_corner
+        = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col);
       row_of_biggest_in_col += k;
 
       row_transpositions[k] = PivIndex(row_of_biggest_in_col);
 
-      if(biggest_in_corner != RealScalar(0))
+      if(biggest_in_corner != Score(0))
       {
         if(k != row_of_biggest_in_col)
         {
@@ -354,7 +476,7 @@
       // update permutations and apply them to A_0
       for(Index i=k; i<k+bs; ++i)
       {
-        Index piv = (row_transpositions[i] += k);
+        Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k));
         A_0.row(i).swap(A_0.row(piv));
       }
 
@@ -377,45 +499,44 @@
 /** \internal performs the LU decomposition with partial pivoting in-place.
   */
 template<typename MatrixType, typename TranspositionType>
-void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::Index& nb_transpositions)
+void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions)
 {
   eigen_assert(lu.cols() == row_transpositions.size());
   eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1);
 
   partial_lu_impl
-    <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::Index>
+    <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex>
     ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions);
 }
 
 } // end namespace internal
 
 template<typename MatrixType>
-PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
+void PartialPivLU<MatrixType>::compute()
 {
   check_template_parameters();
-  
-  // the row permutation is stored as int indices, so just to be sure:
-  eigen_assert(matrix.rows()<NumTraits<int>::highest());
-  
-  m_lu = matrix;
 
-  eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
-  const Index size = matrix.rows();
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(m_lu.rows()<NumTraits<int>::highest());
+
+  m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff();
+
+  eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
+  const Index size = m_lu.rows();
 
   m_rowsTranspositions.resize(size);
 
-  typename TranspositionType::Index nb_transpositions;
+  typename TranspositionType::StorageIndex nb_transpositions;
   internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions);
   m_det_p = (nb_transpositions%2) ? -1 : 1;
 
   m_p = m_rowsTranspositions;
 
   m_isInitialized = true;
-  return *this;
 }
 
 template<typename MatrixType>
-typename internal::traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
+typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const
 {
   eigen_assert(m_isInitialized && "PartialPivLU is not initialized.");
   return Scalar(m_det_p) * m_lu.diagonal().prod();
@@ -438,38 +559,21 @@
   return res;
 }
 
-/***** Implementation of solve() *****************************************************/
+/***** Implementation details *****************************************************/
 
 namespace internal {
 
-template<typename _MatrixType, typename Rhs>
-struct solve_retval<PartialPivLU<_MatrixType>, Rhs>
-  : solve_retval_base<PartialPivLU<_MatrixType>, Rhs>
+/***** Implementation of inverse() *****************************************************/
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
 {
-  EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
+  typedef PartialPivLU<MatrixType> LuType;
+  typedef Inverse<LuType> SrcXprType;
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
   {
-    /* The decomposition PA = LU can be rewritten as A = P^{-1} L U.
-    * So we proceed as follows:
-    * Step 1: compute c = Pb.
-    * Step 2: replace c by the solution x to Lx = c.
-    * Step 3: replace c by the solution x to Ux = c.
-    */
-
-    eigen_assert(rhs().rows() == dec().matrixLU().rows());
-
-    // Step 1
-    dst = dec().permutationP() * rhs();
-
-    // Step 2
-    dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst);
-
-    // Step 3
-    dec().matrixLU().template triangularView<Upper>().solveInPlace(dst);
+    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
   }
 };
-
 } // end namespace internal
 
 /******** MatrixBase methods *******/
@@ -487,7 +591,6 @@
   return PartialPivLU<PlainObject>(eval());
 }
 
-#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
 /** \lu_module
   *
   * Synonym of partialPivLu().
@@ -502,7 +605,6 @@
 {
   return PartialPivLU<PlainObject>(eval());
 }
-#endif
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/LU/PartialPivLU_MKL.h b/Eigen/src/LU/PartialPivLU_LAPACKE.h
similarity index 76%
rename from Eigen/src/LU/PartialPivLU_MKL.h
rename to Eigen/src/LU/PartialPivLU_LAPACKE.h
index 9035953..755168a 100644
--- a/Eigen/src/LU/PartialPivLU_MKL.h
+++ b/Eigen/src/LU/PartialPivLU_LAPACKE.h
@@ -25,7 +25,7 @@
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
  ********************************************************************************
- *   Content : Eigen bindings to Intel(R) MKL
+ *   Content : Eigen bindings to LAPACKe
  *     LU decomposition with partial pivoting based on LAPACKE_?getrf function.
  ********************************************************************************
 */
@@ -33,20 +33,18 @@
 #ifndef EIGEN_PARTIALLU_LAPACK_H
 #define EIGEN_PARTIALLU_LAPACK_H
 
-#include "Eigen/src/Core/util/MKL_support.h"
-
 namespace Eigen { 
 
 namespace internal {
 
-/** \internal Specialization for the data types supported by MKL */
+/** \internal Specialization for the data types supported by LAPACKe */
 
-#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \
+#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \
 template<int StorageOrder> \
 struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \
 { \
   /* \internal performs the LU decomposition in-place of the matrix represented */ \
-  static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
+  static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \
   { \
     EIGEN_UNUSED_VARIABLE(maxBlockSize);\
     lapack_int matrix_order, first_zero_pivot; \
@@ -54,14 +52,14 @@
     EIGTYPE* a; \
 /* Set up parameters for ?getrf */ \
     matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-    lda = luStride; \
+    lda = convert_index<lapack_int>(luStride); \
     a = lu_data; \
     ipiv = row_transpositions; \
-    m = rows; \
-    n = cols; \
+    m = convert_index<lapack_int>(rows); \
+    n = convert_index<lapack_int>(cols); \
     nb_transpositions = 0; \
 \
-    info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \
+    info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \
 \
     for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \
 \
@@ -73,10 +71,10 @@
   } \
 };
 
-EIGEN_MKL_LU_PARTPIV(double, double, d)
-EIGEN_MKL_LU_PARTPIV(float, float, s)
-EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z)
-EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c)
+EIGEN_LAPACKE_LU_PARTPIV(double, double, d)
+EIGEN_LAPACKE_LU_PARTPIV(float, float, s)
+EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z)
+EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float,  c)
 
 } // end namespace internal
 
diff --git a/Eigen/src/LU/arch/CMakeLists.txt b/Eigen/src/LU/arch/CMakeLists.txt
deleted file mode 100644
index f6b7ed9..0000000
--- a/Eigen/src/LU/arch/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_LU_arch_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_LU_arch_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel
-  )
diff --git a/Eigen/src/LU/arch/Inverse_SSE.h b/Eigen/src/LU/arch/Inverse_SSE.h
index 60b7a23..ebb64a6 100644
--- a/Eigen/src/LU/arch/Inverse_SSE.h
+++ b/Eigen/src/LU/arch/Inverse_SSE.h
@@ -35,13 +35,15 @@
 struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType>
 {
   enum {
-    MatrixAlignment     = bool(MatrixType::Flags&AlignedBit),
-    ResultAlignment     = bool(ResultType::Flags&AlignedBit),
+    MatrixAlignment     = traits<MatrixType>::Alignment,
+    ResultAlignment     = traits<ResultType>::Alignment,
     StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
   };
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
   
-  static void run(const MatrixType& matrix, ResultType& result)
+  static void run(const MatrixType& mat, ResultType& result)
   {
+    ActualMatrixType matrix(mat);
     EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 };
 
     // Load the full matrix into registers
@@ -151,10 +153,12 @@
     iC = _mm_mul_ps(rd,iC);
     iD = _mm_mul_ps(rd,iD);
 
-    result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77));
-    result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22));
-    result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77));
-    result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22));
+    Index res_stride = result.outerStride();
+    float* res = result.data();
+    pstoret<float, Packet4f, ResultAlignment>(res+0,            _mm_shuffle_ps(iA,iB,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+res_stride,   _mm_shuffle_ps(iA,iB,0x22));
+    pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77));
+    pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22));
   }
 
 };
@@ -163,18 +167,21 @@
 struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType>
 {
   enum {
-    MatrixAlignment = bool(MatrixType::Flags&AlignedBit),
-    ResultAlignment = bool(ResultType::Flags&AlignedBit),
+    MatrixAlignment     = traits<MatrixType>::Alignment,
+    ResultAlignment     = traits<ResultType>::Alignment,
     StorageOrdersMatch  = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit)
   };
-  static void run(const MatrixType& matrix, ResultType& result)
+  typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType;
+  
+  static void run(const MatrixType& mat, ResultType& result)
   {
+    ActualMatrixType matrix(mat);
     const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
     const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
 
     // The inverse is calculated using "Divide and Conquer" technique. The
     // original matrix is divide into four 2x2 sub-matrices. Since each
-    // register of the matrix holds two element, the smaller matrices are
+    // register of the matrix holds two elements, the smaller matrices are
     // consisted of two registers. Hence we get a better locality of the
     // calculations.
 
@@ -311,14 +318,16 @@
     iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1);
     iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2);
 
-    result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));     // iA# / det
-    result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
-    result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));     // iB# / det
-    result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
-    result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));     // iC# / det
-    result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
-    result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));     // iD# / det
-    result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
+    Index res_stride = result.outerStride();
+    double* res = result.data();
+    pstoret<double, Packet2d, ResultAlignment>(res+0,             _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride,    _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2,             _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2,  _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride,  _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2));
+    pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1));
+    pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2));
   }
 };