Squashed 'third_party/eigen/' changes from 61d72f6..cf794d3


Change-Id: I9b814151b01f49af6337a8605d0c42a3a1ed4c72
git-subtree-dir: third_party/eigen
git-subtree-split: cf794d3b741a6278df169e58461f8529f43bce5d
diff --git a/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt b/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt
deleted file mode 100644
index cdde64d..0000000
--- a/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_MatrixFunctions_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_MatrixFunctions_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MatrixFunctions COMPONENT Devel
-  )
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index 6825a78..e5ebbcf 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -1,8 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2009, 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
-// Copyright (C) 2011 Chen-Pang He <jdh8@ms63.hinet.net>
+// Copyright (C) 2009, 2010, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2011, 2013 Chen-Pang He <jdh8@ms63.hinet.net>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,376 +14,374 @@
 #include "StemFunction.h"
 
 namespace Eigen {
+namespace internal {
 
-/** \ingroup MatrixFunctions_Module
-  * \brief Class for computing the matrix exponential.
-  * \tparam MatrixType type of the argument of the exponential,
-  * expected to be an instantiation of the Matrix class template.
-  */
-template <typename MatrixType>
-class MatrixExponential {
+/** \brief Scaling operator.
+ *
+ * This struct is used by CwiseUnaryOp to scale a matrix by \f$ 2^{-s} \f$.
+ */
+template <typename RealScalar>
+struct MatrixExponentialScalingOp
+{
+  /** \brief Constructor.
+   *
+   * \param[in] squarings  The integer \f$ s \f$ in this document.
+   */
+  MatrixExponentialScalingOp(int squarings) : m_squarings(squarings) { }
 
-  public:
 
-    /** \brief Constructor.
-      * 
-      * The class stores a reference to \p M, so it should not be
-      * changed (or destroyed) before compute() is called.
-      *
-      * \param[in] M  matrix whose exponential is to be computed.
-      */
-    MatrixExponential(const MatrixType &M);
+  /** \brief Scale a matrix coefficient.
+   *
+   * \param[in,out] x  The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
+   */
+  inline const RealScalar operator() (const RealScalar& x) const
+  {
+    using std::ldexp;
+    return ldexp(x, -m_squarings);
+  }
 
-    /** \brief Computes the matrix exponential.
-      *
-      * \param[out] result  the matrix exponential of \p M in the constructor.
-      */
-    template <typename ResultType> 
-    void compute(ResultType &result);
+  typedef std::complex<RealScalar> ComplexScalar;
+
+  /** \brief Scale a matrix coefficient.
+   *
+   * \param[in,out] x  The scalar to be scaled, becoming \f$ 2^{-s} x \f$.
+   */
+  inline const ComplexScalar operator() (const ComplexScalar& x) const
+  {
+    using std::ldexp;
+    return ComplexScalar(ldexp(x.real(), -m_squarings), ldexp(x.imag(), -m_squarings));
+  }
 
   private:
-
-    // Prevent copying
-    MatrixExponential(const MatrixExponential&);
-    MatrixExponential& operator=(const MatrixExponential&);
-
-    /** \brief Compute the (3,3)-Pad&eacute; approximant to the exponential.
-     *
-     *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
-     *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
-     *
-     *  \param[in] A   Argument of matrix exponential
-     */
-    void pade3(const MatrixType &A);
-
-    /** \brief Compute the (5,5)-Pad&eacute; approximant to the exponential.
-     *
-     *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
-     *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
-     *
-     *  \param[in] A   Argument of matrix exponential
-     */
-    void pade5(const MatrixType &A);
-
-    /** \brief Compute the (7,7)-Pad&eacute; approximant to the exponential.
-     *
-     *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
-     *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
-     *
-     *  \param[in] A   Argument of matrix exponential
-     */
-    void pade7(const MatrixType &A);
-
-    /** \brief Compute the (9,9)-Pad&eacute; approximant to the exponential.
-     *
-     *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
-     *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
-     *
-     *  \param[in] A   Argument of matrix exponential
-     */
-    void pade9(const MatrixType &A);
-
-    /** \brief Compute the (13,13)-Pad&eacute; approximant to the exponential.
-     *
-     *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
-     *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
-     *
-     *  \param[in] A   Argument of matrix exponential
-     */
-    void pade13(const MatrixType &A);
-
-    /** \brief Compute the (17,17)-Pad&eacute; approximant to the exponential.
-     *
-     *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
-     *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
-     *
-     *  This function activates only if your long double is double-double or quadruple.
-     *
-     *  \param[in] A   Argument of matrix exponential
-     */
-    void pade17(const MatrixType &A);
-
-    /** \brief Compute Pad&eacute; approximant to the exponential.
-     *
-     * Computes \c m_U, \c m_V and \c m_squarings such that
-     * \f$ (V+U)(V-U)^{-1} \f$ is a Pad&eacute; of
-     * \f$ \exp(2^{-\mbox{squarings}}M) \f$ around \f$ M = 0 \f$. The
-     * degree of the Pad&eacute; approximant and the value of
-     * squarings are chosen such that the approximation error is no
-     * more than the round-off error.
-     *
-     * The argument of this function should correspond with the (real
-     * part of) the entries of \c m_M.  It is used to select the
-     * correct implementation using overloading.
-     */
-    void computeUV(double);
-
-    /** \brief Compute Pad&eacute; approximant to the exponential.
-     *
-     *  \sa computeUV(double);
-     */
-    void computeUV(float);
-    
-    /** \brief Compute Pad&eacute; approximant to the exponential.
-     *
-     *  \sa computeUV(double);
-     */
-    void computeUV(long double);
-
-    typedef typename internal::traits<MatrixType>::Scalar Scalar;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename std::complex<RealScalar> ComplexScalar;
-
-    /** \brief Reference to matrix whose exponential is to be computed. */
-    typename internal::nested<MatrixType>::type m_M;
-
-    /** \brief Odd-degree terms in numerator of Pad&eacute; approximant. */
-    MatrixType m_U;
-
-    /** \brief Even-degree terms in numerator of Pad&eacute; approximant. */
-    MatrixType m_V;
-
-    /** \brief Used for temporary storage. */
-    MatrixType m_tmp1;
-
-    /** \brief Used for temporary storage. */
-    MatrixType m_tmp2;
-
-    /** \brief Identity matrix of the same size as \c m_M. */
-    MatrixType m_Id;
-
-    /** \brief Number of squarings required in the last step. */
     int m_squarings;
+};
 
-    /** \brief L1 norm of m_M. */
-    RealScalar m_l1norm;
+/** \brief Compute the (3,3)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade3(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatA>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {120.L, 60.L, 12.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType tmp = b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (5,5)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade5(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {30240.L, 15120.L, 3360.L, 420.L, 30.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType tmp = b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (7,7)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade7(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {17297280.L, 8648640.L, 1995840.L, 277200.L, 25200.L, 1512.L, 56.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  const MatrixType tmp = b[7] * A6 + b[5] * A4 + b[3] * A2 
+    + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+
+}
+
+/** \brief Compute the (9,9)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade9(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {17643225600.L, 8821612800.L, 2075673600.L, 302702400.L, 30270240.L,
+                          2162160.L, 110880.L, 3960.L, 90.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  const MatrixType A8 = A6 * A2;
+  const MatrixType tmp = b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 
+    + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  V = b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (13,13)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ */
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade13(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {64764752532480000.L, 32382376266240000.L, 7771770303897600.L,
+                          1187353796428800.L, 129060195264000.L, 10559470521600.L, 670442572800.L,
+                          33522128640.L, 1323241920.L, 40840800.L, 960960.L, 16380.L, 182.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  V = b[13] * A6 + b[11] * A4 + b[9] * A2; // used for temporary storage
+  MatrixType tmp = A6 * V;
+  tmp += b[7] * A6 + b[5] * A4 + b[3] * A2 + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  tmp = b[12] * A6 + b[10] * A4 + b[8] * A2;
+  V.noalias() = A6 * tmp;
+  V += b[6] * A6 + b[4] * A4 + b[2] * A2 + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+
+/** \brief Compute the (17,17)-Pad&eacute; approximant to the exponential.
+ *
+ *  After exit, \f$ (V+U)(V-U)^{-1} \f$ is the Pad&eacute;
+ *  approximant of \f$ \exp(A) \f$ around \f$ A = 0 \f$.
+ *
+ *  This function activates only if your long double is double-double or quadruple.
+ */
+#if LDBL_MANT_DIG > 64
+template <typename MatA, typename MatU, typename MatV>
+void matrix_exp_pade17(const MatA& A, MatU& U, MatV& V)
+{
+  typedef typename MatA::PlainObject MatrixType;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  const RealScalar b[] = {830034394580628357120000.L, 415017197290314178560000.L,
+                          100610229646136770560000.L, 15720348382208870400000.L,
+                          1774878043152614400000.L, 153822763739893248000.L, 10608466464820224000.L,
+                          595373117923584000.L, 27563570274240000.L, 1060137318240000.L,
+                          33924394183680.L, 899510451840.L, 19554575040.L, 341863200.L, 4651200.L,
+                          46512.L, 306.L, 1.L};
+  const MatrixType A2 = A * A;
+  const MatrixType A4 = A2 * A2;
+  const MatrixType A6 = A4 * A2;
+  const MatrixType A8 = A4 * A4;
+  V = b[17] * A8 + b[15] * A6 + b[13] * A4 + b[11] * A2; // used for temporary storage
+  MatrixType tmp = A8 * V;
+  tmp += b[9] * A8 + b[7] * A6 + b[5] * A4 + b[3] * A2 
+    + b[1] * MatrixType::Identity(A.rows(), A.cols());
+  U.noalias() = A * tmp;
+  tmp = b[16] * A8 + b[14] * A6 + b[12] * A4 + b[10] * A2;
+  V.noalias() = tmp * A8;
+  V += b[8] * A8 + b[6] * A6 + b[4] * A4 + b[2] * A2 
+    + b[0] * MatrixType::Identity(A.rows(), A.cols());
+}
+#endif
+
+template <typename MatrixType, typename RealScalar = typename NumTraits<typename traits<MatrixType>::Scalar>::Real>
+struct matrix_exp_computeUV
+{
+  /** \brief Compute Pad&eacute; approximant to the exponential.
+    *
+    * Computes \c U, \c V and \c squarings such that \f$ (V+U)(V-U)^{-1} \f$ is a Pad&eacute;
+    * approximant of \f$ \exp(2^{-\mbox{squarings}}M) \f$ around \f$ M = 0 \f$, where \f$ M \f$
+    * denotes the matrix \c arg. The degree of the Pad&eacute; approximant and the value of squarings
+    * are chosen such that the approximation error is no more than the round-off error.
+    */
+  static void run(const MatrixType& arg, MatrixType& U, MatrixType& V, int& squarings);
 };
 
 template <typename MatrixType>
-MatrixExponential<MatrixType>::MatrixExponential(const MatrixType &M) :
-  m_M(M),
-  m_U(M.rows(),M.cols()),
-  m_V(M.rows(),M.cols()),
-  m_tmp1(M.rows(),M.cols()),
-  m_tmp2(M.rows(),M.cols()),
-  m_Id(MatrixType::Identity(M.rows(), M.cols())),
-  m_squarings(0),
-  m_l1norm(M.cwiseAbs().colwise().sum().maxCoeff())
+struct matrix_exp_computeUV<MatrixType, float>
 {
-  /* empty body */
-}
+  template <typename ArgType>
+  static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
+  {
+    using std::frexp;
+    using std::pow;
+    const float l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    squarings = 0;
+    if (l1norm < 4.258730016922831e-001f) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 1.880152677804762e+000f) {
+      matrix_exp_pade5(arg, U, V);
+    } else {
+      const float maxnorm = 3.925724783138660f;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<float>(squarings));
+      matrix_exp_pade7(A, U, V);
+    }
+  }
+};
 
 template <typename MatrixType>
-template <typename ResultType> 
-void MatrixExponential<MatrixType>::compute(ResultType &result)
+struct matrix_exp_computeUV<MatrixType, double>
 {
-#if LDBL_MANT_DIG > 112 // rarely happens
-  if(sizeof(RealScalar) > 14) {
-    result = m_M.matrixFunction(StdStemFunctions<ComplexScalar>::exp);
-    return;
+  typedef typename NumTraits<typename traits<MatrixType>::Scalar>::Real RealScalar;
+  template <typename ArgType>
+  static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
+  {
+    using std::frexp;
+    using std::pow;
+    const RealScalar l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    squarings = 0;
+    if (l1norm < 1.495585217958292e-002) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 2.539398330063230e-001) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 9.504178996162932e-001) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 2.097847961257068e+000) {
+      matrix_exp_pade9(arg, U, V);
+    } else {
+      const RealScalar maxnorm = 5.371920351148152;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<RealScalar>(squarings));
+      matrix_exp_pade13(A, U, V);
+    }
   }
+};
+  
+template <typename MatrixType>
+struct matrix_exp_computeUV<MatrixType, long double>
+{
+  template <typename ArgType>
+  static void run(const ArgType& arg, MatrixType& U, MatrixType& V, int& squarings)
+  {
+#if   LDBL_MANT_DIG == 53   // double precision
+    matrix_exp_computeUV<MatrixType, double>::run(arg, U, V, squarings);
+  
+#else
+  
+    using std::frexp;
+    using std::pow;
+    const long double l1norm = arg.cwiseAbs().colwise().sum().maxCoeff();
+    squarings = 0;
+  
+#if LDBL_MANT_DIG <= 64   // extended precision
+  
+    if (l1norm < 4.1968497232266989671e-003L) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 1.1848116734693823091e-001L) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 5.5170388480686700274e-001L) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 1.3759868875587845383e+000L) {
+      matrix_exp_pade9(arg, U, V);
+    } else {
+      const long double maxnorm = 4.0246098906697353063L;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      matrix_exp_pade13(A, U, V);
+    }
+  
+#elif LDBL_MANT_DIG <= 106  // double-double
+  
+    if (l1norm < 3.2787892205607026992947488108213e-005L) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 6.4467025060072760084130906076332e-003L) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 6.8988028496595374751374122881143e-002L) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 2.7339737518502231741495857201670e-001L) {
+      matrix_exp_pade9(arg, U, V);
+    } else if (l1norm < 1.3203382096514474905666448850278e+000L) {
+      matrix_exp_pade13(arg, U, V);
+    } else {
+      const long double maxnorm = 3.2579440895405400856599663723517L;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      matrix_exp_pade17(A, U, V);
+    }
+  
+#elif LDBL_MANT_DIG <= 112  // quadruple precison
+  
+    if (l1norm < 1.639394610288918690547467954466970e-005L) {
+      matrix_exp_pade3(arg, U, V);
+    } else if (l1norm < 4.253237712165275566025884344433009e-003L) {
+      matrix_exp_pade5(arg, U, V);
+    } else if (l1norm < 5.125804063165764409885122032933142e-002L) {
+      matrix_exp_pade7(arg, U, V);
+    } else if (l1norm < 2.170000765161155195453205651889853e-001L) {
+      matrix_exp_pade9(arg, U, V);
+    } else if (l1norm < 1.125358383453143065081397882891878e+000L) {
+      matrix_exp_pade13(arg, U, V);
+    } else {
+      const long double maxnorm = 2.884233277829519311757165057717815L;
+      frexp(l1norm / maxnorm, &squarings);
+      if (squarings < 0) squarings = 0;
+      MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings));
+      matrix_exp_pade17(A, U, V);
+    }
+  
+#else
+  
+    // this case should be handled in compute()
+    eigen_assert(false && "Bug in MatrixExponential"); 
+  
 #endif
-  computeUV(RealScalar());
-  m_tmp1 = m_U + m_V;   // numerator of Pade approximant
-  m_tmp2 = -m_U + m_V;  // denominator of Pade approximant
-  result = m_tmp2.partialPivLu().solve(m_tmp1);
-  for (int i=0; i<m_squarings; i++)
+#endif  // LDBL_MANT_DIG
+  }
+};
+
+template<typename T> struct is_exp_known_type : false_type {};
+template<> struct is_exp_known_type<float> : true_type {};
+template<> struct is_exp_known_type<double> : true_type {};
+#if LDBL_MANT_DIG <= 112
+template<> struct is_exp_known_type<long double> : true_type {};
+#endif
+
+template <typename ArgType, typename ResultType>
+void matrix_exp_compute(const ArgType& arg, ResultType &result, true_type) // natively supported scalar type
+{
+  typedef typename ArgType::PlainObject MatrixType;
+  MatrixType U, V;
+  int squarings;
+  matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V)
+  MatrixType numer = U + V;
+  MatrixType denom = -U + V;
+  result = denom.partialPivLu().solve(numer);
+  for (int i=0; i<squarings; i++)
     result *= result;   // undo scaling by repeated squaring
 }
 
-template <typename MatrixType>
-EIGEN_STRONG_INLINE void MatrixExponential<MatrixType>::pade3(const MatrixType &A)
+
+/* Computes the matrix exponential
+ *
+ * \param arg    argument of matrix exponential (should be plain object)
+ * \param result variable in which result will be stored
+ */
+template <typename ArgType, typename ResultType>
+void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // default
 {
-  const RealScalar b[] = {120., 60., 12., 1.};
-  m_tmp1.noalias() = A * A;
-  m_tmp2 = b[3]*m_tmp1 + b[1]*m_Id;
-  m_U.noalias() = A * m_tmp2;
-  m_V = b[2]*m_tmp1 + b[0]*m_Id;
+  typedef typename ArgType::PlainObject MatrixType;
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef typename std::complex<RealScalar> ComplexScalar;
+  result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>);
 }
 
-template <typename MatrixType>
-EIGEN_STRONG_INLINE void MatrixExponential<MatrixType>::pade5(const MatrixType &A)
-{
-  const RealScalar b[] = {30240., 15120., 3360., 420., 30., 1.};
-  MatrixType A2 = A * A;
-  m_tmp1.noalias() = A2 * A2;
-  m_tmp2 = b[5]*m_tmp1 + b[3]*A2 + b[1]*m_Id;
-  m_U.noalias() = A * m_tmp2;
-  m_V = b[4]*m_tmp1 + b[2]*A2 + b[0]*m_Id;
-}
-
-template <typename MatrixType>
-EIGEN_STRONG_INLINE void MatrixExponential<MatrixType>::pade7(const MatrixType &A)
-{
-  const RealScalar b[] = {17297280., 8648640., 1995840., 277200., 25200., 1512., 56., 1.};
-  MatrixType A2 = A * A;
-  MatrixType A4 = A2 * A2;
-  m_tmp1.noalias() = A4 * A2;
-  m_tmp2 = b[7]*m_tmp1 + b[5]*A4 + b[3]*A2 + b[1]*m_Id;
-  m_U.noalias() = A * m_tmp2;
-  m_V = b[6]*m_tmp1 + b[4]*A4 + b[2]*A2 + b[0]*m_Id;
-}
-
-template <typename MatrixType>
-EIGEN_STRONG_INLINE void MatrixExponential<MatrixType>::pade9(const MatrixType &A)
-{
-  const RealScalar b[] = {17643225600., 8821612800., 2075673600., 302702400., 30270240.,
-		      2162160., 110880., 3960., 90., 1.};
-  MatrixType A2 = A * A;
-  MatrixType A4 = A2 * A2;
-  MatrixType A6 = A4 * A2;
-  m_tmp1.noalias() = A6 * A2;
-  m_tmp2 = b[9]*m_tmp1 + b[7]*A6 + b[5]*A4 + b[3]*A2 + b[1]*m_Id;
-  m_U.noalias() = A * m_tmp2;
-  m_V = b[8]*m_tmp1 + b[6]*A6 + b[4]*A4 + b[2]*A2 + b[0]*m_Id;
-}
-
-template <typename MatrixType>
-EIGEN_STRONG_INLINE void MatrixExponential<MatrixType>::pade13(const MatrixType &A)
-{
-  const RealScalar b[] = {64764752532480000., 32382376266240000., 7771770303897600.,
-		      1187353796428800., 129060195264000., 10559470521600., 670442572800.,
-		      33522128640., 1323241920., 40840800., 960960., 16380., 182., 1.};
-  MatrixType A2 = A * A;
-  MatrixType A4 = A2 * A2;
-  m_tmp1.noalias() = A4 * A2;
-  m_V = b[13]*m_tmp1 + b[11]*A4 + b[9]*A2; // used for temporary storage
-  m_tmp2.noalias() = m_tmp1 * m_V;
-  m_tmp2 += b[7]*m_tmp1 + b[5]*A4 + b[3]*A2 + b[1]*m_Id;
-  m_U.noalias() = A * m_tmp2;
-  m_tmp2 = b[12]*m_tmp1 + b[10]*A4 + b[8]*A2;
-  m_V.noalias() = m_tmp1 * m_tmp2;
-  m_V += b[6]*m_tmp1 + b[4]*A4 + b[2]*A2 + b[0]*m_Id;
-}
-
-#if LDBL_MANT_DIG > 64
-template <typename MatrixType>
-EIGEN_STRONG_INLINE void MatrixExponential<MatrixType>::pade17(const MatrixType &A)
-{
-  const RealScalar b[] = {830034394580628357120000.L, 415017197290314178560000.L,
-		      100610229646136770560000.L, 15720348382208870400000.L,
-		      1774878043152614400000.L, 153822763739893248000.L, 10608466464820224000.L,
-		      595373117923584000.L, 27563570274240000.L, 1060137318240000.L,
-		      33924394183680.L, 899510451840.L, 19554575040.L, 341863200.L, 4651200.L,
-		      46512.L, 306.L, 1.L};
-  MatrixType A2 = A * A;
-  MatrixType A4 = A2 * A2;
-  MatrixType A6 = A4 * A2;
-  m_tmp1.noalias() = A4 * A4;
-  m_V = b[17]*m_tmp1 + b[15]*A6 + b[13]*A4 + b[11]*A2; // used for temporary storage
-  m_tmp2.noalias() = m_tmp1 * m_V;
-  m_tmp2 += b[9]*m_tmp1 + b[7]*A6 + b[5]*A4 + b[3]*A2 + b[1]*m_Id;
-  m_U.noalias() = A * m_tmp2;
-  m_tmp2 = b[16]*m_tmp1 + b[14]*A6 + b[12]*A4 + b[10]*A2;
-  m_V.noalias() = m_tmp1 * m_tmp2;
-  m_V += b[8]*m_tmp1 + b[6]*A6 + b[4]*A4 + b[2]*A2 + b[0]*m_Id;
-}
-#endif
-
-template <typename MatrixType>
-void MatrixExponential<MatrixType>::computeUV(float)
-{
-  using std::frexp;
-  using std::pow;
-  if (m_l1norm < 4.258730016922831e-001) {
-    pade3(m_M);
-  } else if (m_l1norm < 1.880152677804762e+000) {
-    pade5(m_M);
-  } else {
-    const float maxnorm = 3.925724783138660f;
-    frexp(m_l1norm / maxnorm, &m_squarings);
-    if (m_squarings < 0) m_squarings = 0;
-    MatrixType A = m_M / pow(Scalar(2), m_squarings);
-    pade7(A);
-  }
-}
-
-template <typename MatrixType>
-void MatrixExponential<MatrixType>::computeUV(double)
-{
-  using std::frexp;
-  using std::pow;
-  if (m_l1norm < 1.495585217958292e-002) {
-    pade3(m_M);
-  } else if (m_l1norm < 2.539398330063230e-001) {
-    pade5(m_M);
-  } else if (m_l1norm < 9.504178996162932e-001) {
-    pade7(m_M);
-  } else if (m_l1norm < 2.097847961257068e+000) {
-    pade9(m_M);
-  } else {
-    const double maxnorm = 5.371920351148152;
-    frexp(m_l1norm / maxnorm, &m_squarings);
-    if (m_squarings < 0) m_squarings = 0;
-    MatrixType A = m_M / pow(Scalar(2), m_squarings);
-    pade13(A);
-  }
-}
-
-template <typename MatrixType>
-void MatrixExponential<MatrixType>::computeUV(long double)
-{
-  using std::frexp;
-  using std::pow;
-#if   LDBL_MANT_DIG == 53   // double precision
-  computeUV(double());
-#elif LDBL_MANT_DIG <= 64   // extended precision
-  if (m_l1norm < 4.1968497232266989671e-003L) {
-    pade3(m_M);
-  } else if (m_l1norm < 1.1848116734693823091e-001L) {
-    pade5(m_M);
-  } else if (m_l1norm < 5.5170388480686700274e-001L) {
-    pade7(m_M);
-  } else if (m_l1norm < 1.3759868875587845383e+000L) {
-    pade9(m_M);
-  } else {
-    const long double maxnorm = 4.0246098906697353063L;
-    frexp(m_l1norm / maxnorm, &m_squarings);
-    if (m_squarings < 0) m_squarings = 0;
-    MatrixType A = m_M / pow(Scalar(2), m_squarings);
-    pade13(A);
-  }
-#elif LDBL_MANT_DIG <= 106  // double-double
-  if (m_l1norm < 3.2787892205607026992947488108213e-005L) {
-    pade3(m_M);
-  } else if (m_l1norm < 6.4467025060072760084130906076332e-003L) {
-    pade5(m_M);
-  } else if (m_l1norm < 6.8988028496595374751374122881143e-002L) {
-    pade7(m_M);
-  } else if (m_l1norm < 2.7339737518502231741495857201670e-001L) {
-    pade9(m_M);
-  } else if (m_l1norm < 1.3203382096514474905666448850278e+000L) {
-    pade13(m_M);
-  } else {
-    const long double maxnorm = 3.2579440895405400856599663723517L;
-    frexp(m_l1norm / maxnorm, &m_squarings);
-    if (m_squarings < 0) m_squarings = 0;
-    MatrixType A = m_M / pow(Scalar(2), m_squarings);
-    pade17(A);
-  }
-#elif LDBL_MANT_DIG <= 112  // quadruple precison
-  if (m_l1norm < 1.639394610288918690547467954466970e-005L) {
-    pade3(m_M);
-  } else if (m_l1norm < 4.253237712165275566025884344433009e-003L) {
-    pade5(m_M);
-  } else if (m_l1norm < 5.125804063165764409885122032933142e-002L) {
-    pade7(m_M);
-  } else if (m_l1norm < 2.170000765161155195453205651889853e-001L) {
-    pade9(m_M);
-  } else if (m_l1norm < 1.125358383453143065081397882891878e+000L) {
-    pade13(m_M);
-  } else {
-    const long double maxnorm = 2.884233277829519311757165057717815L;
-    frexp(m_l1norm / maxnorm, &m_squarings);
-    if (m_squarings < 0) m_squarings = 0;
-    MatrixType A = m_M / pow(Scalar(2), m_squarings);
-    pade17(A);
-  }
-#else
-  // this case should be handled in compute()
-  eigen_assert(false && "Bug in MatrixExponential"); 
-#endif  // LDBL_MANT_DIG
-}
+} // end namespace Eigen::internal
 
 /** \ingroup MatrixFunctions_Module
   *
@@ -391,11 +389,9 @@
   *
   * \tparam Derived  Type of the argument to the matrix exponential.
   *
-  * This class holds the argument to the matrix exponential until it
-  * is assigned or evaluated for some other reason (so the argument
-  * should not be changed in the meantime). It is the return type of
-  * MatrixBase::exp() and most of the time this is the only way it is
-  * used.
+  * This class holds the argument to the matrix exponential until it is assigned or evaluated for
+  * some other reason (so the argument should not be changed in the meantime). It is the return type
+  * of MatrixBase::exp() and most of the time this is the only way it is used.
   */
 template<typename Derived> struct MatrixExponentialReturnValue
 : public ReturnByValue<MatrixExponentialReturnValue<Derived> >
@@ -404,31 +400,26 @@
   public:
     /** \brief Constructor.
       *
-      * \param[in] src %Matrix (expression) forming the argument of the
-      * matrix exponential.
+      * \param src %Matrix (expression) forming the argument of the matrix exponential.
       */
     MatrixExponentialReturnValue(const Derived& src) : m_src(src) { }
 
     /** \brief Compute the matrix exponential.
       *
-      * \param[out] result the matrix exponential of \p src in the
-      * constructor.
+      * \param result the matrix exponential of \p src in the constructor.
       */
     template <typename ResultType>
     inline void evalTo(ResultType& result) const
     {
-      const typename Derived::PlainObject srcEvaluated = m_src.eval();
-      MatrixExponential<typename Derived::PlainObject> me(srcEvaluated);
-      me.compute(result);
+      const typename internal::nested_eval<Derived, 10>::type tmp(m_src);
+      internal::matrix_exp_compute(tmp, result, internal::is_exp_known_type<typename Derived::Scalar>());
     }
 
     Index rows() const { return m_src.rows(); }
     Index cols() const { return m_src.cols(); }
 
   protected:
-    const Derived& m_src;
-  private:
-    MatrixExponentialReturnValue& operator=(const MatrixExponentialReturnValue&);
+    const typename internal::ref_selector<Derived>::type m_src;
 };
 
 namespace internal {
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index 7d42664..3df8239 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -1,408 +1,255 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2009-2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2009-2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
-#ifndef EIGEN_MATRIX_FUNCTION
-#define EIGEN_MATRIX_FUNCTION
+#ifndef EIGEN_MATRIX_FUNCTION_H
+#define EIGEN_MATRIX_FUNCTION_H
 
 #include "StemFunction.h"
-#include "MatrixFunctionAtomic.h"
 
 
 namespace Eigen { 
 
+namespace internal {
+
+/** \brief Maximum distance allowed between eigenvalues to be considered "close". */
+static const float matrix_function_separation = 0.1f;
+
 /** \ingroup MatrixFunctions_Module
-  * \brief Class for computing matrix functions.
-  * \tparam  MatrixType  type of the argument of the matrix function,
-  *                      expected to be an instantiation of the Matrix class template.
-  * \tparam  AtomicType  type for computing matrix function of atomic blocks.
-  * \tparam  IsComplex   used internally to select correct specialization.
+  * \class MatrixFunctionAtomic
+  * \brief Helper class for computing matrix functions of atomic matrices.
   *
-  * This class implements the Schur-Parlett algorithm for computing matrix functions. The spectrum of the
-  * matrix is divided in clustered of eigenvalues that lies close together. This class delegates the
-  * computation of the matrix function on every block corresponding to these clusters to an object of type
-  * \p AtomicType and uses these results to compute the matrix function of the whole matrix. The class
-  * \p AtomicType should have a \p compute() member function for computing the matrix function of a block.
-  *
-  * \sa class MatrixFunctionAtomic, class MatrixLogarithmAtomic
+  * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other.
   */
-template <typename MatrixType, 
-	  typename AtomicType,  
-          int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
-class MatrixFunction
-{  
-  public:
-
-    /** \brief Constructor. 
-      *
-      * \param[in]  A       argument of matrix function, should be a square matrix.
-      * \param[in]  atomic  class for computing matrix function of atomic blocks.
-      *
-      * The class stores references to \p A and \p atomic, so they should not be
-      * changed (or destroyed) before compute() is called.
-      */
-    MatrixFunction(const MatrixType& A, AtomicType& atomic);
-
-    /** \brief Compute the matrix function.
-      *
-      * \param[out] result  the function \p f applied to \p A, as
-      * specified in the constructor.
-      *
-      * See MatrixBase::matrixFunction() for details on how this computation
-      * is implemented.
-      */
-    template <typename ResultType> 
-    void compute(ResultType &result);    
-};
-
-
-/** \internal \ingroup MatrixFunctions_Module 
-  * \brief Partial specialization of MatrixFunction for real matrices
-  */
-template <typename MatrixType, typename AtomicType>
-class MatrixFunction<MatrixType, AtomicType, 0>
-{  
-  private:
-
-    typedef internal::traits<MatrixType> Traits;
-    typedef typename Traits::Scalar Scalar;
-    static const int Rows = Traits::RowsAtCompileTime;
-    static const int Cols = Traits::ColsAtCompileTime;
-    static const int Options = MatrixType::Options;
-    static const int MaxRows = Traits::MaxRowsAtCompileTime;
-    static const int MaxCols = Traits::MaxColsAtCompileTime;
-
-    typedef std::complex<Scalar> ComplexScalar;
-    typedef Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols> ComplexMatrix;
-
-  public:
-
-    /** \brief Constructor. 
-      *
-      * \param[in]  A       argument of matrix function, should be a square matrix.
-      * \param[in]  atomic  class for computing matrix function of atomic blocks.
-      */
-    MatrixFunction(const MatrixType& A, AtomicType& atomic) : m_A(A), m_atomic(atomic) { }
-
-    /** \brief Compute the matrix function.
-      *
-      * \param[out] result  the function \p f applied to \p A, as
-      * specified in the constructor.
-      *
-      * This function converts the real matrix \c A to a complex matrix,
-      * uses MatrixFunction<MatrixType,1> and then converts the result back to
-      * a real matrix.
-      */
-    template <typename ResultType>
-    void compute(ResultType& result) 
-    {
-      ComplexMatrix CA = m_A.template cast<ComplexScalar>();
-      ComplexMatrix Cresult;
-      MatrixFunction<ComplexMatrix, AtomicType> mf(CA, m_atomic);
-      mf.compute(Cresult);
-      result = Cresult.real();
-    }
-
-  private:
-    typename internal::nested<MatrixType>::type m_A; /**< \brief Reference to argument of matrix function. */
-    AtomicType& m_atomic; /**< \brief Class for computing matrix function of atomic blocks. */
-
-    MatrixFunction& operator=(const MatrixFunction&);
-};
-
-      
-/** \internal \ingroup MatrixFunctions_Module 
-  * \brief Partial specialization of MatrixFunction for complex matrices
-  */
-template <typename MatrixType, typename AtomicType>
-class MatrixFunction<MatrixType, AtomicType, 1>
+template <typename MatrixType>
+class MatrixFunctionAtomic 
 {
-  private:
+  public:
 
-    typedef internal::traits<MatrixType> Traits;
     typedef typename MatrixType::Scalar Scalar;
-    typedef typename MatrixType::Index Index;
-    static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
-    static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
-    static const int Options = MatrixType::Options;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef Matrix<Scalar, Traits::RowsAtCompileTime, 1> VectorType;
-    typedef Matrix<Index, Traits::RowsAtCompileTime, 1> IntVectorType;
-    typedef Matrix<Index, Dynamic, 1> DynamicIntVectorType;
-    typedef std::list<Scalar> Cluster;
-    typedef std::list<Cluster> ListOfClusters;
-    typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+    typedef typename stem_function<Scalar>::type StemFunction;
 
-  public:
+    /** \brief Constructor
+      * \param[in]  f  matrix function to compute.
+      */
+    MatrixFunctionAtomic(StemFunction f) : m_f(f) { }
 
-    MatrixFunction(const MatrixType& A, AtomicType& atomic);
-    template <typename ResultType> void compute(ResultType& result);
+    /** \brief Compute matrix function of atomic matrix
+      * \param[in]  A  argument of matrix function, should be upper triangular and atomic
+      * \returns  f(A), the matrix function evaluated at the given matrix
+      */
+    MatrixType compute(const MatrixType& A);
 
   private:
-
-    void computeSchurDecomposition();
-    void partitionEigenvalues();
-    typename ListOfClusters::iterator findCluster(Scalar key);
-    void computeClusterSize();
-    void computeBlockStart();
-    void constructPermutation();
-    void permuteSchur();
-    void swapEntriesInSchur(Index index);
-    void computeBlockAtomic();
-    Block<MatrixType> block(MatrixType& A, Index i, Index j);
-    void computeOffDiagonal();
-    DynMatrixType solveTriangularSylvester(const DynMatrixType& A, const DynMatrixType& B, const DynMatrixType& C);
-
-    typename internal::nested<MatrixType>::type m_A; /**< \brief Reference to argument of matrix function. */
-    AtomicType& m_atomic; /**< \brief Class for computing matrix function of atomic blocks. */
-    MatrixType m_T; /**< \brief Triangular part of Schur decomposition */
-    MatrixType m_U; /**< \brief Unitary part of Schur decomposition */
-    MatrixType m_fT; /**< \brief %Matrix function applied to #m_T */
-    ListOfClusters m_clusters; /**< \brief Partition of eigenvalues into clusters of ei'vals "close" to each other */
-    DynamicIntVectorType m_eivalToCluster; /**< \brief m_eivalToCluster[i] = j means i-th ei'val is in j-th cluster */
-    DynamicIntVectorType m_clusterSize; /**< \brief Number of eigenvalues in each clusters  */
-    DynamicIntVectorType m_blockStart; /**< \brief Row index at which block corresponding to i-th cluster starts */
-    IntVectorType m_permutation; /**< \brief Permutation which groups ei'vals in the same cluster together */
-
-    /** \brief Maximum distance allowed between eigenvalues to be considered "close".
-      *
-      * This is morally a \c static \c const \c Scalar, but only
-      * integers can be static constant class members in C++. The
-      * separation constant is set to 0.1, a value taken from the
-      * paper by Davies and Higham. */
-    static const RealScalar separation() { return static_cast<RealScalar>(0.1); }
-
-    MatrixFunction& operator=(const MatrixFunction&);
+    StemFunction* m_f;
 };
 
-/** \brief Constructor. 
- *
- * \param[in]  A       argument of matrix function, should be a square matrix.
- * \param[in]  atomic  class for computing matrix function of atomic blocks.
- */
-template <typename MatrixType, typename AtomicType>
-MatrixFunction<MatrixType,AtomicType,1>::MatrixFunction(const MatrixType& A, AtomicType& atomic)
-  : m_A(A), m_atomic(atomic)
+template <typename MatrixType>
+typename NumTraits<typename MatrixType::Scalar>::Real matrix_function_compute_mu(const MatrixType& A)
 {
-  /* empty body */
+  typedef typename plain_col_type<MatrixType>::type VectorType;
+  typename MatrixType::Index rows = A.rows();
+  const MatrixType N = MatrixType::Identity(rows, rows) - A;
+  VectorType e = VectorType::Ones(rows);
+  N.template triangularView<Upper>().solveInPlace(e);
+  return e.cwiseAbs().maxCoeff();
 }
 
-/** \brief Compute the matrix function.
-  *
-  * \param[out] result  the function \p f applied to \p A, as
-  * specified in the constructor.
+template <typename MatrixType>
+MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
+{
+  // TODO: Use that A is upper triangular
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  typedef typename MatrixType::Index Index;
+  Index rows = A.rows();
+  Scalar avgEival = A.trace() / Scalar(RealScalar(rows));
+  MatrixType Ashifted = A - avgEival * MatrixType::Identity(rows, rows);
+  RealScalar mu = matrix_function_compute_mu(Ashifted);
+  MatrixType F = m_f(avgEival, 0) * MatrixType::Identity(rows, rows);
+  MatrixType P = Ashifted;
+  MatrixType Fincr;
+  for (Index s = 1; s < 1.1 * rows + 10; s++) { // upper limit is fairly arbitrary
+    Fincr = m_f(avgEival, static_cast<int>(s)) * P;
+    F += Fincr;
+    P = Scalar(RealScalar(1.0/(s + 1))) * P * Ashifted;
+
+    // test whether Taylor series converged
+    const RealScalar F_norm = F.cwiseAbs().rowwise().sum().maxCoeff();
+    const RealScalar Fincr_norm = Fincr.cwiseAbs().rowwise().sum().maxCoeff();
+    if (Fincr_norm < NumTraits<Scalar>::epsilon() * F_norm) {
+      RealScalar delta = 0;
+      RealScalar rfactorial = 1;
+      for (Index r = 0; r < rows; r++) {
+        RealScalar mx = 0;
+        for (Index i = 0; i < rows; i++)
+          mx = (std::max)(mx, std::abs(m_f(Ashifted(i, i) + avgEival, static_cast<int>(s+r))));
+        if (r != 0)
+          rfactorial *= RealScalar(r);
+        delta = (std::max)(delta, mx / rfactorial);
+      }
+      const RealScalar P_norm = P.cwiseAbs().rowwise().sum().maxCoeff();
+      if (mu * delta * P_norm < NumTraits<Scalar>::epsilon() * F_norm) // series converged
+        break;
+    }
+  }
+  return F;
+}
+
+/** \brief Find cluster in \p clusters containing some value 
+  * \param[in] key Value to find
+  * \returns Iterator to cluster containing \p key, or \c clusters.end() if no cluster in \p m_clusters
+  * contains \p key.
   */
-template <typename MatrixType, typename AtomicType>
-template <typename ResultType>
-void MatrixFunction<MatrixType,AtomicType,1>::compute(ResultType& result) 
+template <typename Index, typename ListOfClusters>
+typename ListOfClusters::iterator matrix_function_find_cluster(Index key, ListOfClusters& clusters)
 {
-  computeSchurDecomposition();
-  partitionEigenvalues();
-  computeClusterSize();
-  computeBlockStart();
-  constructPermutation();
-  permuteSchur();
-  computeBlockAtomic();
-  computeOffDiagonal();
-  result = m_U * (m_fT.template triangularView<Upper>() * m_U.adjoint());
-}
-
-/** \brief Store the Schur decomposition of #m_A in #m_T and #m_U */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::computeSchurDecomposition()
-{
-  const ComplexSchur<MatrixType> schurOfA(m_A);  
-  m_T = schurOfA.matrixT();
-  m_U = schurOfA.matrixU();
+  typename std::list<Index>::iterator j;
+  for (typename ListOfClusters::iterator i = clusters.begin(); i != clusters.end(); ++i) {
+    j = std::find(i->begin(), i->end(), key);
+    if (j != i->end())
+      return i;
+  }
+  return clusters.end();
 }
 
 /** \brief Partition eigenvalues in clusters of ei'vals close to each other
   * 
-  * This function computes #m_clusters. This is a partition of the
-  * eigenvalues of #m_T in clusters, such that
-  * # Any eigenvalue in a certain cluster is at most separation() away
-  *   from another eigenvalue in the same cluster.
-  * # The distance between two eigenvalues in different clusters is
-  *   more than separation().
-  * The implementation follows Algorithm 4.1 in the paper of Davies
-  * and Higham. 
+  * \param[in]  eivals    Eigenvalues
+  * \param[out] clusters  Resulting partition of eigenvalues
+  *
+  * The partition satisfies the following two properties:
+  * # Any eigenvalue in a certain cluster is at most matrix_function_separation() away from another eigenvalue
+  *   in the same cluster.
+  * # The distance between two eigenvalues in different clusters is more than matrix_function_separation().  
+  * The implementation follows Algorithm 4.1 in the paper of Davies and Higham.
   */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::partitionEigenvalues()
+template <typename EivalsType, typename Cluster>
+void matrix_function_partition_eigenvalues(const EivalsType& eivals, std::list<Cluster>& clusters)
 {
-  using std::abs;
-  const Index rows = m_T.rows();
-  VectorType diag = m_T.diagonal(); // contains eigenvalues of A
-
-  for (Index i=0; i<rows; ++i) {
-    // Find set containing diag(i), adding a new set if necessary
-    typename ListOfClusters::iterator qi = findCluster(diag(i));
-    if (qi == m_clusters.end()) {
+  typedef typename EivalsType::Index Index;
+  typedef typename EivalsType::RealScalar RealScalar;
+  for (Index i=0; i<eivals.rows(); ++i) {
+    // Find cluster containing i-th ei'val, adding a new cluster if necessary
+    typename std::list<Cluster>::iterator qi = matrix_function_find_cluster(i, clusters);
+    if (qi == clusters.end()) {
       Cluster l;
-      l.push_back(diag(i));
-      m_clusters.push_back(l);
-      qi = m_clusters.end();
+      l.push_back(i);
+      clusters.push_back(l);
+      qi = clusters.end();
       --qi;
     }
 
     // Look for other element to add to the set
-    for (Index j=i+1; j<rows; ++j) {
-      if (abs(diag(j) - diag(i)) <= separation() && std::find(qi->begin(), qi->end(), diag(j)) == qi->end()) {
-        typename ListOfClusters::iterator qj = findCluster(diag(j));
-        if (qj == m_clusters.end()) {
-          qi->push_back(diag(j));
+    for (Index j=i+1; j<eivals.rows(); ++j) {
+      if (abs(eivals(j) - eivals(i)) <= RealScalar(matrix_function_separation)
+          && std::find(qi->begin(), qi->end(), j) == qi->end()) {
+        typename std::list<Cluster>::iterator qj = matrix_function_find_cluster(j, clusters);
+        if (qj == clusters.end()) {
+          qi->push_back(j);
         } else {
           qi->insert(qi->end(), qj->begin(), qj->end());
-          m_clusters.erase(qj);
+          clusters.erase(qj);
         }
       }
     }
   }
 }
 
-/** \brief Find cluster in #m_clusters containing some value 
-  * \param[in] key Value to find
-  * \returns Iterator to cluster containing \c key, or
-  * \c m_clusters.end() if no cluster in m_clusters contains \c key.
-  */
-template <typename MatrixType, typename AtomicType>
-typename MatrixFunction<MatrixType,AtomicType,1>::ListOfClusters::iterator MatrixFunction<MatrixType,AtomicType,1>::findCluster(Scalar key)
+/** \brief Compute size of each cluster given a partitioning */
+template <typename ListOfClusters, typename Index>
+void matrix_function_compute_cluster_size(const ListOfClusters& clusters, Matrix<Index, Dynamic, 1>& clusterSize)
 {
-  typename Cluster::iterator j;
-  for (typename ListOfClusters::iterator i = m_clusters.begin(); i != m_clusters.end(); ++i) {
-    j = std::find(i->begin(), i->end(), key);
-    if (j != i->end())
-      return i;
+  const Index numClusters = static_cast<Index>(clusters.size());
+  clusterSize.setZero(numClusters);
+  Index clusterIndex = 0;
+  for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) {
+    clusterSize[clusterIndex] = cluster->size();
+    ++clusterIndex;
   }
-  return m_clusters.end();
 }
 
-/** \brief Compute #m_clusterSize and #m_eivalToCluster using #m_clusters */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::computeClusterSize()
+/** \brief Compute start of each block using clusterSize */
+template <typename VectorType>
+void matrix_function_compute_block_start(const VectorType& clusterSize, VectorType& blockStart)
 {
-  const Index rows = m_T.rows();
-  VectorType diag = m_T.diagonal(); 
-  const Index numClusters = static_cast<Index>(m_clusters.size());
+  blockStart.resize(clusterSize.rows());
+  blockStart(0) = 0;
+  for (typename VectorType::Index i = 1; i < clusterSize.rows(); i++) {
+    blockStart(i) = blockStart(i-1) + clusterSize(i-1);
+  }
+}
 
-  m_clusterSize.setZero(numClusters);
-  m_eivalToCluster.resize(rows);
+/** \brief Compute mapping of eigenvalue indices to cluster indices */
+template <typename EivalsType, typename ListOfClusters, typename VectorType>
+void matrix_function_compute_map(const EivalsType& eivals, const ListOfClusters& clusters, VectorType& eivalToCluster)
+{
+  typedef typename EivalsType::Index Index;
+  eivalToCluster.resize(eivals.rows());
   Index clusterIndex = 0;
-  for (typename ListOfClusters::const_iterator cluster = m_clusters.begin(); cluster != m_clusters.end(); ++cluster) {
-    for (Index i = 0; i < diag.rows(); ++i) {
-      if (std::find(cluster->begin(), cluster->end(), diag(i)) != cluster->end()) {
-        ++m_clusterSize[clusterIndex];
-        m_eivalToCluster[i] = clusterIndex;
+  for (typename ListOfClusters::const_iterator cluster = clusters.begin(); cluster != clusters.end(); ++cluster) {
+    for (Index i = 0; i < eivals.rows(); ++i) {
+      if (std::find(cluster->begin(), cluster->end(), i) != cluster->end()) {
+        eivalToCluster[i] = clusterIndex;
       }
     }
     ++clusterIndex;
   }
 }
 
-/** \brief Compute #m_blockStart using #m_clusterSize */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::computeBlockStart()
+/** \brief Compute permutation which groups ei'vals in same cluster together */
+template <typename DynVectorType, typename VectorType>
+void matrix_function_compute_permutation(const DynVectorType& blockStart, const DynVectorType& eivalToCluster, VectorType& permutation)
 {
-  m_blockStart.resize(m_clusterSize.rows());
-  m_blockStart(0) = 0;
-  for (Index i = 1; i < m_clusterSize.rows(); i++) {
-    m_blockStart(i) = m_blockStart(i-1) + m_clusterSize(i-1);
-  }
-}
-
-/** \brief Compute #m_permutation using #m_eivalToCluster and #m_blockStart */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::constructPermutation()
-{
-  DynamicIntVectorType indexNextEntry = m_blockStart;
-  m_permutation.resize(m_T.rows());
-  for (Index i = 0; i < m_T.rows(); i++) {
-    Index cluster = m_eivalToCluster[i];
-    m_permutation[i] = indexNextEntry[cluster];
+  typedef typename VectorType::Index Index;
+  DynVectorType indexNextEntry = blockStart;
+  permutation.resize(eivalToCluster.rows());
+  for (Index i = 0; i < eivalToCluster.rows(); i++) {
+    Index cluster = eivalToCluster[i];
+    permutation[i] = indexNextEntry[cluster];
     ++indexNextEntry[cluster];
   }
 }  
 
-/** \brief Permute Schur decomposition in #m_U and #m_T according to #m_permutation */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::permuteSchur()
+/** \brief Permute Schur decomposition in U and T according to permutation */
+template <typename VectorType, typename MatrixType>
+void matrix_function_permute_schur(VectorType& permutation, MatrixType& U, MatrixType& T)
 {
-  IntVectorType p = m_permutation;
-  for (Index i = 0; i < p.rows() - 1; i++) {
+  typedef typename VectorType::Index Index;
+  for (Index i = 0; i < permutation.rows() - 1; i++) {
     Index j;
-    for (j = i; j < p.rows(); j++) {
-      if (p(j) == i) break;
+    for (j = i; j < permutation.rows(); j++) {
+      if (permutation(j) == i) break;
     }
-    eigen_assert(p(j) == i);
+    eigen_assert(permutation(j) == i);
     for (Index k = j-1; k >= i; k--) {
-      swapEntriesInSchur(k);
-      std::swap(p.coeffRef(k), p.coeffRef(k+1));
+      JacobiRotation<typename MatrixType::Scalar> rotation;
+      rotation.makeGivens(T(k, k+1), T(k+1, k+1) - T(k, k));
+      T.applyOnTheLeft(k, k+1, rotation.adjoint());
+      T.applyOnTheRight(k, k+1, rotation);
+      U.applyOnTheRight(k, k+1, rotation);
+      std::swap(permutation.coeffRef(k), permutation.coeffRef(k+1));
     }
   }
 }
 
-/** \brief Swap rows \a index and \a index+1 in Schur decomposition in #m_U and #m_T */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::swapEntriesInSchur(Index index)
-{
-  JacobiRotation<Scalar> rotation;
-  rotation.makeGivens(m_T(index, index+1), m_T(index+1, index+1) - m_T(index, index));
-  m_T.applyOnTheLeft(index, index+1, rotation.adjoint());
-  m_T.applyOnTheRight(index, index+1, rotation);
-  m_U.applyOnTheRight(index, index+1, rotation);
-}  
-
-/** \brief Compute block diagonal part of #m_fT.
+/** \brief Compute block diagonal part of matrix function.
   *
-  * This routine computes the matrix function applied to the block diagonal part of #m_T, with the blocking
-  * given by #m_blockStart. The matrix function of each diagonal block is computed by #m_atomic. The
-  * off-diagonal parts of #m_fT are set to zero.
+  * This routine computes the matrix function applied to the block diagonal part of \p T (which should be
+  * upper triangular), with the blocking given by \p blockStart and \p clusterSize. The matrix function of
+  * each diagonal block is computed by \p atomic. The off-diagonal parts of \p fT are set to zero.
   */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::computeBlockAtomic()
+template <typename MatrixType, typename AtomicType, typename VectorType>
+void matrix_function_compute_block_atomic(const MatrixType& T, AtomicType& atomic, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT)
 { 
-  m_fT.resize(m_T.rows(), m_T.cols());
-  m_fT.setZero();
-  for (Index i = 0; i < m_clusterSize.rows(); ++i) {
-    block(m_fT, i, i) = m_atomic.compute(block(m_T, i, i));
-  }
-}
-
-/** \brief Return block of matrix according to blocking given by #m_blockStart */
-template <typename MatrixType, typename AtomicType>
-Block<MatrixType> MatrixFunction<MatrixType,AtomicType,1>::block(MatrixType& A, Index i, Index j)
-{
-  return A.block(m_blockStart(i), m_blockStart(j), m_clusterSize(i), m_clusterSize(j));
-}
-
-/** \brief Compute part of #m_fT above block diagonal.
-  *
-  * This routine assumes that the block diagonal part of #m_fT (which
-  * equals the matrix function applied to #m_T) has already been computed and computes
-  * the part above the block diagonal. The part below the diagonal is
-  * zero, because #m_T is upper triangular.
-  */
-template <typename MatrixType, typename AtomicType>
-void MatrixFunction<MatrixType,AtomicType,1>::computeOffDiagonal()
-{ 
-  for (Index diagIndex = 1; diagIndex < m_clusterSize.rows(); diagIndex++) {
-    for (Index blockIndex = 0; blockIndex < m_clusterSize.rows() - diagIndex; blockIndex++) {
-      // compute (blockIndex, blockIndex+diagIndex) block
-      DynMatrixType A = block(m_T, blockIndex, blockIndex);
-      DynMatrixType B = -block(m_T, blockIndex+diagIndex, blockIndex+diagIndex);
-      DynMatrixType C = block(m_fT, blockIndex, blockIndex) * block(m_T, blockIndex, blockIndex+diagIndex);
-      C -= block(m_T, blockIndex, blockIndex+diagIndex) * block(m_fT, blockIndex+diagIndex, blockIndex+diagIndex);
-      for (Index k = blockIndex + 1; k < blockIndex + diagIndex; k++) {
-	C += block(m_fT, blockIndex, k) * block(m_T, k, blockIndex+diagIndex);
-	C -= block(m_T, blockIndex, k) * block(m_fT, k, blockIndex+diagIndex);
-      }
-      block(m_fT, blockIndex, blockIndex+diagIndex) = solveTriangularSylvester(A, B, C);
-    }
+  fT.setZero(T.rows(), T.cols());
+  for (typename VectorType::Index i = 0; i < clusterSize.rows(); ++i) {
+    fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))
+      = atomic.compute(T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i)));
   }
 }
 
@@ -414,8 +261,8 @@
   *
   * \returns the solution X.
   *
-  * If A is m-by-m and B is n-by-n, then both C and X are m-by-n. 
-  * The (i,j)-th component of the Sylvester equation is
+  * If A is m-by-m and B is n-by-n, then both C and X are m-by-n.  The (i,j)-th component of the Sylvester
+  * equation is
   * \f[ 
   *     \sum_{k=i}^m A_{ik} X_{kj} + \sum_{k=1}^j X_{ik} B_{kj} = C_{ij}. 
   * \f]
@@ -424,16 +271,12 @@
   *     X_{ij} = \frac{1}{A_{ii} + B_{jj}} \Bigl( C_{ij}
   *     - \sum_{k=i+1}^m A_{ik} X_{kj} - \sum_{k=1}^{j-1} X_{ik} B_{kj} \Bigr).
   * \f]
-  * It is assumed that A and B are such that the numerator is never
-  * zero (otherwise the Sylvester equation does not have a unique
-  * solution). In that case, these equations can be evaluated in the
-  * order \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$.
+  * It is assumed that A and B are such that the numerator is never zero (otherwise the Sylvester equation
+  * does not have a unique solution). In that case, these equations can be evaluated in the order 
+  * \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$.
   */
-template <typename MatrixType, typename AtomicType>
-typename MatrixFunction<MatrixType,AtomicType,1>::DynMatrixType MatrixFunction<MatrixType,AtomicType,1>::solveTriangularSylvester(
-  const DynMatrixType& A, 
-  const DynMatrixType& B, 
-  const DynMatrixType& C)
+template <typename MatrixType>
+MatrixType matrix_function_solve_triangular_sylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C)
 {
   eigen_assert(A.rows() == A.cols());
   eigen_assert(A.isUpperTriangular());
@@ -442,9 +285,12 @@
   eigen_assert(C.rows() == A.rows());
   eigen_assert(C.cols() == B.rows());
 
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::Scalar Scalar;
+
   Index m = A.rows();
   Index n = B.rows();
-  DynMatrixType X(m, n);
+  MatrixType X(m, n);
 
   for (Index i = m - 1; i >= 0; --i) {
     for (Index j = 0; j < n; ++j) {
@@ -473,66 +319,209 @@
   return X;
 }
 
+/** \brief Compute part of matrix function above block diagonal.
+  *
+  * This routine completes the computation of \p fT, denoting a matrix function applied to the triangular
+  * matrix \p T. It assumes that the block diagonal part of \p fT has already been computed. The part below
+  * the diagonal is zero, because \p T is upper triangular.
+  */
+template <typename MatrixType, typename VectorType>
+void matrix_function_compute_above_diagonal(const MatrixType& T, const VectorType& blockStart, const VectorType& clusterSize, MatrixType& fT)
+{ 
+  typedef internal::traits<MatrixType> Traits;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
+  static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
+  static const int Options = MatrixType::Options;
+  typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+
+  for (Index k = 1; k < clusterSize.rows(); k++) {
+    for (Index i = 0; i < clusterSize.rows() - k; i++) {
+      // compute (i, i+k) block
+      DynMatrixType A = T.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i));
+      DynMatrixType B = -T.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k));
+      DynMatrixType C = fT.block(blockStart(i), blockStart(i), clusterSize(i), clusterSize(i))
+        * T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k));
+      C -= T.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k))
+        * fT.block(blockStart(i+k), blockStart(i+k), clusterSize(i+k), clusterSize(i+k));
+      for (Index m = i + 1; m < i + k; m++) {
+        C += fT.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m))
+          * T.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k));
+        C -= T.block(blockStart(i), blockStart(m), clusterSize(i), clusterSize(m))
+          * fT.block(blockStart(m), blockStart(i+k), clusterSize(m), clusterSize(i+k));
+      }
+      fT.block(blockStart(i), blockStart(i+k), clusterSize(i), clusterSize(i+k))
+        = matrix_function_solve_triangular_sylvester(A, B, C);
+    }
+  }
+}
+
+/** \ingroup MatrixFunctions_Module
+  * \brief Class for computing matrix functions.
+  * \tparam  MatrixType  type of the argument of the matrix function,
+  *                      expected to be an instantiation of the Matrix class template.
+  * \tparam  AtomicType  type for computing matrix function of atomic blocks.
+  * \tparam  IsComplex   used internally to select correct specialization.
+  *
+  * This class implements the Schur-Parlett algorithm for computing matrix functions. The spectrum of the
+  * matrix is divided in clustered of eigenvalues that lies close together. This class delegates the
+  * computation of the matrix function on every block corresponding to these clusters to an object of type
+  * \p AtomicType and uses these results to compute the matrix function of the whole matrix. The class
+  * \p AtomicType should have a \p compute() member function for computing the matrix function of a block.
+  *
+  * \sa class MatrixFunctionAtomic, class MatrixLogarithmAtomic
+  */
+template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
+struct matrix_function_compute
+{  
+    /** \brief Compute the matrix function.
+      *
+      * \param[in]  A       argument of matrix function, should be a square matrix.
+      * \param[in]  atomic  class for computing matrix function of atomic blocks.
+      * \param[out] result  the function \p f applied to \p A, as
+      * specified in the constructor.
+      *
+      * See MatrixBase::matrixFunction() for details on how this computation
+      * is implemented.
+      */
+    template <typename AtomicType, typename ResultType> 
+    static void run(const MatrixType& A, AtomicType& atomic, ResultType &result);    
+};
+
+/** \internal \ingroup MatrixFunctions_Module 
+  * \brief Partial specialization of MatrixFunction for real matrices
+  *
+  * This converts the real matrix to a complex matrix, compute the matrix function of that matrix, and then
+  * converts the result back to a real matrix.
+  */
+template <typename MatrixType>
+struct matrix_function_compute<MatrixType, 0>
+{  
+  template <typename MatA, typename AtomicType, typename ResultType>
+  static void run(const MatA& A, AtomicType& atomic, ResultType &result)
+  {
+    typedef internal::traits<MatrixType> Traits;
+    typedef typename Traits::Scalar Scalar;
+    static const int Rows = Traits::RowsAtCompileTime, Cols = Traits::ColsAtCompileTime;
+    static const int MaxRows = Traits::MaxRowsAtCompileTime, MaxCols = Traits::MaxColsAtCompileTime;
+
+    typedef std::complex<Scalar> ComplexScalar;
+    typedef Matrix<ComplexScalar, Rows, Cols, 0, MaxRows, MaxCols> ComplexMatrix;
+
+    ComplexMatrix CA = A.template cast<ComplexScalar>();
+    ComplexMatrix Cresult;
+    matrix_function_compute<ComplexMatrix>::run(CA, atomic, Cresult);
+    result = Cresult.real();
+  }
+};
+
+/** \internal \ingroup MatrixFunctions_Module 
+  * \brief Partial specialization of MatrixFunction for complex matrices
+  */
+template <typename MatrixType>
+struct matrix_function_compute<MatrixType, 1>
+{
+  template <typename MatA, typename AtomicType, typename ResultType>
+  static void run(const MatA& A, AtomicType& atomic, ResultType &result)
+  {
+    typedef internal::traits<MatrixType> Traits;
+    
+    // compute Schur decomposition of A
+    const ComplexSchur<MatrixType> schurOfA(A);  
+    MatrixType T = schurOfA.matrixT();
+    MatrixType U = schurOfA.matrixU();
+
+    // partition eigenvalues into clusters of ei'vals "close" to each other
+    std::list<std::list<Index> > clusters; 
+    matrix_function_partition_eigenvalues(T.diagonal(), clusters);
+
+    // compute size of each cluster
+    Matrix<Index, Dynamic, 1> clusterSize;
+    matrix_function_compute_cluster_size(clusters, clusterSize);
+
+    // blockStart[i] is row index at which block corresponding to i-th cluster starts 
+    Matrix<Index, Dynamic, 1> blockStart; 
+    matrix_function_compute_block_start(clusterSize, blockStart);
+
+    // compute map so that eivalToCluster[i] = j means that i-th ei'val is in j-th cluster 
+    Matrix<Index, Dynamic, 1> eivalToCluster;
+    matrix_function_compute_map(T.diagonal(), clusters, eivalToCluster);
+
+    // compute permutation which groups ei'vals in same cluster together 
+    Matrix<Index, Traits::RowsAtCompileTime, 1> permutation;
+    matrix_function_compute_permutation(blockStart, eivalToCluster, permutation);
+
+    // permute Schur decomposition
+    matrix_function_permute_schur(permutation, U, T);
+
+    // compute result
+    MatrixType fT; // matrix function applied to T
+    matrix_function_compute_block_atomic(T, atomic, blockStart, clusterSize, fT);
+    matrix_function_compute_above_diagonal(T, blockStart, clusterSize, fT);
+    result = U * (fT.template triangularView<Upper>() * U.adjoint());
+  }
+};
+
+} // end of namespace internal
+
 /** \ingroup MatrixFunctions_Module
   *
   * \brief Proxy for the matrix function of some matrix (expression).
   *
   * \tparam Derived  Type of the argument to the matrix function.
   *
-  * This class holds the argument to the matrix function until it is
-  * assigned or evaluated for some other reason (so the argument
-  * should not be changed in the meantime). It is the return type of
-  * matrixBase::matrixFunction() and related functions and most of the
-  * time this is the only way it is used.
+  * This class holds the argument to the matrix function until it is assigned or evaluated for some other
+  * reason (so the argument should not be changed in the meantime). It is the return type of
+  * matrixBase::matrixFunction() and related functions and most of the time this is the only way it is used.
   */
 template<typename Derived> class MatrixFunctionReturnValue
 : public ReturnByValue<MatrixFunctionReturnValue<Derived> >
 {
   public:
-
     typedef typename Derived::Scalar Scalar;
     typedef typename Derived::Index Index;
     typedef typename internal::stem_function<Scalar>::type StemFunction;
 
-   /** \brief Constructor.
+  protected:
+    typedef typename internal::ref_selector<Derived>::type DerivedNested;
+
+  public:
+
+    /** \brief Constructor.
       *
-      * \param[in] A  %Matrix (expression) forming the argument of the
-      * matrix function.
+      * \param[in] A  %Matrix (expression) forming the argument of the matrix function.
       * \param[in] f  Stem function for matrix function under consideration.
       */
     MatrixFunctionReturnValue(const Derived& A, StemFunction f) : m_A(A), m_f(f) { }
 
     /** \brief Compute the matrix function.
       *
-      * \param[out] result \p f applied to \p A, where \p f and \p A
-      * are as in the constructor.
+      * \param[out] result \p f applied to \p A, where \p f and \p A are as in the constructor.
       */
     template <typename ResultType>
     inline void evalTo(ResultType& result) const
     {
-      typedef typename Derived::PlainObject PlainObject;
-      typedef internal::traits<PlainObject> Traits;
+      typedef typename internal::nested_eval<Derived, 10>::type NestedEvalType;
+      typedef typename internal::remove_all<NestedEvalType>::type NestedEvalTypeClean;
+      typedef internal::traits<NestedEvalTypeClean> Traits;
       static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
       static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
-      static const int Options = PlainObject::Options;
       typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
-      typedef Matrix<ComplexScalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
-      typedef MatrixFunctionAtomic<DynMatrixType> AtomicType;
+      typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+
+      typedef internal::MatrixFunctionAtomic<DynMatrixType> AtomicType;
       AtomicType atomic(m_f);
 
-      const PlainObject Aevaluated = m_A.eval();
-      MatrixFunction<PlainObject, AtomicType> mf(Aevaluated, atomic);
-      mf.compute(result);
+      internal::matrix_function_compute<typename NestedEvalTypeClean::PlainObject>::run(m_A, atomic, result);
     }
 
     Index rows() const { return m_A.rows(); }
     Index cols() const { return m_A.cols(); }
 
   private:
-    typename internal::nested<Derived>::type m_A;
+    const DerivedNested m_A;
     StemFunction *m_f;
-
-    MatrixFunctionReturnValue& operator=(const MatrixFunctionReturnValue&);
 };
 
 namespace internal {
@@ -559,7 +548,7 @@
 {
   eigen_assert(rows() == cols());
   typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
-  return MatrixFunctionReturnValue<Derived>(derived(), StdStemFunctions<ComplexScalar>::sin);
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_sin<ComplexScalar>);
 }
 
 template <typename Derived>
@@ -567,7 +556,7 @@
 {
   eigen_assert(rows() == cols());
   typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
-  return MatrixFunctionReturnValue<Derived>(derived(), StdStemFunctions<ComplexScalar>::cos);
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_cos<ComplexScalar>);
 }
 
 template <typename Derived>
@@ -575,7 +564,7 @@
 {
   eigen_assert(rows() == cols());
   typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
-  return MatrixFunctionReturnValue<Derived>(derived(), StdStemFunctions<ComplexScalar>::sinh);
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_sinh<ComplexScalar>);
 }
 
 template <typename Derived>
@@ -583,9 +572,9 @@
 {
   eigen_assert(rows() == cols());
   typedef typename internal::stem_function<Scalar>::ComplexScalar ComplexScalar;
-  return MatrixFunctionReturnValue<Derived>(derived(), StdStemFunctions<ComplexScalar>::cosh);
+  return MatrixFunctionReturnValue<Derived>(derived(), internal::stem_function_cosh<ComplexScalar>);
 }
 
 } // end namespace Eigen
 
-#endif // EIGEN_MATRIX_FUNCTION
+#endif // EIGEN_MATRIX_FUNCTION_H
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h
deleted file mode 100644
index efe332c..0000000
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009 Jitse Niesen <jitse@maths.leeds.ac.uk>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_MATRIX_FUNCTION_ATOMIC
-#define EIGEN_MATRIX_FUNCTION_ATOMIC
-
-namespace Eigen { 
-
-/** \ingroup MatrixFunctions_Module
-  * \class MatrixFunctionAtomic
-  * \brief Helper class for computing matrix functions of atomic matrices.
-  *
-  * \internal
-  * Here, an atomic matrix is a triangular matrix whose diagonal
-  * entries are close to each other.
-  */
-template <typename MatrixType>
-class MatrixFunctionAtomic
-{
-  public:
-
-    typedef typename MatrixType::Scalar Scalar;
-    typedef typename MatrixType::Index Index;
-    typedef typename NumTraits<Scalar>::Real RealScalar;
-    typedef typename internal::stem_function<Scalar>::type StemFunction;
-    typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-
-    /** \brief Constructor
-      * \param[in]  f  matrix function to compute.
-      */
-    MatrixFunctionAtomic(StemFunction f) : m_f(f) { }
-
-    /** \brief Compute matrix function of atomic matrix
-      * \param[in]  A  argument of matrix function, should be upper triangular and atomic
-      * \returns  f(A), the matrix function evaluated at the given matrix
-      */
-    MatrixType compute(const MatrixType& A);
-
-  private:
-
-    // Prevent copying
-    MatrixFunctionAtomic(const MatrixFunctionAtomic&);
-    MatrixFunctionAtomic& operator=(const MatrixFunctionAtomic&);
-
-    void computeMu();
-    bool taylorConverged(Index s, const MatrixType& F, const MatrixType& Fincr, const MatrixType& P);
-
-    /** \brief Pointer to scalar function */
-    StemFunction* m_f;
-
-    /** \brief Size of matrix function */
-    Index m_Arows;
-
-    /** \brief Mean of eigenvalues */
-    Scalar m_avgEival;
-
-    /** \brief Argument shifted by mean of eigenvalues */
-    MatrixType m_Ashifted;
-
-    /** \brief Constant used to determine whether Taylor series has converged */
-    RealScalar m_mu;
-};
-
-template <typename MatrixType>
-MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
-{
-  // TODO: Use that A is upper triangular
-  m_Arows = A.rows();
-  m_avgEival = A.trace() / Scalar(RealScalar(m_Arows));
-  m_Ashifted = A - m_avgEival * MatrixType::Identity(m_Arows, m_Arows);
-  computeMu();
-  MatrixType F = m_f(m_avgEival, 0) * MatrixType::Identity(m_Arows, m_Arows);
-  MatrixType P = m_Ashifted;
-  MatrixType Fincr;
-  for (Index s = 1; s < 1.1 * m_Arows + 10; s++) { // upper limit is fairly arbitrary
-    Fincr = m_f(m_avgEival, static_cast<int>(s)) * P;
-    F += Fincr;
-    P = Scalar(RealScalar(1.0/(s + 1))) * P * m_Ashifted;
-    if (taylorConverged(s, F, Fincr, P)) {
-      return F;
-    }
-  }
-  eigen_assert("Taylor series does not converge" && 0);
-  return F;
-}
-
-/** \brief Compute \c m_mu. */
-template <typename MatrixType>
-void MatrixFunctionAtomic<MatrixType>::computeMu()
-{
-  const MatrixType N = MatrixType::Identity(m_Arows, m_Arows) - m_Ashifted;
-  VectorType e = VectorType::Ones(m_Arows);
-  N.template triangularView<Upper>().solveInPlace(e);
-  m_mu = e.cwiseAbs().maxCoeff();
-}
-
-/** \brief Determine whether Taylor series has converged */
-template <typename MatrixType>
-bool MatrixFunctionAtomic<MatrixType>::taylorConverged(Index s, const MatrixType& F,
-						       const MatrixType& Fincr, const MatrixType& P)
-{
-  const Index n = F.rows();
-  const RealScalar F_norm = F.cwiseAbs().rowwise().sum().maxCoeff();
-  const RealScalar Fincr_norm = Fincr.cwiseAbs().rowwise().sum().maxCoeff();
-  if (Fincr_norm < NumTraits<Scalar>::epsilon() * F_norm) {
-    RealScalar delta = 0;
-    RealScalar rfactorial = 1;
-    for (Index r = 0; r < n; r++) {
-      RealScalar mx = 0;
-      for (Index i = 0; i < n; i++)
-        mx = (std::max)(mx, std::abs(m_f(m_Ashifted(i, i) + m_avgEival, static_cast<int>(s+r))));
-      if (r != 0)
-        rfactorial *= RealScalar(r);
-      delta = (std::max)(delta, mx / rfactorial);
-    }
-    const RealScalar P_norm = P.cwiseAbs().rowwise().sum().maxCoeff();
-    if (m_mu * delta * P_norm < NumTraits<Scalar>::epsilon() * F_norm)
-      return true;
-  }
-  return false;
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_MATRIX_FUNCTION_ATOMIC
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
index c744fc0..cf5fffa 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixLogarithm.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
 // Copyright (C) 2011 Chen-Pang He <jdh8@ms63.hinet.net>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@@ -11,91 +11,33 @@
 #ifndef EIGEN_MATRIX_LOGARITHM
 #define EIGEN_MATRIX_LOGARITHM
 
-#ifndef M_PI
-#define M_PI 3.141592653589793238462643383279503L
-#endif
-
 namespace Eigen { 
 
-/** \ingroup MatrixFunctions_Module
-  * \class MatrixLogarithmAtomic
-  * \brief Helper class for computing matrix logarithm of atomic matrices.
-  *
-  * \internal
-  * Here, an atomic matrix is a triangular matrix whose diagonal
-  * entries are close to each other.
-  *
-  * \sa class MatrixFunctionAtomic, MatrixBase::log()
-  */
-template <typename MatrixType>
-class MatrixLogarithmAtomic
+namespace internal { 
+
+template <typename Scalar>
+struct matrix_log_min_pade_degree 
 {
-public:
-
-  typedef typename MatrixType::Scalar Scalar;
-  // typedef typename MatrixType::Index Index;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
-  // typedef typename internal::stem_function<Scalar>::type StemFunction;
-  // typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-
-  /** \brief Constructor. */
-  MatrixLogarithmAtomic() { }
-
-  /** \brief Compute matrix logarithm of atomic matrix
-    * \param[in]  A  argument of matrix logarithm, should be upper triangular and atomic
-    * \returns  The logarithm of \p A.
-    */
-  MatrixType compute(const MatrixType& A);
-
-private:
-
-  void compute2x2(const MatrixType& A, MatrixType& result);
-  void computeBig(const MatrixType& A, MatrixType& result);
-  int getPadeDegree(float normTminusI);
-  int getPadeDegree(double normTminusI);
-  int getPadeDegree(long double normTminusI);
-  void computePade(MatrixType& result, const MatrixType& T, int degree);
-  void computePade3(MatrixType& result, const MatrixType& T);
-  void computePade4(MatrixType& result, const MatrixType& T);
-  void computePade5(MatrixType& result, const MatrixType& T);
-  void computePade6(MatrixType& result, const MatrixType& T);
-  void computePade7(MatrixType& result, const MatrixType& T);
-  void computePade8(MatrixType& result, const MatrixType& T);
-  void computePade9(MatrixType& result, const MatrixType& T);
-  void computePade10(MatrixType& result, const MatrixType& T);
-  void computePade11(MatrixType& result, const MatrixType& T);
-
-  static const int minPadeDegree = 3;
-  static const int maxPadeDegree = std::numeric_limits<RealScalar>::digits<= 24?  5:  // single precision
-                                   std::numeric_limits<RealScalar>::digits<= 53?  7:  // double precision
-                                   std::numeric_limits<RealScalar>::digits<= 64?  8:  // extended precision
-                                   std::numeric_limits<RealScalar>::digits<=106? 10:  // double-double
-                                                                                 11;  // quadruple precision
-
-  // Prevent copying
-  MatrixLogarithmAtomic(const MatrixLogarithmAtomic&);
-  MatrixLogarithmAtomic& operator=(const MatrixLogarithmAtomic&);
+  static const int value = 3;
 };
 
-/** \brief Compute logarithm of triangular matrix with clustered eigenvalues. */
-template <typename MatrixType>
-MatrixType MatrixLogarithmAtomic<MatrixType>::compute(const MatrixType& A)
+template <typename Scalar>
+struct matrix_log_max_pade_degree 
 {
-  using std::log;
-  MatrixType result(A.rows(), A.rows());
-  if (A.rows() == 1)
-    result(0,0) = log(A(0,0));
-  else if (A.rows() == 2)
-    compute2x2(A, result);
-  else
-    computeBig(A, result);
-  return result;
-}
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  static const int value = std::numeric_limits<RealScalar>::digits<= 24?  5:  // single precision
+                           std::numeric_limits<RealScalar>::digits<= 53?  7:  // double precision
+                           std::numeric_limits<RealScalar>::digits<= 64?  8:  // extended precision
+                           std::numeric_limits<RealScalar>::digits<=106? 10:  // double-double
+                                                                         11;  // quadruple precision
+};
 
 /** \brief Compute logarithm of 2x2 triangular matrix. */
 template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::compute2x2(const MatrixType& A, MatrixType& result)
+void matrix_log_compute_2x2(const MatrixType& A, MatrixType& result)
 {
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
   using std::abs;
   using std::ceil;
   using std::imag;
@@ -108,59 +50,31 @@
   result(1,0) = Scalar(0);
   result(1,1) = logA11;
 
-  if (A(0,0) == A(1,1)) {
+  Scalar y = A(1,1) - A(0,0);
+  if (y==Scalar(0))
+  {
     result(0,1) = A(0,1) / A(0,0);
-  } else if ((abs(A(0,0)) < 0.5*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1)))) {
-    result(0,1) = A(0,1) * (logA11 - logA00) / (A(1,1) - A(0,0));
-  } else {
+  }
+  else if ((abs(A(0,0)) < RealScalar(0.5)*abs(A(1,1))) || (abs(A(0,0)) > 2*abs(A(1,1))))
+  {
+    result(0,1) = A(0,1) * (logA11 - logA00) / y;
+  }
+  else
+  {
     // computation in previous branch is inaccurate if A(1,1) \approx A(0,0)
-    int unwindingNumber = static_cast<int>(ceil((imag(logA11 - logA00) - M_PI) / (2*M_PI)));
-    Scalar y = A(1,1) - A(0,0), x = A(1,1) + A(0,0);
-    result(0,1) = A(0,1) * (Scalar(2) * numext::atanh2(y,x) + Scalar(0,2*M_PI*unwindingNumber)) / y;
+    int unwindingNumber = static_cast<int>(ceil((imag(logA11 - logA00) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI)));
+    result(0,1) = A(0,1) * (numext::log1p(y/A(0,0)) + Scalar(0,2*EIGEN_PI*unwindingNumber)) / y;
   }
 }
 
-/** \brief Compute logarithm of triangular matrices with size > 2. 
-  * \details This uses a inverse scale-and-square algorithm. */
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computeBig(const MatrixType& A, MatrixType& result)
-{
-  using std::pow;
-  int numberOfSquareRoots = 0;
-  int numberOfExtraSquareRoots = 0;
-  int degree;
-  MatrixType T = A, sqrtT;
-  const RealScalar maxNormForPade = maxPadeDegree<= 5? 5.3149729967117310e-1:                     // single precision
-                                    maxPadeDegree<= 7? 2.6429608311114350e-1:                     // double precision
-                                    maxPadeDegree<= 8? 2.32777776523703892094e-1L:                // extended precision
-                                    maxPadeDegree<=10? 1.05026503471351080481093652651105e-1L:    // double-double
-                                                       1.1880960220216759245467951592883642e-1L;  // quadruple precision
-
-  while (true) {
-    RealScalar normTminusI = (T - MatrixType::Identity(T.rows(), T.rows())).cwiseAbs().colwise().sum().maxCoeff();
-    if (normTminusI < maxNormForPade) {
-      degree = getPadeDegree(normTminusI);
-      int degree2 = getPadeDegree(normTminusI / RealScalar(2));
-      if ((degree - degree2 <= 1) || (numberOfExtraSquareRoots == 1)) 
-        break;
-      ++numberOfExtraSquareRoots;
-    }
-    MatrixSquareRootTriangular<MatrixType>(T).compute(sqrtT);
-    T = sqrtT.template triangularView<Upper>();
-    ++numberOfSquareRoots;
-  }
-
-  computePade(result, T, degree);
-  result *= pow(RealScalar(2), numberOfSquareRoots);
-}
-
 /* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = float) */
-template <typename MatrixType>
-int MatrixLogarithmAtomic<MatrixType>::getPadeDegree(float normTminusI)
+inline int matrix_log_get_pade_degree(float normTminusI)
 {
   const float maxNormForPade[] = { 2.5111573934555054e-1 /* degree = 3 */ , 4.0535837411880493e-1,
             5.3149729967117310e-1 };
-  int degree = 3;
+  const int minPadeDegree = matrix_log_min_pade_degree<float>::value;
+  const int maxPadeDegree = matrix_log_max_pade_degree<float>::value;
+  int degree = minPadeDegree;
   for (; degree <= maxPadeDegree; ++degree) 
     if (normTminusI <= maxNormForPade[degree - minPadeDegree])
       break;
@@ -168,12 +82,13 @@
 }
 
 /* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = double) */
-template <typename MatrixType>
-int MatrixLogarithmAtomic<MatrixType>::getPadeDegree(double normTminusI)
+inline int matrix_log_get_pade_degree(double normTminusI)
 {
   const double maxNormForPade[] = { 1.6206284795015624e-2 /* degree = 3 */ , 5.3873532631381171e-2,
             1.1352802267628681e-1, 1.8662860613541288e-1, 2.642960831111435e-1 };
-  int degree = 3;
+  const int minPadeDegree = matrix_log_min_pade_degree<double>::value;
+  const int maxPadeDegree = matrix_log_max_pade_degree<double>::value;
+  int degree = minPadeDegree;
   for (; degree <= maxPadeDegree; ++degree)
     if (normTminusI <= maxNormForPade[degree - minPadeDegree])
       break;
@@ -181,8 +96,7 @@
 }
 
 /* \brief Get suitable degree for Pade approximation. (specialized for RealScalar = long double) */
-template <typename MatrixType>
-int MatrixLogarithmAtomic<MatrixType>::getPadeDegree(long double normTminusI)
+inline int matrix_log_get_pade_degree(long double normTminusI)
 {
 #if   LDBL_MANT_DIG == 53         // double precision
   const long double maxNormForPade[] = { 1.6206284795015624e-2L /* degree = 3 */ , 5.3873532631381171e-2L,
@@ -204,7 +118,9 @@
             3.6688019729653446926585242192447447e-2L, 5.9290962294020186998954055264528393e-2L,
             8.6998436081634343903250580992127677e-2L, 1.1880960220216759245467951592883642e-1L };
 #endif
-  int degree = 3;
+  const int minPadeDegree = matrix_log_min_pade_degree<long double>::value;
+  const int maxPadeDegree = matrix_log_max_pade_degree<long double>::value;
+  int degree = minPadeDegree;
   for (; degree <= maxPadeDegree; ++degree)
     if (normTminusI <= maxNormForPade[degree - minPadeDegree])
       break;
@@ -213,197 +129,168 @@
 
 /* \brief Compute Pade approximation to matrix logarithm */
 template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade(MatrixType& result, const MatrixType& T, int degree)
+void matrix_log_compute_pade(MatrixType& result, const MatrixType& T, int degree)
 {
-  switch (degree) {
-    case 3:  computePade3(result, T);  break;
-    case 4:  computePade4(result, T);  break;
-    case 5:  computePade5(result, T);  break;
-    case 6:  computePade6(result, T);  break;
-    case 7:  computePade7(result, T);  break;
-    case 8:  computePade8(result, T);  break;
-    case 9:  computePade9(result, T);  break;
-    case 10: computePade10(result, T); break;
-    case 11: computePade11(result, T); break;
-    default: assert(false); // should never happen
+  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+  const int minPadeDegree = 3;
+  const int maxPadeDegree = 11;
+  assert(degree >= minPadeDegree && degree <= maxPadeDegree);
+
+  const RealScalar nodes[][maxPadeDegree] = { 
+    { 0.1127016653792583114820734600217600L, 0.5000000000000000000000000000000000L,  // degree 3
+      0.8872983346207416885179265399782400L }, 
+    { 0.0694318442029737123880267555535953L, 0.3300094782075718675986671204483777L,  // degree 4
+      0.6699905217924281324013328795516223L, 0.9305681557970262876119732444464048L },
+    { 0.0469100770306680036011865608503035L, 0.2307653449471584544818427896498956L,  // degree 5
+      0.5000000000000000000000000000000000L, 0.7692346550528415455181572103501044L,
+      0.9530899229693319963988134391496965L },
+    { 0.0337652428984239860938492227530027L, 0.1693953067668677431693002024900473L,  // degree 6
+      0.3806904069584015456847491391596440L, 0.6193095930415984543152508608403560L,
+      0.8306046932331322568306997975099527L, 0.9662347571015760139061507772469973L },
+    { 0.0254460438286207377369051579760744L, 0.1292344072003027800680676133596058L,  // degree 7
+      0.2970774243113014165466967939615193L, 0.5000000000000000000000000000000000L,
+      0.7029225756886985834533032060384807L, 0.8707655927996972199319323866403942L,
+      0.9745539561713792622630948420239256L },
+    { 0.0198550717512318841582195657152635L, 0.1016667612931866302042230317620848L,  // degree 8
+      0.2372337950418355070911304754053768L, 0.4082826787521750975302619288199080L,
+      0.5917173212478249024697380711800920L, 0.7627662049581644929088695245946232L,
+      0.8983332387068133697957769682379152L, 0.9801449282487681158417804342847365L },
+    { 0.0159198802461869550822118985481636L, 0.0819844463366821028502851059651326L,  // degree 9
+      0.1933142836497048013456489803292629L, 0.3378732882980955354807309926783317L,
+      0.5000000000000000000000000000000000L, 0.6621267117019044645192690073216683L,
+      0.8066857163502951986543510196707371L, 0.9180155536633178971497148940348674L,
+      0.9840801197538130449177881014518364L },
+    { 0.0130467357414141399610179939577740L, 0.0674683166555077446339516557882535L,  // degree 10
+      0.1602952158504877968828363174425632L, 0.2833023029353764046003670284171079L,
+      0.4255628305091843945575869994351400L, 0.5744371694908156054424130005648600L,
+      0.7166976970646235953996329715828921L, 0.8397047841495122031171636825574368L,
+      0.9325316833444922553660483442117465L, 0.9869532642585858600389820060422260L },
+    { 0.0108856709269715035980309994385713L, 0.0564687001159523504624211153480364L,  // degree 11
+      0.1349239972129753379532918739844233L, 0.2404519353965940920371371652706952L,
+      0.3652284220238275138342340072995692L, 0.5000000000000000000000000000000000L,
+      0.6347715779761724861657659927004308L, 0.7595480646034059079628628347293048L,
+      0.8650760027870246620467081260155767L, 0.9435312998840476495375788846519636L,
+      0.9891143290730284964019690005614287L } };
+
+  const RealScalar weights[][maxPadeDegree] = { 
+    { 0.2777777777777777777777777777777778L, 0.4444444444444444444444444444444444L,  // degree 3
+      0.2777777777777777777777777777777778L },
+    { 0.1739274225687269286865319746109997L, 0.3260725774312730713134680253890003L,  // degree 4
+      0.3260725774312730713134680253890003L, 0.1739274225687269286865319746109997L },
+    { 0.1184634425280945437571320203599587L, 0.2393143352496832340206457574178191L,  // degree 5
+      0.2844444444444444444444444444444444L, 0.2393143352496832340206457574178191L,
+      0.1184634425280945437571320203599587L },
+    { 0.0856622461895851725201480710863665L, 0.1803807865240693037849167569188581L,  // degree 6
+      0.2339569672863455236949351719947755L, 0.2339569672863455236949351719947755L,
+      0.1803807865240693037849167569188581L, 0.0856622461895851725201480710863665L },
+    { 0.0647424830844348466353057163395410L, 0.1398526957446383339507338857118898L,  // degree 7
+      0.1909150252525594724751848877444876L, 0.2089795918367346938775510204081633L,
+      0.1909150252525594724751848877444876L, 0.1398526957446383339507338857118898L,
+      0.0647424830844348466353057163395410L },
+    { 0.0506142681451881295762656771549811L, 0.1111905172266872352721779972131204L,  // degree 8
+      0.1568533229389436436689811009933007L, 0.1813418916891809914825752246385978L,
+      0.1813418916891809914825752246385978L, 0.1568533229389436436689811009933007L,
+      0.1111905172266872352721779972131204L, 0.0506142681451881295762656771549811L },
+    { 0.0406371941807872059859460790552618L, 0.0903240803474287020292360156214564L,  // degree 9
+      0.1303053482014677311593714347093164L, 0.1561735385200014200343152032922218L,
+      0.1651196775006298815822625346434870L, 0.1561735385200014200343152032922218L,
+      0.1303053482014677311593714347093164L, 0.0903240803474287020292360156214564L,
+      0.0406371941807872059859460790552618L },
+    { 0.0333356721543440687967844049466659L, 0.0747256745752902965728881698288487L,  // degree 10
+      0.1095431812579910219977674671140816L, 0.1346333596549981775456134607847347L,
+      0.1477621123573764350869464973256692L, 0.1477621123573764350869464973256692L,
+      0.1346333596549981775456134607847347L, 0.1095431812579910219977674671140816L,
+      0.0747256745752902965728881698288487L, 0.0333356721543440687967844049466659L },
+    { 0.0278342835580868332413768602212743L, 0.0627901847324523123173471496119701L,  // degree 11
+      0.0931451054638671257130488207158280L, 0.1165968822959952399592618524215876L,
+      0.1314022722551233310903444349452546L, 0.1364625433889503153572417641681711L,
+      0.1314022722551233310903444349452546L, 0.1165968822959952399592618524215876L,
+      0.0931451054638671257130488207158280L, 0.0627901847324523123173471496119701L,
+      0.0278342835580868332413768602212743L } };
+
+  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
+  result.setZero(T.rows(), T.rows());
+  for (int k = 0; k < degree; ++k) {
+    RealScalar weight = weights[degree-minPadeDegree][k];
+    RealScalar node = nodes[degree-minPadeDegree][k];
+    result += weight * (MatrixType::Identity(T.rows(), T.rows()) + node * TminusI)
+                       .template triangularView<Upper>().solve(TminusI);
   }
 } 
 
+/** \brief Compute logarithm of triangular matrices with size > 2. 
+  * \details This uses a inverse scale-and-square algorithm. */
 template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade3(MatrixType& result, const MatrixType& T)
+void matrix_log_compute_big(const MatrixType& A, MatrixType& result)
 {
-  const int degree = 3;
-  const RealScalar nodes[]   = { 0.1127016653792583114820734600217600L, 0.5000000000000000000000000000000000L,
-            0.8872983346207416885179265399782400L };
-  const RealScalar weights[] = { 0.2777777777777777777777777777777778L, 0.4444444444444444444444444444444444L,
-            0.2777777777777777777777777777777778L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename NumTraits<Scalar>::Real RealScalar;
+  using std::pow;
+
+  int numberOfSquareRoots = 0;
+  int numberOfExtraSquareRoots = 0;
+  int degree;
+  MatrixType T = A, sqrtT;
+
+  int maxPadeDegree = matrix_log_max_pade_degree<Scalar>::value;
+  const RealScalar maxNormForPade = maxPadeDegree<= 5? 5.3149729967117310e-1L:                    // single precision
+                                    maxPadeDegree<= 7? 2.6429608311114350e-1L:                    // double precision
+                                    maxPadeDegree<= 8? 2.32777776523703892094e-1L:                // extended precision
+                                    maxPadeDegree<=10? 1.05026503471351080481093652651105e-1L:    // double-double
+                                                       1.1880960220216759245467951592883642e-1L;  // quadruple precision
+
+  while (true) {
+    RealScalar normTminusI = (T - MatrixType::Identity(T.rows(), T.rows())).cwiseAbs().colwise().sum().maxCoeff();
+    if (normTminusI < maxNormForPade) {
+      degree = matrix_log_get_pade_degree(normTminusI);
+      int degree2 = matrix_log_get_pade_degree(normTminusI / RealScalar(2));
+      if ((degree - degree2 <= 1) || (numberOfExtraSquareRoots == 1)) 
+        break;
+      ++numberOfExtraSquareRoots;
+    }
+    matrix_sqrt_triangular(T, sqrtT);
+    T = sqrtT.template triangularView<Upper>();
+    ++numberOfSquareRoots;
+  }
+
+  matrix_log_compute_pade(result, T, degree);
+  result *= pow(RealScalar(2), numberOfSquareRoots);
 }
 
+/** \ingroup MatrixFunctions_Module
+  * \class MatrixLogarithmAtomic
+  * \brief Helper class for computing matrix logarithm of atomic matrices.
+  *
+  * Here, an atomic matrix is a triangular matrix whose diagonal entries are close to each other.
+  *
+  * \sa class MatrixFunctionAtomic, MatrixBase::log()
+  */
 template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade4(MatrixType& result, const MatrixType& T)
+class MatrixLogarithmAtomic
 {
-  const int degree = 4;
-  const RealScalar nodes[]   = { 0.0694318442029737123880267555535953L, 0.3300094782075718675986671204483777L,
-            0.6699905217924281324013328795516223L, 0.9305681557970262876119732444464048L };
-  const RealScalar weights[] = { 0.1739274225687269286865319746109997L, 0.3260725774312730713134680253890003L,
-            0.3260725774312730713134680253890003L, 0.1739274225687269286865319746109997L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
+public:
+  /** \brief Compute matrix logarithm of atomic matrix
+    * \param[in]  A  argument of matrix logarithm, should be upper triangular and atomic
+    * \returns  The logarithm of \p A.
+    */
+  MatrixType compute(const MatrixType& A);
+};
 
 template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade5(MatrixType& result, const MatrixType& T)
+MatrixType MatrixLogarithmAtomic<MatrixType>::compute(const MatrixType& A)
 {
-  const int degree = 5;
-  const RealScalar nodes[]   = { 0.0469100770306680036011865608503035L, 0.2307653449471584544818427896498956L,
-            0.5000000000000000000000000000000000L, 0.7692346550528415455181572103501044L,
-            0.9530899229693319963988134391496965L };
-  const RealScalar weights[] = { 0.1184634425280945437571320203599587L, 0.2393143352496832340206457574178191L,
-            0.2844444444444444444444444444444444L, 0.2393143352496832340206457574178191L,
-            0.1184634425280945437571320203599587L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
+  using std::log;
+  MatrixType result(A.rows(), A.rows());
+  if (A.rows() == 1)
+    result(0,0) = log(A(0,0));
+  else if (A.rows() == 2)
+    matrix_log_compute_2x2(A, result);
+  else
+    matrix_log_compute_big(A, result);
+  return result;
 }
 
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade6(MatrixType& result, const MatrixType& T)
-{
-  const int degree = 6;
-  const RealScalar nodes[]   = { 0.0337652428984239860938492227530027L, 0.1693953067668677431693002024900473L,
-            0.3806904069584015456847491391596440L, 0.6193095930415984543152508608403560L,
-            0.8306046932331322568306997975099527L, 0.9662347571015760139061507772469973L };
-  const RealScalar weights[] = { 0.0856622461895851725201480710863665L, 0.1803807865240693037849167569188581L,
-            0.2339569672863455236949351719947755L, 0.2339569672863455236949351719947755L,
-            0.1803807865240693037849167569188581L, 0.0856622461895851725201480710863665L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
-
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade7(MatrixType& result, const MatrixType& T)
-{
-  const int degree = 7;
-  const RealScalar nodes[]   = { 0.0254460438286207377369051579760744L, 0.1292344072003027800680676133596058L,
-            0.2970774243113014165466967939615193L, 0.5000000000000000000000000000000000L,
-            0.7029225756886985834533032060384807L, 0.8707655927996972199319323866403942L,
-            0.9745539561713792622630948420239256L };
-  const RealScalar weights[] = { 0.0647424830844348466353057163395410L, 0.1398526957446383339507338857118898L,
-            0.1909150252525594724751848877444876L, 0.2089795918367346938775510204081633L,
-            0.1909150252525594724751848877444876L, 0.1398526957446383339507338857118898L,
-            0.0647424830844348466353057163395410L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
-
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade8(MatrixType& result, const MatrixType& T)
-{
-  const int degree = 8;
-  const RealScalar nodes[]   = { 0.0198550717512318841582195657152635L, 0.1016667612931866302042230317620848L,
-            0.2372337950418355070911304754053768L, 0.4082826787521750975302619288199080L,
-            0.5917173212478249024697380711800920L, 0.7627662049581644929088695245946232L,
-            0.8983332387068133697957769682379152L, 0.9801449282487681158417804342847365L };
-  const RealScalar weights[] = { 0.0506142681451881295762656771549811L, 0.1111905172266872352721779972131204L,
-            0.1568533229389436436689811009933007L, 0.1813418916891809914825752246385978L,
-            0.1813418916891809914825752246385978L, 0.1568533229389436436689811009933007L,
-            0.1111905172266872352721779972131204L, 0.0506142681451881295762656771549811L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
-
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade9(MatrixType& result, const MatrixType& T)
-{
-  const int degree = 9;
-  const RealScalar nodes[]   = { 0.0159198802461869550822118985481636L, 0.0819844463366821028502851059651326L,
-            0.1933142836497048013456489803292629L, 0.3378732882980955354807309926783317L,
-            0.5000000000000000000000000000000000L, 0.6621267117019044645192690073216683L,
-            0.8066857163502951986543510196707371L, 0.9180155536633178971497148940348674L,
-            0.9840801197538130449177881014518364L };
-  const RealScalar weights[] = { 0.0406371941807872059859460790552618L, 0.0903240803474287020292360156214564L,
-            0.1303053482014677311593714347093164L, 0.1561735385200014200343152032922218L,
-            0.1651196775006298815822625346434870L, 0.1561735385200014200343152032922218L,
-            0.1303053482014677311593714347093164L, 0.0903240803474287020292360156214564L,
-            0.0406371941807872059859460790552618L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
-
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade10(MatrixType& result, const MatrixType& T)
-{
-  const int degree = 10;
-  const RealScalar nodes[]   = { 0.0130467357414141399610179939577740L, 0.0674683166555077446339516557882535L,
-            0.1602952158504877968828363174425632L, 0.2833023029353764046003670284171079L,
-            0.4255628305091843945575869994351400L, 0.5744371694908156054424130005648600L,
-            0.7166976970646235953996329715828921L, 0.8397047841495122031171636825574368L,
-            0.9325316833444922553660483442117465L, 0.9869532642585858600389820060422260L };
-  const RealScalar weights[] = { 0.0333356721543440687967844049466659L, 0.0747256745752902965728881698288487L,
-            0.1095431812579910219977674671140816L, 0.1346333596549981775456134607847347L,
-            0.1477621123573764350869464973256692L, 0.1477621123573764350869464973256692L,
-            0.1346333596549981775456134607847347L, 0.1095431812579910219977674671140816L,
-            0.0747256745752902965728881698288487L, 0.0333356721543440687967844049466659L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
-
-template <typename MatrixType>
-void MatrixLogarithmAtomic<MatrixType>::computePade11(MatrixType& result, const MatrixType& T)
-{
-  const int degree = 11;
-  const RealScalar nodes[]   = { 0.0108856709269715035980309994385713L, 0.0564687001159523504624211153480364L,
-            0.1349239972129753379532918739844233L, 0.2404519353965940920371371652706952L,
-            0.3652284220238275138342340072995692L, 0.5000000000000000000000000000000000L,
-            0.6347715779761724861657659927004308L, 0.7595480646034059079628628347293048L,
-            0.8650760027870246620467081260155767L, 0.9435312998840476495375788846519636L,
-            0.9891143290730284964019690005614287L };
-  const RealScalar weights[] = { 0.0278342835580868332413768602212743L, 0.0627901847324523123173471496119701L,
-            0.0931451054638671257130488207158280L, 0.1165968822959952399592618524215876L,
-            0.1314022722551233310903444349452546L, 0.1364625433889503153572417641681711L,
-            0.1314022722551233310903444349452546L, 0.1165968822959952399592618524215876L,
-            0.0931451054638671257130488207158280L, 0.0627901847324523123173471496119701L,
-            0.0278342835580868332413768602212743L };
-  eigen_assert(degree <= maxPadeDegree);
-  MatrixType TminusI = T - MatrixType::Identity(T.rows(), T.rows());
-  result.setZero(T.rows(), T.rows());
-  for (int k = 0; k < degree; ++k)
-    result += weights[k] * (MatrixType::Identity(T.rows(), T.rows()) + nodes[k] * TminusI)
-                           .template triangularView<Upper>().solve(TminusI);
-}
+} // end of namespace internal
 
 /** \ingroup MatrixFunctions_Module
   *
@@ -421,45 +308,45 @@
 : public ReturnByValue<MatrixLogarithmReturnValue<Derived> >
 {
 public:
-
   typedef typename Derived::Scalar Scalar;
   typedef typename Derived::Index Index;
 
+protected:
+  typedef typename internal::ref_selector<Derived>::type DerivedNested;
+
+public:
+
   /** \brief Constructor.
     *
     * \param[in]  A  %Matrix (expression) forming the argument of the matrix logarithm.
     */
-  MatrixLogarithmReturnValue(const Derived& A) : m_A(A) { }
+  explicit MatrixLogarithmReturnValue(const Derived& A) : m_A(A) { }
   
   /** \brief Compute the matrix logarithm.
     *
-    * \param[out]  result  Logarithm of \p A, where \A is as specified in the constructor.
+    * \param[out]  result  Logarithm of \c A, where \c A is as specified in the constructor.
     */
   template <typename ResultType>
   inline void evalTo(ResultType& result) const
   {
-    typedef typename Derived::PlainObject PlainObject;
-    typedef internal::traits<PlainObject> Traits;
+    typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+    typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+    typedef internal::traits<DerivedEvalTypeClean> Traits;
     static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
     static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
-    static const int Options = PlainObject::Options;
     typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
-    typedef Matrix<ComplexScalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
-    typedef MatrixLogarithmAtomic<DynMatrixType> AtomicType;
+    typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
+    typedef internal::MatrixLogarithmAtomic<DynMatrixType> AtomicType;
     AtomicType atomic;
     
-    const PlainObject Aevaluated = m_A.eval();
-    MatrixFunction<PlainObject, AtomicType> mf(Aevaluated, atomic);
-    mf.compute(result);
+    internal::matrix_function_compute<typename DerivedEvalTypeClean::PlainObject>::run(m_A, atomic, result);
   }
 
   Index rows() const { return m_A.rows(); }
   Index cols() const { return m_A.cols(); }
   
 private:
-  typename internal::nested<Derived>::type m_A;
-  
-  MatrixLogarithmReturnValue& operator=(const MatrixLogarithmReturnValue&);
+  const DerivedNested m_A;
 };
 
 namespace internal {
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
index 78a307e..a3273da 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixPower.h
@@ -14,19 +14,51 @@
 
 template<typename MatrixType> class MatrixPower;
 
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Proxy for the matrix power of some matrix.
+ *
+ * \tparam MatrixType  type of the base, a matrix.
+ *
+ * This class holds the arguments to the matrix power until it is
+ * assigned or evaluated for some other reason (so the argument
+ * should not be changed in the meantime). It is the return type of
+ * MatrixPower::operator() and related functions and most of the
+ * time this is the only way it is used.
+ */
+/* TODO This class is only used by MatrixPower, so it should be nested
+ * into MatrixPower, like MatrixPower::ReturnValue. However, my
+ * compiler complained about unused template parameter in the
+ * following declaration in namespace internal.
+ *
+ * template<typename MatrixType>
+ * struct traits<MatrixPower<MatrixType>::ReturnValue>;
+ */
 template<typename MatrixType>
-class MatrixPowerRetval : public ReturnByValue< MatrixPowerRetval<MatrixType> >
+class MatrixPowerParenthesesReturnValue : public ReturnByValue< MatrixPowerParenthesesReturnValue<MatrixType> >
 {
   public:
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::Index Index;
 
-    MatrixPowerRetval(MatrixPower<MatrixType>& pow, RealScalar p) : m_pow(pow), m_p(p)
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] pow  %MatrixPower storing the base.
+     * \param[in] p    scalar, the exponent of the matrix power.
+     */
+    MatrixPowerParenthesesReturnValue(MatrixPower<MatrixType>& pow, RealScalar p) : m_pow(pow), m_p(p)
     { }
 
+    /**
+     * \brief Compute the matrix power.
+     *
+     * \param[out] result
+     */
     template<typename ResultType>
-    inline void evalTo(ResultType& res) const
-    { m_pow.compute(res, m_p); }
+    inline void evalTo(ResultType& result) const
+    { m_pow.compute(result, m_p); }
 
     Index rows() const { return m_pow.rows(); }
     Index cols() const { return m_pow.cols(); }
@@ -34,11 +66,25 @@
   private:
     MatrixPower<MatrixType>& m_pow;
     const RealScalar m_p;
-    MatrixPowerRetval& operator=(const MatrixPowerRetval&);
 };
 
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Class for computing matrix powers.
+ *
+ * \tparam MatrixType  type of the base, expected to be an instantiation
+ * of the Matrix class template.
+ *
+ * This class is capable of computing triangular real/complex matrices
+ * raised to a power in the interval \f$ (-1, 1) \f$.
+ *
+ * \note Currently this class is only used by MatrixPower. One may
+ * insist that this be nested into MatrixPower. This class is here to
+ * faciliate future development of triangular matrix functions.
+ */
 template<typename MatrixType>
-class MatrixPowerAtomic
+class MatrixPowerAtomic : internal::noncopyable
 {
   private:
     enum {
@@ -49,14 +95,14 @@
     typedef typename MatrixType::RealScalar RealScalar;
     typedef std::complex<RealScalar> ComplexScalar;
     typedef typename MatrixType::Index Index;
-    typedef Array<Scalar, RowsAtCompileTime, 1, ColMajor, MaxRowsAtCompileTime> ArrayType;
+    typedef Block<MatrixType,Dynamic,Dynamic> ResultType;
 
     const MatrixType& m_A;
     RealScalar m_p;
 
-    void computePade(int degree, const MatrixType& IminusT, MatrixType& res) const;
-    void compute2x2(MatrixType& res, RealScalar p) const;
-    void computeBig(MatrixType& res) const;
+    void computePade(int degree, const MatrixType& IminusT, ResultType& res) const;
+    void compute2x2(ResultType& res, RealScalar p) const;
+    void computeBig(ResultType& res) const;
     static int getPadeDegree(float normIminusT);
     static int getPadeDegree(double normIminusT);
     static int getPadeDegree(long double normIminusT);
@@ -64,24 +110,45 @@
     static RealScalar computeSuperDiag(RealScalar, RealScalar, RealScalar p);
 
   public:
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] T  the base of the matrix power.
+     * \param[in] p  the exponent of the matrix power, should be in
+     * \f$ (-1, 1) \f$.
+     *
+     * The class stores a reference to T, so it should not be changed
+     * (or destroyed) before evaluation. Only the upper triangular
+     * part of T is read.
+     */
     MatrixPowerAtomic(const MatrixType& T, RealScalar p);
-    void compute(MatrixType& res) const;
+    
+    /**
+     * \brief Compute the matrix power.
+     *
+     * \param[out] res  \f$ A^p \f$ where A and p are specified in the
+     * constructor.
+     */
+    void compute(ResultType& res) const;
 };
 
 template<typename MatrixType>
 MatrixPowerAtomic<MatrixType>::MatrixPowerAtomic(const MatrixType& T, RealScalar p) :
   m_A(T), m_p(p)
-{ eigen_assert(T.rows() == T.cols()); }
+{
+  eigen_assert(T.rows() == T.cols());
+  eigen_assert(p > -1 && p < 1);
+}
 
 template<typename MatrixType>
-void MatrixPowerAtomic<MatrixType>::compute(MatrixType& res) const
+void MatrixPowerAtomic<MatrixType>::compute(ResultType& res) const
 {
-  res.resizeLike(m_A);
+  using std::pow;
   switch (m_A.rows()) {
     case 0:
       break;
     case 1:
-      res(0,0) = std::pow(m_A(0,0), m_p);
+      res(0,0) = pow(m_A(0,0), m_p);
       break;
     case 2:
       compute2x2(res, m_p);
@@ -92,24 +159,24 @@
 }
 
 template<typename MatrixType>
-void MatrixPowerAtomic<MatrixType>::computePade(int degree, const MatrixType& IminusT, MatrixType& res) const
+void MatrixPowerAtomic<MatrixType>::computePade(int degree, const MatrixType& IminusT, ResultType& res) const
 {
-  int i = degree<<1;
-  res = (m_p-degree) / ((i-1)<<1) * IminusT;
+  int i = 2*degree;
+  res = (m_p-degree) / (2*i-2) * IminusT;
+
   for (--i; i; --i) {
     res = (MatrixType::Identity(IminusT.rows(), IminusT.cols()) + res).template triangularView<Upper>()
-	.solve((i==1 ? -m_p : i&1 ? (-m_p-(i>>1))/(i<<1) : (m_p-(i>>1))/((i-1)<<1)) * IminusT).eval();
+	.solve((i==1 ? -m_p : i&1 ? (-m_p-i/2)/(2*i) : (m_p-i/2)/(2*i-2)) * IminusT).eval();
   }
   res += MatrixType::Identity(IminusT.rows(), IminusT.cols());
 }
 
 // This function assumes that res has the correct size (see bug 614)
 template<typename MatrixType>
-void MatrixPowerAtomic<MatrixType>::compute2x2(MatrixType& res, RealScalar p) const
+void MatrixPowerAtomic<MatrixType>::compute2x2(ResultType& res, RealScalar p) const
 {
   using std::abs;
   using std::pow;
-  
   res.coeffRef(0,0) = pow(m_A.coeff(0,0), p);
 
   for (Index i=1; i < m_A.cols(); ++i) {
@@ -125,32 +192,20 @@
 }
 
 template<typename MatrixType>
-void MatrixPowerAtomic<MatrixType>::computeBig(MatrixType& res) const
+void MatrixPowerAtomic<MatrixType>::computeBig(ResultType& res) const
 {
+  using std::ldexp;
   const int digits = std::numeric_limits<RealScalar>::digits;
-  const RealScalar maxNormForPade = digits <=  24? 4.3386528e-1f:                           // sigle precision
-				    digits <=  53? 2.789358995219730e-1:                    // double precision
-				    digits <=  64? 2.4471944416607995472e-1L:               // extended precision
-				    digits <= 106? 1.1016843812851143391275867258512e-1L:   // double-double
-						   9.134603732914548552537150753385375e-2L; // quadruple precision
+  const RealScalar maxNormForPade = digits <=  24? 4.3386528e-1L                            // single precision
+                                  : digits <=  53? 2.789358995219730e-1L                    // double precision
+                                  : digits <=  64? 2.4471944416607995472e-1L                // extended precision
+                                  : digits <= 106? 1.1016843812851143391275867258512e-1L    // double-double
+                                  :                9.134603732914548552537150753385375e-2L; // quadruple precision
   MatrixType IminusT, sqrtT, T = m_A.template triangularView<Upper>();
   RealScalar normIminusT;
   int degree, degree2, numberOfSquareRoots = 0;
   bool hasExtraSquareRoot = false;
 
-  /* FIXME
-   * For singular T, norm(I - T) >= 1 but maxNormForPade < 1, leads to infinite
-   * loop.  We should move 0 eigenvalues to bottom right corner.  We need not
-   * worry about tiny values (e.g. 1e-300) because they will reach 1 if
-   * repetitively sqrt'ed.
-   *
-   * If the 0 eigenvalues are semisimple, they can form a 0 matrix at the
-   * bottom right corner.
-   *
-   * [ T  A ]^p   [ T^p  (T^-1 T^p A) ]
-   * [      ]   = [                   ]
-   * [ 0  0 ]     [  0         0      ]
-   */
   for (Index i=0; i < m_A.cols(); ++i)
     eigen_assert(m_A(i,i) != RealScalar(0));
 
@@ -164,14 +219,14 @@
 	break;
       hasExtraSquareRoot = true;
     }
-    MatrixSquareRootTriangular<MatrixType>(T).compute(sqrtT);
+    matrix_sqrt_triangular(T, sqrtT);
     T = sqrtT.template triangularView<Upper>();
     ++numberOfSquareRoots;
   }
   computePade(degree, IminusT, res);
 
   for (; numberOfSquareRoots; --numberOfSquareRoots) {
-    compute2x2(res, std::ldexp(m_p, -numberOfSquareRoots));
+    compute2x2(res, ldexp(m_p, -numberOfSquareRoots));
     res = res.template triangularView<Upper>() * res;
   }
   compute2x2(res, m_p);
@@ -209,7 +264,7 @@
       1.999045567181744e-1L, 2.789358995219730e-1L };
 #elif LDBL_MANT_DIG <= 64
   const int maxPadeDegree = 8;
-  const double maxNormForPade[] = { 6.3854693117491799460e-3L /* degree = 3 */ , 2.6394893435456973676e-2L,
+  const long double maxNormForPade[] = { 6.3854693117491799460e-3L /* degree = 3 */ , 2.6394893435456973676e-2L,
       6.4216043030404063729e-2L, 1.1701165502926694307e-1L, 1.7904284231268670284e-1L, 2.4471944416607995472e-1L };
 #elif LDBL_MANT_DIG <= 106
   const int maxPadeDegree = 10;
@@ -236,19 +291,28 @@
 inline typename MatrixPowerAtomic<MatrixType>::ComplexScalar
 MatrixPowerAtomic<MatrixType>::computeSuperDiag(const ComplexScalar& curr, const ComplexScalar& prev, RealScalar p)
 {
-  ComplexScalar logCurr = std::log(curr);
-  ComplexScalar logPrev = std::log(prev);
-  int unwindingNumber = std::ceil((numext::imag(logCurr - logPrev) - M_PI) / (2*M_PI));
-  ComplexScalar w = numext::atanh2(curr - prev, curr + prev) + ComplexScalar(0, M_PI*unwindingNumber);
-  return RealScalar(2) * std::exp(RealScalar(0.5) * p * (logCurr + logPrev)) * std::sinh(p * w) / (curr - prev);
+  using std::ceil;
+  using std::exp;
+  using std::log;
+  using std::sinh;
+
+  ComplexScalar logCurr = log(curr);
+  ComplexScalar logPrev = log(prev);
+  int unwindingNumber = ceil((numext::imag(logCurr - logPrev) - RealScalar(EIGEN_PI)) / RealScalar(2*EIGEN_PI));
+  ComplexScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2) + ComplexScalar(0, EIGEN_PI*unwindingNumber);
+  return RealScalar(2) * exp(RealScalar(0.5) * p * (logCurr + logPrev)) * sinh(p * w) / (curr - prev);
 }
 
 template<typename MatrixType>
 inline typename MatrixPowerAtomic<MatrixType>::RealScalar
 MatrixPowerAtomic<MatrixType>::computeSuperDiag(RealScalar curr, RealScalar prev, RealScalar p)
 {
-  RealScalar w = numext::atanh2(curr - prev, curr + prev);
-  return 2 * std::exp(p * (std::log(curr) + std::log(prev)) / 2) * std::sinh(p * w) / (curr - prev);
+  using std::exp;
+  using std::log;
+  using std::sinh;
+
+  RealScalar w = numext::log1p((curr-prev)/prev)/RealScalar(2);
+  return 2 * exp(p * (log(curr) + log(prev)) / 2) * sinh(p * w) / (curr - prev);
 }
 
 /**
@@ -271,15 +335,9 @@
  * Output: \verbinclude MatrixPower_optimal.out
  */
 template<typename MatrixType>
-class MatrixPower
+class MatrixPower : internal::noncopyable
 {
   private:
-    enum {
-      RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-      MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
-    };
     typedef typename MatrixType::Scalar Scalar;
     typedef typename MatrixType::RealScalar RealScalar;
     typedef typename MatrixType::Index Index;
@@ -293,7 +351,11 @@
      * The class stores a reference to A, so it should not be changed
      * (or destroyed) before evaluation.
      */
-    explicit MatrixPower(const MatrixType& A) : m_A(A), m_conditionNumber(0)
+    explicit MatrixPower(const MatrixType& A) :
+      m_A(A),
+      m_conditionNumber(0),
+      m_rank(A.cols()),
+      m_nulls(0)
     { eigen_assert(A.rows() == A.cols()); }
 
     /**
@@ -303,8 +365,8 @@
      * \return The expression \f$ A^p \f$, where A is specified in the
      * constructor.
      */
-    const MatrixPowerRetval<MatrixType> operator()(RealScalar p)
-    { return MatrixPowerRetval<MatrixType>(*this, p); }
+    const MatrixPowerParenthesesReturnValue<MatrixType> operator()(RealScalar p)
+    { return MatrixPowerParenthesesReturnValue<MatrixType>(*this, p); }
 
     /**
      * \brief Compute the matrix power.
@@ -321,21 +383,54 @@
 
   private:
     typedef std::complex<RealScalar> ComplexScalar;
-    typedef Matrix<ComplexScalar, RowsAtCompileTime, ColsAtCompileTime, MatrixType::Options,
-              MaxRowsAtCompileTime, MaxColsAtCompileTime> ComplexMatrix;
+    typedef Matrix<ComplexScalar, Dynamic, Dynamic, 0,
+              MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> ComplexMatrix;
 
+    /** \brief Reference to the base of matrix power. */
     typename MatrixType::Nested m_A;
+
+    /** \brief Temporary storage. */
     MatrixType m_tmp;
-    ComplexMatrix m_T, m_U, m_fT;
+
+    /** \brief Store the result of Schur decomposition. */
+    ComplexMatrix m_T, m_U;
+    
+    /** \brief Store fractional power of m_T. */
+    ComplexMatrix m_fT;
+
+    /**
+     * \brief Condition number of m_A.
+     *
+     * It is initialized as 0 to avoid performing unnecessary Schur
+     * decomposition, which is the bottleneck.
+     */
     RealScalar m_conditionNumber;
 
-    RealScalar modfAndInit(RealScalar, RealScalar*);
+    /** \brief Rank of m_A. */
+    Index m_rank;
+    
+    /** \brief Rank deficiency of m_A. */
+    Index m_nulls;
+
+    /**
+     * \brief Split p into integral part and fractional part.
+     *
+     * \param[in]  p        The exponent.
+     * \param[out] p        The fractional part ranging in \f$ (-1, 1) \f$.
+     * \param[out] intpart  The integral part.
+     *
+     * Only if the fractional part is nonzero, it calls initialize().
+     */
+    void split(RealScalar& p, RealScalar& intpart);
+
+    /** \brief Perform Schur decomposition for fractional power. */
+    void initialize();
 
     template<typename ResultType>
-    void computeIntPower(ResultType&, RealScalar);
+    void computeIntPower(ResultType& res, RealScalar p);
 
     template<typename ResultType>
-    void computeFracPower(ResultType&, RealScalar);
+    void computeFracPower(ResultType& res, RealScalar p);
 
     template<int Rows, int Cols, int Options, int MaxRows, int MaxCols>
     static void revertSchur(
@@ -354,59 +449,102 @@
 template<typename ResultType>
 void MatrixPower<MatrixType>::compute(ResultType& res, RealScalar p)
 {
+  using std::pow;
   switch (cols()) {
     case 0:
       break;
     case 1:
-      res(0,0) = std::pow(m_A.coeff(0,0), p);
+      res(0,0) = pow(m_A.coeff(0,0), p);
       break;
     default:
-      RealScalar intpart, x = modfAndInit(p, &intpart);
+      RealScalar intpart;
+      split(p, intpart);
+
+      res = MatrixType::Identity(rows(), cols());
       computeIntPower(res, intpart);
-      computeFracPower(res, x);
+      if (p) computeFracPower(res, p);
   }
 }
 
 template<typename MatrixType>
-typename MatrixPower<MatrixType>::RealScalar
-MatrixPower<MatrixType>::modfAndInit(RealScalar x, RealScalar* intpart)
+void MatrixPower<MatrixType>::split(RealScalar& p, RealScalar& intpart)
 {
-  typedef Array<RealScalar, RowsAtCompileTime, 1, ColMajor, MaxRowsAtCompileTime> RealArray;
+  using std::floor;
+  using std::pow;
 
-  *intpart = std::floor(x);
-  RealScalar res = x - *intpart;
+  intpart = floor(p);
+  p -= intpart;
 
-  if (!m_conditionNumber && res) {
-    const ComplexSchur<MatrixType> schurOfA(m_A);
-    m_T = schurOfA.matrixT();
-    m_U = schurOfA.matrixU();
-    
-    const RealArray absTdiag = m_T.diagonal().array().abs();
-    m_conditionNumber = absTdiag.maxCoeff() / absTdiag.minCoeff();
+  // Perform Schur decomposition if it is not yet performed and the power is
+  // not an integer.
+  if (!m_conditionNumber && p)
+    initialize();
+
+  // Choose the more stable of intpart = floor(p) and intpart = ceil(p).
+  if (p > RealScalar(0.5) && p > (1-p) * pow(m_conditionNumber, p)) {
+    --p;
+    ++intpart;
+  }
+}
+
+template<typename MatrixType>
+void MatrixPower<MatrixType>::initialize()
+{
+  const ComplexSchur<MatrixType> schurOfA(m_A);
+  JacobiRotation<ComplexScalar> rot;
+  ComplexScalar eigenvalue;
+
+  m_fT.resizeLike(m_A);
+  m_T = schurOfA.matrixT();
+  m_U = schurOfA.matrixU();
+  m_conditionNumber = m_T.diagonal().array().abs().maxCoeff() / m_T.diagonal().array().abs().minCoeff();
+
+  // Move zero eigenvalues to the bottom right corner.
+  for (Index i = cols()-1; i>=0; --i) {
+    if (m_rank <= 2)
+      return;
+    if (m_T.coeff(i,i) == RealScalar(0)) {
+      for (Index j=i+1; j < m_rank; ++j) {
+        eigenvalue = m_T.coeff(j,j);
+        rot.makeGivens(m_T.coeff(j-1,j), eigenvalue);
+        m_T.applyOnTheRight(j-1, j, rot);
+        m_T.applyOnTheLeft(j-1, j, rot.adjoint());
+        m_T.coeffRef(j-1,j-1) = eigenvalue;
+        m_T.coeffRef(j,j) = RealScalar(0);
+        m_U.applyOnTheRight(j-1, j, rot);
+      }
+      --m_rank;
+    }
   }
 
-  if (res>RealScalar(0.5) && res>(1-res)*std::pow(m_conditionNumber, res)) {
-    --res;
-    ++*intpart;
+  m_nulls = rows() - m_rank;
+  if (m_nulls) {
+    eigen_assert(m_T.bottomRightCorner(m_nulls, m_nulls).isZero()
+        && "Base of matrix power should be invertible or with a semisimple zero eigenvalue.");
+    m_fT.bottomRows(m_nulls).fill(RealScalar(0));
   }
-  return res;
 }
 
 template<typename MatrixType>
 template<typename ResultType>
 void MatrixPower<MatrixType>::computeIntPower(ResultType& res, RealScalar p)
 {
-  RealScalar pp = std::abs(p);
+  using std::abs;
+  using std::fmod;
+  RealScalar pp = abs(p);
 
-  if (p<0)  m_tmp = m_A.inverse();
-  else      m_tmp = m_A;
+  if (p<0) 
+    m_tmp = m_A.inverse();
+  else     
+    m_tmp = m_A;
 
-  res = MatrixType::Identity(rows(), cols());
-  while (pp >= 1) {
-    if (std::fmod(pp, 2) >= 1)
+  while (true) {
+    if (fmod(pp, 2) >= 1)
       res = m_tmp * res;
-    m_tmp *= m_tmp;
     pp /= 2;
+    if (pp < 1)
+      break;
+    m_tmp *= m_tmp;
   }
 }
 
@@ -414,12 +552,17 @@
 template<typename ResultType>
 void MatrixPower<MatrixType>::computeFracPower(ResultType& res, RealScalar p)
 {
-  if (p) {
-    eigen_assert(m_conditionNumber);
-    MatrixPowerAtomic<ComplexMatrix>(m_T, p).compute(m_fT);
-    revertSchur(m_tmp, m_fT, m_U);
-    res = m_tmp * res;
+  Block<ComplexMatrix,Dynamic,Dynamic> blockTp(m_fT, 0, 0, m_rank, m_rank);
+  eigen_assert(m_conditionNumber);
+  eigen_assert(m_rank + m_nulls == rows());
+
+  MatrixPowerAtomic<ComplexMatrix>(m_T.topLeftCorner(m_rank, m_rank), p).compute(blockTp);
+  if (m_nulls) {
+    m_fT.topRightCorner(m_rank, m_nulls) = m_T.topLeftCorner(m_rank, m_rank).template triangularView<Upper>()
+        .solve(blockTp * m_T.topRightCorner(m_rank, m_nulls));
   }
+  revertSchur(m_tmp, m_fT, m_U);
+  res = m_tmp * res;
 }
 
 template<typename MatrixType>
@@ -463,7 +606,7 @@
      * \brief Constructor.
      *
      * \param[in] A  %Matrix (expression), the base of the matrix power.
-     * \param[in] p  scalar, the exponent of the matrix power.
+     * \param[in] p  real scalar, the exponent of the matrix power.
      */
     MatrixPowerReturnValue(const Derived& A, RealScalar p) : m_A(A), m_p(p)
     { }
@@ -475,8 +618,8 @@
      * constructor.
      */
     template<typename ResultType>
-    inline void evalTo(ResultType& res) const
-    { MatrixPower<PlainObject>(m_A.eval()).compute(res, m_p); }
+    inline void evalTo(ResultType& result) const
+    { MatrixPower<PlainObject>(m_A.eval()).compute(result, m_p); }
 
     Index rows() const { return m_A.rows(); }
     Index cols() const { return m_A.cols(); }
@@ -484,25 +627,83 @@
   private:
     const Derived& m_A;
     const RealScalar m_p;
-    MatrixPowerReturnValue& operator=(const MatrixPowerReturnValue&);
+};
+
+/**
+ * \ingroup MatrixFunctions_Module
+ *
+ * \brief Proxy for the matrix power of some matrix (expression).
+ *
+ * \tparam Derived  type of the base, a matrix (expression).
+ *
+ * This class holds the arguments to the matrix power until it is
+ * assigned or evaluated for some other reason (so the argument
+ * should not be changed in the meantime). It is the return type of
+ * MatrixBase::pow() and related functions and most of the
+ * time this is the only way it is used.
+ */
+template<typename Derived>
+class MatrixComplexPowerReturnValue : public ReturnByValue< MatrixComplexPowerReturnValue<Derived> >
+{
+  public:
+    typedef typename Derived::PlainObject PlainObject;
+    typedef typename std::complex<typename Derived::RealScalar> ComplexScalar;
+    typedef typename Derived::Index Index;
+
+    /**
+     * \brief Constructor.
+     *
+     * \param[in] A  %Matrix (expression), the base of the matrix power.
+     * \param[in] p  complex scalar, the exponent of the matrix power.
+     */
+    MatrixComplexPowerReturnValue(const Derived& A, const ComplexScalar& p) : m_A(A), m_p(p)
+    { }
+
+    /**
+     * \brief Compute the matrix power.
+     *
+     * Because \p p is complex, \f$ A^p \f$ is simply evaluated as \f$
+     * \exp(p \log(A)) \f$.
+     *
+     * \param[out] result  \f$ A^p \f$ where \p A and \p p are as in the
+     * constructor.
+     */
+    template<typename ResultType>
+    inline void evalTo(ResultType& result) const
+    { result = (m_p * m_A.log()).exp(); }
+
+    Index rows() const { return m_A.rows(); }
+    Index cols() const { return m_A.cols(); }
+
+  private:
+    const Derived& m_A;
+    const ComplexScalar m_p;
 };
 
 namespace internal {
 
 template<typename MatrixPowerType>
-struct traits< MatrixPowerRetval<MatrixPowerType> >
+struct traits< MatrixPowerParenthesesReturnValue<MatrixPowerType> >
 { typedef typename MatrixPowerType::PlainObject ReturnType; };
 
 template<typename Derived>
 struct traits< MatrixPowerReturnValue<Derived> >
 { typedef typename Derived::PlainObject ReturnType; };
 
+template<typename Derived>
+struct traits< MatrixComplexPowerReturnValue<Derived> >
+{ typedef typename Derived::PlainObject ReturnType; };
+
 }
 
 template<typename Derived>
 const MatrixPowerReturnValue<Derived> MatrixBase<Derived>::pow(const RealScalar& p) const
 { return MatrixPowerReturnValue<Derived>(derived(), p); }
 
+template<typename Derived>
+const MatrixComplexPowerReturnValue<Derived> MatrixBase<Derived>::pow(const std::complex<RealScalar>& p) const
+{ return MatrixComplexPowerReturnValue<Derived>(derived(), p); }
+
 } // namespace Eigen
 
 #endif // EIGEN_MATRIX_POWER
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
index b48ea9d..2e5abda 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixSquareRoot.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2011, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,133 +12,16 @@
 
 namespace Eigen { 
 
-/** \ingroup MatrixFunctions_Module
-  * \brief Class for computing matrix square roots of upper quasi-triangular matrices.
-  * \tparam  MatrixType  type of the argument of the matrix square root,
-  *                      expected to be an instantiation of the Matrix class template.
-  *
-  * This class computes the square root of the upper quasi-triangular
-  * matrix stored in the upper Hessenberg part of the matrix passed to
-  * the constructor.
-  *
-  * \sa MatrixSquareRoot, MatrixSquareRootTriangular
-  */
-template <typename MatrixType>
-class MatrixSquareRootQuasiTriangular
-{
-  public:
-
-    /** \brief Constructor. 
-      *
-      * \param[in]  A  upper quasi-triangular matrix whose square root 
-      *                is to be computed.
-      *
-      * The class stores a reference to \p A, so it should not be
-      * changed (or destroyed) before compute() is called.
-      */
-    MatrixSquareRootQuasiTriangular(const MatrixType& A) 
-      : m_A(A) 
-    {
-      eigen_assert(A.rows() == A.cols());
-    }
-    
-    /** \brief Compute the matrix square root
-      *
-      * \param[out] result  square root of \p A, as specified in the constructor.
-      *
-      * Only the upper Hessenberg part of \p result is updated, the
-      * rest is not touched.  See MatrixBase::sqrt() for details on
-      * how this computation is implemented.
-      */
-    template <typename ResultType> void compute(ResultType &result);    
-    
-  private:
-    typedef typename MatrixType::Index Index;
-    typedef typename MatrixType::Scalar Scalar;
-    
-    void computeDiagonalPartOfSqrt(MatrixType& sqrtT, const MatrixType& T);
-    void computeOffDiagonalPartOfSqrt(MatrixType& sqrtT, const MatrixType& T);
-    void compute2x2diagonalBlock(MatrixType& sqrtT, const MatrixType& T, typename MatrixType::Index i);
-    void compute1x1offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j);
-    void compute1x2offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j);
-    void compute2x1offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j);
-    void compute2x2offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j);
-  
-    template <typename SmallMatrixType>
-    static void solveAuxiliaryEquation(SmallMatrixType& X, const SmallMatrixType& A, 
-				     const SmallMatrixType& B, const SmallMatrixType& C);
-  
-    const MatrixType& m_A;
-};
-
-template <typename MatrixType>
-template <typename ResultType> 
-void MatrixSquareRootQuasiTriangular<MatrixType>::compute(ResultType &result)
-{
-  result.resize(m_A.rows(), m_A.cols());
-  computeDiagonalPartOfSqrt(result, m_A);
-  computeOffDiagonalPartOfSqrt(result, m_A);
-}
-
-// pre:  T is quasi-upper-triangular and sqrtT is a zero matrix of the same size
-// post: the diagonal blocks of sqrtT are the square roots of the diagonal blocks of T
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>::computeDiagonalPartOfSqrt(MatrixType& sqrtT, 
-									  const MatrixType& T)
-{
-  using std::sqrt;
-  const Index size = m_A.rows();
-  for (Index i = 0; i < size; i++) {
-    if (i == size - 1 || T.coeff(i+1, i) == 0) {
-      eigen_assert(T(i,i) >= 0);
-      sqrtT.coeffRef(i,i) = sqrt(T.coeff(i,i));
-    }
-    else {
-      compute2x2diagonalBlock(sqrtT, T, i);
-      ++i;
-    }
-  }
-}
-
-// pre:  T is quasi-upper-triangular and diagonal blocks of sqrtT are square root of diagonal blocks of T.
-// post: sqrtT is the square root of T.
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>::computeOffDiagonalPartOfSqrt(MatrixType& sqrtT, 
-									     const MatrixType& T)
-{
-  const Index size = m_A.rows();
-  for (Index j = 1; j < size; j++) {
-      if (T.coeff(j, j-1) != 0)  // if T(j-1:j, j-1:j) is a 2-by-2 block
-	continue;
-    for (Index i = j-1; i >= 0; i--) {
-      if (i > 0 && T.coeff(i, i-1) != 0)  // if T(i-1:i, i-1:i) is a 2-by-2 block
-	continue;
-      bool iBlockIs2x2 = (i < size - 1) && (T.coeff(i+1, i) != 0);
-      bool jBlockIs2x2 = (j < size - 1) && (T.coeff(j+1, j) != 0);
-      if (iBlockIs2x2 && jBlockIs2x2) 
-	compute2x2offDiagonalBlock(sqrtT, T, i, j);
-      else if (iBlockIs2x2 && !jBlockIs2x2) 
-	compute2x1offDiagonalBlock(sqrtT, T, i, j);
-      else if (!iBlockIs2x2 && jBlockIs2x2) 
-	compute1x2offDiagonalBlock(sqrtT, T, i, j);
-      else if (!iBlockIs2x2 && !jBlockIs2x2) 
-	compute1x1offDiagonalBlock(sqrtT, T, i, j);
-    }
-  }
-}
+namespace internal {
 
 // pre:  T.block(i,i,2,2) has complex conjugate eigenvalues
 // post: sqrtT.block(i,i,2,2) is square root of T.block(i,i,2,2)
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>
-     ::compute2x2diagonalBlock(MatrixType& sqrtT, const MatrixType& T, typename MatrixType::Index i)
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_2x2_diagonal_block(const MatrixType& T, typename MatrixType::Index i, ResultType& sqrtT)
 {
   // TODO: This case (2-by-2 blocks with complex conjugate eigenvalues) is probably hidden somewhere
   //       in EigenSolver. If we expose it, we could call it directly from here.
+  typedef typename traits<MatrixType>::Scalar Scalar;
   Matrix<Scalar,2,2> block = T.template block<2,2>(i,i);
   EigenSolver<Matrix<Scalar,2,2> > es(block);
   sqrtT.template block<2,2>(i,i)
@@ -148,21 +31,19 @@
 // pre:  block structure of T is such that (i,j) is a 1x1 block,
 //       all blocks of sqrtT to left of and below (i,j) are correct
 // post: sqrtT(i,j) has the correct value
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>
-     ::compute1x1offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j)
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
 {
+  typedef typename traits<MatrixType>::Scalar Scalar;
   Scalar tmp = (sqrtT.row(i).segment(i+1,j-i-1) * sqrtT.col(j).segment(i+1,j-i-1)).value();
   sqrtT.coeffRef(i,j) = (T.coeff(i,j) - tmp) / (sqrtT.coeff(i,i) + sqrtT.coeff(j,j));
 }
 
 // similar to compute1x1offDiagonalBlock()
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>
-     ::compute1x2offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j)
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
 {
+  typedef typename traits<MatrixType>::Scalar Scalar;
   Matrix<Scalar,1,2> rhs = T.template block<1,2>(i,j);
   if (j-i > 1)
     rhs -= sqrtT.block(i, i+1, 1, j-i-1) * sqrtT.block(i+1, j, j-i-1, 2);
@@ -172,11 +53,10 @@
 }
 
 // similar to compute1x1offDiagonalBlock()
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>
-     ::compute2x1offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j)
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
 {
+  typedef typename traits<MatrixType>::Scalar Scalar;
   Matrix<Scalar,2,1> rhs = T.template block<2,1>(i,j);
   if (j-i > 2)
     rhs -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 1);
@@ -185,32 +65,11 @@
   sqrtT.template block<2,1>(i,j) = A.fullPivLu().solve(rhs);
 }
 
-// similar to compute1x1offDiagonalBlock()
-template <typename MatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>
-     ::compute2x2offDiagonalBlock(MatrixType& sqrtT, const MatrixType& T, 
-				  typename MatrixType::Index i, typename MatrixType::Index j)
-{
-  Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i);
-  Matrix<Scalar,2,2> B = sqrtT.template block<2,2>(j,j);
-  Matrix<Scalar,2,2> C = T.template block<2,2>(i,j);
-  if (j-i > 2)
-    C -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 2);
-  Matrix<Scalar,2,2> X;
-  solveAuxiliaryEquation(X, A, B, C);
-  sqrtT.template block<2,2>(i,j) = X;
-}
-
 // solves the equation A X + X B = C where all matrices are 2-by-2
 template <typename MatrixType>
-template <typename SmallMatrixType>
-void MatrixSquareRootQuasiTriangular<MatrixType>
-     ::solveAuxiliaryEquation(SmallMatrixType& X, const SmallMatrixType& A,
-			      const SmallMatrixType& B, const SmallMatrixType& C)
+void matrix_sqrt_quasi_triangular_solve_auxiliary_equation(MatrixType& X, const MatrixType& A, const MatrixType& B, const MatrixType& C)
 {
-  EIGEN_STATIC_ASSERT((internal::is_same<SmallMatrixType, Matrix<Scalar,2,2> >::value),
-		      EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT);
-
+  typedef typename traits<MatrixType>::Scalar Scalar;
   Matrix<Scalar,4,4> coeffMatrix = Matrix<Scalar,4,4>::Zero();
   coeffMatrix.coeffRef(0,0) = A.coeff(0,0) + B.coeff(0,0);
   coeffMatrix.coeffRef(1,1) = A.coeff(0,0) + B.coeff(1,1);
@@ -224,13 +83,13 @@
   coeffMatrix.coeffRef(2,3) = B.coeff(1,0);
   coeffMatrix.coeffRef(3,1) = A.coeff(1,0);
   coeffMatrix.coeffRef(3,2) = B.coeff(0,1);
-  
+
   Matrix<Scalar,4,1> rhs;
   rhs.coeffRef(0) = C.coeff(0,0);
   rhs.coeffRef(1) = C.coeff(0,1);
   rhs.coeffRef(2) = C.coeff(1,0);
   rhs.coeffRef(3) = C.coeff(1,1);
-  
+
   Matrix<Scalar,4,1> result;
   result = coeffMatrix.fullPivLu().solve(rhs);
 
@@ -240,165 +99,205 @@
   X.coeffRef(1,1) = result.coeff(3);
 }
 
+// similar to compute1x1offDiagonalBlock()
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(const MatrixType& T, typename MatrixType::Index i, typename MatrixType::Index j, ResultType& sqrtT)
+{
+  typedef typename traits<MatrixType>::Scalar Scalar;
+  Matrix<Scalar,2,2> A = sqrtT.template block<2,2>(i,i);
+  Matrix<Scalar,2,2> B = sqrtT.template block<2,2>(j,j);
+  Matrix<Scalar,2,2> C = T.template block<2,2>(i,j);
+  if (j-i > 2)
+    C -= sqrtT.block(i, i+2, 2, j-i-2) * sqrtT.block(i+2, j, j-i-2, 2);
+  Matrix<Scalar,2,2> X;
+  matrix_sqrt_quasi_triangular_solve_auxiliary_equation(X, A, B, C);
+  sqrtT.template block<2,2>(i,j) = X;
+}
+
+// pre:  T is quasi-upper-triangular and sqrtT is a zero matrix of the same size
+// post: the diagonal blocks of sqrtT are the square roots of the diagonal blocks of T
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_diagonal(const MatrixType& T, ResultType& sqrtT)
+{
+  using std::sqrt;
+  const Index size = T.rows();
+  for (Index i = 0; i < size; i++) {
+    if (i == size - 1 || T.coeff(i+1, i) == 0) {
+      eigen_assert(T(i,i) >= 0);
+      sqrtT.coeffRef(i,i) = sqrt(T.coeff(i,i));
+    }
+    else {
+      matrix_sqrt_quasi_triangular_2x2_diagonal_block(T, i, sqrtT);
+      ++i;
+    }
+  }
+}
+
+// pre:  T is quasi-upper-triangular and diagonal blocks of sqrtT are square root of diagonal blocks of T.
+// post: sqrtT is the square root of T.
+template <typename MatrixType, typename ResultType>
+void matrix_sqrt_quasi_triangular_off_diagonal(const MatrixType& T, ResultType& sqrtT)
+{
+  const Index size = T.rows();
+  for (Index j = 1; j < size; j++) {
+      if (T.coeff(j, j-1) != 0)  // if T(j-1:j, j-1:j) is a 2-by-2 block
+	continue;
+    for (Index i = j-1; i >= 0; i--) {
+      if (i > 0 && T.coeff(i, i-1) != 0)  // if T(i-1:i, i-1:i) is a 2-by-2 block
+	continue;
+      bool iBlockIs2x2 = (i < size - 1) && (T.coeff(i+1, i) != 0);
+      bool jBlockIs2x2 = (j < size - 1) && (T.coeff(j+1, j) != 0);
+      if (iBlockIs2x2 && jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_2x2_off_diagonal_block(T, i, j, sqrtT);
+      else if (iBlockIs2x2 && !jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_2x1_off_diagonal_block(T, i, j, sqrtT);
+      else if (!iBlockIs2x2 && jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_1x2_off_diagonal_block(T, i, j, sqrtT);
+      else if (!iBlockIs2x2 && !jBlockIs2x2) 
+        matrix_sqrt_quasi_triangular_1x1_off_diagonal_block(T, i, j, sqrtT);
+    }
+  }
+}
+
+} // end of namespace internal
 
 /** \ingroup MatrixFunctions_Module
-  * \brief Class for computing matrix square roots of upper triangular matrices.
-  * \tparam  MatrixType  type of the argument of the matrix square root,
-  *                      expected to be an instantiation of the Matrix class template.
+  * \brief Compute matrix square root of quasi-triangular matrix.
   *
-  * This class computes the square root of the upper triangular matrix
-  * stored in the upper triangular part (including the diagonal) of
-  * the matrix passed to the constructor.
+  * \tparam  MatrixType  type of \p arg, the argument of matrix square root,
+  *                      expected to be an instantiation of the Matrix class template.
+  * \tparam  ResultType  type of \p result, where result is to be stored.
+  * \param[in]  arg      argument of matrix square root.
+  * \param[out] result   matrix square root of upper Hessenberg part of \p arg.
+  *
+  * This function computes the square root of the upper quasi-triangular matrix stored in the upper
+  * Hessenberg part of \p arg.  Only the upper Hessenberg part of \p result is updated, the rest is
+  * not touched.  See MatrixBase::sqrt() for details on how this computation is implemented.
   *
   * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular
   */
-template <typename MatrixType>
-class MatrixSquareRootTriangular
+template <typename MatrixType, typename ResultType> 
+void matrix_sqrt_quasi_triangular(const MatrixType &arg, ResultType &result)
 {
-  public:
-    MatrixSquareRootTriangular(const MatrixType& A) 
-      : m_A(A) 
-    {
-      eigen_assert(A.rows() == A.cols());
-    }
+  eigen_assert(arg.rows() == arg.cols());
+  result.resize(arg.rows(), arg.cols());
+  internal::matrix_sqrt_quasi_triangular_diagonal(arg, result);
+  internal::matrix_sqrt_quasi_triangular_off_diagonal(arg, result);
+}
 
-    /** \brief Compute the matrix square root
-      *
-      * \param[out] result  square root of \p A, as specified in the constructor.
-      *
-      * Only the upper triangular part (including the diagonal) of 
-      * \p result is updated, the rest is not touched.  See
-      * MatrixBase::sqrt() for details on how this computation is
-      * implemented.
-      */
-    template <typename ResultType> void compute(ResultType &result);    
 
- private:
-    const MatrixType& m_A;
-};
-
-template <typename MatrixType>
-template <typename ResultType> 
-void MatrixSquareRootTriangular<MatrixType>::compute(ResultType &result)
+/** \ingroup MatrixFunctions_Module
+  * \brief Compute matrix square root of triangular matrix.
+  *
+  * \tparam  MatrixType  type of \p arg, the argument of matrix square root,
+  *                      expected to be an instantiation of the Matrix class template.
+  * \tparam  ResultType  type of \p result, where result is to be stored.
+  * \param[in]  arg      argument of matrix square root.
+  * \param[out] result   matrix square root of upper triangular part of \p arg.
+  *
+  * Only the upper triangular part (including the diagonal) of \p result is updated, the rest is not
+  * touched.  See MatrixBase::sqrt() for details on how this computation is implemented.
+  *
+  * \sa MatrixSquareRoot, MatrixSquareRootQuasiTriangular
+  */
+template <typename MatrixType, typename ResultType> 
+void matrix_sqrt_triangular(const MatrixType &arg, ResultType &result)
 {
   using std::sqrt;
-
-  // Compute square root of m_A and store it in upper triangular part of result
-  // This uses that the square root of triangular matrices can be computed directly.
-  result.resize(m_A.rows(), m_A.cols());
-  typedef typename MatrixType::Index Index;
-  for (Index i = 0; i < m_A.rows(); i++) {
-    result.coeffRef(i,i) = sqrt(m_A.coeff(i,i));
-  }
-  for (Index j = 1; j < m_A.cols(); j++) {
-    for (Index i = j-1; i >= 0; i--) {
       typedef typename MatrixType::Scalar Scalar;
+
+  eigen_assert(arg.rows() == arg.cols());
+
+  // Compute square root of arg and store it in upper triangular part of result
+  // This uses that the square root of triangular matrices can be computed directly.
+  result.resize(arg.rows(), arg.cols());
+  for (Index i = 0; i < arg.rows(); i++) {
+    result.coeffRef(i,i) = sqrt(arg.coeff(i,i));
+  }
+  for (Index j = 1; j < arg.cols(); j++) {
+    for (Index i = j-1; i >= 0; i--) {
       // if i = j-1, then segment has length 0 so tmp = 0
       Scalar tmp = (result.row(i).segment(i+1,j-i-1) * result.col(j).segment(i+1,j-i-1)).value();
       // denominator may be zero if original matrix is singular
-      result.coeffRef(i,j) = (m_A.coeff(i,j) - tmp) / (result.coeff(i,i) + result.coeff(j,j));
+      result.coeffRef(i,j) = (arg.coeff(i,j) - tmp) / (result.coeff(i,i) + result.coeff(j,j));
     }
   }
 }
 
 
+namespace internal {
+
 /** \ingroup MatrixFunctions_Module
-  * \brief Class for computing matrix square roots of general matrices.
+  * \brief Helper struct for computing matrix square roots of general matrices.
   * \tparam  MatrixType  type of the argument of the matrix square root,
   *                      expected to be an instantiation of the Matrix class template.
   *
   * \sa MatrixSquareRootTriangular, MatrixSquareRootQuasiTriangular, MatrixBase::sqrt()
   */
 template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex>
-class MatrixSquareRoot
+struct matrix_sqrt_compute
 {
-  public:
-
-    /** \brief Constructor. 
-      *
-      * \param[in]  A  matrix whose square root is to be computed.
-      *
-      * The class stores a reference to \p A, so it should not be
-      * changed (or destroyed) before compute() is called.
-      */
-    MatrixSquareRoot(const MatrixType& A); 
-    
-    /** \brief Compute the matrix square root
-      *
-      * \param[out] result  square root of \p A, as specified in the constructor.
-      *
-      * See MatrixBase::sqrt() for details on how this computation is
-      * implemented.
-      */
-    template <typename ResultType> void compute(ResultType &result);    
+  /** \brief Compute the matrix square root
+    *
+    * \param[in]  arg     matrix whose square root is to be computed.
+    * \param[out] result  square root of \p arg.
+    *
+    * See MatrixBase::sqrt() for details on how this computation is implemented.
+    */
+  template <typename ResultType> static void run(const MatrixType &arg, ResultType &result);    
 };
 
 
 // ********** Partial specialization for real matrices **********
 
 template <typename MatrixType>
-class MatrixSquareRoot<MatrixType, 0>
+struct matrix_sqrt_compute<MatrixType, 0>
 {
-  public:
+  template <typename ResultType>
+  static void run(const MatrixType &arg, ResultType &result)
+  {
+    eigen_assert(arg.rows() == arg.cols());
 
-    MatrixSquareRoot(const MatrixType& A) 
-      : m_A(A) 
-    {  
-      eigen_assert(A.rows() == A.cols());
-    }
-  
-    template <typename ResultType> void compute(ResultType &result)
-    {
-      // Compute Schur decomposition of m_A
-      const RealSchur<MatrixType> schurOfA(m_A);  
-      const MatrixType& T = schurOfA.matrixT();
-      const MatrixType& U = schurOfA.matrixU();
+    // Compute Schur decomposition of arg
+    const RealSchur<MatrixType> schurOfA(arg);  
+    const MatrixType& T = schurOfA.matrixT();
+    const MatrixType& U = schurOfA.matrixU();
     
-      // Compute square root of T
-      MatrixType sqrtT = MatrixType::Zero(m_A.rows(), m_A.cols());
-      MatrixSquareRootQuasiTriangular<MatrixType>(T).compute(sqrtT);
+    // Compute square root of T
+    MatrixType sqrtT = MatrixType::Zero(arg.rows(), arg.cols());
+    matrix_sqrt_quasi_triangular(T, sqrtT);
     
-      // Compute square root of m_A
-      result = U * sqrtT * U.adjoint();
-    }
-    
-  private:
-    const MatrixType& m_A;
+    // Compute square root of arg
+    result = U * sqrtT * U.adjoint();
+  }
 };
 
 
 // ********** Partial specialization for complex matrices **********
 
 template <typename MatrixType>
-class MatrixSquareRoot<MatrixType, 1>
+struct matrix_sqrt_compute<MatrixType, 1>
 {
-  public:
+  template <typename ResultType>
+  static void run(const MatrixType &arg, ResultType &result)
+  {
+    eigen_assert(arg.rows() == arg.cols());
 
-    MatrixSquareRoot(const MatrixType& A) 
-      : m_A(A) 
-    {  
-      eigen_assert(A.rows() == A.cols());
-    }
-  
-    template <typename ResultType> void compute(ResultType &result)
-    {
-      // Compute Schur decomposition of m_A
-      const ComplexSchur<MatrixType> schurOfA(m_A);  
-      const MatrixType& T = schurOfA.matrixT();
-      const MatrixType& U = schurOfA.matrixU();
+    // Compute Schur decomposition of arg
+    const ComplexSchur<MatrixType> schurOfA(arg);  
+    const MatrixType& T = schurOfA.matrixT();
+    const MatrixType& U = schurOfA.matrixU();
     
-      // Compute square root of T
-      MatrixType sqrtT;
-      MatrixSquareRootTriangular<MatrixType>(T).compute(sqrtT);
+    // Compute square root of T
+    MatrixType sqrtT;
+    matrix_sqrt_triangular(T, sqrtT);
     
-      // Compute square root of m_A
-      result = U * (sqrtT.template triangularView<Upper>() * U.adjoint());
-    }
-    
-  private:
-    const MatrixType& m_A;
+    // Compute square root of arg
+    result = U * (sqrtT.template triangularView<Upper>() * U.adjoint());
+  }
 };
 
+} // end namespace internal
 
 /** \ingroup MatrixFunctions_Module
   *
@@ -415,14 +314,16 @@
 template<typename Derived> class MatrixSquareRootReturnValue
 : public ReturnByValue<MatrixSquareRootReturnValue<Derived> >
 {
-    typedef typename Derived::Index Index;
+  protected:
+    typedef typename internal::ref_selector<Derived>::type DerivedNested;
+
   public:
     /** \brief Constructor.
       *
       * \param[in]  src  %Matrix (expression) forming the argument of the
       * matrix square root.
       */
-    MatrixSquareRootReturnValue(const Derived& src) : m_src(src) { }
+    explicit MatrixSquareRootReturnValue(const Derived& src) : m_src(src) { }
 
     /** \brief Compute the matrix square root.
       *
@@ -432,18 +333,17 @@
     template <typename ResultType>
     inline void evalTo(ResultType& result) const
     {
-      const typename Derived::PlainObject srcEvaluated = m_src.eval();
-      MatrixSquareRoot<typename Derived::PlainObject> me(srcEvaluated);
-      me.compute(result);
+      typedef typename internal::nested_eval<Derived, 10>::type DerivedEvalType;
+      typedef typename internal::remove_all<DerivedEvalType>::type DerivedEvalTypeClean;
+      DerivedEvalType tmp(m_src);
+      internal::matrix_sqrt_compute<DerivedEvalTypeClean>::run(tmp, result);
     }
 
     Index rows() const { return m_src.rows(); }
     Index cols() const { return m_src.cols(); }
 
   protected:
-    const Derived& m_src;
-  private:
-    MatrixSquareRootReturnValue& operator=(const MatrixSquareRootReturnValue&);
+    const DerivedNested m_src;
 };
 
 namespace internal {
diff --git a/unsupported/Eigen/src/MatrixFunctions/StemFunction.h b/unsupported/Eigen/src/MatrixFunctions/StemFunction.h
index 724e55c..7604df9 100644
--- a/unsupported/Eigen/src/MatrixFunctions/StemFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/StemFunction.h
@@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2010 Jitse Niesen <jitse@maths.leeds.ac.uk>
+// Copyright (C) 2010, 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -12,93 +12,105 @@
 
 namespace Eigen { 
 
-/** \ingroup MatrixFunctions_Module 
-  * \brief Stem functions corresponding to standard mathematical functions.
-  */
+namespace internal {
+
+/** \brief The exponential function (and its derivatives). */
 template <typename Scalar>
-class StdStemFunctions
+Scalar stem_function_exp(Scalar x, int)
 {
-  public:
+  using std::exp;
+  return exp(x);
+}
 
-    /** \brief The exponential function (and its derivatives). */
-    static Scalar exp(Scalar x, int)
-    {
-      return std::exp(x);
-    }
+/** \brief Cosine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_cos(Scalar x, int n)
+{
+  using std::cos;
+  using std::sin;
+  Scalar res;
 
-    /** \brief Cosine (and its derivatives). */
-    static Scalar cos(Scalar x, int n)
-    {
-      Scalar res;
-      switch (n % 4) {
-      case 0: 
-	res = std::cos(x);
-	break;
-      case 1:
-	res = -std::sin(x);
-	break;
-      case 2:
-	res = -std::cos(x);
-	break;
-      case 3:
-	res = std::sin(x);
-	break;
-      }
-      return res;
-    }
+  switch (n % 4) {
+  case 0: 
+    res = std::cos(x);
+    break;
+  case 1:
+    res = -std::sin(x);
+    break;
+  case 2:
+    res = -std::cos(x);
+    break;
+  case 3:
+    res = std::sin(x);
+    break;
+  }
+  return res;
+}
 
-    /** \brief Sine (and its derivatives). */
-    static Scalar sin(Scalar x, int n)
-    {
-      Scalar res;
-      switch (n % 4) {
-      case 0:
-	res = std::sin(x);
-	break;
-      case 1:
-	res = std::cos(x);
-	break;
-      case 2:
-	res = -std::sin(x);
-	break;
-      case 3:
-	res = -std::cos(x);
-	break;
-      }
-      return res;
-    }
+/** \brief Sine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_sin(Scalar x, int n)
+{
+  using std::cos;
+  using std::sin;
+  Scalar res;
 
-    /** \brief Hyperbolic cosine (and its derivatives). */
-    static Scalar cosh(Scalar x, int n)
-    {
-      Scalar res;
-      switch (n % 2) {
-      case 0:
-	res = std::cosh(x);
-	break;
-      case 1:
-	res = std::sinh(x);
-	break;
-      }
-      return res;
-    }
+  switch (n % 4) {
+  case 0:
+    res = std::sin(x);
+    break;
+  case 1:
+    res = std::cos(x);
+    break;
+  case 2:
+    res = -std::sin(x);
+    break;
+  case 3:
+    res = -std::cos(x);
+    break;
+  }
+  return res;
+}
+
+/** \brief Hyperbolic cosine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_cosh(Scalar x, int n)
+{
+  using std::cosh;
+  using std::sinh;
+  Scalar res;
+  
+  switch (n % 2) {
+  case 0:
+    res = std::cosh(x);
+    break;
+  case 1:
+    res = std::sinh(x);
+    break;
+  }
+  return res;
+}
 	
-    /** \brief Hyperbolic sine (and its derivatives). */
-    static Scalar sinh(Scalar x, int n)
-    {
-      Scalar res;
-      switch (n % 2) {
-      case 0:
-	res = std::sinh(x);
-	break;
-      case 1:
-	res = std::cosh(x);
-	break;
-      }
-      return res;
-    }
+/** \brief Hyperbolic sine (and its derivatives). */
+template <typename Scalar>
+Scalar stem_function_sinh(Scalar x, int n)
+{
+  using std::cosh;
+  using std::sinh;
+  Scalar res;
+  
+  switch (n % 2) {
+  case 0:
+    res = std::sinh(x);
+    break;
+  case 1:
+    res = std::cosh(x);
+    break;
+  }
+  return res;
+}
 
-}; // end of class StdStemFunctions
+} // end namespace internal
 
 } // end namespace Eigen