Squashed 'third_party/eigen/' changes from 61d72f6..cf794d3


Change-Id: I9b814151b01f49af6337a8605d0c42a3a1ed4c72
git-subtree-dir: third_party/eigen
git-subtree-split: cf794d3b741a6278df169e58461f8529f43bce5d
diff --git a/Eigen/src/SparseLU/CMakeLists.txt b/Eigen/src/SparseLU/CMakeLists.txt
deleted file mode 100644
index 69729ee..0000000
--- a/Eigen/src/SparseLU/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SparseLU_SRCS "*.h")
-
-INSTALL(FILES
-  ${Eigen_SparseLU_SRCS}
-  DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel
-  )
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 4514cfd..7104831 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -2,7 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@@ -14,7 +14,7 @@
 
 namespace Eigen {
 
-template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::Index> > class SparseLU;
+template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::StorageIndex> > class SparseLU;
 template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType;
 template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType;
 
@@ -64,33 +64,45 @@
   * 
   * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<>
   * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD
+  *
+  * \implsparsesolverconcept
   * 
-  * 
-  * \sa \ref TutorialSparseDirectSolvers
+  * \sa \ref TutorialSparseSolverConcept
   * \sa \ref OrderingMethods_Module
   */
 template <typename _MatrixType, typename _OrderingType>
-class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index>
+class SparseLU : public SparseSolverBase<SparseLU<_MatrixType,_OrderingType> >, public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::StorageIndex>
 {
+  protected:
+    typedef SparseSolverBase<SparseLU<_MatrixType,_OrderingType> > APIBase;
+    using APIBase::m_isInitialized;
   public:
+    using APIBase::_solve_impl;
+    
     typedef _MatrixType MatrixType; 
     typedef _OrderingType OrderingType;
     typedef typename MatrixType::Scalar Scalar; 
     typedef typename MatrixType::RealScalar RealScalar; 
-    typedef typename MatrixType::Index Index; 
-    typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix;
-    typedef internal::MappedSuperNodalMatrix<Scalar, Index> SCMatrix; 
+    typedef typename MatrixType::StorageIndex StorageIndex;
+    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> NCMatrix;
+    typedef internal::MappedSuperNodalMatrix<Scalar, StorageIndex> SCMatrix;
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
-    typedef Matrix<Index,Dynamic,1> IndexVector;
-    typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
-    typedef internal::SparseLUImpl<Scalar, Index> Base;
+    typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
+    typedef PermutationMatrix<Dynamic, Dynamic, StorageIndex> PermutationType;
+    typedef internal::SparseLUImpl<Scalar, StorageIndex> Base;
+
+    enum {
+      ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+      MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+    };
     
   public:
-    SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+    SparseLU():m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
     {
       initperfvalues(); 
     }
-    SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
+    explicit SparseLU(const MatrixType& matrix)
+      : m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1)
     {
       initperfvalues(); 
       compute(matrix);
@@ -141,9 +153,9 @@
       * y = b; matrixU().solveInPlace(y);
       * \endcode
       */
-    SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,Index> > matrixU() const
+    SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,StorageIndex> > matrixU() const
     {
-      return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,Index> >(m_Lstore, m_Ustore);
+      return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,StorageIndex> >(m_Lstore, m_Ustore);
     }
 
     /**
@@ -168,6 +180,7 @@
       m_diagpivotthresh = thresh; 
     }
 
+#ifdef EIGEN_PARSED_BY_DOXYGEN
     /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
       *
       * \warning the destination matrix X in X = this->solve(B) must be colmun-major.
@@ -175,26 +188,8 @@
       * \sa compute()
       */
     template<typename Rhs>
-    inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const 
-    {
-      eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); 
-      eigen_assert(rows()==B.rows()
-                    && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
-          return internal::solve_retval<SparseLU, Rhs>(*this, B.derived());
-    }
-
-    /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
-      *
-      * \sa compute()
-      */
-    template<typename Rhs>
-    inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const 
-    {
-      eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); 
-      eigen_assert(rows()==B.rows()
-                    && "SparseLU::solve(): invalid number of rows of the right hand side matrix B");
-          return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived());
-    }
+    inline const Solve<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const;
+#endif // EIGEN_PARSED_BY_DOXYGEN
     
     /** \brief Reports whether previous computation was successful.
       *
@@ -219,7 +214,7 @@
     }
 
     template<typename Rhs, typename Dest>
-    bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
+    bool _solve_impl(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const
     {
       Dest& X(X_base.derived());
       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first");
@@ -255,8 +250,9 @@
       *
       * \sa logAbsDeterminant(), signDeterminant()
       */
-     Scalar absDeterminant()
+    Scalar absDeterminant()
     {
+      using std::abs;
       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
       // Initialize with the determinant of the row matrix
       Scalar det = Scalar(1.);
@@ -268,42 +264,43 @@
         {
           if(it.index() == j)
           {
-            using std::abs;
             det *= abs(it.value());
             break;
           }
         }
-       }
-       return det;
-     }
+      }
+      return det;
+    }
 
-     /** \returns the natural log of the absolute value of the determinant of the matrix
-       * of which **this is the QR decomposition
-       *
-       * \note This method is useful to work around the risk of overflow/underflow that's
-       * inherent to the determinant computation.
-       *
-       * \sa absDeterminant(), signDeterminant()
-       */
-     Scalar logAbsDeterminant() const
-     {
-       eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
-       Scalar det = Scalar(0.);
-       for (Index j = 0; j < this->cols(); ++j)
-       {
-         for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
-         {
-           if(it.row() < j) continue;
-           if(it.row() == j)
-           {
-             using std::log; using std::abs;
-             det += log(abs(it.value()));
-             break;
-           }
-         }
-       }
-       return det;
-     }
+    /** \returns the natural log of the absolute value of the determinant of the matrix
+      * of which **this is the QR decomposition
+      *
+      * \note This method is useful to work around the risk of overflow/underflow that's
+      * inherent to the determinant computation.
+      *
+      * \sa absDeterminant(), signDeterminant()
+      */
+    Scalar logAbsDeterminant() const
+    {
+      using std::log;
+      using std::abs;
+
+      eigen_assert(m_factorizationIsOk && "The matrix should be factorized first.");
+      Scalar det = Scalar(0.);
+      for (Index j = 0; j < this->cols(); ++j)
+      {
+        for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it)
+        {
+          if(it.row() < j) continue;
+          if(it.row() == j)
+          {
+            det += log(abs(it.value()));
+            break;
+          }
+        }
+      }
+      return det;
+    }
 
     /** \returns A number representing the sign of the determinant
       *
@@ -355,7 +352,7 @@
           }
         }
       }
-      return det * Scalar(m_detPermR * m_detPermC);
+      return (m_detPermR * m_detPermC) > 0 ? det : -det;
     }
 
   protected:
@@ -372,13 +369,12 @@
       
     // Variables 
     mutable ComputationInfo m_info;
-    bool m_isInitialized;
     bool m_factorizationIsOk;
     bool m_analysisIsOk;
     std::string m_lastError;
     NCMatrix m_mat; // The input (permuted ) matrix 
     SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
-    MappedSparseMatrix<Scalar,ColMajor,Index> m_Ustore; // The upper triangular matrix
+    MappedSparseMatrix<Scalar,ColMajor,StorageIndex> m_Ustore; // The upper triangular matrix
     PermutationType m_perm_c; // Column permutation 
     PermutationType m_perm_r ; // Row permutation
     IndexVector m_etree; // Column elimination tree 
@@ -388,7 +384,7 @@
     // SparseLU options 
     bool m_symmetricmode;
     // values for performance 
-    internal::perfvalues<Index> m_perfv; 
+    internal::perfvalues m_perfv;
     RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot
     Index m_nnzL, m_nnzU; // Nonzeros in L and U factors
     Index m_detPermR, m_detPermC; // Determinants of the permutation matrices
@@ -417,30 +413,32 @@
   
   //TODO  It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat.
   
+  // Firstly, copy the whole input matrix. 
+  m_mat = mat;
+  
+  // Compute fill-in ordering
   OrderingType ord; 
-  ord(mat,m_perm_c);
+  ord(m_mat,m_perm_c);
   
   // Apply the permutation to the column of the input  matrix
-  //First copy the whole input matrix. 
-  m_mat = mat;
-  if (m_perm_c.size()) {
+  if (m_perm_c.size())
+  {
     m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used.  
-    //Then, permute only the column pointers
-    const Index * outerIndexPtr;
-    if (mat.isCompressed()) outerIndexPtr = mat.outerIndexPtr();
-    else
-    {
-      Index *outerIndexPtr_t = new Index[mat.cols()+1];
-      for(Index i = 0; i <= mat.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
-      outerIndexPtr = outerIndexPtr_t;
-    }
+    // Then, permute only the column pointers
+    ei_declare_aligned_stack_constructed_variable(StorageIndex,outerIndexPtr,mat.cols()+1,mat.isCompressed()?const_cast<StorageIndex*>(mat.outerIndexPtr()):0);
+    
+    // If the input matrix 'mat' is uncompressed, then the outer-indices do not match the ones of m_mat, and a copy is thus needed.
+    if(!mat.isCompressed()) 
+      IndexVector::Map(outerIndexPtr, mat.cols()+1) = IndexVector::Map(m_mat.outerIndexPtr(),mat.cols()+1);
+    
+    // Apply the permutation and compute the nnz per column.
     for (Index i = 0; i < mat.cols(); i++)
     {
       m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i];
       m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i];
     }
-    if(!mat.isCompressed()) delete[] outerIndexPtr;
   }
+  
   // Compute the column elimination tree of the permuted matrix 
   IndexVector firstRowElt;
   internal::coletree(m_mat, m_etree,firstRowElt); 
@@ -449,7 +447,7 @@
   if (!m_symmetricmode) {
     IndexVector post, iwork; 
     // Post order etree
-    internal::treePostorder(m_mat.cols(), m_etree, post); 
+    internal::treePostorder(StorageIndex(m_mat.cols()), m_etree, post); 
       
    
     // Renumber etree in postorder 
@@ -501,7 +499,7 @@
   eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); 
   eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices");
   
-  typedef typename IndexVector::Scalar Index; 
+  m_isInitialized = true;
   
   
   // Apply the column permutation computed in analyzepattern()
@@ -511,11 +509,11 @@
   {
     m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers.
     //Then, permute only the column pointers
-    const Index * outerIndexPtr;
+    const StorageIndex * outerIndexPtr;
     if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr();
     else
     {
-      Index* outerIndexPtr_t = new Index[matrix.cols()+1];
+      StorageIndex* outerIndexPtr_t = new StorageIndex[matrix.cols()+1];
       for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i];
       outerIndexPtr = outerIndexPtr_t;
     }
@@ -529,7 +527,7 @@
   else 
   { //FIXME This should not be needed if the empty permutation is handled transparently
     m_perm_c.resize(matrix.cols());
-    for(Index i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
+    for(StorageIndex i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i;
   }
   
   Index m = m_mat.rows();
@@ -694,7 +692,7 @@
   // Create supernode matrix L 
   m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); 
   // Create the column major upper sparse matrix  U; 
-  new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, Index> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); 
+  new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, StorageIndex> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() );
   
   m_info = Success;
   m_factorizationIsOk = true;
@@ -703,9 +701,8 @@
 template<typename MappedSupernodalType>
 struct SparseLUMatrixLReturnType : internal::no_assignment_operator
 {
-  typedef typename MappedSupernodalType::Index Index;
   typedef typename MappedSupernodalType::Scalar Scalar;
-  SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
+  explicit SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL)
   { }
   Index rows() { return m_mapL.rows(); }
   Index cols() { return m_mapL.cols(); }
@@ -720,7 +717,6 @@
 template<typename MatrixLType, typename MatrixUType>
 struct SparseLUMatrixUReturnType : internal::no_assignment_operator
 {
-  typedef typename MatrixLType::Index Index;
   typedef typename MatrixLType::Scalar Scalar;
   SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU)
   : m_mapL(mapL),m_mapU(mapU)
@@ -731,7 +727,7 @@
   template<typename Dest>   void solveInPlace(MatrixBase<Dest> &X) const
   {
     Index nrhs = X.cols();
-    Index n = X.rows();
+    Index n    = X.rows();
     // Backward solve with U
     for (Index k = m_mapL.nsuper(); k >= 0; k--)
     {
@@ -749,8 +745,8 @@
       }
       else
       {
-        Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
-        Map< Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
+        Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
+        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
         U = A.template triangularView<Upper>().solve(U);
       }
 
@@ -772,35 +768,6 @@
   const MatrixUType& m_mapU;
 };
 
-namespace internal {
-  
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
-  : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
-{
-  typedef SparseLU<_MatrixType,Derived> Dec;
-  EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    dec()._solve(rhs(),dst);
-  }
-};
-
-template<typename _MatrixType, typename Derived, typename Rhs>
-struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
-  : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs>
-{
-  typedef SparseLU<_MatrixType,Derived> Dec;
-  EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs)
-
-  template<typename Dest> void evalTo(Dest& dst) const
-  {
-    this->defaultEvalTo(dst);
-  }
-};
-} // end namespace internal
-
 } // End namespace Eigen 
 
 #endif
diff --git a/Eigen/src/SparseLU/SparseLUImpl.h b/Eigen/src/SparseLU/SparseLUImpl.h
index 14d7089..fc0cfc4 100644
--- a/Eigen/src/SparseLU/SparseLUImpl.h
+++ b/Eigen/src/SparseLU/SparseLUImpl.h
@@ -16,17 +16,19 @@
   * \class SparseLUImpl
   * Base class for sparseLU
   */
-template <typename Scalar, typename Index>
+template <typename Scalar, typename StorageIndex>
 class SparseLUImpl
 {
   public:
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
-    typedef Matrix<Index,Dynamic,1> IndexVector; 
+    typedef Matrix<StorageIndex,Dynamic,1> IndexVector; 
+    typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> ScalarMatrix;
+    typedef Map<ScalarMatrix, 0,  OuterStride<> > MappedMatrixBlock;
     typedef typename ScalarVector::RealScalar RealScalar; 
     typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector;
-    typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector;
+    typedef Ref<Matrix<StorageIndex,Dynamic,1> > BlockIndexVector;
     typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; 
-    typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; 
+    typedef SparseMatrix<Scalar,ColMajor,StorageIndex> MatrixType; 
     
   protected:
      template <typename VectorType>
@@ -40,7 +42,7 @@
      Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu);
      Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu);
      template <typename Traits>
-     void dfs_kernel(const Index jj, IndexVector& perm_r,
+     void dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
                     Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
                     Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
                     IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits);
diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h
index 1ffa7d5..4dc42e8 100644
--- a/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/Eigen/src/SparseLU/SparseLU_Memory.h
@@ -36,13 +36,12 @@
   
 enum { LUNoMarker = 3 };
 enum {emptyIdxLU = -1};
-template<typename Index>
 inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b)
 {
   return (std::max)(m, (t+b)*w);
 }
 
-template< typename Scalar, typename Index>
+template< typename Scalar>
 inline Index LUTempSpace(Index&m, Index& w)
 {
   return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar);
@@ -59,9 +58,9 @@
   * \param keep_prev  1: use length  and do not expand the vector; 0: compute new_len and expand
   * \param[in,out] num_expansions Number of times the memory has been expanded
   */
-template <typename Scalar, typename Index>
+template <typename Scalar, typename StorageIndex>
 template <typename VectorType>
-Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) 
+Index  SparseLUImpl<Scalar,StorageIndex>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) 
 {
   
   float alpha = 1.5; // Ratio of the memory increase 
@@ -148,13 +147,13 @@
  * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success
  * \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation
  */
-template <typename Scalar, typename Index>
-Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size,  GlobalLU_t& glu)
 {
   Index& num_expansions = glu.num_expansions; //No memory expansions so far
   num_expansions = 0;
-  glu.nzumax = glu.nzlumax = (std::min)(fillratio * annz / n, m) * n; // estimated number of nonzeros in U 
-  glu.nzlmax = (std::max)(Index(4), fillratio) * annz / 4; // estimated  nnz in L factor
+  glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U 
+  glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated  nnz in L factor
   // Return the estimated size to the user if necessary
   Index tempSpace;
   tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
@@ -205,9 +204,9 @@
  * \param num_expansions Number of expansions 
  * \return 0 on success, > 0 size of the memory allocated so far
  */
-template <typename Scalar, typename Index>
+template <typename Scalar, typename StorageIndex>
 template <typename VectorType>
-Index SparseLUImpl<Scalar,Index>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
+Index SparseLUImpl<Scalar,StorageIndex>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions)
 {
   Index failed_size; 
   if (memtype == USUB)
diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h
index 24d6bf1..cf5ec44 100644
--- a/Eigen/src/SparseLU/SparseLU_Structs.h
+++ b/Eigen/src/SparseLU/SparseLU_Structs.h
@@ -75,7 +75,7 @@
 
 template <typename IndexVector, typename ScalarVector>
 struct LU_GlobalLU_t {
-  typedef typename IndexVector::Scalar Index; 
+  typedef typename IndexVector::Scalar StorageIndex; 
   IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode
   IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping)
   ScalarVector  lusup; // nonzero values of L ordered by columns 
@@ -93,7 +93,6 @@
 };
 
 // Values to set for performance
-template <typename Index>
 struct perfvalues {
   Index panel_size; // a panel consists of at most <panel_size> consecutive columns
   Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns) 
diff --git a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
index b16afd6..721e188 100644
--- a/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
+++ b/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h
@@ -29,20 +29,20 @@
  * SuperInnerIterator to iterate through all supernodes 
  * Function for triangular solve
  */
-template <typename _Scalar, typename _Index>
+template <typename _Scalar, typename _StorageIndex>
 class MappedSuperNodalMatrix
 {
   public:
     typedef _Scalar Scalar; 
-    typedef _Index Index;
-    typedef Matrix<Index,Dynamic,1> IndexVector; 
+    typedef _StorageIndex StorageIndex;
+    typedef Matrix<StorageIndex,Dynamic,1> IndexVector;
     typedef Matrix<Scalar,Dynamic,1> ScalarVector;
   public:
     MappedSuperNodalMatrix()
     {
       
     }
-    MappedSuperNodalMatrix(Index m, Index n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
+    MappedSuperNodalMatrix(Index m, Index n,  ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
              IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
     {
       setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col);
@@ -58,7 +58,7 @@
      * FIXME This class will be modified such that it can be use in the course 
      * of the factorization.
      */
-    void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, 
+    void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind,
              IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col )
     {
       m_row = m;
@@ -96,12 +96,12 @@
     /**
      * Return the pointers to the beginning of each column in \ref valuePtr()
      */
-    Index* colIndexPtr()
+    StorageIndex* colIndexPtr()
     {
       return m_nzval_colptr; 
     }
     
-    const Index* colIndexPtr() const
+    const StorageIndex* colIndexPtr() const
     {
       return m_nzval_colptr; 
     }
@@ -109,9 +109,9 @@
     /**
      * Return the array of compressed row indices of all supernodes
      */
-    Index* rowIndex()  { return m_rowind; }
+    StorageIndex* rowIndex()  { return m_rowind; }
     
-    const Index* rowIndex() const
+    const StorageIndex* rowIndex() const
     {
       return m_rowind; 
     }
@@ -119,9 +119,9 @@
     /**
      * Return the location in \em rowvaluePtr() which starts each column
      */
-    Index* rowIndexPtr() { return m_rowind_colptr; }
+    StorageIndex* rowIndexPtr() { return m_rowind_colptr; }
     
-    const Index* rowIndexPtr() const 
+    const StorageIndex* rowIndexPtr() const
     {
       return m_rowind_colptr; 
     }
@@ -129,18 +129,18 @@
     /** 
      * Return the array of column-to-supernode mapping 
      */
-    Index* colToSup()  { return m_col_to_sup; }
+    StorageIndex* colToSup()  { return m_col_to_sup; }
     
-    const Index* colToSup() const
+    const StorageIndex* colToSup() const
     {
       return m_col_to_sup;       
     }
     /**
      * Return the array of supernode-to-column mapping
      */
-    Index* supToCol() { return m_sup_to_col; }
+    StorageIndex* supToCol() { return m_sup_to_col; }
     
-    const Index* supToCol() const 
+    const StorageIndex* supToCol() const
     {
       return m_sup_to_col;
     }
@@ -148,7 +148,7 @@
     /**
      * Return the number of supernodes
      */
-    Index nsuper() const 
+    Index nsuper() const
     {
       return m_nsuper; 
     }
@@ -162,14 +162,14 @@
     
   protected:
     Index m_row; // Number of rows
-    Index m_col; // Number of columns 
-    Index m_nsuper; // Number of supernodes 
+    Index m_col; // Number of columns
+    Index m_nsuper; // Number of supernodes
     Scalar* m_nzval; //array of nonzero values packed by column
-    Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j 
-    Index* m_rowind; // Array of compressed row indices of rectangular supernodes
-    Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j
-    Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs
-    Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode
+    StorageIndex* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j
+    StorageIndex* m_rowind; // Array of compressed row indices of rectangular supernodes
+    StorageIndex* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j
+    StorageIndex* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs
+    StorageIndex* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode
     
   private :
 };
@@ -178,13 +178,13 @@
   * \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L
   * 
   */
-template<typename Scalar, typename Index>
-class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator
+template<typename Scalar, typename StorageIndex>
+class MappedSuperNodalMatrix<Scalar,StorageIndex>::InnerIterator
 {
   public:
      InnerIterator(const MappedSuperNodalMatrix& mat, Index outer)
       : m_matrix(mat),
-        m_outer(outer), 
+        m_outer(outer),
         m_supno(mat.colToSup()[outer]),
         m_idval(mat.colIndexPtr()[outer]),
         m_startidval(m_idval),
@@ -229,14 +229,17 @@
  * \brief Solve with the supernode triangular matrix
  * 
  */
-template<typename Scalar, typename Index>
+template<typename Scalar, typename Index_>
 template<typename Dest>
-void MappedSuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const
+void MappedSuperNodalMatrix<Scalar,Index_>::solveInPlace( MatrixBase<Dest>&X) const
 {
-    Index n = X.rows(); 
-    Index nrhs = X.cols(); 
+    /* Explicit type conversion as the Index type of MatrixBase<Dest> may be wider than Index */
+//    eigen_assert(X.rows() <= NumTraits<Index>::highest());
+//    eigen_assert(X.cols() <= NumTraits<Index>::highest());
+    Index n    = int(X.rows());
+    Index nrhs = Index(X.cols());
     const Scalar * Lval = valuePtr();                 // Nonzero values 
-    Matrix<Scalar,Dynamic,Dynamic> work(n, nrhs);     // working vector
+    Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor> work(n, nrhs);     // working vector
     work.setZero();
     for (Index k = 0; k <= nsuper(); k ++)
     {
@@ -267,13 +270,13 @@
         Index lda = colIndexPtr()[fsupc+1] - luptr;
         
         // Triangular solve 
-        Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) ); 
-        Map< Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); 
+        Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) );
+        Map< Matrix<Scalar,Dynamic,Dest::ColsAtCompileTime, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) );
         U = A.template triangularView<UnitLower>().solve(U); 
         
         // Matrix-vector product 
-        new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); 
-        work.block(0, 0, nrow, nrhs) = A * U; 
+        new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
+        work.topRows(nrow).noalias() = A * U;
         
         //Begin Scatter 
         for (Index j = 0; j < nrhs; j++)
diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h
index 15352ac..9e3dab4 100644
--- a/Eigen/src/SparseLU/SparseLU_Utils.h
+++ b/Eigen/src/SparseLU/SparseLU_Utils.h
@@ -17,8 +17,8 @@
 /**
  * \brief Count Nonzero elements in the factors
  */
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu)
 {
  nnzL = 0; 
  nnzU = (glu.xusub)(n); 
@@ -48,12 +48,12 @@
  * and applies permutation to the remaining subscripts
  * 
  */
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu)
 {
   Index fsupc, i, j, k, jstart; 
   
-  Index nextl = 0; 
+  StorageIndex nextl = 0; 
   Index nsuper = (glu.supno)(n); 
   
   // For each supernode 
diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h
index f24bd87..b57f068 100644
--- a/Eigen/src/SparseLU/SparseLU_column_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h
@@ -49,8 +49,9 @@
  *         > 0 - number of bytes allocated when run out of space
  * 
  */
-template <typename Scalar, typename Index>
-Index SparseLUImpl<Scalar,Index>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv,
+                                                     BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu)
 {
   Index  jsupno, k, ksub, krep, ksupno; 
   Index lptr, nrow, isub, irow, nextlu, new_next, ufirst; 
@@ -137,7 +138,7 @@
     glu.lusup.segment(nextlu,offset).setZero();
     nextlu += offset;
   }
-  glu.xlusup(jcol + 1) = nextlu;  // close L\U(*,jcol); 
+  glu.xlusup(jcol + 1) = StorageIndex(nextlu);  // close L\U(*,jcol); 
   
   /* For more updates within the panel (also within the current supernode),
    * should start from the first column of the panel, or the first column
@@ -162,11 +163,11 @@
     // points to the beginning of jcol in snode L\U(jsupno) 
     ufirst = glu.xlusup(jcol) + d_fsupc; 
     Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
-    Map<Matrix<Scalar,Dynamic,Dynamic>, 0,  OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); 
+    MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
     VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); 
     u = A.template triangularView<UnitLower>().solve(u); 
     
-    new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); 
+    new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
     VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); 
     l.noalias() -= A * u;
     
diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h
index 4c04b0e..c98b30e 100644
--- a/Eigen/src/SparseLU/SparseLU_column_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h
@@ -30,7 +30,7 @@
 #ifndef SPARSELU_COLUMN_DFS_H
 #define SPARSELU_COLUMN_DFS_H
 
-template <typename Scalar, typename Index> class SparseLUImpl;
+template <typename Scalar, typename StorageIndex> class SparseLUImpl;
 namespace Eigen {
 
 namespace internal {
@@ -39,8 +39,8 @@
 struct column_dfs_traits : no_assignment_operator
 {
   typedef typename ScalarVector::Scalar Scalar;
-  typedef typename IndexVector::Scalar Index;
-  column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, Index>::GlobalLU_t& glu, SparseLUImpl<Scalar, Index>& luImpl)
+  typedef typename IndexVector::Scalar StorageIndex;
+  column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& glu, SparseLUImpl<Scalar, StorageIndex>& luImpl)
    : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl)
  {}
   bool update_segrep(Index /*krep*/, Index /*jj*/)
@@ -57,8 +57,8 @@
   
   Index m_jcol;
   Index& m_jsuper_ref;
-  typename SparseLUImpl<Scalar, Index>::GlobalLU_t& m_glu;
-  SparseLUImpl<Scalar, Index>& m_luImpl;
+  typename SparseLUImpl<Scalar, StorageIndex>::GlobalLU_t& m_glu;
+  SparseLUImpl<Scalar, StorageIndex>& m_luImpl;
 };
 
 
@@ -89,8 +89,10 @@
  *         > 0 number of bytes allocated when run out of space
  * 
  */
-template <typename Scalar, typename Index>
-Index SparseLUImpl<Scalar,Index>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,  BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg,
+                                                    BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune,
+                                                    IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
 {
   
   Index jsuper = glu.supno(jcol); 
@@ -110,13 +112,13 @@
     // krow was visited before, go to the next nonz; 
     if (kmark == jcol) continue;
     
-    dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
+    dfs_kernel(StorageIndex(jcol), perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
                    xplore, glu, nextl, krow, traits);
   } // for each nonzero ... 
   
-  Index fsupc, jptr, jm1ptr, ito, ifrom, istop;
-  Index nsuper = glu.supno(jcol);
-  Index jcolp1 = jcol + 1;
+  Index fsupc;
+  StorageIndex nsuper = glu.supno(jcol);
+  StorageIndex jcolp1 = StorageIndex(jcol) + 1;
   Index jcolm1 = jcol - 1;
   
   // check to see if j belongs in the same supernode as j-1
@@ -127,8 +129,8 @@
   else 
   {
     fsupc = glu.xsup(nsuper); 
-    jptr = glu.xlsub(jcol); // Not yet compressed
-    jm1ptr = glu.xlsub(jcolm1); 
+    StorageIndex jptr = glu.xlsub(jcol); // Not yet compressed
+    StorageIndex jm1ptr = glu.xlsub(jcolm1); 
     
     // Use supernodes of type T2 : see SuperLU paper
     if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
@@ -146,13 +148,13 @@
     { // starts a new supernode 
       if ( (fsupc < jcolm1-1) ) 
       { // >= 3 columns in nsuper
-        ito = glu.xlsub(fsupc+1);
+        StorageIndex ito = glu.xlsub(fsupc+1);
         glu.xlsub(jcolm1) = ito; 
-        istop = ito + jptr - jm1ptr; 
+        StorageIndex istop = ito + jptr - jm1ptr; 
         xprune(jcolm1) = istop; // intialize xprune(jcol-1)
         glu.xlsub(jcol) = istop; 
         
-        for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
+        for (StorageIndex ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
           glu.lsub(ito) = glu.lsub(ifrom); 
         nextl = ito;  // = istop + length(jcol)
       }
@@ -164,8 +166,8 @@
   // Tidy up the pointers before exit
   glu.xsup(nsuper+1) = jcolp1; 
   glu.supno(jcolp1) = nsuper; 
-  xprune(jcol) = nextl;  // Intialize upper bound for pruning
-  glu.xlsub(jcolp1) = nextl; 
+  xprune(jcol) = StorageIndex(nextl);  // Intialize upper bound for pruning
+  glu.xlsub(jcolp1) = StorageIndex(nextl); 
   
   return 0; 
 }
diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
index 170610d..c32d8d8 100644
--- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
+++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h
@@ -46,8 +46,9 @@
  *         > 0 - number of bytes allocated when run out of space
  * 
  */
-template <typename Scalar, typename Index>
-Index SparseLUImpl<Scalar,Index>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep,
+                                                      BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu)
 {  
   Index ksub, krep, ksupno; 
     
@@ -55,7 +56,7 @@
   
   // For each nonzero supernode segment of U[*,j] in topological order 
   Index k = nseg - 1, i; 
-  Index nextu = glu.xusub(jcol); 
+  StorageIndex nextu = glu.xusub(jcol); 
   Index kfnz, isub, segsize; 
   Index new_next,irow; 
   Index fsupc, mem; 
diff --git a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
index 9e4e3e7..95ba741 100644
--- a/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
+++ b/Eigen/src/SparseLU/SparseLU_gemm_kernel.h
@@ -21,7 +21,7 @@
   *  - lda and ldc must be multiples of the respective packet size
   *  - C must have the same alignment as A
   */
-template<typename Scalar,typename Index>
+template<typename Scalar>
 EIGEN_DONT_INLINE
 void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc)
 {
@@ -39,9 +39,9 @@
   };
   Index d_end = (d/RK)*RK;    // number of columns of A (rows of B) suitable for full register blocking
   Index n_end = (n/RN)*RN;    // number of columns of B-C suitable for processing RN columns at once
-  Index i0 = internal::first_aligned(A,m);
+  Index i0 = internal::first_default_aligned(A,m);
   
-  eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_aligned(C,m)));
+  eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_default_aligned(C,m)));
   
   // handle the non aligned rows of A and C without any optimization:
   for(Index i=0; i<i0; ++i)
@@ -72,14 +72,14 @@
         
         // load and expand a RN x RK block of B
         Packet b00, b10, b20, b30, b01, b11, b21, b31;
-                  b00 = pset1<Packet>(Bc0[0]);
-                  b10 = pset1<Packet>(Bc0[1]);
-        if(RK==4) b20 = pset1<Packet>(Bc0[2]);
-        if(RK==4) b30 = pset1<Packet>(Bc0[3]);
-                  b01 = pset1<Packet>(Bc1[0]);
-                  b11 = pset1<Packet>(Bc1[1]);
-        if(RK==4) b21 = pset1<Packet>(Bc1[2]);
-        if(RK==4) b31 = pset1<Packet>(Bc1[3]);
+                  { b00 = pset1<Packet>(Bc0[0]); }
+                  { b10 = pset1<Packet>(Bc0[1]); }
+        if(RK==4) { b20 = pset1<Packet>(Bc0[2]); }
+        if(RK==4) { b30 = pset1<Packet>(Bc0[3]); }
+                  { b01 = pset1<Packet>(Bc1[0]); }
+                  { b11 = pset1<Packet>(Bc1[1]); }
+        if(RK==4) { b21 = pset1<Packet>(Bc1[2]); }
+        if(RK==4) { b31 = pset1<Packet>(Bc1[3]); }
         
         Packet a0, a1, a2, a3, c0, c1, t0, t1;
         
@@ -106,22 +106,22 @@
         
 #define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);}
 #define WORK(I)  \
-                    c0 = pload<Packet>(C0+i+(I)*PacketSize);   \
-                    c1 = pload<Packet>(C1+i+(I)*PacketSize);   \
-                    KMADD(c0, a0, b00, t0)      \
-                    KMADD(c1, a0, b01, t1)      \
-                    a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
-                    KMADD(c0, a1, b10, t0)      \
-                    KMADD(c1, a1, b11, t1)       \
-                    a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
-          if(RK==4) KMADD(c0, a2, b20, t0)       \
-          if(RK==4) KMADD(c1, a2, b21, t1)       \
-          if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
-          if(RK==4) KMADD(c0, a3, b30, t0)       \
-          if(RK==4) KMADD(c1, a3, b31, t1)       \
-          if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
-                    pstore(C0+i+(I)*PacketSize, c0);           \
-                    pstore(C1+i+(I)*PacketSize, c1)
+                     c0 = pload<Packet>(C0+i+(I)*PacketSize);    \
+                     c1 = pload<Packet>(C1+i+(I)*PacketSize);    \
+                     KMADD(c0, a0, b00, t0)                      \
+                     KMADD(c1, a0, b01, t1)                      \
+                     a0 = pload<Packet>(A0+i+(I+1)*PacketSize);  \
+                     KMADD(c0, a1, b10, t0)                      \
+                     KMADD(c1, a1, b11, t1)                      \
+                     a1 = pload<Packet>(A1+i+(I+1)*PacketSize);  \
+          if(RK==4){ KMADD(c0, a2, b20, t0)                     }\
+          if(RK==4){ KMADD(c1, a2, b21, t1)                     }\
+          if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize); }\
+          if(RK==4){ KMADD(c0, a3, b30, t0)                     }\
+          if(RK==4){ KMADD(c1, a3, b31, t1)                     }\
+          if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize); }\
+                     pstore(C0+i+(I)*PacketSize, c0);            \
+                     pstore(C1+i+(I)*PacketSize, c1)
         
         // process rows of A' - C' with aggressive vectorization and peeling 
         for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
@@ -131,14 +131,15 @@
                     prefetch((A1+i+(5)*PacketSize));
           if(RK==4) prefetch((A2+i+(5)*PacketSize));
           if(RK==4) prefetch((A3+i+(5)*PacketSize));
-                    WORK(0);
-                    WORK(1);
-                    WORK(2);
-                    WORK(3);
-                    WORK(4);
-                    WORK(5);
-                    WORK(6);
-                    WORK(7);
+
+          WORK(0);
+          WORK(1);
+          WORK(2);
+          WORK(3);
+          WORK(4);
+          WORK(5);
+          WORK(6);
+          WORK(7);
         }
         // process the remaining rows with vectorization only
         for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize)
@@ -165,7 +166,7 @@
         Bc1 += RK;
       } // peeled loop on k
     } // peeled loop on the columns j
-    // process the last column (we now perform a matrux-vector product)
+    // process the last column (we now perform a matrix-vector product)
     if((n-n_end)>0)
     {
       const Scalar* Bc0 = B+(n-1)*ldb;
@@ -203,16 +204,16 @@
         }
         
 #define WORK(I) \
-                  c0 = pload<Packet>(C0+i+(I)*PacketSize);   \
-                  KMADD(c0, a0, b00, t0)       \
-                  a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \
-                  KMADD(c0, a1, b10, t0)       \
-                  a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \
-        if(RK==4) KMADD(c0, a2, b20, t0)       \
-        if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \
-        if(RK==4) KMADD(c0, a3, b30, t0)       \
-        if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \
-                  pstore(C0+i+(I)*PacketSize, c0);
+                   c0 = pload<Packet>(C0+i+(I)*PacketSize);     \
+                   KMADD(c0, a0, b00, t0)                       \
+                   a0 = pload<Packet>(A0+i+(I+1)*PacketSize);   \
+                   KMADD(c0, a1, b10, t0)                       \
+                   a1 = pload<Packet>(A1+i+(I+1)*PacketSize);   \
+        if(RK==4){ KMADD(c0, a2, b20, t0)                      }\
+        if(RK==4){ a2 = pload<Packet>(A2+i+(I+1)*PacketSize);  }\
+        if(RK==4){ KMADD(c0, a3, b30, t0)                      }\
+        if(RK==4){ a3 = pload<Packet>(A3+i+(I+1)*PacketSize);  }\
+                   pstore(C0+i+(I)*PacketSize, c0);
         
         // agressive vectorization and peeling
         for(Index i=0; i<actual_b_end1; i+=PacketSize*8)
diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
index 7a4e430..6f75d50 100644
--- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h
@@ -42,21 +42,20 @@
  * \param descendants Number of descendants of each node in the etree
  * \param relax_end last column in a supernode
  */
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
 {
   
   // The etree may not be postordered, but its heap ordered  
   IndexVector post;
-  internal::treePostorder(n, et, post); // Post order etree
+  internal::treePostorder(StorageIndex(n), et, post); // Post order etree
   IndexVector inv_post(n+1); 
-  Index i;
-  for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
+  for (StorageIndex i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
   
   // Renumber etree in postorder 
   IndexVector iwork(n);
   IndexVector et_save(n+1);
-  for (i = 0; i < n; ++i)
+  for (Index i = 0; i < n; ++i)
   {
     iwork(post(i)) = post(et(i));
   }
@@ -75,10 +74,10 @@
   }
   // Identify the relaxed supernodes by postorder traversal of the etree
   Index snode_start; // beginning of a snode 
-  Index k;
+  StorageIndex k;
   Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree 
   Index nsuper_et = 0; // Number of relaxed snodes in the original etree 
-  Index l; 
+  StorageIndex l; 
   for (j = 0; j < n; )
   {
     parent = et(j);
@@ -90,8 +89,8 @@
     }
     // Found a supernode in postordered etree, j is the last column 
     ++nsuper_et_post;
-    k = n;
-    for (i = snode_start; i <= j; ++i)
+    k = StorageIndex(n);
+    for (Index i = snode_start; i <= j; ++i)
       k = (std::min)(k, inv_post(i));
     l = inv_post(j);
     if ( (l - k) == (j - snode_start) )  // Same number of columns in the snode
@@ -102,7 +101,7 @@
     }
     else 
     {
-      for (i = snode_start; i <= j; ++i) 
+      for (Index i = snode_start; i <= j; ++i) 
       {
         l = inv_post(i);
         if (descendants(i) == 0) 
diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
index 0d0283b..8c1b3e8 100644
--- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h
@@ -14,30 +14,29 @@
 namespace Eigen {
 namespace internal {
   
-/**
- * \brief Performs numeric block updates from a given supernode to a single column
- * 
- * \param segsize Size of the segment (and blocks ) to use for updates
- * \param[in,out] dense Packed values of the original matrix
- * \param tempv temporary vector to use for updates
- * \param lusup array containing the supernodes
- * \param lda Leading dimension in the supernode
- * \param nrow Number of rows in the rectangular part of the supernode
- * \param lsub compressed row subscripts of supernodes
- * \param lptr pointer to the first column of the current supernode in lsub
- * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
- * \return 0 on success
- */
 template <int SegSizeAtCompileTime> struct LU_kernel_bmod
 {
-  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
-  static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+  /** \internal
+    * \brief Performs numeric block updates from a given supernode to a single column
+    *
+    * \param segsize Size of the segment (and blocks ) to use for updates
+    * \param[in,out] dense Packed values of the original matrix
+    * \param tempv temporary vector to use for updates
+    * \param lusup array containing the supernodes
+    * \param lda Leading dimension in the supernode
+    * \param nrow Number of rows in the rectangular part of the supernode
+    * \param lsub compressed row subscripts of supernodes
+    * \param lptr pointer to the first column of the current supernode in lsub
+    * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode
+    */
+  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+  static EIGEN_DONT_INLINE void run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
                                     const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
 };
 
 template <int SegSizeAtCompileTime>
-template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
-EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const Index segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda,
                                                                   const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
 {
   typedef typename ScalarVector::Scalar Scalar;
@@ -45,7 +44,7 @@
   // The result of triangular solve is in tempv[*]; 
     // The result of matric-vector update is in dense[*]
   Index isub = lptr + no_zeros; 
-  int i;
+  Index i;
   Index irow;
   for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++)
   {
@@ -56,7 +55,7 @@
   // Dense triangular solve -- start effective triangle
   luptr += lda * no_zeros + no_zeros; 
   // Form Eigen matrix and vector 
-  Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
+  Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) );
   Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize);
   
   u = A.template triangularView<UnitLower>().solve(u); 
@@ -65,9 +64,9 @@
   luptr += segsize;
   const Index PacketSize = internal::packet_traits<Scalar>::size;
   Index ldl = internal::first_multiple(nrow, PacketSize);
-  Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
-  Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize);
-  Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize;
+  Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) );
+  Index aligned_offset = internal::first_default_aligned(tempv.data()+segsize, PacketSize);
+  Index aligned_with_B_offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize))%PacketSize;
   Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) );
   
   l.setZero();
@@ -91,21 +90,22 @@
 
 template <> struct LU_kernel_bmod<1>
 {
-  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
-  static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+  template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+  static EIGEN_DONT_INLINE void run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
                                     const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros);
 };
 
 
-template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index>
-EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
+template <typename BlockScalarVector, typename ScalarVector, typename IndexVector>
+EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const Index /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr,
                                               const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros)
 {
   typedef typename ScalarVector::Scalar Scalar;
+  typedef typename IndexVector::Scalar StorageIndex;
   Scalar f = dense(lsub(lptr + no_zeros));
   luptr += lda * no_zeros + no_zeros + 1;
   const Scalar* a(lusup.data() + luptr);
-  const /*typename IndexVector::Scalar*/Index*  irow(lsub.data()+lptr + no_zeros + 1);
+  const StorageIndex*  irow(lsub.data()+lptr + no_zeros + 1);
   Index i = 0;
   for (; i+1 < nrow; i+=2)
   {
diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
index da0e0fc..822cf32 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h
@@ -52,8 +52,8 @@
  * 
  * 
  */
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::panel_bmod(const Index m, const Index w, const Index jcol, 
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::panel_bmod(const Index m, const Index w, const Index jcol, 
                                             const Index nseg, ScalarVector& dense, ScalarVector& tempv,
                                             IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu)
 {
@@ -102,7 +102,7 @@
     if(nsupc >= 2)
     { 
       Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
-      Map<Matrix<Scalar,Dynamic,Dynamic>, Aligned,  OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
+      Map<ScalarMatrix, Aligned,  OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
       
       // gather U
       Index u_col = 0;
@@ -136,17 +136,17 @@
       Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
       no_zeros = (krep - u_rows + 1) - fsupc;
       luptr += lda * no_zeros + no_zeros;
-      Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
+      MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
       U = A.template triangularView<UnitLower>().solve(U);
       
       // update
       luptr += u_rows;
-      Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
+      MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
       eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
       
       Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
-      Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
-      Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
+      Index offset = (PacketSize-internal::first_default_aligned(B.data(), PacketSize)) % PacketSize;
+      MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
       
       L.setZero();
       internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h
index dc0054e..155df73 100644
--- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h
@@ -37,11 +37,11 @@
 template<typename IndexVector>
 struct panel_dfs_traits
 {
-  typedef typename IndexVector::Scalar Index;
-  panel_dfs_traits(Index jcol, Index* marker)
+  typedef typename IndexVector::Scalar StorageIndex;
+  panel_dfs_traits(Index jcol, StorageIndex* marker)
     : m_jcol(jcol), m_marker(marker)
   {}
-  bool update_segrep(Index krep, Index jj)
+  bool update_segrep(Index krep, StorageIndex jj)
   {
     if(m_marker[krep]<m_jcol)
     {
@@ -53,13 +53,13 @@
   void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {}
   enum { ExpandMem = false };
   Index m_jcol;
-  Index* m_marker;
+  StorageIndex* m_marker;
 };
 
 
-template <typename Scalar, typename Index>
+template <typename Scalar, typename StorageIndex>
 template <typename Traits>
-void SparseLUImpl<Scalar,Index>::dfs_kernel(const Index jj, IndexVector& perm_r,
+void SparseLUImpl<Scalar,StorageIndex>::dfs_kernel(const StorageIndex jj, IndexVector& perm_r,
                    Index& nseg, IndexVector& panel_lsub, IndexVector& segrep,
                    Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent,
                    IndexVector& xplore, GlobalLU_t& glu,
@@ -67,14 +67,14 @@
                   )
 {
   
-  Index kmark = marker(krow);
+  StorageIndex kmark = marker(krow);
       
   // For each unmarked krow of jj
   marker(krow) = jj; 
-  Index kperm = perm_r(krow); 
+  StorageIndex kperm = perm_r(krow); 
   if (kperm == emptyIdxLU ) {
     // krow is in L : place it in structure of L(*, jj)
-    panel_lsub(nextl_col++) = krow;  // krow is indexed into A
+    panel_lsub(nextl_col++) = StorageIndex(krow);  // krow is indexed into A
     
     traits.mem_expand(panel_lsub, nextl_col, kmark);
   }
@@ -83,9 +83,9 @@
     // krow is in U : if its supernode-representative krep
     // has been explored, update repfnz(*)
     // krep = supernode representative of the current row
-    Index krep = glu.xsup(glu.supno(kperm)+1) - 1; 
+    StorageIndex krep = glu.xsup(glu.supno(kperm)+1) - 1; 
     // First nonzero element in the current column:
-    Index myfnz = repfnz_col(krep); 
+    StorageIndex myfnz = repfnz_col(krep); 
     
     if (myfnz != emptyIdxLU )
     {
@@ -96,26 +96,26 @@
     else 
     {
       // Otherwise, perform dfs starting at krep
-      Index oldrep = emptyIdxLU; 
+      StorageIndex oldrep = emptyIdxLU; 
       parent(krep) = oldrep; 
       repfnz_col(krep) = kperm; 
-      Index xdfs =  glu.xlsub(krep); 
+      StorageIndex xdfs =  glu.xlsub(krep); 
       Index maxdfs = xprune(krep); 
       
-      Index kpar;
+      StorageIndex kpar;
       do 
       {
         // For each unmarked kchild of krep
         while (xdfs < maxdfs) 
         {
-          Index kchild = glu.lsub(xdfs); 
+          StorageIndex kchild = glu.lsub(xdfs); 
           xdfs++; 
-          Index chmark = marker(kchild); 
+          StorageIndex chmark = marker(kchild); 
           
           if (chmark != jj ) 
           {
             marker(kchild) = jj; 
-            Index chperm = perm_r(kchild); 
+            StorageIndex chperm = perm_r(kchild); 
             
             if (chperm == emptyIdxLU) 
             {
@@ -128,7 +128,7 @@
               // case kchild is in U :
               // chrep = its supernode-rep. If its rep has been explored, 
               // update its repfnz(*)
-              Index chrep = glu.xsup(glu.supno(chperm)+1) - 1; 
+              StorageIndex chrep = glu.xsup(glu.supno(chperm)+1) - 1; 
               myfnz = repfnz_col(chrep); 
               
               if (myfnz != emptyIdxLU) 
@@ -215,8 +215,8 @@
  * 
  */
 
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu)
 {
   Index nextl_col; // Next available position in panel_lsub[*,jj] 
   
@@ -227,7 +227,7 @@
   panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
   
   // For each column in the panel 
-  for (Index jj = jcol; jj < jcol + w; jj++) 
+  for (StorageIndex jj = StorageIndex(jcol); jj < jcol + w; jj++) 
   {
     nextl_col = (jj - jcol) * m; 
     
@@ -241,7 +241,7 @@
       Index krow = it.row(); 
       dense_col(krow) = it.value();
       
-      Index kmark = marker(krow); 
+      StorageIndex kmark = marker(krow); 
       if (kmark == jj) 
         continue; // krow visited before, go to the next nonzero
       
diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h
index 457789c..a86dac9 100644
--- a/Eigen/src/SparseLU/SparseLU_pivotL.h
+++ b/Eigen/src/SparseLU/SparseLU_pivotL.h
@@ -56,8 +56,8 @@
  * \return 0 if success, i > 0 if U(i,i) is exactly zero 
  * 
  */
-template <typename Scalar, typename Index>
-Index SparseLUImpl<Scalar,Index>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+Index SparseLUImpl<Scalar,StorageIndex>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu)
 {
   
   Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
@@ -67,11 +67,11 @@
   Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension
   Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode
   Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode
-  Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
+  StorageIndex* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
   
   // Determine the largest abs numerical value for partial pivoting 
   Index diagind = iperm_c(jcol); // diagonal index 
-  RealScalar pivmax = 0.0; 
+  RealScalar pivmax(-1.0);
   Index pivptr = nsupc; 
   Index diag = emptyIdxLU; 
   RealScalar rtemp;
@@ -87,9 +87,10 @@
   }
   
   // Test for singularity
-  if ( pivmax == 0.0 ) {
-    pivrow = lsub_ptr[pivptr];
-    perm_r(pivrow) = jcol;
+  if ( pivmax <= RealScalar(0.0) ) {
+    // if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero
+    pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr];
+    perm_r(pivrow) = StorageIndex(jcol);
     return (jcol+1);
   }
   
@@ -104,13 +105,13 @@
       // Diagonal element exists
       using std::abs;
       rtemp = abs(lu_col_ptr[diag]);
-      if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag;
+      if (rtemp != RealScalar(0.0) && rtemp >= thresh) pivptr = diag;
     }
     pivrow = lsub_ptr[pivptr];
   }
   
   // Record pivot row
-  perm_r(pivrow) = jcol; 
+  perm_r(pivrow) = StorageIndex(jcol);
   // Interchange row subscripts
   if (pivptr != nsupc )
   {
diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h
index 66460d1..ad32fed 100644
--- a/Eigen/src/SparseLU/SparseLU_pruneL.h
+++ b/Eigen/src/SparseLU/SparseLU_pruneL.h
@@ -49,8 +49,9 @@
  * \param glu Global LU data
  * 
  */
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg,
+                                               const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu)
 {
   // For each supernode-rep irep in U(*,j]
   Index jsupno = glu.supno(jcol); 
@@ -123,7 +124,7 @@
           }
         } // end while 
         
-        xprune(irep) = kmin;  //Pruning 
+        xprune(irep) = StorageIndex(kmin);  //Pruning 
       } // end if do_prune 
     } // end pruning 
   } // End for each U-segment
diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h
index 58ec32e..c408d01 100644
--- a/Eigen/src/SparseLU/SparseLU_relax_snode.h
+++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h
@@ -43,15 +43,15 @@
  * \param descendants Number of descendants of each node in the etree
  * \param relax_end last column in a supernode
  */
-template <typename Scalar, typename Index>
-void SparseLUImpl<Scalar,Index>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
+template <typename Scalar, typename StorageIndex>
+void SparseLUImpl<Scalar,StorageIndex>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end)
 {
   
   // compute the number of descendants of each node in the etree
-  Index j, parent; 
+  Index parent; 
   relax_end.setConstant(emptyIdxLU);
   descendants.setZero();
-  for (j = 0; j < n; j++) 
+  for (Index j = 0; j < n; j++) 
   {
     parent = et(j);
     if (parent != n) // not the dummy root
@@ -59,7 +59,7 @@
   }
   // Identify the relaxed supernodes by postorder traversal of the etree
   Index snode_start; // beginning of a snode 
-  for (j = 0; j < n; )
+  for (Index j = 0; j < n; )
   {
     parent = et(j);
     snode_start = j; 
@@ -69,7 +69,7 @@
       parent = et(j);
     }
     // Found a supernode in postordered etree, j is the last column 
-    relax_end(snode_start) = j; // Record last column
+    relax_end(snode_start) = StorageIndex(j); // Record last column
     j++;
     // Search for a new leaf
     while (descendants(j) != 0 && j < n) j++;