Blame - Eigen/src/SVD/BDCSVD.h - RealtimeRoboticsGroup/test

blob: 1134d66e7e96f53777407931cb8e7c718f84408b [file] [log] [blame]

Austin Schuh	189376f	2018-12-20 22:11:15 +1100	[diff] [blame^]	1	// This file is part of Eigen, a lightweight C++ template library
				2	// for linear algebra.
				3	//
				4	// We used the "A Divide-And-Conquer Algorithm for the Bidiagonal SVD"
				5	// research report written by Ming Gu and Stanley C.Eisenstat
				6	// The code variable names correspond to the names they used in their
				7	// report
				8	//
				9	// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com>
				10	// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr>
				11	// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr>
				12	// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr>
				13	// Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk>
				14	// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr>
				15	//
				16	// Source Code Form is subject to the terms of the Mozilla
				17	// Public License v. 2.0. If a copy of the MPL was not distributed
				18	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
				19
				20	#ifndef EIGEN_BDCSVD_H
				21	#define EIGEN_BDCSVD_H
				22	// #define EIGEN_BDCSVD_DEBUG_VERBOSE
				23	// #define EIGEN_BDCSVD_SANITY_CHECKS
				24
				25	namespace Eigen {
				26
				27	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				28	IOFormat bdcsvdfmt(8, 0, ", ", "\n", " [", "]");
				29	#endif
				30
				31	template<typename _MatrixType> class BDCSVD;
				32
				33	namespace internal {
				34
				35	template<typename _MatrixType>
				36	struct traits<BDCSVD<_MatrixType> >
				37	{
				38	typedef _MatrixType MatrixType;
				39	};
				40
				41	} // end namespace internal
				42
				43
				44	/** \ingroup SVD_Module
				45	*
				46	*
				47	* \class BDCSVD
				48	*
				49	* \brief class Bidiagonal Divide and Conquer SVD
				50	*
				51	* \tparam _MatrixType the type of the matrix of which we are computing the SVD decomposition
				52	*
				53	* This class first reduces the input matrix to bi-diagonal form using class UpperBidiagonalization,
				54	* and then performs a divide-and-conquer diagonalization. Small blocks are diagonalized using class JacobiSVD.
				55	* You can control the switching size with the setSwitchSize() method, default is 16.
				56	* For small matrice (<16), it is thus preferable to directly use JacobiSVD. For larger ones, BDCSVD is highly
				57	* recommended and can several order of magnitude faster.
				58	*
				59	* \warning this algorithm is unlikely to provide accurate result when compiled with unsafe math optimizations.
				60	* For instance, this concerns Intel's compiler (ICC), which perfroms such optimization by default unless
				61	* you compile with the \c -fp-model \c precise option. Likewise, the \c -ffast-math option of GCC or clang will
				62	* significantly degrade the accuracy.
				63	*
				64	* \sa class JacobiSVD
				65	*/
				66	template<typename _MatrixType>
				67	class BDCSVD : public SVDBase<BDCSVD<_MatrixType> >
				68	{
				69	typedef SVDBase<BDCSVD> Base;
				70
				71	public:
				72	using Base::rows;
				73	using Base::cols;
				74	using Base::computeU;
				75	using Base::computeV;
				76
				77	typedef _MatrixType MatrixType;
				78	typedef typename MatrixType::Scalar Scalar;
				79	typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
				80	typedef typename NumTraits<RealScalar>::Literal Literal;
				81	enum {
				82	RowsAtCompileTime = MatrixType::RowsAtCompileTime,
				83	ColsAtCompileTime = MatrixType::ColsAtCompileTime,
				84	DiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_DYNAMIC(RowsAtCompileTime, ColsAtCompileTime),
				85	MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
				86	MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
				87	MaxDiagSizeAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(MaxRowsAtCompileTime, MaxColsAtCompileTime),
				88	MatrixOptions = MatrixType::Options
				89	};
				90
				91	typedef typename Base::MatrixUType MatrixUType;
				92	typedef typename Base::MatrixVType MatrixVType;
				93	typedef typename Base::SingularValuesType SingularValuesType;
				94
				95	typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> MatrixX;
				96	typedef Matrix<RealScalar, Dynamic, Dynamic, ColMajor> MatrixXr;
				97	typedef Matrix<RealScalar, Dynamic, 1> VectorType;
				98	typedef Array<RealScalar, Dynamic, 1> ArrayXr;
				99	typedef Array<Index,1,Dynamic> ArrayXi;
				100	typedef Ref<ArrayXr> ArrayRef;
				101	typedef Ref<ArrayXi> IndicesRef;
				102
				103	/** \brief Default Constructor.
				104	*
				105	* The default constructor is useful in cases in which the user intends to
				106	* perform decompositions via BDCSVD::compute(const MatrixType&).
				107	*/
				108	BDCSVD() : m_algoswap(16), m_numIters(0)
				109	{}
				110
				111
				112	/** \brief Default Constructor with memory preallocation
				113	*
				114	* Like the default constructor but with preallocation of the internal data
				115	* according to the specified problem size.
				116	* \sa BDCSVD()
				117	*/
				118	BDCSVD(Index rows, Index cols, unsigned int computationOptions = 0)
				119	: m_algoswap(16), m_numIters(0)
				120	{
				121	allocate(rows, cols, computationOptions);
				122	}
				123
				124	/** \brief Constructor performing the decomposition of given matrix.
				125	*
				126	* \param matrix the matrix to decompose
				127	* \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
				128	* By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
				129	* #ComputeFullV, #ComputeThinV.
				130	*
				131	* Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
				132	* available with the (non - default) FullPivHouseholderQR preconditioner.
				133	*/
				134	BDCSVD(const MatrixType& matrix, unsigned int computationOptions = 0)
				135	: m_algoswap(16), m_numIters(0)
				136	{
				137	compute(matrix, computationOptions);
				138	}
				139
				140	~BDCSVD()
				141	{
				142	}
				143
				144	/** \brief Method performing the decomposition of given matrix using custom options.
				145	*
				146	* \param matrix the matrix to decompose
				147	* \param computationOptions optional parameter allowing to specify if you want full or thin U or V unitaries to be computed.
				148	* By default, none is computed. This is a bit - field, the possible bits are #ComputeFullU, #ComputeThinU,
				149	* #ComputeFullV, #ComputeThinV.
				150	*
				151	* Thin unitaries are only available if your matrix type has a Dynamic number of columns (for example MatrixXf). They also are not
				152	* available with the (non - default) FullPivHouseholderQR preconditioner.
				153	*/
				154	BDCSVD& compute(const MatrixType& matrix, unsigned int computationOptions);
				155
				156	/** \brief Method performing the decomposition of given matrix using current options.
				157	*
				158	* \param matrix the matrix to decompose
				159	*
				160	* This method uses the current \a computationOptions, as already passed to the constructor or to compute(const MatrixType&, unsigned int).
				161	*/
				162	BDCSVD& compute(const MatrixType& matrix)
				163	{
				164	return compute(matrix, this->m_computationOptions);
				165	}
				166
				167	void setSwitchSize(int s)
				168	{
				169	eigen_assert(s>3 && "BDCSVD the size of the algo switch has to be greater than 3");
				170	m_algoswap = s;
				171	}
				172
				173	private:
				174	void allocate(Index rows, Index cols, unsigned int computationOptions);
				175	void divide(Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift);
				176	void computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V);
				177	void computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, VectorType& singVals, ArrayRef shifts, ArrayRef mus);
				178	void perturbCol0(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat);
				179	void computeSingVecs(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef& perm, const VectorType& singVals, const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V);
				180	void deflation43(Index firstCol, Index shift, Index i, Index size);
				181	void deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size);
				182	void deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift);
				183	template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
				184	void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev);
				185	void structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1);
				186	static RealScalar secularEq(RealScalar x, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift);
				187
				188	protected:
				189	MatrixXr m_naiveU, m_naiveV;
				190	MatrixXr m_computed;
				191	Index m_nRec;
				192	ArrayXr m_workspace;
				193	ArrayXi m_workspaceI;
				194	int m_algoswap;
				195	bool m_isTranspose, m_compU, m_compV;
				196
				197	using Base::m_singularValues;
				198	using Base::m_diagSize;
				199	using Base::m_computeFullU;
				200	using Base::m_computeFullV;
				201	using Base::m_computeThinU;
				202	using Base::m_computeThinV;
				203	using Base::m_matrixU;
				204	using Base::m_matrixV;
				205	using Base::m_isInitialized;
				206	using Base::m_nonzeroSingularValues;
				207
				208	public:
				209	int m_numIters;
				210	}; //end class BDCSVD
				211
				212
				213	// Method to allocate and initialize matrix and attributes
				214	template<typename MatrixType>
				215	void BDCSVD<MatrixType>::allocate(Index rows, Index cols, unsigned int computationOptions)
				216	{
				217	m_isTranspose = (cols > rows);
				218
				219	if (Base::allocate(rows, cols, computationOptions))
				220	return;
				221
				222	m_computed = MatrixXr::Zero(m_diagSize + 1, m_diagSize );
				223	m_compU = computeV();
				224	m_compV = computeU();
				225	if (m_isTranspose)
				226	std::swap(m_compU, m_compV);
				227
				228	if (m_compU) m_naiveU = MatrixXr::Zero(m_diagSize + 1, m_diagSize + 1 );
				229	else m_naiveU = MatrixXr::Zero(2, m_diagSize + 1 );
				230
				231	if (m_compV) m_naiveV = MatrixXr::Zero(m_diagSize, m_diagSize);
				232
				233	m_workspace.resize((m_diagSize+1)(m_diagSize+1)3);
				234	m_workspaceI.resize(3*m_diagSize);
				235	}// end allocate
				236
				237	template<typename MatrixType>
				238	BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsigned int computationOptions)
				239	{
				240	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				241	std::cout << "\n\n\n======================================================================================================================\n\n\n";
				242	#endif
				243	allocate(matrix.rows(), matrix.cols(), computationOptions);
				244	using std::abs;
				245
				246	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
				247
				248	//**** step -1 - If the problem is too small, directly falls back to JacobiSVD and return
				249	if(matrix.cols() < m_algoswap)
				250	{
				251	// FIXME this line involves temporaries
				252	JacobiSVD<MatrixType> jsvd(matrix,computationOptions);
				253	if(computeU()) m_matrixU = jsvd.matrixU();
				254	if(computeV()) m_matrixV = jsvd.matrixV();
				255	m_singularValues = jsvd.singularValues();
				256	m_nonzeroSingularValues = jsvd.nonzeroSingularValues();
				257	m_isInitialized = true;
				258	return *this;
				259	}
				260
				261	//**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows
				262	RealScalar scale = matrix.cwiseAbs().maxCoeff();
				263	if(scale==Literal(0)) scale = Literal(1);
				264	MatrixX copy;
				265	if (m_isTranspose) copy = matrix.adjoint()/scale;
				266	else copy = matrix/scale;
				267
				268	//**** step 1 - Bidiagonalization
				269	// FIXME this line involves temporaries
				270	internal::UpperBidiagonalization<MatrixX> bid(copy);
				271
				272	//**** step 2 - Divide & Conquer
				273	m_naiveU.setZero();
				274	m_naiveV.setZero();
				275	// FIXME this line involves a temporary matrix
				276	m_computed.topRows(m_diagSize) = bid.bidiagonal().toDenseMatrix().transpose();
				277	m_computed.template bottomRows<1>().setZero();
				278	divide(0, m_diagSize - 1, 0, 0, 0);
				279
				280	//**** step 3 - Copy singular values and vectors
				281	for (int i=0; i<m_diagSize; i++)
				282	{
				283	RealScalar a = abs(m_computed.coeff(i, i));
				284	m_singularValues.coeffRef(i) = a * scale;
				285	if (a<considerZero)
				286	{
				287	m_nonzeroSingularValues = i;
				288	m_singularValues.tail(m_diagSize - i - 1).setZero();
				289	break;
				290	}
				291	else if (i == m_diagSize - 1)
				292	{
				293	m_nonzeroSingularValues = i + 1;
				294	break;
				295	}
				296	}
				297
				298	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				299	// std::cout << "m_naiveU\n" << m_naiveU << "\n\n";
				300	// std::cout << "m_naiveV\n" << m_naiveV << "\n\n";
				301	#endif
				302	if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU);
				303	else copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV);
				304
				305	m_isInitialized = true;
				306	return *this;
				307	}// end compute
				308
				309
				310	template<typename MatrixType>
				311	template<typename HouseholderU, typename HouseholderV, typename NaiveU, typename NaiveV>
				312	void BDCSVD<MatrixType>::copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naiveV)
				313	{
				314	// Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa
				315	if (computeU())
				316	{
				317	Index Ucols = m_computeThinU ? m_diagSize : householderU.cols();
				318	m_matrixU = MatrixX::Identity(householderU.cols(), Ucols);
				319	m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
				320	householderU.applyThisOnTheLeft(m_matrixU); // FIXME this line involves a temporary buffer
				321	}
				322	if (computeV())
				323	{
				324	Index Vcols = m_computeThinV ? m_diagSize : householderV.cols();
				325	m_matrixV = MatrixX::Identity(householderV.cols(), Vcols);
				326	m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast<Scalar>().topLeftCorner(m_diagSize, m_diagSize);
				327	householderV.applyThisOnTheLeft(m_matrixV); // FIXME this line involves a temporary buffer
				328	}
				329	}
				330
				331	/** \internal
				332	* Performs A = A * B exploiting the special structure of the matrix A. Splitting A as:
				333	* A = [A1]
				334	* [A2]
				335	* such that A1.rows()==n1, then we assume that at least half of the columns of A1 and A2 are zeros.
				336	* We can thus pack them prior to the the matrix product. However, this is only worth the effort if the matrix is large
				337	* enough.
				338	*/
				339	template<typename MatrixType>
				340	void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, const MatrixXr &B, Index n1)
				341	{
				342	Index n = A.rows();
				343	if(n>100)
				344	{
				345	// If the matrices are large enough, let's exploit the sparse structure of A by
				346	// splitting it in half (wrt n1), and packing the non-zero columns.
				347	Index n2 = n - n1;
				348	Map<MatrixXr> A1(m_workspace.data() , n1, n);
				349	Map<MatrixXr> A2(m_workspace.data()+ n1*n, n2, n);
				350	Map<MatrixXr> B1(m_workspace.data()+ n*n, n, n);
				351	Map<MatrixXr> B2(m_workspace.data()+2nn, n, n);
				352	Index k1=0, k2=0;
				353	for(Index j=0; j<n; ++j)
				354	{
				355	if( (A.col(j).head(n1).array()!=Literal(0)).any() )
				356	{
				357	A1.col(k1) = A.col(j).head(n1);
				358	B1.row(k1) = B.row(j);
				359	++k1;
				360	}
				361	if( (A.col(j).tail(n2).array()!=Literal(0)).any() )
				362	{
				363	A2.col(k2) = A.col(j).tail(n2);
				364	B2.row(k2) = B.row(j);
				365	++k2;
				366	}
				367	}
				368
				369	A.topRows(n1).noalias() = A1.leftCols(k1) * B1.topRows(k1);
				370	A.bottomRows(n2).noalias() = A2.leftCols(k2) * B2.topRows(k2);
				371	}
				372	else
				373	{
				374	Map<MatrixXr,Aligned> tmp(m_workspace.data(),n,n);
				375	tmp.noalias() = A*B;
				376	A = tmp;
				377	}
				378	}
				379
				380	// The divide algorithm is done "in place", we are always working on subsets of the same matrix. The divide methods takes as argument the
				381	// place of the submatrix we are currently working on.
				382
				383	//@param firstCol : The Index of the first column of the submatrix of m_computed and for m_naiveU;
				384	//@param lastCol : The Index of the last column of the submatrix of m_computed and for m_naiveU;
				385	// lastCol + 1 - firstCol is the size of the submatrix.
				386	//@param firstRowW : The Index of the first row of the matrix W that we are to change. (see the reference paper section 1 for more information on W)
				387	//@param firstRowW : Same as firstRowW with the column.
				388	//@param shift : Each time one takes the left submatrix, one must add 1 to the shift. Why? Because! We actually want the last column of the U submatrix
				389	// to become the first column (*coeff) and to shift all the other columns to the right. There are more details on the reference paper.
				390	template<typename MatrixType>
				391	void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, Index firstColW, Index shift)
				392	{
				393	// requires rows = cols + 1;
				394	using std::pow;
				395	using std::sqrt;
				396	using std::abs;
				397	const Index n = lastCol - firstCol + 1;
				398	const Index k = n/2;
				399	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
				400	RealScalar alphaK;
				401	RealScalar betaK;
				402	RealScalar r0;
				403	RealScalar lambda, phi, c0, s0;
				404	VectorType l, f;
				405	// We use the other algorithm which is more efficient for small
				406	// matrices.
				407	if (n < m_algoswap)
				408	{
				409	// FIXME this line involves temporaries
				410	JacobiSVD<MatrixXr> b(m_computed.block(firstCol, firstCol, n + 1, n), ComputeFullU \| (m_compV ? ComputeFullV : 0));
				411	if (m_compU)
				412	m_naiveU.block(firstCol, firstCol, n + 1, n + 1).real() = b.matrixU();
				413	else
				414	{
				415	m_naiveU.row(0).segment(firstCol, n + 1).real() = b.matrixU().row(0);
				416	m_naiveU.row(1).segment(firstCol, n + 1).real() = b.matrixU().row(n);
				417	}
				418	if (m_compV) m_naiveV.block(firstRowW, firstColW, n, n).real() = b.matrixV();
				419	m_computed.block(firstCol + shift, firstCol + shift, n + 1, n).setZero();
				420	m_computed.diagonal().segment(firstCol + shift, n) = b.singularValues().head(n);
				421	return;
				422	}
				423	// We use the divide and conquer algorithm
				424	alphaK = m_computed(firstCol + k, firstCol + k);
				425	betaK = m_computed(firstCol + k + 1, firstCol + k);
				426	// The divide must be done in that order in order to have good results. Divide change the data inside the submatrices
				427	// and the divide of the right submatrice reads one column of the left submatrice. That's why we need to treat the
				428	// right submatrix before the left one.
				429	divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift);
				430	divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1);
				431
				432	if (m_compU)
				433	{
				434	lambda = m_naiveU(firstCol + k, firstCol + k);
				435	phi = m_naiveU(firstCol + k + 1, lastCol + 1);
				436	}
				437	else
				438	{
				439	lambda = m_naiveU(1, firstCol + k);
				440	phi = m_naiveU(0, lastCol + 1);
				441	}
				442	r0 = sqrt((abs(alphaK * lambda) * abs(alphaK * lambda)) + abs(betaK * phi) * abs(betaK * phi));
				443	if (m_compU)
				444	{
				445	l = m_naiveU.row(firstCol + k).segment(firstCol, k);
				446	f = m_naiveU.row(firstCol + k + 1).segment(firstCol + k + 1, n - k - 1);
				447	}
				448	else
				449	{
				450	l = m_naiveU.row(1).segment(firstCol, k);
				451	f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1);
				452	}
				453	if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1);
				454	if (r0<considerZero)
				455	{
				456	c0 = Literal(1);
				457	s0 = Literal(0);
				458	}
				459	else
				460	{
				461	c0 = alphaK * lambda / r0;
				462	s0 = betaK * phi / r0;
				463	}
				464
				465	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				466	assert(m_naiveU.allFinite());
				467	assert(m_naiveV.allFinite());
				468	assert(m_computed.allFinite());
				469	#endif
				470
				471	if (m_compU)
				472	{
				473	MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1));
				474	// we shiftW Q1 to the right
				475	for (Index i = firstCol + k - 1; i >= firstCol; i--)
				476	m_naiveU.col(i + 1).segment(firstCol, k + 1) = m_naiveU.col(i).segment(firstCol, k + 1);
				477	// we shift q1 at the left with a factor c0
				478	m_naiveU.col(firstCol).segment( firstCol, k + 1) = (q1 * c0);
				479	// last column = q1 * - s0
				480	m_naiveU.col(lastCol + 1).segment(firstCol, k + 1) = (q1 * ( - s0));
				481	// first column = q2 * s0
				482	m_naiveU.col(firstCol).segment(firstCol + k + 1, n - k) = m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) * s0;
				483	// q2 *= c0
				484	m_naiveU.col(lastCol + 1).segment(firstCol + k + 1, n - k) *= c0;
				485	}
				486	else
				487	{
				488	RealScalar q1 = m_naiveU(0, firstCol + k);
				489	// we shift Q1 to the right
				490	for (Index i = firstCol + k - 1; i >= firstCol; i--)
				491	m_naiveU(0, i + 1) = m_naiveU(0, i);
				492	// we shift q1 at the left with a factor c0
				493	m_naiveU(0, firstCol) = (q1 * c0);
				494	// last column = q1 * - s0
				495	m_naiveU(0, lastCol + 1) = (q1 * ( - s0));
				496	// first column = q2 * s0
				497	m_naiveU(1, firstCol) = m_naiveU(1, lastCol + 1) *s0;
				498	// q2 *= c0
				499	m_naiveU(1, lastCol + 1) *= c0;
				500	m_naiveU.row(1).segment(firstCol + 1, k).setZero();
				501	m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero();
				502	}
				503
				504	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				505	assert(m_naiveU.allFinite());
				506	assert(m_naiveV.allFinite());
				507	assert(m_computed.allFinite());
				508	#endif
				509
				510	m_computed(firstCol + shift, firstCol + shift) = r0;
				511	m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real();
				512	m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real();
				513
				514	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				515	ArrayXr tmp1 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
				516	#endif
				517	// Second part: try to deflate singular values in combined matrix
				518	deflation(firstCol, lastCol, k, firstRowW, firstColW, shift);
				519	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				520	ArrayXr tmp2 = (m_computed.block(firstCol+shift, firstCol+shift, n, n)).jacobiSvd().singularValues();
				521	std::cout << "\n\nj1 = " << tmp1.transpose().format(bdcsvdfmt) << "\n";
				522	std::cout << "j2 = " << tmp2.transpose().format(bdcsvdfmt) << "\n\n";
				523	std::cout << "err: " << ((tmp1-tmp2).abs()>1e-12*tmp2.abs()).transpose() << "\n";
				524	static int count = 0;
				525	std::cout << "# " << ++count << "\n\n";
				526	assert((tmp1-tmp2).matrix().norm() < 1e-14*tmp2.matrix().norm());
				527	// assert(count<681);
				528	// assert(((tmp1-tmp2).abs()<1e-13*tmp2.abs()).all());
				529	#endif
				530
				531	// Third part: compute SVD of combined matrix
				532	MatrixXr UofSVD, VofSVD;
				533	VectorType singVals;
				534	computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD);
				535
				536	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				537	assert(UofSVD.allFinite());
				538	assert(VofSVD.allFinite());
				539	#endif
				540
				541	if (m_compU)
				542	structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2);
				543	else
				544	{
				545	Map<Matrix<RealScalar,2,Dynamic>,Aligned> tmp(m_workspace.data(),2,n+1);
				546	tmp.noalias() = m_naiveU.middleCols(firstCol, n+1) * UofSVD;
				547	m_naiveU.middleCols(firstCol, n + 1) = tmp;
				548	}
				549
				550	if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2);
				551
				552	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				553	assert(m_naiveU.allFinite());
				554	assert(m_naiveV.allFinite());
				555	assert(m_computed.allFinite());
				556	#endif
				557
				558	m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero();
				559	m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals;
				560	}// end divide
				561
				562	// Compute SVD of m_computed.block(firstCol, firstCol, n + 1, n); this block only has non-zeros in
				563	// the first column and on the diagonal and has undergone deflation, so diagonal is in increasing
				564	// order except for possibly the (0,0) entry. The computed SVD is stored U, singVals and V, except
				565	// that if m_compV is false, then V is not computed. Singular values are sorted in decreasing order.
				566	//
				567	// TODO Opportunities for optimization: better root finding algo, better stopping criterion, better
				568	// handling of round-off errors, be consistent in ordering
				569	// For instance, to solve the secular equation using FMM, see http://www.stat.uchicago.edu/~lekheng/courses/302/classics/greengard-rokhlin.pdf
				570	template <typename MatrixType>
				571	void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, VectorType& singVals, MatrixXr& V)
				572	{
				573	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
				574	using std::abs;
				575	ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n);
				576	m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal();
				577	ArrayRef diag = m_workspace.head(n);
				578	diag(0) = Literal(0);
				579
				580	// Allocate space for singular values and vectors
				581	singVals.resize(n);
				582	U.resize(n+1, n+1);
				583	if (m_compV) V.resize(n, n);
				584
				585	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				586	if (col0.hasNaN() \|\| diag.hasNaN())
				587	std::cout << "\n\nHAS NAN\n\n";
				588	#endif
				589
				590	// Many singular values might have been deflated, the zero ones have been moved to the end,
				591	// but others are interleaved and we must ignore them at this stage.
				592	// To this end, let's compute a permutation skipping them:
				593	Index actual_n = n;
				594	while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n;
				595	Index m = 0; // size of the deflated problem
				596	for(Index k=0;k<actual_n;++k)
				597	if(abs(col0(k))>considerZero)
				598	m_workspaceI(m++) = k;
				599	Map<ArrayXi> perm(m_workspaceI.data(),m);
				600
				601	Map<ArrayXr> shifts(m_workspace.data()+1*n, n);
				602	Map<ArrayXr> mus(m_workspace.data()+2*n, n);
				603	Map<ArrayXr> zhat(m_workspace.data()+3*n, n);
				604
				605	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				606	std::cout << "computeSVDofM using:\n";
				607	std::cout << " z: " << col0.transpose() << "\n";
				608	std::cout << " d: " << diag.transpose() << "\n";
				609	#endif
				610
				611	// Compute singVals, shifts, and mus
				612	computeSingVals(col0, diag, perm, singVals, shifts, mus);
				613
				614	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				615	std::cout << " j: " << (m_computed.block(firstCol, firstCol, n, n)).jacobiSvd().singularValues().transpose().reverse() << "\n\n";
				616	std::cout << " sing-val: " << singVals.transpose() << "\n";
				617	std::cout << " mu: " << mus.transpose() << "\n";
				618	std::cout << " shift: " << shifts.transpose() << "\n";
				619
				620	{
				621	Index actual_n = n;
				622	while(actual_n>1 && abs(col0(actual_n-1))<considerZero) --actual_n;
				623	std::cout << "\n\n mus: " << mus.head(actual_n).transpose() << "\n\n";
				624	std::cout << " check1 (expect0) : " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n";
				625	std::cout << " check2 (>0) : " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n";
				626	std::cout << " check3 (>0) : " << ((diag.segment(1,actual_n-1)-singVals.head(actual_n-1).array()) / singVals.head(actual_n-1).array()).transpose() << "\n\n\n";
				627	std::cout << " check4 (>0) : " << ((singVals.segment(1,actual_n-1)-singVals.head(actual_n-1))).transpose() << "\n\n\n";
				628	}
				629	#endif
				630
				631	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				632	assert(singVals.allFinite());
				633	assert(mus.allFinite());
				634	assert(shifts.allFinite());
				635	#endif
				636
				637	// Compute zhat
				638	perturbCol0(col0, diag, perm, singVals, shifts, mus, zhat);
				639	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				640	std::cout << " zhat: " << zhat.transpose() << "\n";
				641	#endif
				642
				643	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				644	assert(zhat.allFinite());
				645	#endif
				646
				647	computeSingVecs(zhat, diag, perm, singVals, shifts, mus, U, V);
				648
				649	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				650	std::cout << "U^T U: " << (U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() << "\n";
				651	std::cout << "V^T V: " << (V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() << "\n";
				652	#endif
				653
				654	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				655	assert(U.allFinite());
				656	assert(V.allFinite());
				657	assert((U.transpose() * U - MatrixXr(MatrixXr::Identity(U.cols(),U.cols()))).norm() < 1e-14 * n);
				658	assert((V.transpose() * V - MatrixXr(MatrixXr::Identity(V.cols(),V.cols()))).norm() < 1e-14 * n);
				659	assert(m_naiveU.allFinite());
				660	assert(m_naiveV.allFinite());
				661	assert(m_computed.allFinite());
				662	#endif
				663
				664	// Because of deflation, the singular values might not be completely sorted.
				665	// Fortunately, reordering them is a O(n) problem
				666	for(Index i=0; i<actual_n-1; ++i)
				667	{
				668	if(singVals(i)>singVals(i+1))
				669	{
				670	using std::swap;
				671	swap(singVals(i),singVals(i+1));
				672	U.col(i).swap(U.col(i+1));
				673	if(m_compV) V.col(i).swap(V.col(i+1));
				674	}
				675	}
				676
				677	// Reverse order so that singular values in increased order
				678	// Because of deflation, the zeros singular-values are already at the end
				679	singVals.head(actual_n).reverseInPlace();
				680	U.leftCols(actual_n).rowwise().reverseInPlace();
				681	if (m_compV) V.leftCols(actual_n).rowwise().reverseInPlace();
				682
				683	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				684	JacobiSVD<MatrixXr> jsvd(m_computed.block(firstCol, firstCol, n, n) );
				685	std::cout << " * j: " << jsvd.singularValues().transpose() << "\n\n";
				686	std::cout << " * sing-val: " << singVals.transpose() << "\n";
				687	// std::cout << " * err: " << ((jsvd.singularValues()-singVals)>1e-13*singVals.norm()).transpose() << "\n";
				688	#endif
				689	}
				690
				691	template <typename MatrixType>
				692	typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift)
				693	{
				694	Index m = perm.size();
				695	RealScalar res = Literal(1);
				696	for(Index i=0; i<m; ++i)
				697	{
				698	Index j = perm(i);
				699	// The following expression could be rewritten to involve only a single division,
				700	// but this would make the expression more sensitive to overflow.
				701	res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu));
				702	}
				703	return res;
				704
				705	}
				706
				707	template <typename MatrixType>
				708	void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm,
				709	VectorType& singVals, ArrayRef shifts, ArrayRef mus)
				710	{
				711	using std::abs;
				712	using std::swap;
				713	using std::sqrt;
				714
				715	Index n = col0.size();
				716	Index actual_n = n;
				717	// Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above
				718	// because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value.
				719	while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n;
				720
				721	for (Index k = 0; k < n; ++k)
				722	{
				723	if (col0(k) == Literal(0) \|\| actual_n==1)
				724	{
				725	// if col0(k) == 0, then entry is deflated, so singular value is on diagonal
				726	// if actual_n==1, then the deflated problem is already diagonalized
				727	singVals(k) = k==0 ? col0(0) : diag(k);
				728	mus(k) = Literal(0);
				729	shifts(k) = k==0 ? col0(0) : diag(k);
				730	continue;
				731	}
				732
				733	// otherwise, use secular equation to find singular value
				734	RealScalar left = diag(k);
				735	RealScalar right; // was: = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm());
				736	if(k==actual_n-1)
				737	right = (diag(actual_n-1) + col0.matrix().norm());
				738	else
				739	{
				740	// Skip deflated singular values,
				741	// recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside.
				742	// This should be equivalent to using perm[]
				743	Index l = k+1;
				744	while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); }
				745	right = diag(l);
				746	}
				747
				748	// first decide whether it's closer to the left end or the right end
				749	RealScalar mid = left + (right-left) / Literal(2);
				750	RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0));
				751	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				752	std::cout << right-left << "\n";
				753	std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n";
				754	std::cout << " = " << secularEq(0.1*(left+right), col0, diag, perm, diag, 0)
				755	<< " " << secularEq(0.2*(left+right), col0, diag, perm, diag, 0)
				756	<< " " << secularEq(0.3*(left+right), col0, diag, perm, diag, 0)
				757	<< " " << secularEq(0.4*(left+right), col0, diag, perm, diag, 0)
				758	<< " " << secularEq(0.49*(left+right), col0, diag, perm, diag, 0)
				759	<< " " << secularEq(0.5*(left+right), col0, diag, perm, diag, 0)
				760	<< " " << secularEq(0.51*(left+right), col0, diag, perm, diag, 0)
				761	<< " " << secularEq(0.6*(left+right), col0, diag, perm, diag, 0)
				762	<< " " << secularEq(0.7*(left+right), col0, diag, perm, diag, 0)
				763	<< " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0)
				764	<< " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n";
				765	#endif
				766	RealScalar shift = (k == actual_n-1 \|\| fMid > Literal(0)) ? left : right;
				767
				768	// measure everything relative to shift
				769	Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n);
				770	diagShifted = diag - shift;
				771
				772	// initial guess
				773	RealScalar muPrev, muCur;
				774	if (shift == left)
				775	{
				776	muPrev = (right - left) * RealScalar(0.1);
				777	if (k == actual_n-1) muCur = right - left;
				778	else muCur = (right - left) * RealScalar(0.5);
				779	}
				780	else
				781	{
				782	muPrev = -(right - left) * RealScalar(0.1);
				783	muCur = -(right - left) * RealScalar(0.5);
				784	}
				785
				786	RealScalar fPrev = secularEq(muPrev, col0, diag, perm, diagShifted, shift);
				787	RealScalar fCur = secularEq(muCur, col0, diag, perm, diagShifted, shift);
				788	if (abs(fPrev) < abs(fCur))
				789	{
				790	swap(fPrev, fCur);
				791	swap(muPrev, muCur);
				792	}
				793
				794	// rational interpolation: fit a function of the form a / mu + b through the two previous
				795	// iterates and use its zero to compute the next iterate
				796	bool useBisection = fPrev*fCur>Literal(0);
				797	while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection)
				798	{
				799	++m_numIters;
				800
				801	// Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples.
				802	RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev);
				803	RealScalar b = fCur - a / muCur;
				804	// And find mu such that f(mu)==0:
				805	RealScalar muZero = -a/b;
				806	RealScalar fZero = secularEq(muZero, col0, diag, perm, diagShifted, shift);
				807
				808	muPrev = muCur;
				809	fPrev = fCur;
				810	muCur = muZero;
				811	fCur = fZero;
				812
				813
				814	if (shift == left && (muCur < Literal(0) \|\| muCur > right - left)) useBisection = true;
				815	if (shift == right && (muCur < -(right - left) \|\| muCur > Literal(0))) useBisection = true;
				816	if (abs(fCur)>abs(fPrev)) useBisection = true;
				817	}
				818
				819	// fall back on bisection method if rational interpolation did not work
				820	if (useBisection)
				821	{
				822	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				823	std::cout << "useBisection for k = " << k << ", actual_n = " << actual_n << "\n";
				824	#endif
				825	RealScalar leftShifted, rightShifted;
				826	if (shift == left)
				827	{
				828	// to avoid overflow, we must have mu > max(real_min, \|z(k)\|/sqrt(real_max)),
				829	// the factor 2 is to be more conservative
				830	leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
				831
				832	// check that we did it right:
				833	eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) );
				834	// I don't understand why the case k==0 would be special there:
				835	// if (k == 0) rightShifted = right - left; else
				836	rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe
				837	}
				838	else
				839	{
				840	leftShifted = -(right - left) * RealScalar(0.51);
				841	if(k+1<n)
				842	rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) );
				843	else
				844	rightShifted = -(std::numeric_limits<RealScalar>::min)();
				845	}
				846
				847	RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift);
				848
				849	#if defined EIGEN_INTERNAL_DEBUGGING \|\| defined EIGEN_BDCSVD_DEBUG_VERBOSE
				850	RealScalar fRight = secularEq(rightShifted, col0, diag, perm, diagShifted, shift);
				851	#endif
				852
				853	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				854	if(!(fLeft * fRight<0))
				855	{
				856	std::cout << "fLeft: " << leftShifted << " - " << diagShifted.head(10).transpose() << "\n ; " << bool(left==shift) << " " << (left-shift) << "\n";
				857	std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n";
				858	}
				859	#endif
				860	eigen_internal_assert(fLeft * fRight < Literal(0));
				861
				862	while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted)))
				863	{
				864	RealScalar midShifted = (leftShifted + rightShifted) / Literal(2);
				865	fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift);
				866	if (fLeft * fMid < Literal(0))
				867	{
				868	rightShifted = midShifted;
				869	}
				870	else
				871	{
				872	leftShifted = midShifted;
				873	fLeft = fMid;
				874	}
				875	}
				876
				877	muCur = (leftShifted + rightShifted) / Literal(2);
				878	}
				879
				880	singVals[k] = shift + muCur;
				881	shifts[k] = shift;
				882	mus[k] = muCur;
				883
				884	// perturb singular value slightly if it equals diagonal entry to avoid division by zero later
				885	// (deflation is supposed to avoid this from happening)
				886	// - this does no seem to be necessary anymore -
				887	// if (singVals[k] == left) singVals[k] *= 1 + NumTraits<RealScalar>::epsilon();
				888	// if (singVals[k] == right) singVals[k] *= 1 - NumTraits<RealScalar>::epsilon();
				889	}
				890	}
				891
				892
				893	// zhat is perturbation of col0 for which singular vectors can be computed stably (see Section 3.1)
				894	template <typename MatrixType>
				895	void BDCSVD<MatrixType>::perturbCol0
				896	(const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
				897	const ArrayRef& shifts, const ArrayRef& mus, ArrayRef zhat)
				898	{
				899	using std::sqrt;
				900	Index n = col0.size();
				901	Index m = perm.size();
				902	if(m==0)
				903	{
				904	zhat.setZero();
				905	return;
				906	}
				907	Index last = perm(m-1);
				908	// The offset permits to skip deflated entries while computing zhat
				909	for (Index k = 0; k < n; ++k)
				910	{
				911	if (col0(k) == Literal(0)) // deflated
				912	zhat(k) = Literal(0);
				913	else
				914	{
				915	// see equation (3.6)
				916	RealScalar dk = diag(k);
				917	RealScalar prod = (singVals(last) + dk) * (mus(last) + (shifts(last) - dk));
				918
				919	for(Index l = 0; l<m; ++l)
				920	{
				921	Index i = perm(l);
				922	if(i!=k)
				923	{
				924	Index j = i<k ? i : perm(l-1);
				925	prod = ((singVals(j)+dk) / ((diag(i)+dk))) ((mus(j)+(shifts(j)-dk)) / ((diag(i)-dk)));
				926	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				927	if(i!=k && std::abs(((singVals(j)+dk)(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)(diag(i)-dk)) - 1) > 0.9 )
				928	std::cout << " " << ((singVals(j)+dk)(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk))
				929	<< ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n";
				930	#endif
				931	}
				932	}
				933	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				934	std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n";
				935	#endif
				936	RealScalar tmp = sqrt(prod);
				937	zhat(k) = col0(k) > Literal(0) ? tmp : -tmp;
				938	}
				939	}
				940	}
				941
				942	// compute singular vectors
				943	template <typename MatrixType>
				944	void BDCSVD<MatrixType>::computeSingVecs
				945	(const ArrayRef& zhat, const ArrayRef& diag, const IndicesRef &perm, const VectorType& singVals,
				946	const ArrayRef& shifts, const ArrayRef& mus, MatrixXr& U, MatrixXr& V)
				947	{
				948	Index n = zhat.size();
				949	Index m = perm.size();
				950
				951	for (Index k = 0; k < n; ++k)
				952	{
				953	if (zhat(k) == Literal(0))
				954	{
				955	U.col(k) = VectorType::Unit(n+1, k);
				956	if (m_compV) V.col(k) = VectorType::Unit(n, k);
				957	}
				958	else
				959	{
				960	U.col(k).setZero();
				961	for(Index l=0;l<m;++l)
				962	{
				963	Index i = perm(l);
				964	U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
				965	}
				966	U(n,k) = Literal(0);
				967	U.col(k).normalize();
				968
				969	if (m_compV)
				970	{
				971	V.col(k).setZero();
				972	for(Index l=1;l<m;++l)
				973	{
				974	Index i = perm(l);
				975	V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k]));
				976	}
				977	V(0,k) = Literal(-1);
				978	V.col(k).normalize();
				979	}
				980	}
				981	}
				982	U.col(n) = VectorType::Unit(n+1, n);
				983	}
				984
				985
				986	// page 12_13
				987	// i >= 1, di almost null and zi non null.
				988	// We use a rotation to zero out zi applied to the left of M
				989	template <typename MatrixType>
				990	void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index size)
				991	{
				992	using std::abs;
				993	using std::sqrt;
				994	using std::pow;
				995	Index start = firstCol + shift;
				996	RealScalar c = m_computed(start, start);
				997	RealScalar s = m_computed(start+i, start);
				998	RealScalar r = numext::hypot(c,s);
				999	if (r == Literal(0))
				1000	{
				1001	m_computed(start+i, start+i) = Literal(0);
				1002	return;
				1003	}
				1004	m_computed(start,start) = r;
				1005	m_computed(start+i, start) = Literal(0);
				1006	m_computed(start+i, start+i) = Literal(0);
				1007
				1008	JacobiRotation<RealScalar> J(c/r,-s/r);
				1009	if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J);
				1010	else m_naiveU.applyOnTheRight(firstCol, firstCol+i, J);
				1011	}// end deflation 43
				1012
				1013
				1014	// page 13
				1015	// i,j >= 1, i!=j and \|di - dj\| < epsilon * norm2(M)
				1016	// We apply two rotations to have zj = 0;
				1017	// TODO deflation44 is still broken and not properly tested
				1018	template <typename MatrixType>
				1019	void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size)
				1020	{
				1021	using std::abs;
				1022	using std::sqrt;
				1023	using std::conj;
				1024	using std::pow;
				1025	RealScalar c = m_computed(firstColm+i, firstColm);
				1026	RealScalar s = m_computed(firstColm+j, firstColm);
				1027	RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s));
				1028	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1029	std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; "
				1030	<< m_computed(firstColm + i-1, firstColm) << " "
				1031	<< m_computed(firstColm + i, firstColm) << " "
				1032	<< m_computed(firstColm + i+1, firstColm) << " "
				1033	<< m_computed(firstColm + i+2, firstColm) << "\n";
				1034	std::cout << m_computed(firstColm + i-1, firstColm + i-1) << " "
				1035	<< m_computed(firstColm + i, firstColm+i) << " "
				1036	<< m_computed(firstColm + i+1, firstColm+i+1) << " "
				1037	<< m_computed(firstColm + i+2, firstColm+i+2) << "\n";
				1038	#endif
				1039	if (r==Literal(0))
				1040	{
				1041	m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j);
				1042	return;
				1043	}
				1044	c/=r;
				1045	s/=r;
				1046	m_computed(firstColm + i, firstColm) = r;
				1047	m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i);
				1048	m_computed(firstColm + j, firstColm) = Literal(0);
				1049
				1050	JacobiRotation<RealScalar> J(c,-s);
				1051	if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J);
				1052	else m_naiveU.applyOnTheRight(firstColu+i, firstColu+j, J);
				1053	if (m_compV) m_naiveV.middleRows(firstRowW, size).applyOnTheRight(firstColW + i, firstColW + j, J);
				1054	}// end deflation 44
				1055
				1056
				1057	// acts on block from (firstCol+shift, firstCol+shift) to (lastCol+shift, lastCol+shift) [inclusive]
				1058	template <typename MatrixType>
				1059	void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift)
				1060	{
				1061	using std::sqrt;
				1062	using std::abs;
				1063	const Index length = lastCol + 1 - firstCol;
				1064
				1065	Block<MatrixXr,Dynamic,1> col0(m_computed, firstCol+shift, firstCol+shift, length, 1);
				1066	Diagonal<MatrixXr> fulldiag(m_computed);
				1067	VectorBlock<Diagonal<MatrixXr>,Dynamic> diag(fulldiag, firstCol+shift, length);
				1068
				1069	const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)();
				1070	RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff();
				1071	RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag);
				1072	RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag);
				1073
				1074	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				1075	assert(m_naiveU.allFinite());
				1076	assert(m_naiveV.allFinite());
				1077	assert(m_computed.allFinite());
				1078	#endif
				1079
				1080	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1081	std::cout << "\ndeflate:" << diag.head(k+1).transpose() << " \| " << diag.segment(k+1,length-k-1).transpose() << "\n";
				1082	#endif
				1083
				1084	//condition 4.1
				1085	if (diag(0) < epsilon_coarse)
				1086	{
				1087	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1088	std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon_coarse << "\n";
				1089	#endif
				1090	diag(0) = epsilon_coarse;
				1091	}
				1092
				1093	//condition 4.2
				1094	for (Index i=1;i<length;++i)
				1095	if (abs(col0(i)) < epsilon_strict)
				1096	{
				1097	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1098	std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n";
				1099	#endif
				1100	col0(i) = Literal(0);
				1101	}
				1102
				1103	//condition 4.3
				1104	for (Index i=1;i<length; i++)
				1105	if (diag(i) < epsilon_coarse)
				1106	{
				1107	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1108	std::cout << "deflation 4.3, cancel z(" << i << ")=" << col0(i) << " because diag(" << i << ")=" << diag(i) << " < " << epsilon_coarse << "\n";
				1109	#endif
				1110	deflation43(firstCol, shift, i, length);
				1111	}
				1112
				1113	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				1114	assert(m_naiveU.allFinite());
				1115	assert(m_naiveV.allFinite());
				1116	assert(m_computed.allFinite());
				1117	#endif
				1118	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1119	std::cout << "to be sorted: " << diag.transpose() << "\n\n";
				1120	#endif
				1121	{
				1122	// Check for total deflation
				1123	// If we have a total deflation, then we have to consider col0(0)==diag(0) as a singular value during sorting
				1124	bool total_deflation = (col0.tail(length-1).array()<considerZero).all();
				1125
				1126	// Sort the diagonal entries, since diag(1:k-1) and diag(k:length) are already sorted, let's do a sorted merge.
				1127	// First, compute the respective permutation.
				1128	Index *permutation = m_workspaceI.data();
				1129	{
				1130	permutation[0] = 0;
				1131	Index p = 1;
				1132
				1133	// Move deflated diagonal entries at the end.
				1134	for(Index i=1; i<length; ++i)
				1135	if(abs(diag(i))<considerZero)
				1136	permutation[p++] = i;
				1137
				1138	Index i=1, j=k+1;
				1139	for( ; p < length; ++p)
				1140	{
				1141	if (i > k) permutation[p] = j++;
				1142	else if (j >= length) permutation[p] = i++;
				1143	else if (diag(i) < diag(j)) permutation[p] = j++;
				1144	else permutation[p] = i++;
				1145	}
				1146	}
				1147
				1148	// If we have a total deflation, then we have to insert diag(0) at the right place
				1149	if(total_deflation)
				1150	{
				1151	for(Index i=1; i<length; ++i)
				1152	{
				1153	Index pi = permutation[i];
				1154	if(abs(diag(pi))<considerZero \|\| diag(0)<diag(pi))
				1155	permutation[i-1] = permutation[i];
				1156	else
				1157	{
				1158	permutation[i-1] = 0;
				1159	break;
				1160	}
				1161	}
				1162	}
				1163
				1164	// Current index of each col, and current column of each index
				1165	Index *realInd = m_workspaceI.data()+length;
				1166	Index realCol = m_workspaceI.data()+2length;
				1167
				1168	for(int pos = 0; pos< length; pos++)
				1169	{
				1170	realCol[pos] = pos;
				1171	realInd[pos] = pos;
				1172	}
				1173
				1174	for(Index i = total_deflation?0:1; i < length; i++)
				1175	{
				1176	const Index pi = permutation[length - (total_deflation ? i+1 : i)];
				1177	const Index J = realCol[pi];
				1178
				1179	using std::swap;
				1180	// swap diagonal and first column entries:
				1181	swap(diag(i), diag(J));
				1182	if(i!=0 && J!=0) swap(col0(i), col0(J));
				1183
				1184	// change columns
				1185	if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1));
				1186	else m_naiveU.col(firstCol+i).segment(0, 2) .swap(m_naiveU.col(firstCol+J).segment(0, 2));
				1187	if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length));
				1188
				1189	//update real pos
				1190	const Index realI = realInd[i];
				1191	realCol[realI] = J;
				1192	realCol[pi] = i;
				1193	realInd[J] = realI;
				1194	realInd[i] = pi;
				1195	}
				1196	}
				1197	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1198	std::cout << "sorted: " << diag.transpose().format(bdcsvdfmt) << "\n";
				1199	std::cout << " : " << col0.transpose() << "\n\n";
				1200	#endif
				1201
				1202	//condition 4.4
				1203	{
				1204	Index i = length-1;
				1205	while(i>0 && (abs(diag(i))<considerZero \|\| abs(col0(i))<considerZero)) --i;
				1206	for(; i>1;--i)
				1207	if( (diag(i) - diag(i-1)) < NumTraits<RealScalar>::epsilon()*maxDiag )
				1208	{
				1209	#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE
				1210	std::cout << "deflation 4.4 with i = " << i << " because " << (diag(i) - diag(i-1)) << " < " << NumTraits<RealScalar>::epsilon()*diag(i) << "\n";
				1211	#endif
				1212	eigen_internal_assert(abs(diag(i) - diag(i-1))<epsilon_coarse && " diagonal entries are not properly sorted");
				1213	deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i-1, i, length);
				1214	}
				1215	}
				1216
				1217	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				1218	for(Index j=2;j<length;++j)
				1219	assert(diag(j-1)<=diag(j) \|\| abs(diag(j))<considerZero);
				1220	#endif
				1221
				1222	#ifdef EIGEN_BDCSVD_SANITY_CHECKS
				1223	assert(m_naiveU.allFinite());
				1224	assert(m_naiveV.allFinite());
				1225	assert(m_computed.allFinite());
				1226	#endif
				1227	}//end deflation
				1228
				1229	#ifndef __CUDACC__
				1230	/** \svd_module
				1231	*
				1232	* \return the singular value decomposition of \c *this computed by Divide & Conquer algorithm
				1233	*
				1234	* \sa class BDCSVD
				1235	*/
				1236	template<typename Derived>
				1237	BDCSVD<typename MatrixBase<Derived>::PlainObject>
				1238	MatrixBase<Derived>::bdcSvd(unsigned int computationOptions) const
				1239	{
				1240	return BDCSVD<PlainObject>(*this, computationOptions);
				1241	}
				1242	#endif
				1243
				1244	} // end namespace Eigen
				1245
				1246	#endif