Blame - frc971/solvers/sparse_convex.cc - RealtimeRoboticsGroup/test

blob: dfb418f8e7b615f40674332e489ef1b0cddc5ec4 [file] [log] [blame]

Austin Schuh	2944103	2023-05-31 19:32:24 -0700	[diff] [blame^]	1	#include "frc971/solvers/sparse_convex.h"
				2
				3	#include <Eigen/Sparse>
				4	#include <Eigen/SparseLU>
				5
				6	#include "absl/strings/str_join.h"
				7	#include "glog/logging.h"
				8
				9	namespace frc971 {
				10	namespace solvers {
				11
				12	Eigen::VectorXd SparseSolver::Rt(const Derivatives &derivatives,
				13	Eigen::VectorXd y, double t_inverse) {
				14	Eigen::VectorXd result(derivatives.states() +
				15	derivatives.inequality_constraints() +
				16	derivatives.equality_constraints());
				17
				18	// states x 1
				19	Eigen::Ref<Eigen::VectorXd> r_dual =
				20	result.block(0, 0, derivatives.states(), 1);
				21	// inequality_constraints x 1
				22	Eigen::Ref<Eigen::VectorXd> r_cent = result.block(
				23	derivatives.states(), 0, derivatives.inequality_constraints(), 1);
				24	// equality_constraints x 1
				25	Eigen::Ref<Eigen::VectorXd> r_pri =
				26	result.block(derivatives.states() + derivatives.inequality_constraints(),
				27	0, derivatives.equality_constraints(), 1);
				28
				29	// inequality_constraints x 1
				30	Eigen::Ref<const Eigen::VectorXd> lambda =
				31	y.block(derivatives.states(), 0, derivatives.inequality_constraints(), 1);
				32	// equality_constraints x 1
				33	Eigen::Ref<const Eigen::VectorXd> v =
				34	y.block(derivatives.states() + derivatives.inequality_constraints(), 0,
				35	derivatives.equality_constraints(), 1);
				36
				37	r_dual = derivatives.gradient + derivatives.df.transpose() * lambda +
				38	derivatives.A.transpose() * v;
				39	r_cent = -lambda.array() * derivatives.f.array() - t_inverse;
				40	r_pri = derivatives.Axmb;
				41
				42	return result;
				43	}
				44
				45	void AppendColumns(std::vector<Eigen::Triplet<double>> *triplet_list,
				46	size_t starting_row, size_t starting_column,
				47	const Eigen::SparseMatrix<double> &matrix) {
				48	for (int k = 0; k < matrix.outerSize(); ++k) {
				49	for (Eigen::SparseMatrix<double, Eigen::ColMajor>::InnerIterator it(matrix,
				50	k);
				51	it; ++it) {
				52	(*triplet_list)
				53	.emplace_back(it.row() + starting_row, it.col() + starting_column,
				54	it.value());
				55	}
				56	}
				57	}
				58
				59	void AppendColumns(
				60	std::vector<Eigen::Triplet<double>> *triplet_list, size_t starting_row,
				61	size_t starting_column,
				62	const Eigen::DiagonalMatrix<double, Eigen::Dynamic> &matrix) {
				63	for (int k = 0; k < matrix.rows(); ++k) {
				64	(*triplet_list)
				65	.emplace_back(k + starting_row, k + starting_column,
				66	matrix.diagonal()(k));
				67	}
				68	}
				69
				70	Eigen::VectorXd SparseSolver::Solve(
				71	const SparseConvexProblem &problem,
				72	Eigen::Ref<const Eigen::VectorXd> X_initial) {
				73	CHECK_EQ(static_cast<size_t>(X_initial.rows()), problem.states());
				74	CHECK_EQ(X_initial.cols(), 1);
				75
				76	const Eigen::IOFormat kHeavyFormat(Eigen::StreamPrecision, 0, ", ",
				77	",\n "
				78	" ",
				79	"[", "]", "[", "]");
				80
				81	Eigen::VectorXd y = Eigen::VectorXd::Constant(
				82	problem.states() + problem.inequality_constraints() +
				83	problem.equality_constraints(),
				84	1.0);
				85	y.block(0, 0, problem.states(), 1) = X_initial;
				86
				87	Derivatives derivatives = ComputeDerivative(problem, y);
				88
				89	for (size_t i = 0; i < problem.inequality_constraints(); ++i) {
				90	CHECK_LE(derivatives.f(i, 0), 0.0)
				91	<< ": Initial state " << X_initial.transpose().format(kHeavyFormat)
				92	<< " not feasible";
				93	}
				94
				95	PrintDerivatives(derivatives, y, "", 1);
				96
				97	size_t iteration = 0;
				98	while (true) {
				99	// Solve for the primal-dual search direction by solving the newton step.
				100
				101	// inequality_constraints x 1;
				102	Eigen::Ref<const Eigen::VectorXd> lambda =
				103	y.block(problem.states(), 0, problem.inequality_constraints(), 1);
				104
				105	const double nu = -(derivatives.f.transpose() * lambda)(0, 0);
				106	const double t_inverse = nu / (kMu * lambda.rows());
				107	Eigen::VectorXd rt_orig = Rt(derivatives, y, t_inverse);
				108
				109	std::vector<Eigen::Triplet<double>> triplet_list;
				110
				111	AppendColumns(&triplet_list, 0, 0, derivatives.hessian);
				112	AppendColumns(&triplet_list, 0, problem.states(),
				113	derivatives.df.transpose());
				114	AppendColumns(&triplet_list, 0,
				115	problem.states() + problem.inequality_constraints(),
				116	derivatives.A.transpose());
				117
				118	// TODO(austin): I think I can do better on the next 2, making them more
				119	// efficient and not creating the intermediate matrix.
				120	AppendColumns(&triplet_list, problem.states(), 0,
				121	-(Eigen::DiagonalMatrix<double, Eigen::Dynamic>(lambda) *
				122	derivatives.df));
				123	AppendColumns(
				124	&triplet_list, problem.states(), problem.states(),
				125	Eigen::DiagonalMatrix<double, Eigen::Dynamic>(-derivatives.f));
				126
				127	AppendColumns(&triplet_list,
				128	problem.states() + problem.inequality_constraints(), 0,
				129	derivatives.A);
				130
				131	Eigen::SparseMatrix<double> m1(
				132	problem.states() + problem.inequality_constraints() +
				133	problem.equality_constraints(),
				134	problem.states() + problem.inequality_constraints() +
				135	problem.equality_constraints());
				136	m1.setFromTriplets(triplet_list.begin(), triplet_list.end());
				137
				138	Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
				139	solver.analyzePattern(m1);
				140	solver.factorize(m1);
				141	Eigen::VectorXd dy = solver.solve(-rt_orig);
				142
				143	Eigen::Ref<Eigen::VectorXd> dlambda =
				144	dy.block(problem.states(), 0, problem.inequality_constraints(), 1);
				145
				146	double s = 1.0;
				147
				148	// Now, time to do line search.
				149	//
				150	// Start by keeping lambda positive. Make sure our step doesn't let
				151	// lambda cross 0.
				152	for (int i = 0; i < dlambda.rows(); ++i) {
				153	if (lambda(i) + s * dlambda(i) < 0.0) {
				154	// Ignore tiny steps in lambda. They cause issues when we get really
				155	// close to having our constraints met but haven't converged the rest
				156	// of the problem and start to run into rounding issues in the matrix
				157	// solve portion.
				158	if (dlambda(i) < 0.0 && dlambda(i) > -1e-12) {
				159	VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s
				160	<< " * " << dlambda(i) << " -> s would be now "
				161	<< -lambda(i) / dlambda(i);
				162	dlambda(i) = 0.0;
				163	VLOG(1) << " dy -> " << std::setprecision(12) << std::fixed
				164	<< std::setfill(' ') << dy.transpose().format(kHeavyFormat);
				165	continue;
				166	}
				167	VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s << " * "
				168	<< dlambda(i) << " -> s now " << -lambda(i) / dlambda(i);
				169	s = -lambda(i) / dlambda(i);
				170	}
				171	}
				172
				173	VLOG(1) << " After lambda line search, s is " << s;
				174
				175	VLOG(3) << " Initial step " << iteration << " -> " << std::setprecision(12)
				176	<< std::fixed << std::setfill(' ')
				177	<< dy.transpose().format(kHeavyFormat);
				178	VLOG(3) << " rt -> "
				179	<< std::setprecision(12) << std::fixed << std::setfill(' ')
				180	<< rt_orig.transpose().format(kHeavyFormat);
				181
				182	const double rt_orig_squared_norm = rt_orig.squaredNorm();
				183
				184	Eigen::VectorXd next_y;
				185	Eigen::VectorXd rt;
				186	Derivatives next_derivatives;
				187	while (true) {
				188	next_y = y + s * dy;
				189	next_derivatives = ComputeDerivative(problem, next_y);
				190	rt = Rt(next_derivatives, next_y, t_inverse);
				191
				192	const Eigen::Ref<const Eigen::VectorXd> next_x =
				193	next_y.block(0, 0, next_derivatives.hessian.rows(), 1);
				194	const Eigen::Ref<const Eigen::VectorXd> next_lambda =
				195	next_y.block(next_x.rows(), 0, next_derivatives.f.rows(), 1);
				196
				197	const Eigen::Ref<const Eigen::VectorXd> next_v = next_y.block(
				198	next_x.rows() + next_lambda.rows(), 0, next_derivatives.A.rows(), 1);
				199
				200	VLOG(1) << " next_rt(" << iteration << ") is " << rt.norm() << " -> "
				201	<< std::setprecision(12) << std::fixed << std::setfill(' ')
				202	<< rt.transpose().format(kHeavyFormat);
				203
				204	PrintDerivatives(next_derivatives, next_y, "next_", 3);
				205
				206	if (next_derivatives.f.maxCoeff() > 0.0) {
				207	VLOG(1) << " f_next > 0.0 -> " << next_derivatives.f.maxCoeff()
				208	<< ", continuing line search.";
				209	s *= kBeta;
				210	} else if (next_derivatives.Axmb.squaredNorm() < 0.1 &&
				211	rt.squaredNorm() >
				212	std::pow(1.0 - kAlpha * s, 2.0) * rt_orig_squared_norm) {
				213	VLOG(1) << " \|Rt\| > \|Rt+1\| " << rt.norm() << " > " << rt_orig.norm()
				214	<< ", drt -> " << std::setprecision(12) << std::fixed
				215	<< std::setfill(' ')
				216	<< (rt_orig - rt).transpose().format(kHeavyFormat);
				217	s *= kBeta;
				218	} else {
				219	break;
				220	}
				221	}
				222
				223	VLOG(1) << " Terminated line search with s " << s << ", " << rt.norm()
				224	<< "(\|Rt+1\|) < " << rt_orig.norm() << "(\|Rt\|)";
				225	y = next_y;
				226
				227	const Eigen::Ref<const Eigen::VectorXd> next_lambda =
				228	y.block(problem.states(), 0, problem.inequality_constraints(), 1);
				229
				230	// See if we hit our convergence criteria.
				231	const double r_primal_squared_norm =
				232	rt.block(problem.states() + problem.inequality_constraints(), 0,
				233	problem.equality_constraints(), 1)
				234	.squaredNorm();
				235	VLOG(1) << " rt_next(" << iteration << ") is " << rt.norm() << " -> "
				236	<< std::setprecision(12) << std::fixed << std::setfill(' ')
				237	<< rt.transpose().format(kHeavyFormat);
				238	if (r_primal_squared_norm < kEpsilonF * kEpsilonF) {
				239	const double r_dual_squared_norm =
				240	rt.block(0, 0, problem.states(), 1).squaredNorm();
				241	if (r_dual_squared_norm < kEpsilonF * kEpsilonF) {
				242	const double next_nu =
				243	-(next_derivatives.f.transpose() * next_lambda)(0, 0);
				244	if (next_nu < kEpsilon) {
				245	VLOG(1) << " r_primal(" << iteration << ") -> "
				246	<< std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
				247	<< ", r_dual(" << iteration << ") -> "
				248	<< std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
				249	<< ", nu(" << iteration << ") -> " << next_nu << " < "
				250	<< kEpsilon;
				251	break;
				252	} else {
				253	VLOG(1) << " nu(" << iteration << ") -> " << next_nu << " < "
				254	<< kEpsilon << ", not done yet";
				255	}
				256
				257	} else {
				258	VLOG(1) << " r_dual(" << iteration << ") -> "
				259	<< std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
				260	<< ", not done yet";
				261	}
				262	} else {
				263	VLOG(1) << " r_primal(" << iteration << ") -> "
				264	<< std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
				265	<< ", not done yet";
				266	}
				267	VLOG(1) << " step(" << iteration << ") " << std::setprecision(12)
				268	<< (s * dy).transpose().format(kHeavyFormat);
				269	VLOG(1) << " y(" << iteration << ") is now " << std::setprecision(12)
				270	<< y.transpose().format(kHeavyFormat);
				271
				272	// Very import, use the last set of derivatives we picked for our new y
				273	// for the next iteration. This avoids re-computing it.
				274	derivatives = std::move(next_derivatives);
				275
				276	++iteration;
				277	if (iteration > 100) {
				278	LOG(FATAL) << "Too many iterations";
				279	}
				280	}
				281
				282	return y.block(0, 0, problem.states(), 1);
				283	}
				284
				285	SparseSolver::Derivatives SparseSolver::ComputeDerivative(
				286	const SparseConvexProblem &problem,
				287	const Eigen::Ref<const Eigen::VectorXd> y) {
				288	// states x 1
				289	const Eigen::Ref<const Eigen::VectorXd> x =
				290	y.block(0, 0, problem.states(), 1);
				291
				292	Derivatives derivatives;
				293	derivatives.gradient = problem.df0(x);
				294	CHECK_EQ(static_cast<size_t>(derivatives.gradient.rows()), problem.states());
				295	CHECK_EQ(static_cast<size_t>(derivatives.gradient.cols()), 1u);
				296
				297	derivatives.hessian = problem.ddf0(x);
				298	CHECK_EQ(static_cast<size_t>(derivatives.hessian.rows()), problem.states());
				299	CHECK_EQ(static_cast<size_t>(derivatives.hessian.cols()), problem.states());
				300
				301	derivatives.f = problem.f(x);
				302	CHECK_EQ(static_cast<size_t>(derivatives.f.rows()),
				303	problem.inequality_constraints());
				304	CHECK_EQ(static_cast<size_t>(derivatives.f.cols()), 1u);
				305
				306	derivatives.df = problem.df(x);
				307	CHECK_EQ(static_cast<size_t>(derivatives.df.rows()),
				308	problem.inequality_constraints());
				309	CHECK_EQ(static_cast<size_t>(derivatives.df.cols()), problem.states());
				310
				311	derivatives.A = problem.A();
				312	CHECK_EQ(static_cast<size_t>(derivatives.A.rows()),
				313	problem.equality_constraints());
				314	CHECK_EQ(static_cast<size_t>(derivatives.A.cols()), problem.states());
				315
				316	derivatives.Axmb =
				317	derivatives.A * y.block(0, 0, problem.states(), 1) - problem.b();
				318	CHECK_EQ(static_cast<size_t>(derivatives.Axmb.rows()),
				319	problem.equality_constraints());
				320	CHECK_EQ(static_cast<size_t>(derivatives.Axmb.cols()), 1u);
				321
				322	return derivatives;
				323	}
				324
				325	void SparseSolver::PrintDerivatives(const Derivatives &derivatives,
				326	const Eigen::Ref<const Eigen::VectorXd> y,
				327	std::string_view prefix, int verbosity) {
				328	const Eigen::Ref<const Eigen::VectorXd> x =
				329	y.block(0, 0, derivatives.hessian.rows(), 1);
				330	const Eigen::Ref<const Eigen::VectorXd> lambda =
				331	y.block(x.rows(), 0, derivatives.f.rows(), 1);
				332
				333	if (VLOG_IS_ON(verbosity)) {
				334	Eigen::IOFormat heavy(Eigen::StreamPrecision, 0, ", ",
				335	",\n "
				336	" ",
				337	"[", "]", "[", "]");
				338	heavy.rowSeparator =
				339	heavy.rowSeparator +
				340	std::string(absl::StrCat(getpid()).size() + prefix.size(), ' ');
				341
				342	const Eigen::Ref<const Eigen::VectorXd> v =
				343	y.block(x.rows() + lambda.rows(), 0, derivatives.A.rows(), 1);
				344	VLOG(verbosity) << " " << prefix << "x: " << x.transpose().format(heavy);
				345	VLOG(verbosity) << " " << prefix
				346	<< "lambda: " << lambda.transpose().format(heavy);
				347	VLOG(verbosity) << " " << prefix << "v: " << v.transpose().format(heavy);
				348	VLOG(verbosity) << " " << prefix << "hessian: " << derivatives.hessian;
				349	VLOG(verbosity) << " " << prefix
				350	<< "gradient: " << derivatives.gradient;
				351	VLOG(verbosity) << " " << prefix << "A: " << derivatives.A;
				352	VLOG(verbosity) << " " << prefix
				353	<< "Ax-b: " << derivatives.Axmb.format(heavy);
				354	VLOG(verbosity) << " " << prefix
				355	<< "f: " << derivatives.f.format(heavy);
				356	VLOG(verbosity) << " " << prefix << "df: " << derivatives.df;
				357	}
				358	}
				359
				360	} // namespace solvers
				361	} // namespace frc971