blob: 714b21c421f45f8b2d58d48b1d1fc0d4840d405a [file] [log] [blame]
Austin Schuh29441032023-05-31 19:32:24 -07001#include "frc971/solvers/sparse_convex.h"
2
Austin Schuh29441032023-05-31 19:32:24 -07003#include "absl/strings/str_join.h"
4#include "glog/logging.h"
Philipp Schrader790cb542023-07-05 21:06:52 -07005#include <Eigen/Sparse>
6#include <Eigen/SparseLU>
Austin Schuh29441032023-05-31 19:32:24 -07007
Stephan Pleinesf63bde82024-01-13 15:59:33 -08008namespace frc971::solvers {
Austin Schuh29441032023-05-31 19:32:24 -07009
10Eigen::VectorXd SparseSolver::Rt(const Derivatives &derivatives,
11 Eigen::VectorXd y, double t_inverse) {
12 Eigen::VectorXd result(derivatives.states() +
13 derivatives.inequality_constraints() +
14 derivatives.equality_constraints());
15
16 // states x 1
17 Eigen::Ref<Eigen::VectorXd> r_dual =
18 result.block(0, 0, derivatives.states(), 1);
19 // inequality_constraints x 1
20 Eigen::Ref<Eigen::VectorXd> r_cent = result.block(
21 derivatives.states(), 0, derivatives.inequality_constraints(), 1);
22 // equality_constraints x 1
23 Eigen::Ref<Eigen::VectorXd> r_pri =
24 result.block(derivatives.states() + derivatives.inequality_constraints(),
25 0, derivatives.equality_constraints(), 1);
26
27 // inequality_constraints x 1
28 Eigen::Ref<const Eigen::VectorXd> lambda =
29 y.block(derivatives.states(), 0, derivatives.inequality_constraints(), 1);
30 // equality_constraints x 1
31 Eigen::Ref<const Eigen::VectorXd> v =
32 y.block(derivatives.states() + derivatives.inequality_constraints(), 0,
33 derivatives.equality_constraints(), 1);
34
35 r_dual = derivatives.gradient + derivatives.df.transpose() * lambda +
36 derivatives.A.transpose() * v;
37 r_cent = -lambda.array() * derivatives.f.array() - t_inverse;
38 r_pri = derivatives.Axmb;
39
40 return result;
41}
42
43void AppendColumns(std::vector<Eigen::Triplet<double>> *triplet_list,
44 size_t starting_row, size_t starting_column,
45 const Eigen::SparseMatrix<double> &matrix) {
46 for (int k = 0; k < matrix.outerSize(); ++k) {
47 for (Eigen::SparseMatrix<double, Eigen::ColMajor>::InnerIterator it(matrix,
48 k);
49 it; ++it) {
50 (*triplet_list)
51 .emplace_back(it.row() + starting_row, it.col() + starting_column,
52 it.value());
53 }
54 }
55}
56
57void AppendColumns(
58 std::vector<Eigen::Triplet<double>> *triplet_list, size_t starting_row,
59 size_t starting_column,
60 const Eigen::DiagonalMatrix<double, Eigen::Dynamic> &matrix) {
61 for (int k = 0; k < matrix.rows(); ++k) {
62 (*triplet_list)
63 .emplace_back(k + starting_row, k + starting_column,
64 matrix.diagonal()(k));
65 }
66}
67
68Eigen::VectorXd SparseSolver::Solve(
69 const SparseConvexProblem &problem,
70 Eigen::Ref<const Eigen::VectorXd> X_initial) {
71 CHECK_EQ(static_cast<size_t>(X_initial.rows()), problem.states());
72 CHECK_EQ(X_initial.cols(), 1);
73
74 const Eigen::IOFormat kHeavyFormat(Eigen::StreamPrecision, 0, ", ",
75 ",\n "
76 " ",
77 "[", "]", "[", "]");
78
79 Eigen::VectorXd y = Eigen::VectorXd::Constant(
80 problem.states() + problem.inequality_constraints() +
81 problem.equality_constraints(),
82 1.0);
83 y.block(0, 0, problem.states(), 1) = X_initial;
84
85 Derivatives derivatives = ComputeDerivative(problem, y);
86
87 for (size_t i = 0; i < problem.inequality_constraints(); ++i) {
88 CHECK_LE(derivatives.f(i, 0), 0.0)
89 << ": Initial state " << X_initial.transpose().format(kHeavyFormat)
90 << " not feasible";
91 }
92
93 PrintDerivatives(derivatives, y, "", 1);
94
95 size_t iteration = 0;
96 while (true) {
97 // Solve for the primal-dual search direction by solving the newton step.
98
99 // inequality_constraints x 1;
100 Eigen::Ref<const Eigen::VectorXd> lambda =
101 y.block(problem.states(), 0, problem.inequality_constraints(), 1);
102
103 const double nu = -(derivatives.f.transpose() * lambda)(0, 0);
104 const double t_inverse = nu / (kMu * lambda.rows());
105 Eigen::VectorXd rt_orig = Rt(derivatives, y, t_inverse);
106
107 std::vector<Eigen::Triplet<double>> triplet_list;
108
109 AppendColumns(&triplet_list, 0, 0, derivatives.hessian);
110 AppendColumns(&triplet_list, 0, problem.states(),
111 derivatives.df.transpose());
112 AppendColumns(&triplet_list, 0,
113 problem.states() + problem.inequality_constraints(),
114 derivatives.A.transpose());
115
116 // TODO(austin): I think I can do better on the next 2, making them more
117 // efficient and not creating the intermediate matrix.
118 AppendColumns(&triplet_list, problem.states(), 0,
119 -(Eigen::DiagonalMatrix<double, Eigen::Dynamic>(lambda) *
120 derivatives.df));
121 AppendColumns(
122 &triplet_list, problem.states(), problem.states(),
123 Eigen::DiagonalMatrix<double, Eigen::Dynamic>(-derivatives.f));
124
125 AppendColumns(&triplet_list,
126 problem.states() + problem.inequality_constraints(), 0,
127 derivatives.A);
128
129 Eigen::SparseMatrix<double> m1(
130 problem.states() + problem.inequality_constraints() +
131 problem.equality_constraints(),
132 problem.states() + problem.inequality_constraints() +
133 problem.equality_constraints());
134 m1.setFromTriplets(triplet_list.begin(), triplet_list.end());
135
136 Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
137 solver.analyzePattern(m1);
138 solver.factorize(m1);
139 Eigen::VectorXd dy = solver.solve(-rt_orig);
140
141 Eigen::Ref<Eigen::VectorXd> dlambda =
142 dy.block(problem.states(), 0, problem.inequality_constraints(), 1);
143
144 double s = 1.0;
145
146 // Now, time to do line search.
147 //
148 // Start by keeping lambda positive. Make sure our step doesn't let
149 // lambda cross 0.
150 for (int i = 0; i < dlambda.rows(); ++i) {
151 if (lambda(i) + s * dlambda(i) < 0.0) {
152 // Ignore tiny steps in lambda. They cause issues when we get really
153 // close to having our constraints met but haven't converged the rest
154 // of the problem and start to run into rounding issues in the matrix
155 // solve portion.
156 if (dlambda(i) < 0.0 && dlambda(i) > -1e-12) {
157 VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s
158 << " * " << dlambda(i) << " -> s would be now "
159 << -lambda(i) / dlambda(i);
160 dlambda(i) = 0.0;
161 VLOG(1) << " dy -> " << std::setprecision(12) << std::fixed
162 << std::setfill(' ') << dy.transpose().format(kHeavyFormat);
163 continue;
164 }
165 VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s << " * "
166 << dlambda(i) << " -> s now " << -lambda(i) / dlambda(i);
167 s = -lambda(i) / dlambda(i);
168 }
169 }
170
171 VLOG(1) << " After lambda line search, s is " << s;
172
173 VLOG(3) << " Initial step " << iteration << " -> " << std::setprecision(12)
174 << std::fixed << std::setfill(' ')
175 << dy.transpose().format(kHeavyFormat);
176 VLOG(3) << " rt -> "
177 << std::setprecision(12) << std::fixed << std::setfill(' ')
178 << rt_orig.transpose().format(kHeavyFormat);
179
180 const double rt_orig_squared_norm = rt_orig.squaredNorm();
181
182 Eigen::VectorXd next_y;
183 Eigen::VectorXd rt;
184 Derivatives next_derivatives;
185 while (true) {
186 next_y = y + s * dy;
187 next_derivatives = ComputeDerivative(problem, next_y);
188 rt = Rt(next_derivatives, next_y, t_inverse);
189
190 const Eigen::Ref<const Eigen::VectorXd> next_x =
191 next_y.block(0, 0, next_derivatives.hessian.rows(), 1);
192 const Eigen::Ref<const Eigen::VectorXd> next_lambda =
193 next_y.block(next_x.rows(), 0, next_derivatives.f.rows(), 1);
194
195 const Eigen::Ref<const Eigen::VectorXd> next_v = next_y.block(
196 next_x.rows() + next_lambda.rows(), 0, next_derivatives.A.rows(), 1);
197
198 VLOG(1) << " next_rt(" << iteration << ") is " << rt.norm() << " -> "
199 << std::setprecision(12) << std::fixed << std::setfill(' ')
200 << rt.transpose().format(kHeavyFormat);
201
202 PrintDerivatives(next_derivatives, next_y, "next_", 3);
203
204 if (next_derivatives.f.maxCoeff() > 0.0) {
205 VLOG(1) << " f_next > 0.0 -> " << next_derivatives.f.maxCoeff()
206 << ", continuing line search.";
207 s *= kBeta;
208 } else if (next_derivatives.Axmb.squaredNorm() < 0.1 &&
209 rt.squaredNorm() >
210 std::pow(1.0 - kAlpha * s, 2.0) * rt_orig_squared_norm) {
211 VLOG(1) << " |Rt| > |Rt+1| " << rt.norm() << " > " << rt_orig.norm()
212 << ", drt -> " << std::setprecision(12) << std::fixed
213 << std::setfill(' ')
214 << (rt_orig - rt).transpose().format(kHeavyFormat);
215 s *= kBeta;
216 } else {
217 break;
218 }
219 }
220
221 VLOG(1) << " Terminated line search with s " << s << ", " << rt.norm()
222 << "(|Rt+1|) < " << rt_orig.norm() << "(|Rt|)";
223 y = next_y;
224
225 const Eigen::Ref<const Eigen::VectorXd> next_lambda =
226 y.block(problem.states(), 0, problem.inequality_constraints(), 1);
227
228 // See if we hit our convergence criteria.
229 const double r_primal_squared_norm =
230 rt.block(problem.states() + problem.inequality_constraints(), 0,
231 problem.equality_constraints(), 1)
232 .squaredNorm();
233 VLOG(1) << " rt_next(" << iteration << ") is " << rt.norm() << " -> "
234 << std::setprecision(12) << std::fixed << std::setfill(' ')
235 << rt.transpose().format(kHeavyFormat);
236 if (r_primal_squared_norm < kEpsilonF * kEpsilonF) {
237 const double r_dual_squared_norm =
238 rt.block(0, 0, problem.states(), 1).squaredNorm();
239 if (r_dual_squared_norm < kEpsilonF * kEpsilonF) {
240 const double next_nu =
241 -(next_derivatives.f.transpose() * next_lambda)(0, 0);
242 if (next_nu < kEpsilon) {
243 VLOG(1) << " r_primal(" << iteration << ") -> "
244 << std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
245 << ", r_dual(" << iteration << ") -> "
246 << std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
247 << ", nu(" << iteration << ") -> " << next_nu << " < "
248 << kEpsilon;
249 break;
250 } else {
251 VLOG(1) << " nu(" << iteration << ") -> " << next_nu << " < "
252 << kEpsilon << ", not done yet";
253 }
254
255 } else {
256 VLOG(1) << " r_dual(" << iteration << ") -> "
257 << std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
258 << ", not done yet";
259 }
260 } else {
261 VLOG(1) << " r_primal(" << iteration << ") -> "
262 << std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
263 << ", not done yet";
264 }
265 VLOG(1) << " step(" << iteration << ") " << std::setprecision(12)
266 << (s * dy).transpose().format(kHeavyFormat);
267 VLOG(1) << " y(" << iteration << ") is now " << std::setprecision(12)
268 << y.transpose().format(kHeavyFormat);
269
270 // Very import, use the last set of derivatives we picked for our new y
271 // for the next iteration. This avoids re-computing it.
272 derivatives = std::move(next_derivatives);
273
274 ++iteration;
275 if (iteration > 100) {
276 LOG(FATAL) << "Too many iterations";
277 }
278 }
279
280 return y.block(0, 0, problem.states(), 1);
281}
282
283SparseSolver::Derivatives SparseSolver::ComputeDerivative(
284 const SparseConvexProblem &problem,
285 const Eigen::Ref<const Eigen::VectorXd> y) {
286 // states x 1
287 const Eigen::Ref<const Eigen::VectorXd> x =
288 y.block(0, 0, problem.states(), 1);
289
290 Derivatives derivatives;
291 derivatives.gradient = problem.df0(x);
292 CHECK_EQ(static_cast<size_t>(derivatives.gradient.rows()), problem.states());
293 CHECK_EQ(static_cast<size_t>(derivatives.gradient.cols()), 1u);
294
295 derivatives.hessian = problem.ddf0(x);
296 CHECK_EQ(static_cast<size_t>(derivatives.hessian.rows()), problem.states());
297 CHECK_EQ(static_cast<size_t>(derivatives.hessian.cols()), problem.states());
298
299 derivatives.f = problem.f(x);
300 CHECK_EQ(static_cast<size_t>(derivatives.f.rows()),
301 problem.inequality_constraints());
302 CHECK_EQ(static_cast<size_t>(derivatives.f.cols()), 1u);
303
304 derivatives.df = problem.df(x);
305 CHECK_EQ(static_cast<size_t>(derivatives.df.rows()),
306 problem.inequality_constraints());
307 CHECK_EQ(static_cast<size_t>(derivatives.df.cols()), problem.states());
308
309 derivatives.A = problem.A();
310 CHECK_EQ(static_cast<size_t>(derivatives.A.rows()),
311 problem.equality_constraints());
312 CHECK_EQ(static_cast<size_t>(derivatives.A.cols()), problem.states());
313
314 derivatives.Axmb =
315 derivatives.A * y.block(0, 0, problem.states(), 1) - problem.b();
316 CHECK_EQ(static_cast<size_t>(derivatives.Axmb.rows()),
317 problem.equality_constraints());
318 CHECK_EQ(static_cast<size_t>(derivatives.Axmb.cols()), 1u);
319
320 return derivatives;
321}
322
323void SparseSolver::PrintDerivatives(const Derivatives &derivatives,
324 const Eigen::Ref<const Eigen::VectorXd> y,
325 std::string_view prefix, int verbosity) {
326 const Eigen::Ref<const Eigen::VectorXd> x =
327 y.block(0, 0, derivatives.hessian.rows(), 1);
328 const Eigen::Ref<const Eigen::VectorXd> lambda =
329 y.block(x.rows(), 0, derivatives.f.rows(), 1);
330
331 if (VLOG_IS_ON(verbosity)) {
332 Eigen::IOFormat heavy(Eigen::StreamPrecision, 0, ", ",
333 ",\n "
334 " ",
335 "[", "]", "[", "]");
336 heavy.rowSeparator =
337 heavy.rowSeparator +
338 std::string(absl::StrCat(getpid()).size() + prefix.size(), ' ');
339
340 const Eigen::Ref<const Eigen::VectorXd> v =
341 y.block(x.rows() + lambda.rows(), 0, derivatives.A.rows(), 1);
342 VLOG(verbosity) << " " << prefix << "x: " << x.transpose().format(heavy);
343 VLOG(verbosity) << " " << prefix
344 << "lambda: " << lambda.transpose().format(heavy);
345 VLOG(verbosity) << " " << prefix << "v: " << v.transpose().format(heavy);
346 VLOG(verbosity) << " " << prefix << "hessian: " << derivatives.hessian;
347 VLOG(verbosity) << " " << prefix
348 << "gradient: " << derivatives.gradient;
349 VLOG(verbosity) << " " << prefix << "A: " << derivatives.A;
350 VLOG(verbosity) << " " << prefix
351 << "Ax-b: " << derivatives.Axmb.format(heavy);
352 VLOG(verbosity) << " " << prefix
353 << "f: " << derivatives.f.format(heavy);
354 VLOG(verbosity) << " " << prefix << "df: " << derivatives.df;
355 }
356}
357
Stephan Pleinesf63bde82024-01-13 15:59:33 -0800358} // namespace frc971::solvers