blob: dfb418f8e7b615f40674332e489ef1b0cddc5ec4 [file] [log] [blame]
Austin Schuh29441032023-05-31 19:32:24 -07001#include "frc971/solvers/sparse_convex.h"
2
3#include <Eigen/Sparse>
4#include <Eigen/SparseLU>
5
6#include "absl/strings/str_join.h"
7#include "glog/logging.h"
8
9namespace frc971 {
10namespace solvers {
11
12Eigen::VectorXd SparseSolver::Rt(const Derivatives &derivatives,
13 Eigen::VectorXd y, double t_inverse) {
14 Eigen::VectorXd result(derivatives.states() +
15 derivatives.inequality_constraints() +
16 derivatives.equality_constraints());
17
18 // states x 1
19 Eigen::Ref<Eigen::VectorXd> r_dual =
20 result.block(0, 0, derivatives.states(), 1);
21 // inequality_constraints x 1
22 Eigen::Ref<Eigen::VectorXd> r_cent = result.block(
23 derivatives.states(), 0, derivatives.inequality_constraints(), 1);
24 // equality_constraints x 1
25 Eigen::Ref<Eigen::VectorXd> r_pri =
26 result.block(derivatives.states() + derivatives.inequality_constraints(),
27 0, derivatives.equality_constraints(), 1);
28
29 // inequality_constraints x 1
30 Eigen::Ref<const Eigen::VectorXd> lambda =
31 y.block(derivatives.states(), 0, derivatives.inequality_constraints(), 1);
32 // equality_constraints x 1
33 Eigen::Ref<const Eigen::VectorXd> v =
34 y.block(derivatives.states() + derivatives.inequality_constraints(), 0,
35 derivatives.equality_constraints(), 1);
36
37 r_dual = derivatives.gradient + derivatives.df.transpose() * lambda +
38 derivatives.A.transpose() * v;
39 r_cent = -lambda.array() * derivatives.f.array() - t_inverse;
40 r_pri = derivatives.Axmb;
41
42 return result;
43}
44
45void AppendColumns(std::vector<Eigen::Triplet<double>> *triplet_list,
46 size_t starting_row, size_t starting_column,
47 const Eigen::SparseMatrix<double> &matrix) {
48 for (int k = 0; k < matrix.outerSize(); ++k) {
49 for (Eigen::SparseMatrix<double, Eigen::ColMajor>::InnerIterator it(matrix,
50 k);
51 it; ++it) {
52 (*triplet_list)
53 .emplace_back(it.row() + starting_row, it.col() + starting_column,
54 it.value());
55 }
56 }
57}
58
59void AppendColumns(
60 std::vector<Eigen::Triplet<double>> *triplet_list, size_t starting_row,
61 size_t starting_column,
62 const Eigen::DiagonalMatrix<double, Eigen::Dynamic> &matrix) {
63 for (int k = 0; k < matrix.rows(); ++k) {
64 (*triplet_list)
65 .emplace_back(k + starting_row, k + starting_column,
66 matrix.diagonal()(k));
67 }
68}
69
70Eigen::VectorXd SparseSolver::Solve(
71 const SparseConvexProblem &problem,
72 Eigen::Ref<const Eigen::VectorXd> X_initial) {
73 CHECK_EQ(static_cast<size_t>(X_initial.rows()), problem.states());
74 CHECK_EQ(X_initial.cols(), 1);
75
76 const Eigen::IOFormat kHeavyFormat(Eigen::StreamPrecision, 0, ", ",
77 ",\n "
78 " ",
79 "[", "]", "[", "]");
80
81 Eigen::VectorXd y = Eigen::VectorXd::Constant(
82 problem.states() + problem.inequality_constraints() +
83 problem.equality_constraints(),
84 1.0);
85 y.block(0, 0, problem.states(), 1) = X_initial;
86
87 Derivatives derivatives = ComputeDerivative(problem, y);
88
89 for (size_t i = 0; i < problem.inequality_constraints(); ++i) {
90 CHECK_LE(derivatives.f(i, 0), 0.0)
91 << ": Initial state " << X_initial.transpose().format(kHeavyFormat)
92 << " not feasible";
93 }
94
95 PrintDerivatives(derivatives, y, "", 1);
96
97 size_t iteration = 0;
98 while (true) {
99 // Solve for the primal-dual search direction by solving the newton step.
100
101 // inequality_constraints x 1;
102 Eigen::Ref<const Eigen::VectorXd> lambda =
103 y.block(problem.states(), 0, problem.inequality_constraints(), 1);
104
105 const double nu = -(derivatives.f.transpose() * lambda)(0, 0);
106 const double t_inverse = nu / (kMu * lambda.rows());
107 Eigen::VectorXd rt_orig = Rt(derivatives, y, t_inverse);
108
109 std::vector<Eigen::Triplet<double>> triplet_list;
110
111 AppendColumns(&triplet_list, 0, 0, derivatives.hessian);
112 AppendColumns(&triplet_list, 0, problem.states(),
113 derivatives.df.transpose());
114 AppendColumns(&triplet_list, 0,
115 problem.states() + problem.inequality_constraints(),
116 derivatives.A.transpose());
117
118 // TODO(austin): I think I can do better on the next 2, making them more
119 // efficient and not creating the intermediate matrix.
120 AppendColumns(&triplet_list, problem.states(), 0,
121 -(Eigen::DiagonalMatrix<double, Eigen::Dynamic>(lambda) *
122 derivatives.df));
123 AppendColumns(
124 &triplet_list, problem.states(), problem.states(),
125 Eigen::DiagonalMatrix<double, Eigen::Dynamic>(-derivatives.f));
126
127 AppendColumns(&triplet_list,
128 problem.states() + problem.inequality_constraints(), 0,
129 derivatives.A);
130
131 Eigen::SparseMatrix<double> m1(
132 problem.states() + problem.inequality_constraints() +
133 problem.equality_constraints(),
134 problem.states() + problem.inequality_constraints() +
135 problem.equality_constraints());
136 m1.setFromTriplets(triplet_list.begin(), triplet_list.end());
137
138 Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
139 solver.analyzePattern(m1);
140 solver.factorize(m1);
141 Eigen::VectorXd dy = solver.solve(-rt_orig);
142
143 Eigen::Ref<Eigen::VectorXd> dlambda =
144 dy.block(problem.states(), 0, problem.inequality_constraints(), 1);
145
146 double s = 1.0;
147
148 // Now, time to do line search.
149 //
150 // Start by keeping lambda positive. Make sure our step doesn't let
151 // lambda cross 0.
152 for (int i = 0; i < dlambda.rows(); ++i) {
153 if (lambda(i) + s * dlambda(i) < 0.0) {
154 // Ignore tiny steps in lambda. They cause issues when we get really
155 // close to having our constraints met but haven't converged the rest
156 // of the problem and start to run into rounding issues in the matrix
157 // solve portion.
158 if (dlambda(i) < 0.0 && dlambda(i) > -1e-12) {
159 VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s
160 << " * " << dlambda(i) << " -> s would be now "
161 << -lambda(i) / dlambda(i);
162 dlambda(i) = 0.0;
163 VLOG(1) << " dy -> " << std::setprecision(12) << std::fixed
164 << std::setfill(' ') << dy.transpose().format(kHeavyFormat);
165 continue;
166 }
167 VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s << " * "
168 << dlambda(i) << " -> s now " << -lambda(i) / dlambda(i);
169 s = -lambda(i) / dlambda(i);
170 }
171 }
172
173 VLOG(1) << " After lambda line search, s is " << s;
174
175 VLOG(3) << " Initial step " << iteration << " -> " << std::setprecision(12)
176 << std::fixed << std::setfill(' ')
177 << dy.transpose().format(kHeavyFormat);
178 VLOG(3) << " rt -> "
179 << std::setprecision(12) << std::fixed << std::setfill(' ')
180 << rt_orig.transpose().format(kHeavyFormat);
181
182 const double rt_orig_squared_norm = rt_orig.squaredNorm();
183
184 Eigen::VectorXd next_y;
185 Eigen::VectorXd rt;
186 Derivatives next_derivatives;
187 while (true) {
188 next_y = y + s * dy;
189 next_derivatives = ComputeDerivative(problem, next_y);
190 rt = Rt(next_derivatives, next_y, t_inverse);
191
192 const Eigen::Ref<const Eigen::VectorXd> next_x =
193 next_y.block(0, 0, next_derivatives.hessian.rows(), 1);
194 const Eigen::Ref<const Eigen::VectorXd> next_lambda =
195 next_y.block(next_x.rows(), 0, next_derivatives.f.rows(), 1);
196
197 const Eigen::Ref<const Eigen::VectorXd> next_v = next_y.block(
198 next_x.rows() + next_lambda.rows(), 0, next_derivatives.A.rows(), 1);
199
200 VLOG(1) << " next_rt(" << iteration << ") is " << rt.norm() << " -> "
201 << std::setprecision(12) << std::fixed << std::setfill(' ')
202 << rt.transpose().format(kHeavyFormat);
203
204 PrintDerivatives(next_derivatives, next_y, "next_", 3);
205
206 if (next_derivatives.f.maxCoeff() > 0.0) {
207 VLOG(1) << " f_next > 0.0 -> " << next_derivatives.f.maxCoeff()
208 << ", continuing line search.";
209 s *= kBeta;
210 } else if (next_derivatives.Axmb.squaredNorm() < 0.1 &&
211 rt.squaredNorm() >
212 std::pow(1.0 - kAlpha * s, 2.0) * rt_orig_squared_norm) {
213 VLOG(1) << " |Rt| > |Rt+1| " << rt.norm() << " > " << rt_orig.norm()
214 << ", drt -> " << std::setprecision(12) << std::fixed
215 << std::setfill(' ')
216 << (rt_orig - rt).transpose().format(kHeavyFormat);
217 s *= kBeta;
218 } else {
219 break;
220 }
221 }
222
223 VLOG(1) << " Terminated line search with s " << s << ", " << rt.norm()
224 << "(|Rt+1|) < " << rt_orig.norm() << "(|Rt|)";
225 y = next_y;
226
227 const Eigen::Ref<const Eigen::VectorXd> next_lambda =
228 y.block(problem.states(), 0, problem.inequality_constraints(), 1);
229
230 // See if we hit our convergence criteria.
231 const double r_primal_squared_norm =
232 rt.block(problem.states() + problem.inequality_constraints(), 0,
233 problem.equality_constraints(), 1)
234 .squaredNorm();
235 VLOG(1) << " rt_next(" << iteration << ") is " << rt.norm() << " -> "
236 << std::setprecision(12) << std::fixed << std::setfill(' ')
237 << rt.transpose().format(kHeavyFormat);
238 if (r_primal_squared_norm < kEpsilonF * kEpsilonF) {
239 const double r_dual_squared_norm =
240 rt.block(0, 0, problem.states(), 1).squaredNorm();
241 if (r_dual_squared_norm < kEpsilonF * kEpsilonF) {
242 const double next_nu =
243 -(next_derivatives.f.transpose() * next_lambda)(0, 0);
244 if (next_nu < kEpsilon) {
245 VLOG(1) << " r_primal(" << iteration << ") -> "
246 << std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
247 << ", r_dual(" << iteration << ") -> "
248 << std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
249 << ", nu(" << iteration << ") -> " << next_nu << " < "
250 << kEpsilon;
251 break;
252 } else {
253 VLOG(1) << " nu(" << iteration << ") -> " << next_nu << " < "
254 << kEpsilon << ", not done yet";
255 }
256
257 } else {
258 VLOG(1) << " r_dual(" << iteration << ") -> "
259 << std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
260 << ", not done yet";
261 }
262 } else {
263 VLOG(1) << " r_primal(" << iteration << ") -> "
264 << std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
265 << ", not done yet";
266 }
267 VLOG(1) << " step(" << iteration << ") " << std::setprecision(12)
268 << (s * dy).transpose().format(kHeavyFormat);
269 VLOG(1) << " y(" << iteration << ") is now " << std::setprecision(12)
270 << y.transpose().format(kHeavyFormat);
271
272 // Very import, use the last set of derivatives we picked for our new y
273 // for the next iteration. This avoids re-computing it.
274 derivatives = std::move(next_derivatives);
275
276 ++iteration;
277 if (iteration > 100) {
278 LOG(FATAL) << "Too many iterations";
279 }
280 }
281
282 return y.block(0, 0, problem.states(), 1);
283}
284
285SparseSolver::Derivatives SparseSolver::ComputeDerivative(
286 const SparseConvexProblem &problem,
287 const Eigen::Ref<const Eigen::VectorXd> y) {
288 // states x 1
289 const Eigen::Ref<const Eigen::VectorXd> x =
290 y.block(0, 0, problem.states(), 1);
291
292 Derivatives derivatives;
293 derivatives.gradient = problem.df0(x);
294 CHECK_EQ(static_cast<size_t>(derivatives.gradient.rows()), problem.states());
295 CHECK_EQ(static_cast<size_t>(derivatives.gradient.cols()), 1u);
296
297 derivatives.hessian = problem.ddf0(x);
298 CHECK_EQ(static_cast<size_t>(derivatives.hessian.rows()), problem.states());
299 CHECK_EQ(static_cast<size_t>(derivatives.hessian.cols()), problem.states());
300
301 derivatives.f = problem.f(x);
302 CHECK_EQ(static_cast<size_t>(derivatives.f.rows()),
303 problem.inequality_constraints());
304 CHECK_EQ(static_cast<size_t>(derivatives.f.cols()), 1u);
305
306 derivatives.df = problem.df(x);
307 CHECK_EQ(static_cast<size_t>(derivatives.df.rows()),
308 problem.inequality_constraints());
309 CHECK_EQ(static_cast<size_t>(derivatives.df.cols()), problem.states());
310
311 derivatives.A = problem.A();
312 CHECK_EQ(static_cast<size_t>(derivatives.A.rows()),
313 problem.equality_constraints());
314 CHECK_EQ(static_cast<size_t>(derivatives.A.cols()), problem.states());
315
316 derivatives.Axmb =
317 derivatives.A * y.block(0, 0, problem.states(), 1) - problem.b();
318 CHECK_EQ(static_cast<size_t>(derivatives.Axmb.rows()),
319 problem.equality_constraints());
320 CHECK_EQ(static_cast<size_t>(derivatives.Axmb.cols()), 1u);
321
322 return derivatives;
323}
324
325void SparseSolver::PrintDerivatives(const Derivatives &derivatives,
326 const Eigen::Ref<const Eigen::VectorXd> y,
327 std::string_view prefix, int verbosity) {
328 const Eigen::Ref<const Eigen::VectorXd> x =
329 y.block(0, 0, derivatives.hessian.rows(), 1);
330 const Eigen::Ref<const Eigen::VectorXd> lambda =
331 y.block(x.rows(), 0, derivatives.f.rows(), 1);
332
333 if (VLOG_IS_ON(verbosity)) {
334 Eigen::IOFormat heavy(Eigen::StreamPrecision, 0, ", ",
335 ",\n "
336 " ",
337 "[", "]", "[", "]");
338 heavy.rowSeparator =
339 heavy.rowSeparator +
340 std::string(absl::StrCat(getpid()).size() + prefix.size(), ' ');
341
342 const Eigen::Ref<const Eigen::VectorXd> v =
343 y.block(x.rows() + lambda.rows(), 0, derivatives.A.rows(), 1);
344 VLOG(verbosity) << " " << prefix << "x: " << x.transpose().format(heavy);
345 VLOG(verbosity) << " " << prefix
346 << "lambda: " << lambda.transpose().format(heavy);
347 VLOG(verbosity) << " " << prefix << "v: " << v.transpose().format(heavy);
348 VLOG(verbosity) << " " << prefix << "hessian: " << derivatives.hessian;
349 VLOG(verbosity) << " " << prefix
350 << "gradient: " << derivatives.gradient;
351 VLOG(verbosity) << " " << prefix << "A: " << derivatives.A;
352 VLOG(verbosity) << " " << prefix
353 << "Ax-b: " << derivatives.Axmb.format(heavy);
354 VLOG(verbosity) << " " << prefix
355 << "f: " << derivatives.f.format(heavy);
356 VLOG(verbosity) << " " << prefix << "df: " << derivatives.df;
357 }
358}
359
360} // namespace solvers
361} // namespace frc971