blob: 7e2a6ca0dee31a461afa3e519c673aef7da9f67c [file] [log] [blame]
Austin Schuh29441032023-05-31 19:32:24 -07001#include "frc971/solvers/sparse_convex.h"
2
Austin Schuh29441032023-05-31 19:32:24 -07003#include "absl/strings/str_join.h"
4#include "glog/logging.h"
Philipp Schrader790cb542023-07-05 21:06:52 -07005#include <Eigen/Sparse>
6#include <Eigen/SparseLU>
Austin Schuh29441032023-05-31 19:32:24 -07007
8namespace frc971 {
9namespace solvers {
10
11Eigen::VectorXd SparseSolver::Rt(const Derivatives &derivatives,
12 Eigen::VectorXd y, double t_inverse) {
13 Eigen::VectorXd result(derivatives.states() +
14 derivatives.inequality_constraints() +
15 derivatives.equality_constraints());
16
17 // states x 1
18 Eigen::Ref<Eigen::VectorXd> r_dual =
19 result.block(0, 0, derivatives.states(), 1);
20 // inequality_constraints x 1
21 Eigen::Ref<Eigen::VectorXd> r_cent = result.block(
22 derivatives.states(), 0, derivatives.inequality_constraints(), 1);
23 // equality_constraints x 1
24 Eigen::Ref<Eigen::VectorXd> r_pri =
25 result.block(derivatives.states() + derivatives.inequality_constraints(),
26 0, derivatives.equality_constraints(), 1);
27
28 // inequality_constraints x 1
29 Eigen::Ref<const Eigen::VectorXd> lambda =
30 y.block(derivatives.states(), 0, derivatives.inequality_constraints(), 1);
31 // equality_constraints x 1
32 Eigen::Ref<const Eigen::VectorXd> v =
33 y.block(derivatives.states() + derivatives.inequality_constraints(), 0,
34 derivatives.equality_constraints(), 1);
35
36 r_dual = derivatives.gradient + derivatives.df.transpose() * lambda +
37 derivatives.A.transpose() * v;
38 r_cent = -lambda.array() * derivatives.f.array() - t_inverse;
39 r_pri = derivatives.Axmb;
40
41 return result;
42}
43
44void AppendColumns(std::vector<Eigen::Triplet<double>> *triplet_list,
45 size_t starting_row, size_t starting_column,
46 const Eigen::SparseMatrix<double> &matrix) {
47 for (int k = 0; k < matrix.outerSize(); ++k) {
48 for (Eigen::SparseMatrix<double, Eigen::ColMajor>::InnerIterator it(matrix,
49 k);
50 it; ++it) {
51 (*triplet_list)
52 .emplace_back(it.row() + starting_row, it.col() + starting_column,
53 it.value());
54 }
55 }
56}
57
58void AppendColumns(
59 std::vector<Eigen::Triplet<double>> *triplet_list, size_t starting_row,
60 size_t starting_column,
61 const Eigen::DiagonalMatrix<double, Eigen::Dynamic> &matrix) {
62 for (int k = 0; k < matrix.rows(); ++k) {
63 (*triplet_list)
64 .emplace_back(k + starting_row, k + starting_column,
65 matrix.diagonal()(k));
66 }
67}
68
69Eigen::VectorXd SparseSolver::Solve(
70 const SparseConvexProblem &problem,
71 Eigen::Ref<const Eigen::VectorXd> X_initial) {
72 CHECK_EQ(static_cast<size_t>(X_initial.rows()), problem.states());
73 CHECK_EQ(X_initial.cols(), 1);
74
75 const Eigen::IOFormat kHeavyFormat(Eigen::StreamPrecision, 0, ", ",
76 ",\n "
77 " ",
78 "[", "]", "[", "]");
79
80 Eigen::VectorXd y = Eigen::VectorXd::Constant(
81 problem.states() + problem.inequality_constraints() +
82 problem.equality_constraints(),
83 1.0);
84 y.block(0, 0, problem.states(), 1) = X_initial;
85
86 Derivatives derivatives = ComputeDerivative(problem, y);
87
88 for (size_t i = 0; i < problem.inequality_constraints(); ++i) {
89 CHECK_LE(derivatives.f(i, 0), 0.0)
90 << ": Initial state " << X_initial.transpose().format(kHeavyFormat)
91 << " not feasible";
92 }
93
94 PrintDerivatives(derivatives, y, "", 1);
95
96 size_t iteration = 0;
97 while (true) {
98 // Solve for the primal-dual search direction by solving the newton step.
99
100 // inequality_constraints x 1;
101 Eigen::Ref<const Eigen::VectorXd> lambda =
102 y.block(problem.states(), 0, problem.inequality_constraints(), 1);
103
104 const double nu = -(derivatives.f.transpose() * lambda)(0, 0);
105 const double t_inverse = nu / (kMu * lambda.rows());
106 Eigen::VectorXd rt_orig = Rt(derivatives, y, t_inverse);
107
108 std::vector<Eigen::Triplet<double>> triplet_list;
109
110 AppendColumns(&triplet_list, 0, 0, derivatives.hessian);
111 AppendColumns(&triplet_list, 0, problem.states(),
112 derivatives.df.transpose());
113 AppendColumns(&triplet_list, 0,
114 problem.states() + problem.inequality_constraints(),
115 derivatives.A.transpose());
116
117 // TODO(austin): I think I can do better on the next 2, making them more
118 // efficient and not creating the intermediate matrix.
119 AppendColumns(&triplet_list, problem.states(), 0,
120 -(Eigen::DiagonalMatrix<double, Eigen::Dynamic>(lambda) *
121 derivatives.df));
122 AppendColumns(
123 &triplet_list, problem.states(), problem.states(),
124 Eigen::DiagonalMatrix<double, Eigen::Dynamic>(-derivatives.f));
125
126 AppendColumns(&triplet_list,
127 problem.states() + problem.inequality_constraints(), 0,
128 derivatives.A);
129
130 Eigen::SparseMatrix<double> m1(
131 problem.states() + problem.inequality_constraints() +
132 problem.equality_constraints(),
133 problem.states() + problem.inequality_constraints() +
134 problem.equality_constraints());
135 m1.setFromTriplets(triplet_list.begin(), triplet_list.end());
136
137 Eigen::SparseLU<Eigen::SparseMatrix<double>> solver;
138 solver.analyzePattern(m1);
139 solver.factorize(m1);
140 Eigen::VectorXd dy = solver.solve(-rt_orig);
141
142 Eigen::Ref<Eigen::VectorXd> dlambda =
143 dy.block(problem.states(), 0, problem.inequality_constraints(), 1);
144
145 double s = 1.0;
146
147 // Now, time to do line search.
148 //
149 // Start by keeping lambda positive. Make sure our step doesn't let
150 // lambda cross 0.
151 for (int i = 0; i < dlambda.rows(); ++i) {
152 if (lambda(i) + s * dlambda(i) < 0.0) {
153 // Ignore tiny steps in lambda. They cause issues when we get really
154 // close to having our constraints met but haven't converged the rest
155 // of the problem and start to run into rounding issues in the matrix
156 // solve portion.
157 if (dlambda(i) < 0.0 && dlambda(i) > -1e-12) {
158 VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s
159 << " * " << dlambda(i) << " -> s would be now "
160 << -lambda(i) / dlambda(i);
161 dlambda(i) = 0.0;
162 VLOG(1) << " dy -> " << std::setprecision(12) << std::fixed
163 << std::setfill(' ') << dy.transpose().format(kHeavyFormat);
164 continue;
165 }
166 VLOG(1) << " lambda(" << i << ") " << lambda(i) << " + " << s << " * "
167 << dlambda(i) << " -> s now " << -lambda(i) / dlambda(i);
168 s = -lambda(i) / dlambda(i);
169 }
170 }
171
172 VLOG(1) << " After lambda line search, s is " << s;
173
174 VLOG(3) << " Initial step " << iteration << " -> " << std::setprecision(12)
175 << std::fixed << std::setfill(' ')
176 << dy.transpose().format(kHeavyFormat);
177 VLOG(3) << " rt -> "
178 << std::setprecision(12) << std::fixed << std::setfill(' ')
179 << rt_orig.transpose().format(kHeavyFormat);
180
181 const double rt_orig_squared_norm = rt_orig.squaredNorm();
182
183 Eigen::VectorXd next_y;
184 Eigen::VectorXd rt;
185 Derivatives next_derivatives;
186 while (true) {
187 next_y = y + s * dy;
188 next_derivatives = ComputeDerivative(problem, next_y);
189 rt = Rt(next_derivatives, next_y, t_inverse);
190
191 const Eigen::Ref<const Eigen::VectorXd> next_x =
192 next_y.block(0, 0, next_derivatives.hessian.rows(), 1);
193 const Eigen::Ref<const Eigen::VectorXd> next_lambda =
194 next_y.block(next_x.rows(), 0, next_derivatives.f.rows(), 1);
195
196 const Eigen::Ref<const Eigen::VectorXd> next_v = next_y.block(
197 next_x.rows() + next_lambda.rows(), 0, next_derivatives.A.rows(), 1);
198
199 VLOG(1) << " next_rt(" << iteration << ") is " << rt.norm() << " -> "
200 << std::setprecision(12) << std::fixed << std::setfill(' ')
201 << rt.transpose().format(kHeavyFormat);
202
203 PrintDerivatives(next_derivatives, next_y, "next_", 3);
204
205 if (next_derivatives.f.maxCoeff() > 0.0) {
206 VLOG(1) << " f_next > 0.0 -> " << next_derivatives.f.maxCoeff()
207 << ", continuing line search.";
208 s *= kBeta;
209 } else if (next_derivatives.Axmb.squaredNorm() < 0.1 &&
210 rt.squaredNorm() >
211 std::pow(1.0 - kAlpha * s, 2.0) * rt_orig_squared_norm) {
212 VLOG(1) << " |Rt| > |Rt+1| " << rt.norm() << " > " << rt_orig.norm()
213 << ", drt -> " << std::setprecision(12) << std::fixed
214 << std::setfill(' ')
215 << (rt_orig - rt).transpose().format(kHeavyFormat);
216 s *= kBeta;
217 } else {
218 break;
219 }
220 }
221
222 VLOG(1) << " Terminated line search with s " << s << ", " << rt.norm()
223 << "(|Rt+1|) < " << rt_orig.norm() << "(|Rt|)";
224 y = next_y;
225
226 const Eigen::Ref<const Eigen::VectorXd> next_lambda =
227 y.block(problem.states(), 0, problem.inequality_constraints(), 1);
228
229 // See if we hit our convergence criteria.
230 const double r_primal_squared_norm =
231 rt.block(problem.states() + problem.inequality_constraints(), 0,
232 problem.equality_constraints(), 1)
233 .squaredNorm();
234 VLOG(1) << " rt_next(" << iteration << ") is " << rt.norm() << " -> "
235 << std::setprecision(12) << std::fixed << std::setfill(' ')
236 << rt.transpose().format(kHeavyFormat);
237 if (r_primal_squared_norm < kEpsilonF * kEpsilonF) {
238 const double r_dual_squared_norm =
239 rt.block(0, 0, problem.states(), 1).squaredNorm();
240 if (r_dual_squared_norm < kEpsilonF * kEpsilonF) {
241 const double next_nu =
242 -(next_derivatives.f.transpose() * next_lambda)(0, 0);
243 if (next_nu < kEpsilon) {
244 VLOG(1) << " r_primal(" << iteration << ") -> "
245 << std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
246 << ", r_dual(" << iteration << ") -> "
247 << std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
248 << ", nu(" << iteration << ") -> " << next_nu << " < "
249 << kEpsilon;
250 break;
251 } else {
252 VLOG(1) << " nu(" << iteration << ") -> " << next_nu << " < "
253 << kEpsilon << ", not done yet";
254 }
255
256 } else {
257 VLOG(1) << " r_dual(" << iteration << ") -> "
258 << std::sqrt(r_dual_squared_norm) << " < " << kEpsilonF
259 << ", not done yet";
260 }
261 } else {
262 VLOG(1) << " r_primal(" << iteration << ") -> "
263 << std::sqrt(r_primal_squared_norm) << " < " << kEpsilonF
264 << ", not done yet";
265 }
266 VLOG(1) << " step(" << iteration << ") " << std::setprecision(12)
267 << (s * dy).transpose().format(kHeavyFormat);
268 VLOG(1) << " y(" << iteration << ") is now " << std::setprecision(12)
269 << y.transpose().format(kHeavyFormat);
270
271 // Very import, use the last set of derivatives we picked for our new y
272 // for the next iteration. This avoids re-computing it.
273 derivatives = std::move(next_derivatives);
274
275 ++iteration;
276 if (iteration > 100) {
277 LOG(FATAL) << "Too many iterations";
278 }
279 }
280
281 return y.block(0, 0, problem.states(), 1);
282}
283
284SparseSolver::Derivatives SparseSolver::ComputeDerivative(
285 const SparseConvexProblem &problem,
286 const Eigen::Ref<const Eigen::VectorXd> y) {
287 // states x 1
288 const Eigen::Ref<const Eigen::VectorXd> x =
289 y.block(0, 0, problem.states(), 1);
290
291 Derivatives derivatives;
292 derivatives.gradient = problem.df0(x);
293 CHECK_EQ(static_cast<size_t>(derivatives.gradient.rows()), problem.states());
294 CHECK_EQ(static_cast<size_t>(derivatives.gradient.cols()), 1u);
295
296 derivatives.hessian = problem.ddf0(x);
297 CHECK_EQ(static_cast<size_t>(derivatives.hessian.rows()), problem.states());
298 CHECK_EQ(static_cast<size_t>(derivatives.hessian.cols()), problem.states());
299
300 derivatives.f = problem.f(x);
301 CHECK_EQ(static_cast<size_t>(derivatives.f.rows()),
302 problem.inequality_constraints());
303 CHECK_EQ(static_cast<size_t>(derivatives.f.cols()), 1u);
304
305 derivatives.df = problem.df(x);
306 CHECK_EQ(static_cast<size_t>(derivatives.df.rows()),
307 problem.inequality_constraints());
308 CHECK_EQ(static_cast<size_t>(derivatives.df.cols()), problem.states());
309
310 derivatives.A = problem.A();
311 CHECK_EQ(static_cast<size_t>(derivatives.A.rows()),
312 problem.equality_constraints());
313 CHECK_EQ(static_cast<size_t>(derivatives.A.cols()), problem.states());
314
315 derivatives.Axmb =
316 derivatives.A * y.block(0, 0, problem.states(), 1) - problem.b();
317 CHECK_EQ(static_cast<size_t>(derivatives.Axmb.rows()),
318 problem.equality_constraints());
319 CHECK_EQ(static_cast<size_t>(derivatives.Axmb.cols()), 1u);
320
321 return derivatives;
322}
323
324void SparseSolver::PrintDerivatives(const Derivatives &derivatives,
325 const Eigen::Ref<const Eigen::VectorXd> y,
326 std::string_view prefix, int verbosity) {
327 const Eigen::Ref<const Eigen::VectorXd> x =
328 y.block(0, 0, derivatives.hessian.rows(), 1);
329 const Eigen::Ref<const Eigen::VectorXd> lambda =
330 y.block(x.rows(), 0, derivatives.f.rows(), 1);
331
332 if (VLOG_IS_ON(verbosity)) {
333 Eigen::IOFormat heavy(Eigen::StreamPrecision, 0, ", ",
334 ",\n "
335 " ",
336 "[", "]", "[", "]");
337 heavy.rowSeparator =
338 heavy.rowSeparator +
339 std::string(absl::StrCat(getpid()).size() + prefix.size(), ' ');
340
341 const Eigen::Ref<const Eigen::VectorXd> v =
342 y.block(x.rows() + lambda.rows(), 0, derivatives.A.rows(), 1);
343 VLOG(verbosity) << " " << prefix << "x: " << x.transpose().format(heavy);
344 VLOG(verbosity) << " " << prefix
345 << "lambda: " << lambda.transpose().format(heavy);
346 VLOG(verbosity) << " " << prefix << "v: " << v.transpose().format(heavy);
347 VLOG(verbosity) << " " << prefix << "hessian: " << derivatives.hessian;
348 VLOG(verbosity) << " " << prefix
349 << "gradient: " << derivatives.gradient;
350 VLOG(verbosity) << " " << prefix << "A: " << derivatives.A;
351 VLOG(verbosity) << " " << prefix
352 << "Ax-b: " << derivatives.Axmb.format(heavy);
353 VLOG(verbosity) << " " << prefix
354 << "f: " << derivatives.f.format(heavy);
355 VLOG(verbosity) << " " << prefix << "df: " << derivatives.df;
356 }
357}
358
359} // namespace solvers
360} // namespace frc971