blob: 2727545348109d4678124ebf1577fd62c9270d19 [file] [log] [blame]
Austin Schuhf8f95022018-01-28 20:01:10 -08001#include <chrono>
2#include <cmath>
3#include <thread>
4
5#include <ct/optcon/optcon.h>
6
7#include "third_party/gflags/include/gflags/gflags.h"
8#include "third_party/matplotlib-cpp/matplotlibcpp.h"
9
10DEFINE_double(boundary_scalar, 12.0, "Test command-line flag");
11DEFINE_double(boundary_rate, 25.0, "Sigmoid rate");
12DEFINE_bool(sigmoid, true, "If true, sigmoid, else exponential.");
13DEFINE_double(round_corner, 0.0, "Corner radius of the constraint box.");
14
15// This code is for analysis and simulation of a double jointed arm. It is an
16// attempt to see if a MPC could work for arm control under constraints.
17
18// Describes a double jointed arm.
19// A large chunk of this code comes from demos. Most of the raw pointer,
20// shared_ptr, and non-const &'s come from the library's conventions.
21template <typename SCALAR>
22class MySecondOrderSystem : public ::ct::core::ControlledSystem<4, 2, SCALAR> {
23 public:
24 static const size_t STATE_DIM = 4;
25 static const size_t CONTROL_DIM = 2;
26
27 MySecondOrderSystem(::std::shared_ptr<::ct::core::Controller<4, 2, SCALAR>>
28 controller = nullptr)
29 : ::ct::core::ControlledSystem<4, 2, SCALAR>(
30 controller, ::ct::core::SYSTEM_TYPE::GENERAL) {}
31
32 MySecondOrderSystem(const MySecondOrderSystem &arg)
33 : ::ct::core::ControlledSystem<4, 2, SCALAR>(arg) {}
34
35 // Deep copy
36 MySecondOrderSystem *clone() const override {
37 return new MySecondOrderSystem(*this);
38 }
39 virtual ~MySecondOrderSystem() {}
40
41 // Evaluate the system dynamics.
42 //
43 // Args:
44 // state: current state (position, velocity)
45 // t: current time (gets ignored)
46 // control: control action
47 // derivative: (velocity, acceleration)
48 virtual void computeControlledDynamics(
49 const ::ct::core::StateVector<4, SCALAR> &state, const SCALAR & /*t*/,
50 const ::ct::core::ControlVector<2, SCALAR> &control,
51 ::ct::core::StateVector<4, SCALAR> &derivative) override {
52 derivative(0) = state(1);
53 derivative(1) = control(0);
54 derivative(2) = state(3);
55 derivative(3) = control(1);
56 }
57};
58
59template <size_t STATE_DIM, size_t CONTROL_DIM, typename SCALAR_EVAL = double,
60 typename SCALAR = SCALAR_EVAL>
61class MyTermStateBarrier : public ::ct::optcon::TermBase<STATE_DIM, CONTROL_DIM,
62 SCALAR_EVAL, SCALAR> {
63 public:
64 EIGEN_MAKE_ALIGNED_OPERATOR_NEW
65
66 typedef Eigen::Matrix<SCALAR_EVAL, STATE_DIM, 1> state_vector_t;
67 typedef Eigen::Matrix<SCALAR_EVAL, STATE_DIM, STATE_DIM> state_matrix_t;
68 typedef Eigen::Matrix<SCALAR_EVAL, CONTROL_DIM, CONTROL_DIM> control_matrix_t;
69 typedef Eigen::Matrix<SCALAR_EVAL, CONTROL_DIM, STATE_DIM>
70 control_state_matrix_t;
71 typedef Eigen::Matrix<SCALAR_EVAL, STATE_DIM, STATE_DIM>
72 state_matrix_double_t;
73 typedef Eigen::Matrix<SCALAR_EVAL, CONTROL_DIM, CONTROL_DIM>
74 control_matrix_double_t;
75 typedef Eigen::Matrix<SCALAR_EVAL, CONTROL_DIM, STATE_DIM>
76 control_state_matrix_double_t;
77
78 MyTermStateBarrier() {}
79
80 MyTermStateBarrier(const MyTermStateBarrier & /*arg*/) {}
81
82 static constexpr double kEpsilon = 1.0e-7;
83
84 virtual ~MyTermStateBarrier() {}
85
86 MyTermStateBarrier<STATE_DIM, CONTROL_DIM, SCALAR_EVAL, SCALAR> *clone()
87 const override {
88 return new MyTermStateBarrier(*this);
89 }
90
91 virtual SCALAR evaluate(const Eigen::Matrix<SCALAR, STATE_DIM, 1> &x,
92 const Eigen::Matrix<SCALAR, CONTROL_DIM, 1> & /*u*/,
93 const SCALAR & /*t*/) override {
94 SCALAR min_distance;
95
96 // Round the corner by this amount.
97 SCALAR round_corner = SCALAR(FLAGS_round_corner);
98
99 // Positive means violation.
100 SCALAR theta0_distance = x(0, 0) - (0.5 + round_corner);
101 SCALAR theta1_distance = (0.8 - round_corner) - x(2, 0);
102 if (theta0_distance < SCALAR(0.0) && theta1_distance < SCALAR(0.0)) {
103 // Ok, both outside. Return corner distance.
104 min_distance = -hypot(theta1_distance, theta0_distance);
105 } else if (theta0_distance < SCALAR(0.0) && theta1_distance > SCALAR(0.0)) {
106 min_distance = theta0_distance;
107 } else if (theta0_distance > SCALAR(0.0) && theta1_distance < SCALAR(0.0)) {
108 min_distance = theta1_distance;
109 } else {
110 min_distance = ::std::min(theta0_distance, theta1_distance);
111 }
112 min_distance += round_corner;
113 if (FLAGS_sigmoid) {
114 return FLAGS_boundary_scalar /
115 (1.0 + ::std::exp(-min_distance * FLAGS_boundary_rate));
116 } else {
117 // Values of 4 and 15 work semi resonably.
118 return FLAGS_boundary_scalar *
119 ::std::exp(min_distance * FLAGS_boundary_rate);
120 }
121 }
122
123 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> stateDerivative(
124 const ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> &x,
125 const ::ct::core::ControlVector<CONTROL_DIM, SCALAR_EVAL> &u,
126 const SCALAR_EVAL &t) override {
127 SCALAR epsilon = SCALAR(kEpsilon);
128
129 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> result =
130 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL>::Zero();
131
132 // Perturb x for both position axis and return the result.
133 for (size_t i = 0; i < STATE_DIM; i += 2) {
134 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> plus_perterbed_x = x;
135 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> minus_perterbed_x = x;
136 plus_perterbed_x[i] += epsilon;
137 minus_perterbed_x[i] -= epsilon;
138 result[i] = (evaluate(plus_perterbed_x, u, t) -
139 evaluate(minus_perterbed_x, u, t)) /
140 (epsilon * 2.0);
141 }
142 return result;
143 }
144
145 // Compute second order derivative of this cost term w.r.t. the state
146 state_matrix_t stateSecondDerivative(
147 const ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> &x,
148 const ::ct::core::ControlVector<CONTROL_DIM, SCALAR_EVAL> &u,
149 const SCALAR_EVAL &t) override {
150 state_matrix_t result = state_matrix_t::Zero();
151
152 SCALAR epsilon = SCALAR(kEpsilon);
153
154 // Perturb x a second time.
155 for (size_t i = 0; i < STATE_DIM; i += 2) {
156 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> plus_perterbed_x = x;
157 ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> minus_perterbed_x = x;
158 plus_perterbed_x[i] += epsilon;
159 minus_perterbed_x[i] -= epsilon;
160 state_vector_t delta = (stateDerivative(plus_perterbed_x, u, t) -
161 stateDerivative(minus_perterbed_x, u, t)) /
162 (epsilon * 2.0);
163
164 result.col(i) = delta;
165 }
166 return result;
167 }
168
169 ::ct::core::ControlVector<CONTROL_DIM, SCALAR_EVAL> controlDerivative(
170 const ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> & /*x*/,
171 const ::ct::core::ControlVector<CONTROL_DIM, SCALAR_EVAL> & /*u*/,
172 const SCALAR_EVAL & /*t*/) override {
173 return ::ct::core::StateVector<CONTROL_DIM, SCALAR_EVAL>::Zero();
174 }
175
176 control_state_matrix_t stateControlDerivative(
177 const ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> & /*x*/,
178 const ::ct::core::ControlVector<CONTROL_DIM, SCALAR_EVAL> & /*u*/,
179 const SCALAR_EVAL & /*t*/) override {
180 control_state_matrix_t result = control_state_matrix_t::Zero();
181
182 return result;
183 }
184
185 control_matrix_t controlSecondDerivative(
186 const ::ct::core::StateVector<STATE_DIM, SCALAR_EVAL> & /*x*/,
187 const ::ct::core::ControlVector<CONTROL_DIM, SCALAR_EVAL> & /*u*/,
188 const SCALAR_EVAL & /*t*/) override {
189 control_matrix_t result = control_matrix_t::Zero();
190 return result;
191 }
192
193 /*
194 // TODO(austin): Implement this for the automatic differentiation.
195 virtual ::ct::core::ADCGScalar evaluateCppadCg(
196 const ::ct::core::StateVector<STATE_DIM, ::ct::core::ADCGScalar> &x,
197 const ::ct::core::ControlVector<CONTROL_DIM, ::ct::core::ADCGScalar> &u,
198 ::ct::core::ADCGScalar t) override {
199 ::ct::core::ADCGScalar c = ::ct::core::ADCGScalar(0.0);
200 for (size_t i = 0; i < STATE_DIM; i++)
201 c += barriers_[i].computeActivation(x(i));
202 return c;
203 }
204 */
205};
206
207int main(int argc, char **argv) {
208 gflags::ParseCommandLineFlags(&argc, &argv, false);
209 // PRELIMINIARIES, see example NLOC.cpp
210 constexpr size_t state_dim = MySecondOrderSystem<double>::STATE_DIM;
211 constexpr size_t control_dim = MySecondOrderSystem<double>::CONTROL_DIM;
212
213 ::std::shared_ptr<ct::core::ControlledSystem<state_dim, control_dim>>
214 oscillator_dynamics(new MySecondOrderSystem<double>());
215
216 ::std::shared_ptr<ct::core::SystemLinearizer<state_dim, control_dim>>
217 ad_linearizer(new ::ct::core::SystemLinearizer<state_dim, control_dim>(
218 oscillator_dynamics));
219
220 constexpr double kQPos = 0.5;
221 constexpr double kQVel = 1.65;
222 ::Eigen::Matrix<double, 4, 4> Q_step;
223 Q_step << 1.0 / (kQPos * kQPos), 0.0, 0.0, 0.0, 0.0, 1.0 / (kQVel * kQVel),
224 0.0, 0.0, 0.0, 0.0, 1.0 / (kQPos * kQPos), 0.0, 0.0, 0.0, 0.0,
225 1.0 / (kQVel * kQVel);
226 ::Eigen::Matrix<double, 2, 2> R_step;
227 R_step << 1.0 / (12.0 * 12.0), 0.0, 0.0, 1.0 / (12.0 * 12.0);
228 ::std::shared_ptr<ct::optcon::TermQuadratic<state_dim, control_dim>>
229 intermediate_cost(new ::ct::optcon::TermQuadratic<state_dim, control_dim>(
230 Q_step, R_step));
231
232 // TODO(austin): DARE for these.
233 ::Eigen::Matrix<double, 4, 4> Q_final = Q_step;
234 ::Eigen::Matrix<double, 2, 2> R_final = R_step;
235 ::std::shared_ptr<ct::optcon::TermQuadratic<state_dim, control_dim>>
236 final_cost(new ::ct::optcon::TermQuadratic<state_dim, control_dim>(
237 Q_final, R_final));
238
239 ::std::shared_ptr<ct::optcon::TermBase<state_dim, control_dim>> bounds_cost(
240 new MyTermStateBarrier<4, 2>());
241
242 // TODO(austin): Cost function needs constraints.
243 ::std::shared_ptr<::ct::optcon::CostFunctionQuadratic<state_dim, control_dim>>
244 cost_function(
245 new ::ct::optcon::CostFunctionAnalytical<state_dim, control_dim>());
246 cost_function->addIntermediateTerm(intermediate_cost);
247 cost_function->addIntermediateTerm(bounds_cost);
248 cost_function->addFinalTerm(final_cost);
249
250 // STEP 1-D: set up the box constraints for the control input
251 // input box constraint boundaries with sparsities in constraint toolbox
252 // format
253 Eigen::VectorXd u_lb(control_dim);
254 Eigen::VectorXd u_ub(control_dim);
255 u_ub.setConstant(12.0);
256 u_lb = -u_ub;
257 ::std::cout << "uub " << u_ub << ::std::endl;
258 ::std::cout << "ulb " << u_lb << ::std::endl;
259
260 // constraint terms
261 std::shared_ptr<::ct::optcon::ControlInputConstraint<state_dim, control_dim>>
262 controlConstraint(
263 new ::ct::optcon::ControlInputConstraint<state_dim, control_dim>(
264 u_lb, u_ub));
265 controlConstraint->setName("ControlInputConstraint");
266 // create constraint container
267 std::shared_ptr<
268 ::ct::optcon::ConstraintContainerAnalytical<state_dim, control_dim>>
269 box_constraints(
270 new ::ct::optcon::ConstraintContainerAnalytical<state_dim,
271 control_dim>());
272 // add and initialize constraint terms
273 box_constraints->addIntermediateConstraint(controlConstraint, true);
274 box_constraints->initialize();
275
276 // Starting point.
277 ::ct::core::StateVector<state_dim> x0;
278 x0 << 1.0, 0.0, 0.9, 0.0;
279
280 constexpr ::ct::core::Time kTimeHorizon = 1.5;
281 ::ct::optcon::OptConProblem<state_dim, control_dim> opt_con_problem(
282 kTimeHorizon, x0, oscillator_dynamics, cost_function, ad_linearizer);
283 ::ct::optcon::NLOptConSettings ilqr_settings;
284 ilqr_settings.dt = 0.00505; // the control discretization in [sec]
285 ilqr_settings.integrator = ::ct::core::IntegrationType::RK4;
286 ilqr_settings.discretization =
287 ::ct::optcon::NLOptConSettings::APPROXIMATION::FORWARD_EULER;
288 // ilqr_settings.discretization =
289 // NLOptConSettings::APPROXIMATION::MATRIX_EXPONENTIAL;
290 ilqr_settings.max_iterations = 20;
291 ilqr_settings.min_cost_improvement = 1.0e-11;
292 ilqr_settings.nlocp_algorithm =
293 ::ct::optcon::NLOptConSettings::NLOCP_ALGORITHM::ILQR;
294 // the LQ-problems are solved using a custom Gauss-Newton Riccati solver
295 // ilqr_settings.lqocp_solver =
296 // NLOptConSettings::LQOCP_SOLVER::GNRICCATI_SOLVER;
297 ilqr_settings.lqocp_solver =
298 ::ct::optcon::NLOptConSettings::LQOCP_SOLVER::HPIPM_SOLVER;
299 ilqr_settings.printSummary = true;
300 if (ilqr_settings.lqocp_solver ==
301 ::ct::optcon::NLOptConSettings::LQOCP_SOLVER::HPIPM_SOLVER) {
302 opt_con_problem.setBoxConstraints(box_constraints);
303 }
304
305 size_t K = ilqr_settings.computeK(kTimeHorizon);
306 printf("Using %d steps\n", static_cast<int>(K));
307
308 // Vector of feeback matricies.
309 ::ct::core::FeedbackArray<state_dim, control_dim> u0_fb(
310 K, ::ct::core::FeedbackMatrix<state_dim, control_dim>::Zero());
311 ::ct::core::ControlVectorArray<control_dim> u0_ff(
312 K, ::ct::core::ControlVector<control_dim>::Zero());
313 ::ct::core::StateVectorArray<state_dim> x_ref_init(K + 1, x0);
314 ::ct::core::StateFeedbackController<state_dim, control_dim>
315 initial_controller(x_ref_init, u0_ff, u0_fb, ilqr_settings.dt);
316
317 // STEP 2-C: create an NLOptConSolver instance
318 ::ct::optcon::NLOptConSolver<state_dim, control_dim> iLQR(opt_con_problem,
319 ilqr_settings);
320 // Seed it with the initial guess
321 iLQR.setInitialGuess(initial_controller);
322 // we solve the optimal control problem and retrieve the solution
323 iLQR.solve();
324 ::ct::core::StateFeedbackController<state_dim, control_dim> initial_solution =
325 iLQR.getSolution();
326 // MPC-EXAMPLE
327 // we store the initial solution obtained from solving the initial optimal
328 // control problem, and re-use it to initialize the MPC solver in the
329 // following.
330
331 // STEP 1: first, we set up an MPC instance for the iLQR solver and configure
332 // it. Since the MPC class is wrapped around normal Optimal Control Solvers,
333 // we need to different kind of settings, those for the optimal control
334 // solver, and those specific to MPC:
335
336 // 1) settings for the iLQR instance used in MPC. Of course, we use the same
337 // settings as for solving the initial problem ...
338 ::ct::optcon::NLOptConSettings ilqr_settings_mpc = ilqr_settings;
339 ilqr_settings_mpc.max_iterations = 20;
340 // and we limited the printouts, too.
341 ilqr_settings_mpc.printSummary = false;
342 // 2) settings specific to model predictive control. For a more detailed
343 // description of those, visit ct/optcon/mpc/MpcSettings.h
344 ::ct::optcon::mpc_settings mpc_settings;
345 mpc_settings.stateForwardIntegration_ = true;
346 mpc_settings.postTruncation_ = false;
347 mpc_settings.measureDelay_ = false;
348 mpc_settings.fixedDelayUs_ = 5000 * 0; // Ignore the delay for now.
349 mpc_settings.delayMeasurementMultiplier_ = 1.0;
350 // mpc_settings.mpc_mode = ::ct::optcon::MPC_MODE::FIXED_FINAL_TIME;
351 mpc_settings.mpc_mode = ::ct::optcon::MPC_MODE::CONSTANT_RECEDING_HORIZON;
352 mpc_settings.coldStart_ = false;
353
354 // STEP 2 : Create the iLQR-MPC object, based on the optimal control problem
355 // and the selected settings.
356 ::ct::optcon::MPC<::ct::optcon::NLOptConSolver<state_dim, control_dim>>
357 ilqr_mpc(opt_con_problem, ilqr_settings_mpc, mpc_settings);
358 // initialize it using the previously computed initial controller
359 ilqr_mpc.setInitialGuess(initial_solution);
360 // STEP 3: running MPC
361 // Here, we run the MPC loop. Note that the general underlying idea is that
362 // you receive a state-estimate together with a time-stamp from your robot or
363 // system. MPC needs to receive both that time information and the state from
364 // your control system. Here, "simulate" the time measurement using
365 // ::std::chrono and wrap everything into a for-loop.
366 // The basic idea of operation is that after receiving time and state
367 // information, one executes the finishIteration() method of MPC.
368 ///
369 auto start_time = ::std::chrono::high_resolution_clock::now();
370 // limit the maximum number of runs in this example
371 size_t maxNumRuns = 400;
372 ::std::cout << "Starting to run MPC" << ::std::endl;
373
374 ::std::vector<double> time_array;
375 ::std::vector<double> theta1_array;
376 ::std::vector<double> omega1_array;
377 ::std::vector<double> theta2_array;
378 ::std::vector<double> omega2_array;
379
380 ::std::vector<double> u0_array;
381 ::std::vector<double> u1_array;
382
383 for (size_t i = 0; i < maxNumRuns; i++) {
384 ::std::cout << "Solving iteration " << i << ::std::endl;
385 // Time which has passed since start of MPC
386 auto current_time = ::std::chrono::high_resolution_clock::now();
387 ::ct::core::Time t =
388 1e-6 *
389 ::std::chrono::duration_cast<::std::chrono::microseconds>(current_time -
390 start_time)
391 .count();
392 // prepare mpc iteration
393 ilqr_mpc.prepareIteration(t);
394 // new optimal policy
395 ::std::shared_ptr<ct::core::StateFeedbackController<state_dim, control_dim>>
396 newPolicy(
397 new ::ct::core::StateFeedbackController<state_dim, control_dim>());
398 // timestamp of the new optimal policy
399 ::ct::core::Time ts_newPolicy;
400 current_time = ::std::chrono::high_resolution_clock::now();
401 t = 1e-6 *
402 ::std::chrono::duration_cast<::std::chrono::microseconds>(current_time -
403 start_time)
404 .count();
405 bool success = ilqr_mpc.finishIteration(x0, t, *newPolicy, ts_newPolicy);
406 // we break the loop in case the time horizon is reached or solve() failed
407 if (ilqr_mpc.timeHorizonReached() | !success) break;
408
409 ::std::cout << "Solved for time " << newPolicy->time()[0] << " state "
410 << x0.transpose() << " next time " << newPolicy->time()[1]
411 << ::std::endl;
412 ::std::cout << " Solution: Uff " << newPolicy->uff()[0].transpose()
413 << " x_ref_ " << newPolicy->x_ref()[0].transpose()
414 << ::std::endl;
415
416 time_array.push_back(ilqr_settings.dt * i);
417 theta1_array.push_back(x0(0));
418 omega1_array.push_back(x0(1));
419 theta2_array.push_back(x0(2));
420 omega2_array.push_back(x0(3));
421
422 u0_array.push_back(newPolicy->uff()[0](0, 0));
423 u1_array.push_back(newPolicy->uff()[0](1, 0));
424
425 ::std::cout << "xref[1] " << newPolicy->x_ref()[1].transpose()
426 << ::std::endl;
427 ilqr_mpc.doForwardIntegration(0.0, ilqr_settings.dt, x0, newPolicy);
428 ::std::cout << "Next X: " << x0.transpose() << ::std::endl;
429
430 // TODO(austin): Re-use the policy. Maybe? Or maybe mpc already does that.
431 }
432 // The summary contains some statistical data about time delays, etc.
433 ilqr_mpc.printMpcSummary();
434
435 // Now plot our simulation.
436 matplotlibcpp::plot(time_array, theta1_array, {{"label", "theta1"}});
437 matplotlibcpp::plot(time_array, omega1_array, {{"label", "omega1"}});
438 matplotlibcpp::plot(time_array, theta2_array, {{"label", "theta2"}});
439 matplotlibcpp::plot(time_array, omega2_array, {{"label", "omega2"}});
440 matplotlibcpp::legend();
441
442 matplotlibcpp::figure();
443 matplotlibcpp::plot(theta1_array, theta2_array, {{"label", "trajectory"}});
444 ::std::vector<double> box_x{0.5, 0.5, 1.0, 1.0};
445 ::std::vector<double> box_y{0.0, 0.8, 0.8, 0.0};
446 matplotlibcpp::plot(box_x, box_y, {{"label", "keepout zone"}});
447 matplotlibcpp::legend();
448
449 matplotlibcpp::figure();
450 matplotlibcpp::plot(time_array, u0_array, {{"label", "u0"}});
451 matplotlibcpp::plot(time_array, u1_array, {{"label", "u1"}});
452 matplotlibcpp::legend();
453 matplotlibcpp::show();
454}