blob: 4eecee9067e349d9edf6aa273e6e4a07859fc014 [file] [log] [blame]
Austin Schuh085eab92020-11-26 13:54:51 -08001#!/usr/bin/python3
Austin Schuh434c8372018-01-21 16:30:06 -08002# This is an initial, hacky implementation of the extended LQR paper. It's just
3# a proof of concept, so don't trust it too much.
4
5import numpy
6import scipy.optimize
7from matplotlib import pylab
8import sys
9
10from frc971.control_loops.python import controls
11
12
13class ArmDynamics(object):
14 def __init__(self, dt):
15 self.dt = dt
16
17 self.l1 = 1.0
18 self.l2 = 0.8
19 self.num_states = 4
20 self.num_inputs = 2
21
22 def dynamics(self, X, U):
23 """Calculates the dynamics for a double jointed arm.
24
25 Args:
26 X, numpy.matrix(4, 1), The state. [theta1, omega1, theta2, omega2]
27 U, numpy.matrix(2, 1), The input. [torque1, torque2]
28
29 Returns:
30 numpy.matrix(4, 1), The derivative of the dynamics.
31 """
32 return numpy.matrix([[X[1, 0]],
33 [U[0, 0]],
34 [X[3, 0]],
35 [U[1, 0]]])
36
37 def discrete_dynamics(self, X, U):
38 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
39
40 def inverse_discrete_dynamics(self, X, U):
41 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X, dt)
42
43# Simple implementation for a quadratic cost function.
44class ArmCostFunction:
45 def __init__(self, dt, dynamics):
46 self.num_states = 4
47 self.num_inputs = 2
48 self.dt = dt
49 self.dynamics = dynamics
50
51 q_pos = 0.5
52 q_vel = 1.65
53 self.Q = numpy.matrix(numpy.diag([
54 1.0 / (q_pos ** 2.0), 1.0 / (q_vel ** 2.0),
55 1.0 / (q_pos ** 2.0), 1.0 / (q_vel ** 2.0)]))
56
57 self.R = numpy.matrix(numpy.diag([1.0 / (12.0 ** 2.0),
58 1.0 / (12.0 ** 2.0)]))
59
60 final_A = numerical_jacobian_x(self.dynamics.discrete_dynamics,
61 numpy.matrix(numpy.zeros((4, 1))),
62 numpy.matrix(numpy.zeros((2, 1))))
63 final_B = numerical_jacobian_u(self.dynamics.discrete_dynamics,
64 numpy.matrix(numpy.zeros((4, 1))),
65 numpy.matrix(numpy.zeros((2, 1))))
66 print 'Final A', final_A
67 print 'Final B', final_B
68 K, self.S = controls.dlqr(
69 final_A, final_B, self.Q, self.R, optimal_cost_function=True)
70 print 'Final eig:', numpy.linalg.eig(final_A - final_B * K)
71
72 def final_cost(self, X, U):
73 """Computes the final cost of being at X
74
75 Args:
76 X: numpy.matrix(self.num_states, 1)
77 U: numpy.matrix(self.num_inputs, 1), ignored
78
79 Returns:
80 numpy.matrix(1, 1), The quadratic cost of being at X
81 """
82 return 0.5 * X.T * self.S * X
83
84 def cost(self, X, U):
85 """Computes the incremental cost given a position and U.
86
87 Args:
88 X: numpy.matrix(self.num_states, 1)
89 U: numpy.matrix(self.num_inputs, 1)
90
91 Returns:
92 numpy.matrix(1, 1), The quadratic cost of evaluating U.
93 """
94 return U.T * self.R * U + X.T * self.Q * X
95
96 def estimate_Q_final(self, X_hat):
97 """Returns the quadraticized final Q around X_hat.
98
99 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
100
101 Args:
102 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
103
104 Result:
105 numpy.matrix(self.num_states, self.num_states)
106 """
107 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
108 print 'S', self.S
109 print 'Q_final', numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
110 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
111
112 def estimate_partial_cost_partial_x_final(self, X_hat):
113 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
114
115 Args:
116 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
117
118 Result:
119 numpy.matrix(self.num_states, 1)
120 """
121 return numerical_jacobian_x(self.final_cost, X_hat,
122 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
123
124 def estimate_q_final(self, X_hat):
125 """Returns q evaluated at X_hat for the final cost function."""
126 return self.estimate_partial_cost_partial_x_final(X_hat) - self.estimate_Q_final(X_hat) * X_hat
127
128
129
130class SkidSteerDynamics(object):
131 def __init__(self, dt):
132 self.width = 0.2
133 self.dt = dt
134 self.num_states = 3
135 self.num_inputs = 2
136
137 def dynamics(self, X, U):
138 """Calculates the dynamics for a 2 wheeled robot.
139
140 Args:
141 X, numpy.matrix(3, 1), The state. [x, y, theta]
142 U, numpy.matrix(2, 1), The input. [left velocity, right velocity]
143
144 Returns:
145 numpy.matrix(3, 1), The derivative of the dynamics.
146 """
147 #return numpy.matrix([[X[1, 0]],
148 # [X[2, 0]],
149 # [U[0, 0]]])
150 return numpy.matrix([[(U[0, 0] + U[1, 0]) * numpy.cos(X[2, 0]) / 2.0],
151 [(U[0, 0] + U[1, 0]) * numpy.sin(X[2, 0]) / 2.0],
152 [(U[1, 0] - U[0, 0]) / self.width]])
153
154 def discrete_dynamics(self, X, U):
155 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
156
157 def inverse_discrete_dynamics(self, X, U):
158 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X, dt)
159
160
161# Simple implementation for a quadratic cost function.
162class CostFunction:
163 def __init__(self, dt):
164 self.num_states = 3
165 self.num_inputs = 2
166 self.dt = dt
167 self.Q = numpy.matrix([[0.1, 0, 0],
168 [0, 0.6, 0],
169 [0, 0, 0.1]]) / self.dt / self.dt
170 self.R = numpy.matrix([[0.40, 0],
171 [0, 0.40]]) / self.dt / self.dt
172
173 def final_cost(self, X, U):
174 """Computes the final cost of being at X
175
176 Args:
177 X: numpy.matrix(self.num_states, 1)
178 U: numpy.matrix(self.num_inputs, 1), ignored
179
180 Returns:
181 numpy.matrix(1, 1), The quadratic cost of being at X
182 """
183 return X.T * self.Q * X * 1000
184
185 def cost(self, X, U):
186 """Computes the incremental cost given a position and U.
187
188 Args:
189 X: numpy.matrix(self.num_states, 1)
190 U: numpy.matrix(self.num_inputs, 1)
191
192 Returns:
193 numpy.matrix(1, 1), The quadratic cost of evaluating U.
194 """
195 return U.T * self.R * U + X.T * self.Q * X
196
197 def estimate_Q_final(self, X_hat):
198 """Returns the quadraticized final Q around X_hat.
199
200 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
201
202 Args:
203 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
204
205 Result:
206 numpy.matrix(self.num_states, self.num_states)
207 """
208 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
209 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
210
211 def estimate_partial_cost_partial_x_final(self, X_hat):
212 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
213
214 Args:
215 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
216
217 Result:
218 numpy.matrix(self.num_states, 1)
219 """
220 return numerical_jacobian_x(self.final_cost, X_hat, numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
221
222 def estimate_q_final(self, X_hat):
223 """Returns q evaluated at X_hat for the final cost function."""
224 return self.estimate_partial_cost_partial_x_final(X_hat) - self.estimate_Q_final(X_hat) * X_hat
225
226
227def RungeKutta(f, x, dt):
228 """4th order RungeKutta integration of F starting at X."""
229 a = f(x)
230 b = f(x + dt / 2.0 * a)
231 c = f(x + dt / 2.0 * b)
232 d = f(x + dt * c)
233 return x + dt * (a + 2.0 * b + 2.0 * c + d) / 6.0
234
235def numerical_jacobian_x(fn, X, U, epsilon=1e-4):
236 """Numerically estimates the jacobian around X, U in X.
237
238 Args:
239 fn: A function of X, U.
240 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
241 around.
242 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
243 around.
244
245 Returns:
246 numpy.matrix(num_states, num_states), The jacobian of fn with X as the
247 variable.
248 """
249 num_states = X.shape[0]
250 nominal = fn(X, U)
251 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_states)))
252 # It's more expensive, but +- epsilon will be more reliable
253 for i in range(0, num_states):
254 dX_plus = X.copy()
255 dX_plus[i] += epsilon
256 dX_minus = X.copy()
257 dX_minus[i] -= epsilon
258 answer[:, i] = (fn(dX_plus, U) - fn(dX_minus, U)) / epsilon / 2.0
259 return answer
260
261def numerical_jacobian_u(fn, X, U, epsilon=1e-4):
262 """Numerically estimates the jacobian around X, U in U.
263
264 Args:
265 fn: A function of X, U.
266 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
267 around.
268 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
269 around.
270
271 Returns:
272 numpy.matrix(num_states, num_inputs), The jacobian of fn with U as the
273 variable.
274 """
275 num_states = X.shape[0]
276 num_inputs = U.shape[0]
277 nominal = fn(X, U)
278 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_inputs)))
279 for i in range(0, num_inputs):
280 dU_plus = U.copy()
281 dU_plus[i] += epsilon
282 dU_minus = U.copy()
283 dU_minus[i] -= epsilon
284 answer[:, i] = (fn(X, dU_plus) - fn(X, dU_minus)) / epsilon / 2.0
285 return answer
286
287def numerical_jacobian_x_x(fn, X, U):
288 return numerical_jacobian_x(
289 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T, X, U)
290
291def numerical_jacobian_x_u(fn, X, U):
292 return numerical_jacobian_x(
293 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T, X, U)
294
295def numerical_jacobian_u_x(fn, X, U):
296 return numerical_jacobian_u(
297 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T, X, U)
298
299def numerical_jacobian_u_u(fn, X, U):
300 return numerical_jacobian_u(
301 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T, X, U)
302
303
304class ELQR(object):
305 def __init__(self, dynamics, cost):
306 self.dynamics = dynamics
307 self.cost = cost
308
309 def Solve(self, x_hat_initial, horizon, iterations):
310 l = horizon
311 num_states = self.dynamics.num_states
312 num_inputs = self.dynamics.num_inputs
313 self.S_bar_t = [numpy.matrix(numpy.zeros((num_states, num_states))) for _ in range(l + 1)]
314 self.s_bar_t = [numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)]
315 self.s_scalar_bar_t = [numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)]
316
317 self.L_t = [numpy.matrix(numpy.zeros((num_inputs, num_states))) for _ in range(l + 1)]
318 self.l_t = [numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)]
319 self.L_bar_t = [numpy.matrix(numpy.zeros((num_inputs, num_states))) for _ in range(l + 1)]
320 self.l_bar_t = [numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)]
321
322 self.S_t = [numpy.matrix(numpy.zeros((num_states, num_states))) for _ in range(l + 1)]
323 self.s_t = [numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)]
324 self.s_scalar_t = [numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)]
325
326 self.last_x_hat_t = [numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)]
327
328 # Iterate the solver
329 for a in range(iterations):
330 x_hat = x_hat_initial
331 u_t = self.L_t[0] * x_hat + self.l_t[0]
332 self.S_bar_t[0] = numpy.matrix(numpy.zeros((num_states, num_states)))
333 self.s_bar_t[0] = numpy.matrix(numpy.zeros((num_states, 1)))
334 self.s_scalar_bar_t[0] = numpy.matrix([[0]])
335
336 self.last_x_hat_t[0] = x_hat_initial
337
338 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_initial, u_t)
339 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_initial, u_t)
340 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_initial, u_t)
341
342 q_t = numerical_jacobian_x(self.cost.cost, x_hat_initial, u_t).T \
343 - Q_t * x_hat_initial - P_t.T * u_t
344 r_t = numerical_jacobian_u(self.cost.cost, x_hat_initial, u_t).T \
345 - P_t * x_hat_initial - R_t * u_t
346
347 q_scalar_t = self.cost.cost(x_hat_initial, u_t) \
348 - 0.5 * (x_hat_initial.T * (Q_t * x_hat_initial + P_t.T * u_t) \
349 + u_t.T * (P_t * x_hat_initial + R_t * u_t)) \
350 - x_hat_initial.T * q_t - u_t.T * r_t
351
352 start_A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics, x_hat_initial, u_t)
353 start_B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics, x_hat_initial, u_t)
354 x_hat_next = self.dynamics.discrete_dynamics(x_hat_initial, u_t)
355 start_c_t = x_hat_next - start_A_t * x_hat_initial - start_B_t * u_t
356
357 B_svd_u, B_svd_sigma_diag, B_svd_v = numpy.linalg.svd(start_B_t)
358 B_svd_sigma = numpy.matrix(numpy.zeros(start_B_t.shape))
359 B_svd_sigma[0:B_svd_sigma_diag.shape[0], 0:B_svd_sigma_diag.shape[0]] = \
360 numpy.diag(B_svd_sigma_diag)
361
362 B_svd_sigma_inv = numpy.matrix(numpy.zeros(start_B_t.shape)).T
363 B_svd_sigma_inv[0:B_svd_sigma_diag.shape[0],
364 0:B_svd_sigma_diag.shape[0]] = \
365 numpy.linalg.inv(numpy.diag(B_svd_sigma_diag))
366 B_svd_inv = B_svd_v.T * B_svd_sigma_inv * B_svd_u.T
367
368 self.L_bar_t[1] = B_svd_inv
369 self.l_bar_t[1] = -B_svd_inv * (start_A_t * x_hat_initial + start_c_t)
370
371 self.S_bar_t[1] = self.L_bar_t[1].T * R_t * self.L_bar_t[1]
372
373 TotalS_1 = start_B_t.T * self.S_t[1] * start_B_t + R_t
374 Totals_1 = start_B_t.T * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
375 + start_B_t.T * self.s_t[1] + P_t * x_hat_initial + r_t
376 Totals_scalar_1 = 0.5 * (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
377 + self.s_scalar_t[1] + x_hat_initial.T * q_t + q_scalar_t \
378 + 0.5 * x_hat_initial.T * Q_t * x_hat_initial \
379 + (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.s_t[1]
380
381 optimal_u_1 = -numpy.linalg.solve(TotalS_1, Totals_1)
382 optimal_x_1 = start_A_t * x_hat_initial \
383 + start_B_t * optimal_u_1 + start_c_t
384
385 # TODO(austin): Disable this if we are controlable. It should not be needed then.
386 S_bar_1_eigh_eigenvalues, S_bar_1_eigh_eigenvectors = \
387 numpy.linalg.eigh(self.S_bar_t[1])
388 S_bar_1_eigh = numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues))
389 S_bar_1_eigh_eigenvalues_stiff = S_bar_1_eigh_eigenvalues.copy()
390 for i in range(S_bar_1_eigh_eigenvalues_stiff.shape[0]):
391 if abs(S_bar_1_eigh_eigenvalues_stiff[i]) < 1e-8:
392 S_bar_1_eigh_eigenvalues_stiff[i] = max(S_bar_1_eigh_eigenvalues_stiff) * 1.0
393
394 S_bar_stiff = S_bar_1_eigh_eigenvectors * numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues_stiff)) * S_bar_1_eigh_eigenvectors.T
395
396 print 'Min u', -numpy.linalg.solve(TotalS_1, Totals_1)
397 print 'Min x_hat', optimal_x_1
398 self.s_bar_t[1] = -self.s_t[1] - (S_bar_stiff + self.S_t[1]) * optimal_x_1
399 self.s_scalar_bar_t[1] = 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1 \
400 - optimal_x_1.T * (S_bar_stiff + self.S_t[1]) * optimal_x_1) \
401 + optimal_u_1.T * Totals_1 \
402 - optimal_x_1.T * (self.s_bar_t[1] + self.s_t[1]) \
403 - self.s_scalar_t[1] + Totals_scalar_1
404
405 print 'optimal_u_1', optimal_u_1
406 print 'TotalS_1', TotalS_1
407 print 'Totals_1', Totals_1
408 print 'Totals_scalar_1', Totals_scalar_1
409 print 'overall cost 1', 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1) \
410 + optimal_u_1.T * Totals_1 + Totals_scalar_1
411 print 'overall cost 0', 0.5 * (x_hat_initial.T * self.S_t[0] * x_hat_initial) \
412 + x_hat_initial.T * self.s_t[0] + self.s_scalar_t[0]
413
414 print 't forward 0'
415 print 'x_hat_initial[ 0]: %s' % (x_hat_initial)
416 print 'x_hat[%2d]: %s' % (0, x_hat.T)
417 print 'x_hat_next[%2d]: %s' % (0, x_hat_next.T)
418 print 'u[%2d]: %s' % (0, u_t.T)
419 print ('L[ 0]: %s' % (self.L_t[0],)).replace('\n', '\n ')
420 print ('l[ 0]: %s' % (self.l_t[0],)).replace('\n', '\n ')
421
422 print ('A_t[%2d]: %s' % (0, start_A_t)).replace('\n', '\n ')
423 print ('B_t[%2d]: %s' % (0, start_B_t)).replace('\n', '\n ')
424 print ('c_t[%2d]: %s' % (0, start_c_t)).replace('\n', '\n ')
425
426 # TODO(austin): optimal_x_1 is x_hat
427 x_hat = -numpy.linalg.solve((self.S_t[1] + S_bar_stiff),
428 (self.s_t[1] + self.s_bar_t[1]))
429 print 'new xhat', x_hat
430
431 self.S_bar_t[1] = S_bar_stiff
432
433 self.last_x_hat_t[1] = x_hat
434
435 for t in range(1, l):
436 print 't forward', t
437 u_t = self.L_t[t] * x_hat + self.l_t[t]
438
439 x_hat_next = self.dynamics.discrete_dynamics(x_hat, u_t)
440 A_bar_t = numerical_jacobian_x(self.dynamics.inverse_discrete_dynamics,
441 x_hat_next, u_t)
442 B_bar_t = numerical_jacobian_u(self.dynamics.inverse_discrete_dynamics,
443 x_hat_next, u_t)
444 c_bar_t = x_hat - A_bar_t * x_hat_next - B_bar_t * u_t
445
446 print 'x_hat[%2d]: %s' % (t, x_hat.T)
447 print 'x_hat_next[%2d]: %s' % (t, x_hat_next.T)
448 print ('L[%2d]: %s' % (t, self.L_t[t],)).replace('\n', '\n ')
449 print ('l[%2d]: %s' % (t, self.l_t[t],)).replace('\n', '\n ')
450 print 'u[%2d]: %s' % (t, u_t.T)
451
452 print ('A_bar_t[%2d]: %s' % (t, A_bar_t)).replace('\n', '\n ')
453 print ('B_bar_t[%2d]: %s' % (t, B_bar_t)).replace('\n', '\n ')
454 print ('c_bar_t[%2d]: %s' % (t, c_bar_t)).replace('\n', '\n ')
455
456 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat, u_t)
457 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat, u_t)
458 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat, u_t)
459
460 q_t = numerical_jacobian_x(self.cost.cost, x_hat, u_t).T \
461 - Q_t * x_hat - P_t.T * u_t
462 r_t = numerical_jacobian_u(self.cost.cost, x_hat, u_t).T \
463 - P_t * x_hat - R_t * u_t
464
465 q_scalar_t = self.cost.cost(x_hat, u_t) \
466 - 0.5 * (x_hat.T * (Q_t * x_hat + P_t.T * u_t) \
467 + u_t.T * (P_t * x_hat + R_t * u_t)) - x_hat.T * q_t - u_t.T * r_t
468
469 C_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t + P_t * A_bar_t
470 D_bar_t = A_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t
471 E_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * B_bar_t + R_t \
472 + P_t * B_bar_t + B_bar_t.T * P_t.T
473 d_bar_t = A_bar_t.T * (self.s_bar_t[t] + q_t) \
474 + A_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
475 e_bar_t = r_t + P_t * c_bar_t + B_bar_t.T * self.s_bar_t[t] \
476 + B_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
477
478 self.L_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * C_bar_t
479 self.l_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * e_bar_t
480
481 self.S_bar_t[t + 1] = D_bar_t + C_bar_t.T * self.L_bar_t[t + 1]
482 self.s_bar_t[t + 1] = d_bar_t + C_bar_t.T * self.l_bar_t[t + 1]
483 self.s_scalar_bar_t[t + 1] = \
484 -0.5 * e_bar_t.T * numpy.linalg.inv(E_bar_t) * e_bar_t \
485 + 0.5 * c_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t \
486 + c_bar_t.T * self.s_bar_t[t] + c_bar_t.T * q_t \
487 + self.s_scalar_bar_t[t] + q_scalar_t
488
489 x_hat = -numpy.linalg.solve((self.S_t[t + 1] + self.S_bar_t[t + 1]),
490 (self.s_t[t + 1] + self.s_bar_t[t + 1]))
491 self.S_t[l] = self.cost.estimate_Q_final(x_hat)
492 self.s_t[l] = self.cost.estimate_q_final(x_hat)
493 x_hat = -numpy.linalg.inv(self.S_t[l] + self.S_bar_t[l]) \
494 * (self.s_t[l] + self.s_bar_t[l])
495
496 for t in reversed(range(l)):
497 print 't backward', t
498 # TODO(austin): I don't think we can use L_t like this here.
499 # I think we are off by 1 somewhere...
500 u_t = self.L_bar_t[t + 1] * x_hat + self.l_bar_t[t + 1]
501
502 x_hat_prev = self.dynamics.inverse_discrete_dynamics(x_hat, u_t)
503 print 'x_hat[%2d]: %s' % (t, x_hat.T)
504 print 'x_hat_prev[%2d]: %s' % (t, x_hat_prev.T)
505 print ('L_bar[%2d]: %s' % (t + 1, self.L_bar_t[t + 1])).replace('\n', '\n ')
506 print ('l_bar[%2d]: %s' % (t + 1, self.l_bar_t[t + 1])).replace('\n', '\n ')
507 print 'u[%2d]: %s' % (t, u_t.T)
508 # Now compute the linearized A, B, and C
509 # Start by doing it numerically, and then optimize.
510 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics, x_hat_prev, u_t)
511 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics, x_hat_prev, u_t)
512 c_t = x_hat - A_t * x_hat_prev - B_t * u_t
513
514 print ('A_t[%2d]: %s' % (t, A_t)).replace('\n', '\n ')
515 print ('B_t[%2d]: %s' % (t, B_t)).replace('\n', '\n ')
516 print ('c_t[%2d]: %s' % (t, c_t)).replace('\n', '\n ')
517
518 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_prev, u_t)
519 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_prev, u_t)
520 P_T_t = numerical_jacobian_u_x(self.cost.cost, x_hat_prev, u_t)
521 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_prev, u_t)
522
523 q_t = numerical_jacobian_x(self.cost.cost, x_hat_prev, u_t).T \
524 - Q_t * x_hat_prev - P_T_t * u_t
525 r_t = numerical_jacobian_u(self.cost.cost, x_hat_prev, u_t).T \
526 - P_t * x_hat_prev - R_t * u_t
527
528 q_scalar_t = self.cost.cost(x_hat_prev, u_t) \
529 - 0.5 * (x_hat_prev.T * (Q_t * x_hat_prev + P_t.T * u_t) \
530 + u_t.T * (P_t * x_hat_prev + R_t * u_t)) \
531 - x_hat_prev.T * q_t - u_t.T * r_t
532
533 C_t = P_t + B_t.T * self.S_t[t + 1] * A_t
534 D_t = Q_t + A_t.T * self.S_t[t + 1] * A_t
535 E_t = R_t + B_t.T * self.S_t[t + 1] * B_t
536 d_t = q_t + A_t.T * self.s_t[t + 1] + A_t.T * self.S_t[t + 1] * c_t
537 e_t = r_t + B_t.T * self.s_t[t + 1] + B_t.T * self.S_t[t + 1] * c_t
538 self.L_t[t] = -numpy.linalg.inv(E_t) * C_t
539 self.l_t[t] = -numpy.linalg.inv(E_t) * e_t
540 self.s_t[t] = d_t + C_t.T * self.l_t[t]
541 self.S_t[t] = D_t + C_t.T * self.L_t[t]
542 self.s_scalar_t[t] = q_scalar_t \
543 - 0.5 * e_t.T * numpy.linalg.inv(E_t) * e_t \
544 + 0.5 * c_t.T * self.S_t[t + 1] * c_t \
545 + c_t.T * self.s_t[t + 1] \
546 + self.s_scalar_t[t + 1]
547
548 x_hat = -numpy.linalg.solve((self.S_t[t] + self.S_bar_t[t]),
549 (self.s_t[t] + self.s_bar_t[t]))
550 if t == 0:
551 self.last_x_hat_t[t] = x_hat_initial
552 else:
553 self.last_x_hat_t[t] = x_hat
554
555 x_hat_t = [x_hat_initial]
556
557 pylab.figure('states %d' % a)
558 pylab.ion()
559 for dim in range(num_states):
560 pylab.plot(numpy.arange(len(self.last_x_hat_t)),
561 [x_hat_loop[dim, 0] for x_hat_loop in self.last_x_hat_t],
562 marker='o', label='Xhat[%d]' % dim)
563 pylab.legend()
564 pylab.draw()
565
566 pylab.figure('xy %d' % a)
567 pylab.ion()
568 pylab.plot([x_hat_loop[0, 0] for x_hat_loop in self.last_x_hat_t],
569 [x_hat_loop[1, 0] for x_hat_loop in self.last_x_hat_t],
570 marker='o', label='trajectory')
571 pylab.legend()
572 pylab.draw()
573
574 final_u_t = [numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)]
575 cost_to_go = []
576 cost_to_come = []
577 cost = []
578 for t in range(l):
579 cost_to_go.append(
580 (0.5 * self.last_x_hat_t[t].T * self.S_t[t] * self.last_x_hat_t[t] \
581 + self.last_x_hat_t[t].T * self.s_t[t] + self.s_scalar_t[t])[0, 0])
582 cost_to_come.append(
583 (0.5 * self.last_x_hat_t[t].T * self.S_bar_t[t] * self.last_x_hat_t[t] \
584 + self.last_x_hat_t[t].T * self.s_bar_t[t] + self.s_scalar_bar_t[t])[0, 0])
585 cost.append(cost_to_go[-1] + cost_to_come[-1])
586 final_u_t[t] = self.L_t[t] * self.last_x_hat_t[t] + self.l_t[t]
587
588 for t in range(l):
589 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
590 self.last_x_hat_t[t], final_u_t[t])
591 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
592 self.last_x_hat_t[t], final_u_t[t])
593 c_t = self.dynamics.discrete_dynamics(self.last_x_hat_t[t], final_u_t[t]) \
594 - A_t * self.last_x_hat_t[t] - B_t * final_u_t[t]
595 print("Infeasability at", t, "is",
596 ((A_t * self.last_x_hat_t[t] + B_t * final_u_t[t] + c_t) \
597 - self.last_x_hat_t[t + 1]).T)
598
599 pylab.figure('u')
600 samples = numpy.arange(len(final_u_t))
601 for i in range(num_inputs):
602 pylab.plot(samples, [u[i, 0] for u in final_u_t],
603 label='u[%d]' % i, marker='o')
604 pylab.legend()
605
606 pylab.figure('cost')
607 cost_samples = numpy.arange(len(cost))
608 pylab.plot(cost_samples, cost_to_go, label='cost to go', marker='o')
609 pylab.plot(cost_samples, cost_to_come, label='cost to come', marker='o')
610 pylab.plot(cost_samples, cost, label='cost', marker='o')
611 pylab.legend()
612
613 pylab.ioff()
614 pylab.show()
615
616if __name__ == '__main__':
617 dt = 0.05
618 #arm_dynamics = ArmDynamics(dt=dt)
619 #elqr = ELQR(arm_dynamics, ArmCostFunction(dt=dt, dynamics=arm_dynamics))
620 #x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0], [0.0]])
621 #elqr.Solve(x_hat_initial, 100, 3)
622
623 elqr = ELQR(SkidSteerDynamics(dt=dt), CostFunction(dt=dt))
624 x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0]])
625 elqr.Solve(x_hat_initial, 100, 15)
626 sys.exit(1)