blob: 9a4705b7831a2548621313d5db4c95ba4af79561 [file] [log] [blame]
Austin Schuh085eab92020-11-26 13:54:51 -08001#!/usr/bin/python3
Austin Schuh434c8372018-01-21 16:30:06 -08002# This is an initial, hacky implementation of the extended LQR paper. It's just
3# a proof of concept, so don't trust it too much.
4
5import numpy
6import scipy.optimize
7from matplotlib import pylab
8import sys
9
10from frc971.control_loops.python import controls
11
12
13class ArmDynamics(object):
Austin Schuh434c8372018-01-21 16:30:06 -080014
Ravago Jones5127ccc2022-07-31 16:32:45 -070015 def __init__(self, dt):
16 self.dt = dt
Austin Schuh434c8372018-01-21 16:30:06 -080017
Ravago Jones5127ccc2022-07-31 16:32:45 -070018 self.l1 = 1.0
19 self.l2 = 0.8
20 self.num_states = 4
21 self.num_inputs = 2
22
23 def dynamics(self, X, U):
24 """Calculates the dynamics for a double jointed arm.
Austin Schuh434c8372018-01-21 16:30:06 -080025
26 Args:
27 X, numpy.matrix(4, 1), The state. [theta1, omega1, theta2, omega2]
28 U, numpy.matrix(2, 1), The input. [torque1, torque2]
29
30 Returns:
31 numpy.matrix(4, 1), The derivative of the dynamics.
32 """
Ravago Jones5127ccc2022-07-31 16:32:45 -070033 return numpy.matrix([[X[1, 0]], [U[0, 0]], [X[3, 0]], [U[1, 0]]])
Austin Schuh434c8372018-01-21 16:30:06 -080034
Ravago Jones5127ccc2022-07-31 16:32:45 -070035 def discrete_dynamics(self, X, U):
36 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
Austin Schuh434c8372018-01-21 16:30:06 -080037
Ravago Jones5127ccc2022-07-31 16:32:45 -070038 def inverse_discrete_dynamics(self, X, U):
39 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
40 dt)
41
Austin Schuh434c8372018-01-21 16:30:06 -080042
43# Simple implementation for a quadratic cost function.
44class ArmCostFunction:
Austin Schuh434c8372018-01-21 16:30:06 -080045
Ravago Jones5127ccc2022-07-31 16:32:45 -070046 def __init__(self, dt, dynamics):
47 self.num_states = 4
48 self.num_inputs = 2
49 self.dt = dt
50 self.dynamics = dynamics
Austin Schuh434c8372018-01-21 16:30:06 -080051
Ravago Jones5127ccc2022-07-31 16:32:45 -070052 q_pos = 0.5
53 q_vel = 1.65
54 self.Q = numpy.matrix(
55 numpy.diag([
56 1.0 / (q_pos**2.0), 1.0 / (q_vel**2.0), 1.0 / (q_pos**2.0),
57 1.0 / (q_vel**2.0)
58 ]))
Austin Schuh434c8372018-01-21 16:30:06 -080059
Ravago Jones5127ccc2022-07-31 16:32:45 -070060 self.R = numpy.matrix(
61 numpy.diag([1.0 / (12.0**2.0), 1.0 / (12.0**2.0)]))
Austin Schuh434c8372018-01-21 16:30:06 -080062
Ravago Jones5127ccc2022-07-31 16:32:45 -070063 final_A = numerical_jacobian_x(self.dynamics.discrete_dynamics,
64 numpy.matrix(numpy.zeros((4, 1))),
65 numpy.matrix(numpy.zeros((2, 1))))
66 final_B = numerical_jacobian_u(self.dynamics.discrete_dynamics,
67 numpy.matrix(numpy.zeros((4, 1))),
68 numpy.matrix(numpy.zeros((2, 1))))
69 print 'Final A', final_A
70 print 'Final B', final_B
71 K, self.S = controls.dlqr(final_A,
72 final_B,
73 self.Q,
74 self.R,
75 optimal_cost_function=True)
76 print 'Final eig:', numpy.linalg.eig(final_A - final_B * K)
77
78 def final_cost(self, X, U):
79 """Computes the final cost of being at X
Austin Schuh434c8372018-01-21 16:30:06 -080080
81 Args:
82 X: numpy.matrix(self.num_states, 1)
83 U: numpy.matrix(self.num_inputs, 1), ignored
84
85 Returns:
86 numpy.matrix(1, 1), The quadratic cost of being at X
87 """
Ravago Jones5127ccc2022-07-31 16:32:45 -070088 return 0.5 * X.T * self.S * X
Austin Schuh434c8372018-01-21 16:30:06 -080089
Ravago Jones5127ccc2022-07-31 16:32:45 -070090 def cost(self, X, U):
91 """Computes the incremental cost given a position and U.
Austin Schuh434c8372018-01-21 16:30:06 -080092
93 Args:
94 X: numpy.matrix(self.num_states, 1)
95 U: numpy.matrix(self.num_inputs, 1)
96
97 Returns:
98 numpy.matrix(1, 1), The quadratic cost of evaluating U.
99 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700100 return U.T * self.R * U + X.T * self.Q * X
Austin Schuh434c8372018-01-21 16:30:06 -0800101
Ravago Jones5127ccc2022-07-31 16:32:45 -0700102 def estimate_Q_final(self, X_hat):
103 """Returns the quadraticized final Q around X_hat.
Austin Schuh434c8372018-01-21 16:30:06 -0800104
105 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
106
107 Args:
108 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
109
110 Result:
111 numpy.matrix(self.num_states, self.num_states)
112 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700113 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
114 print 'S', self.S
115 print 'Q_final', numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
116 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
Austin Schuh434c8372018-01-21 16:30:06 -0800117
Ravago Jones5127ccc2022-07-31 16:32:45 -0700118 def estimate_partial_cost_partial_x_final(self, X_hat):
119 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
Austin Schuh434c8372018-01-21 16:30:06 -0800120
121 Args:
122 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
123
124 Result:
125 numpy.matrix(self.num_states, 1)
126 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700127 return numerical_jacobian_x(
128 self.final_cost, X_hat,
129 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
Austin Schuh434c8372018-01-21 16:30:06 -0800130
Ravago Jones5127ccc2022-07-31 16:32:45 -0700131 def estimate_q_final(self, X_hat):
132 """Returns q evaluated at X_hat for the final cost function."""
133 return self.estimate_partial_cost_partial_x_final(
134 X_hat) - self.estimate_Q_final(X_hat) * X_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800135
136
137class SkidSteerDynamics(object):
Austin Schuh434c8372018-01-21 16:30:06 -0800138
Ravago Jones5127ccc2022-07-31 16:32:45 -0700139 def __init__(self, dt):
140 self.width = 0.2
141 self.dt = dt
142 self.num_states = 3
143 self.num_inputs = 2
144
145 def dynamics(self, X, U):
146 """Calculates the dynamics for a 2 wheeled robot.
Austin Schuh434c8372018-01-21 16:30:06 -0800147
148 Args:
149 X, numpy.matrix(3, 1), The state. [x, y, theta]
150 U, numpy.matrix(2, 1), The input. [left velocity, right velocity]
151
152 Returns:
153 numpy.matrix(3, 1), The derivative of the dynamics.
154 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700155 #return numpy.matrix([[X[1, 0]],
156 # [X[2, 0]],
157 # [U[0, 0]]])
158 return numpy.matrix([[(U[0, 0] + U[1, 0]) * numpy.cos(X[2, 0]) / 2.0],
159 [(U[0, 0] + U[1, 0]) * numpy.sin(X[2, 0]) / 2.0],
160 [(U[1, 0] - U[0, 0]) / self.width]])
Austin Schuh434c8372018-01-21 16:30:06 -0800161
Ravago Jones5127ccc2022-07-31 16:32:45 -0700162 def discrete_dynamics(self, X, U):
163 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
Austin Schuh434c8372018-01-21 16:30:06 -0800164
Ravago Jones5127ccc2022-07-31 16:32:45 -0700165 def inverse_discrete_dynamics(self, X, U):
166 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
167 dt)
Austin Schuh434c8372018-01-21 16:30:06 -0800168
169
170# Simple implementation for a quadratic cost function.
171class CostFunction:
Austin Schuh434c8372018-01-21 16:30:06 -0800172
Ravago Jones5127ccc2022-07-31 16:32:45 -0700173 def __init__(self, dt):
174 self.num_states = 3
175 self.num_inputs = 2
176 self.dt = dt
177 self.Q = numpy.matrix([[0.1, 0, 0], [0, 0.6, 0], [0, 0, 0.1]
178 ]) / self.dt / self.dt
179 self.R = numpy.matrix([[0.40, 0], [0, 0.40]]) / self.dt / self.dt
180
181 def final_cost(self, X, U):
182 """Computes the final cost of being at X
Austin Schuh434c8372018-01-21 16:30:06 -0800183
184 Args:
185 X: numpy.matrix(self.num_states, 1)
186 U: numpy.matrix(self.num_inputs, 1), ignored
187
188 Returns:
189 numpy.matrix(1, 1), The quadratic cost of being at X
190 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700191 return X.T * self.Q * X * 1000
Austin Schuh434c8372018-01-21 16:30:06 -0800192
Ravago Jones5127ccc2022-07-31 16:32:45 -0700193 def cost(self, X, U):
194 """Computes the incremental cost given a position and U.
Austin Schuh434c8372018-01-21 16:30:06 -0800195
196 Args:
197 X: numpy.matrix(self.num_states, 1)
198 U: numpy.matrix(self.num_inputs, 1)
199
200 Returns:
201 numpy.matrix(1, 1), The quadratic cost of evaluating U.
202 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700203 return U.T * self.R * U + X.T * self.Q * X
Austin Schuh434c8372018-01-21 16:30:06 -0800204
Ravago Jones5127ccc2022-07-31 16:32:45 -0700205 def estimate_Q_final(self, X_hat):
206 """Returns the quadraticized final Q around X_hat.
Austin Schuh434c8372018-01-21 16:30:06 -0800207
208 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
209
210 Args:
211 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
212
213 Result:
214 numpy.matrix(self.num_states, self.num_states)
215 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700216 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
217 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
Austin Schuh434c8372018-01-21 16:30:06 -0800218
Ravago Jones5127ccc2022-07-31 16:32:45 -0700219 def estimate_partial_cost_partial_x_final(self, X_hat):
220 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
Austin Schuh434c8372018-01-21 16:30:06 -0800221
222 Args:
223 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
224
225 Result:
226 numpy.matrix(self.num_states, 1)
227 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700228 return numerical_jacobian_x(
229 self.final_cost, X_hat,
230 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
Austin Schuh434c8372018-01-21 16:30:06 -0800231
Ravago Jones5127ccc2022-07-31 16:32:45 -0700232 def estimate_q_final(self, X_hat):
233 """Returns q evaluated at X_hat for the final cost function."""
234 return self.estimate_partial_cost_partial_x_final(
235 X_hat) - self.estimate_Q_final(X_hat) * X_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800236
237
238def RungeKutta(f, x, dt):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700239 """4th order RungeKutta integration of F starting at X."""
240 a = f(x)
241 b = f(x + dt / 2.0 * a)
242 c = f(x + dt / 2.0 * b)
243 d = f(x + dt * c)
244 return x + dt * (a + 2.0 * b + 2.0 * c + d) / 6.0
245
Austin Schuh434c8372018-01-21 16:30:06 -0800246
247def numerical_jacobian_x(fn, X, U, epsilon=1e-4):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700248 """Numerically estimates the jacobian around X, U in X.
Austin Schuh434c8372018-01-21 16:30:06 -0800249
250 Args:
251 fn: A function of X, U.
252 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
253 around.
254 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
255 around.
256
257 Returns:
258 numpy.matrix(num_states, num_states), The jacobian of fn with X as the
259 variable.
260 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700261 num_states = X.shape[0]
262 nominal = fn(X, U)
263 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_states)))
264 # It's more expensive, but +- epsilon will be more reliable
265 for i in range(0, num_states):
266 dX_plus = X.copy()
267 dX_plus[i] += epsilon
268 dX_minus = X.copy()
269 dX_minus[i] -= epsilon
270 answer[:, i] = (fn(dX_plus, U) - fn(dX_minus, U)) / epsilon / 2.0
271 return answer
272
Austin Schuh434c8372018-01-21 16:30:06 -0800273
274def numerical_jacobian_u(fn, X, U, epsilon=1e-4):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700275 """Numerically estimates the jacobian around X, U in U.
Austin Schuh434c8372018-01-21 16:30:06 -0800276
277 Args:
278 fn: A function of X, U.
279 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
280 around.
281 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
282 around.
283
284 Returns:
285 numpy.matrix(num_states, num_inputs), The jacobian of fn with U as the
286 variable.
287 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700288 num_states = X.shape[0]
289 num_inputs = U.shape[0]
290 nominal = fn(X, U)
291 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_inputs)))
292 for i in range(0, num_inputs):
293 dU_plus = U.copy()
294 dU_plus[i] += epsilon
295 dU_minus = U.copy()
296 dU_minus[i] -= epsilon
297 answer[:, i] = (fn(X, dU_plus) - fn(X, dU_minus)) / epsilon / 2.0
298 return answer
299
Austin Schuh434c8372018-01-21 16:30:06 -0800300
301def numerical_jacobian_x_x(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700302 return numerical_jacobian_x(
303 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
304 X, U)
305
Austin Schuh434c8372018-01-21 16:30:06 -0800306
307def numerical_jacobian_x_u(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700308 return numerical_jacobian_x(
309 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
310 X, U)
311
Austin Schuh434c8372018-01-21 16:30:06 -0800312
313def numerical_jacobian_u_x(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700314 return numerical_jacobian_u(
315 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
316 X, U)
317
Austin Schuh434c8372018-01-21 16:30:06 -0800318
319def numerical_jacobian_u_u(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700320 return numerical_jacobian_u(
321 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
322 X, U)
Austin Schuh434c8372018-01-21 16:30:06 -0800323
324
325class ELQR(object):
Austin Schuh434c8372018-01-21 16:30:06 -0800326
Ravago Jones5127ccc2022-07-31 16:32:45 -0700327 def __init__(self, dynamics, cost):
328 self.dynamics = dynamics
329 self.cost = cost
Austin Schuh434c8372018-01-21 16:30:06 -0800330
Ravago Jones5127ccc2022-07-31 16:32:45 -0700331 def Solve(self, x_hat_initial, horizon, iterations):
332 l = horizon
333 num_states = self.dynamics.num_states
334 num_inputs = self.dynamics.num_inputs
335 self.S_bar_t = [
336 numpy.matrix(numpy.zeros((num_states, num_states)))
337 for _ in range(l + 1)
338 ]
339 self.s_bar_t = [
340 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
341 ]
342 self.s_scalar_bar_t = [
343 numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
344 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800345
Ravago Jones5127ccc2022-07-31 16:32:45 -0700346 self.L_t = [
347 numpy.matrix(numpy.zeros((num_inputs, num_states)))
348 for _ in range(l + 1)
349 ]
350 self.l_t = [
351 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
352 ]
353 self.L_bar_t = [
354 numpy.matrix(numpy.zeros((num_inputs, num_states)))
355 for _ in range(l + 1)
356 ]
357 self.l_bar_t = [
358 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
359 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800360
Ravago Jones5127ccc2022-07-31 16:32:45 -0700361 self.S_t = [
362 numpy.matrix(numpy.zeros((num_states, num_states)))
363 for _ in range(l + 1)
364 ]
365 self.s_t = [
366 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
367 ]
368 self.s_scalar_t = [
369 numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
370 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800371
Ravago Jones5127ccc2022-07-31 16:32:45 -0700372 self.last_x_hat_t = [
373 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
374 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800375
Ravago Jones5127ccc2022-07-31 16:32:45 -0700376 # Iterate the solver
377 for a in range(iterations):
378 x_hat = x_hat_initial
379 u_t = self.L_t[0] * x_hat + self.l_t[0]
380 self.S_bar_t[0] = numpy.matrix(
381 numpy.zeros((num_states, num_states)))
382 self.s_bar_t[0] = numpy.matrix(numpy.zeros((num_states, 1)))
383 self.s_scalar_bar_t[0] = numpy.matrix([[0]])
Austin Schuh434c8372018-01-21 16:30:06 -0800384
Ravago Jones5127ccc2022-07-31 16:32:45 -0700385 self.last_x_hat_t[0] = x_hat_initial
Austin Schuh434c8372018-01-21 16:30:06 -0800386
Ravago Jones5127ccc2022-07-31 16:32:45 -0700387 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_initial, u_t)
388 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_initial, u_t)
389 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_initial, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800390
Ravago Jones5127ccc2022-07-31 16:32:45 -0700391 q_t = numerical_jacobian_x(self.cost.cost, x_hat_initial, u_t).T \
392 - Q_t * x_hat_initial - P_t.T * u_t
393 r_t = numerical_jacobian_u(self.cost.cost, x_hat_initial, u_t).T \
394 - P_t * x_hat_initial - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800395
Ravago Jones5127ccc2022-07-31 16:32:45 -0700396 q_scalar_t = self.cost.cost(x_hat_initial, u_t) \
397 - 0.5 * (x_hat_initial.T * (Q_t * x_hat_initial + P_t.T * u_t) \
398 + u_t.T * (P_t * x_hat_initial + R_t * u_t)) \
399 - x_hat_initial.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800400
Ravago Jones5127ccc2022-07-31 16:32:45 -0700401 start_A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
402 x_hat_initial, u_t)
403 start_B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
404 x_hat_initial, u_t)
405 x_hat_next = self.dynamics.discrete_dynamics(x_hat_initial, u_t)
406 start_c_t = x_hat_next - start_A_t * x_hat_initial - start_B_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800407
Ravago Jones5127ccc2022-07-31 16:32:45 -0700408 B_svd_u, B_svd_sigma_diag, B_svd_v = numpy.linalg.svd(start_B_t)
409 B_svd_sigma = numpy.matrix(numpy.zeros(start_B_t.shape))
410 B_svd_sigma[0:B_svd_sigma_diag.shape[0], 0:B_svd_sigma_diag.shape[0]] = \
411 numpy.diag(B_svd_sigma_diag)
Austin Schuh434c8372018-01-21 16:30:06 -0800412
Ravago Jones5127ccc2022-07-31 16:32:45 -0700413 B_svd_sigma_inv = numpy.matrix(numpy.zeros(start_B_t.shape)).T
414 B_svd_sigma_inv[0:B_svd_sigma_diag.shape[0],
415 0:B_svd_sigma_diag.shape[0]] = \
416 numpy.linalg.inv(numpy.diag(B_svd_sigma_diag))
417 B_svd_inv = B_svd_v.T * B_svd_sigma_inv * B_svd_u.T
Austin Schuh434c8372018-01-21 16:30:06 -0800418
Ravago Jones5127ccc2022-07-31 16:32:45 -0700419 self.L_bar_t[1] = B_svd_inv
420 self.l_bar_t[1] = -B_svd_inv * (start_A_t * x_hat_initial +
421 start_c_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800422
Ravago Jones5127ccc2022-07-31 16:32:45 -0700423 self.S_bar_t[1] = self.L_bar_t[1].T * R_t * self.L_bar_t[1]
Austin Schuh434c8372018-01-21 16:30:06 -0800424
Ravago Jones5127ccc2022-07-31 16:32:45 -0700425 TotalS_1 = start_B_t.T * self.S_t[1] * start_B_t + R_t
426 Totals_1 = start_B_t.T * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
427 + start_B_t.T * self.s_t[1] + P_t * x_hat_initial + r_t
428 Totals_scalar_1 = 0.5 * (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
429 + self.s_scalar_t[1] + x_hat_initial.T * q_t + q_scalar_t \
430 + 0.5 * x_hat_initial.T * Q_t * x_hat_initial \
431 + (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.s_t[1]
Austin Schuh434c8372018-01-21 16:30:06 -0800432
Ravago Jones5127ccc2022-07-31 16:32:45 -0700433 optimal_u_1 = -numpy.linalg.solve(TotalS_1, Totals_1)
434 optimal_x_1 = start_A_t * x_hat_initial \
435 + start_B_t * optimal_u_1 + start_c_t
Austin Schuh434c8372018-01-21 16:30:06 -0800436
Ravago Jones5127ccc2022-07-31 16:32:45 -0700437 # TODO(austin): Disable this if we are controlable. It should not be needed then.
438 S_bar_1_eigh_eigenvalues, S_bar_1_eigh_eigenvectors = \
439 numpy.linalg.eigh(self.S_bar_t[1])
440 S_bar_1_eigh = numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues))
441 S_bar_1_eigh_eigenvalues_stiff = S_bar_1_eigh_eigenvalues.copy()
442 for i in range(S_bar_1_eigh_eigenvalues_stiff.shape[0]):
443 if abs(S_bar_1_eigh_eigenvalues_stiff[i]) < 1e-8:
444 S_bar_1_eigh_eigenvalues_stiff[i] = max(
445 S_bar_1_eigh_eigenvalues_stiff) * 1.0
Austin Schuh434c8372018-01-21 16:30:06 -0800446
Ravago Jones5127ccc2022-07-31 16:32:45 -0700447 S_bar_stiff = S_bar_1_eigh_eigenvectors * numpy.matrix(
448 numpy.diag(S_bar_1_eigh_eigenvalues_stiff)
449 ) * S_bar_1_eigh_eigenvectors.T
Austin Schuh434c8372018-01-21 16:30:06 -0800450
Ravago Jones5127ccc2022-07-31 16:32:45 -0700451 print 'Min u', -numpy.linalg.solve(TotalS_1, Totals_1)
452 print 'Min x_hat', optimal_x_1
453 self.s_bar_t[1] = -self.s_t[1] - (S_bar_stiff +
454 self.S_t[1]) * optimal_x_1
455 self.s_scalar_bar_t[1] = 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1 \
456 - optimal_x_1.T * (S_bar_stiff + self.S_t[1]) * optimal_x_1) \
457 + optimal_u_1.T * Totals_1 \
458 - optimal_x_1.T * (self.s_bar_t[1] + self.s_t[1]) \
459 - self.s_scalar_t[1] + Totals_scalar_1
Austin Schuh434c8372018-01-21 16:30:06 -0800460
Ravago Jones5127ccc2022-07-31 16:32:45 -0700461 print 'optimal_u_1', optimal_u_1
462 print 'TotalS_1', TotalS_1
463 print 'Totals_1', Totals_1
464 print 'Totals_scalar_1', Totals_scalar_1
465 print 'overall cost 1', 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1) \
466 + optimal_u_1.T * Totals_1 + Totals_scalar_1
467 print 'overall cost 0', 0.5 * (x_hat_initial.T * self.S_t[0] * x_hat_initial) \
468 + x_hat_initial.T * self.s_t[0] + self.s_scalar_t[0]
Austin Schuh434c8372018-01-21 16:30:06 -0800469
Ravago Jones5127ccc2022-07-31 16:32:45 -0700470 print 't forward 0'
471 print 'x_hat_initial[ 0]: %s' % (x_hat_initial)
472 print 'x_hat[%2d]: %s' % (0, x_hat.T)
473 print 'x_hat_next[%2d]: %s' % (0, x_hat_next.T)
474 print 'u[%2d]: %s' % (0, u_t.T)
475 print('L[ 0]: %s' % (self.L_t[0], )).replace('\n', '\n ')
476 print('l[ 0]: %s' % (self.l_t[0], )).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800477
Ravago Jones5127ccc2022-07-31 16:32:45 -0700478 print('A_t[%2d]: %s' % (0, start_A_t)).replace('\n', '\n ')
479 print('B_t[%2d]: %s' % (0, start_B_t)).replace('\n', '\n ')
480 print('c_t[%2d]: %s' % (0, start_c_t)).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800481
Ravago Jones5127ccc2022-07-31 16:32:45 -0700482 # TODO(austin): optimal_x_1 is x_hat
483 x_hat = -numpy.linalg.solve((self.S_t[1] + S_bar_stiff),
484 (self.s_t[1] + self.s_bar_t[1]))
485 print 'new xhat', x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800486
Ravago Jones5127ccc2022-07-31 16:32:45 -0700487 self.S_bar_t[1] = S_bar_stiff
Austin Schuh434c8372018-01-21 16:30:06 -0800488
Ravago Jones5127ccc2022-07-31 16:32:45 -0700489 self.last_x_hat_t[1] = x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800490
Ravago Jones5127ccc2022-07-31 16:32:45 -0700491 for t in range(1, l):
492 print 't forward', t
493 u_t = self.L_t[t] * x_hat + self.l_t[t]
Austin Schuh434c8372018-01-21 16:30:06 -0800494
Ravago Jones5127ccc2022-07-31 16:32:45 -0700495 x_hat_next = self.dynamics.discrete_dynamics(x_hat, u_t)
496 A_bar_t = numerical_jacobian_x(
497 self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
498 B_bar_t = numerical_jacobian_u(
499 self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
500 c_bar_t = x_hat - A_bar_t * x_hat_next - B_bar_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800501
Ravago Jones5127ccc2022-07-31 16:32:45 -0700502 print 'x_hat[%2d]: %s' % (t, x_hat.T)
503 print 'x_hat_next[%2d]: %s' % (t, x_hat_next.T)
504 print('L[%2d]: %s' % (
505 t,
506 self.L_t[t],
507 )).replace('\n', '\n ')
508 print('l[%2d]: %s' % (
509 t,
510 self.l_t[t],
511 )).replace('\n', '\n ')
512 print 'u[%2d]: %s' % (t, u_t.T)
Austin Schuh434c8372018-01-21 16:30:06 -0800513
Ravago Jones5127ccc2022-07-31 16:32:45 -0700514 print('A_bar_t[%2d]: %s' % (t, A_bar_t)).replace(
515 '\n', '\n ')
516 print('B_bar_t[%2d]: %s' % (t, B_bar_t)).replace(
517 '\n', '\n ')
518 print('c_bar_t[%2d]: %s' % (t, c_bar_t)).replace(
519 '\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800520
Ravago Jones5127ccc2022-07-31 16:32:45 -0700521 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat, u_t)
522 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat, u_t)
523 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800524
Ravago Jones5127ccc2022-07-31 16:32:45 -0700525 q_t = numerical_jacobian_x(self.cost.cost, x_hat, u_t).T \
526 - Q_t * x_hat - P_t.T * u_t
527 r_t = numerical_jacobian_u(self.cost.cost, x_hat, u_t).T \
528 - P_t * x_hat - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800529
Ravago Jones5127ccc2022-07-31 16:32:45 -0700530 q_scalar_t = self.cost.cost(x_hat, u_t) \
531 - 0.5 * (x_hat.T * (Q_t * x_hat + P_t.T * u_t) \
532 + u_t.T * (P_t * x_hat + R_t * u_t)) - x_hat.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800533
Ravago Jones5127ccc2022-07-31 16:32:45 -0700534 C_bar_t = B_bar_t.T * (self.S_bar_t[t] +
535 Q_t) * A_bar_t + P_t * A_bar_t
536 D_bar_t = A_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t
537 E_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * B_bar_t + R_t \
538 + P_t * B_bar_t + B_bar_t.T * P_t.T
539 d_bar_t = A_bar_t.T * (self.s_bar_t[t] + q_t) \
540 + A_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
541 e_bar_t = r_t + P_t * c_bar_t + B_bar_t.T * self.s_bar_t[t] \
542 + B_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800543
Ravago Jones5127ccc2022-07-31 16:32:45 -0700544 self.L_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * C_bar_t
545 self.l_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * e_bar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800546
Ravago Jones5127ccc2022-07-31 16:32:45 -0700547 self.S_bar_t[t + 1] = D_bar_t + C_bar_t.T * self.L_bar_t[t + 1]
548 self.s_bar_t[t + 1] = d_bar_t + C_bar_t.T * self.l_bar_t[t + 1]
549 self.s_scalar_bar_t[t + 1] = \
550 -0.5 * e_bar_t.T * numpy.linalg.inv(E_bar_t) * e_bar_t \
551 + 0.5 * c_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t \
552 + c_bar_t.T * self.s_bar_t[t] + c_bar_t.T * q_t \
553 + self.s_scalar_bar_t[t] + q_scalar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800554
Ravago Jones5127ccc2022-07-31 16:32:45 -0700555 x_hat = -numpy.linalg.solve(
556 (self.S_t[t + 1] + self.S_bar_t[t + 1]),
557 (self.s_t[t + 1] + self.s_bar_t[t + 1]))
558 self.S_t[l] = self.cost.estimate_Q_final(x_hat)
559 self.s_t[l] = self.cost.estimate_q_final(x_hat)
560 x_hat = -numpy.linalg.inv(self.S_t[l] + self.S_bar_t[l]) \
561 * (self.s_t[l] + self.s_bar_t[l])
Austin Schuh434c8372018-01-21 16:30:06 -0800562
Ravago Jones5127ccc2022-07-31 16:32:45 -0700563 for t in reversed(range(l)):
564 print 't backward', t
565 # TODO(austin): I don't think we can use L_t like this here.
566 # I think we are off by 1 somewhere...
567 u_t = self.L_bar_t[t + 1] * x_hat + self.l_bar_t[t + 1]
Austin Schuh434c8372018-01-21 16:30:06 -0800568
Ravago Jones5127ccc2022-07-31 16:32:45 -0700569 x_hat_prev = self.dynamics.inverse_discrete_dynamics(
570 x_hat, u_t)
571 print 'x_hat[%2d]: %s' % (t, x_hat.T)
572 print 'x_hat_prev[%2d]: %s' % (t, x_hat_prev.T)
573 print('L_bar[%2d]: %s' % (t + 1, self.L_bar_t[t + 1])).replace(
574 '\n', '\n ')
575 print('l_bar[%2d]: %s' % (t + 1, self.l_bar_t[t + 1])).replace(
576 '\n', '\n ')
577 print 'u[%2d]: %s' % (t, u_t.T)
578 # Now compute the linearized A, B, and C
579 # Start by doing it numerically, and then optimize.
580 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
581 x_hat_prev, u_t)
582 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
583 x_hat_prev, u_t)
584 c_t = x_hat - A_t * x_hat_prev - B_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800585
Ravago Jones5127ccc2022-07-31 16:32:45 -0700586 print('A_t[%2d]: %s' % (t, A_t)).replace('\n', '\n ')
587 print('B_t[%2d]: %s' % (t, B_t)).replace('\n', '\n ')
588 print('c_t[%2d]: %s' % (t, c_t)).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800589
Ravago Jones5127ccc2022-07-31 16:32:45 -0700590 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_prev, u_t)
591 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_prev, u_t)
592 P_T_t = numerical_jacobian_u_x(self.cost.cost, x_hat_prev, u_t)
593 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_prev, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800594
Ravago Jones5127ccc2022-07-31 16:32:45 -0700595 q_t = numerical_jacobian_x(self.cost.cost, x_hat_prev, u_t).T \
596 - Q_t * x_hat_prev - P_T_t * u_t
597 r_t = numerical_jacobian_u(self.cost.cost, x_hat_prev, u_t).T \
598 - P_t * x_hat_prev - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800599
Ravago Jones5127ccc2022-07-31 16:32:45 -0700600 q_scalar_t = self.cost.cost(x_hat_prev, u_t) \
601 - 0.5 * (x_hat_prev.T * (Q_t * x_hat_prev + P_t.T * u_t) \
602 + u_t.T * (P_t * x_hat_prev + R_t * u_t)) \
603 - x_hat_prev.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800604
Ravago Jones5127ccc2022-07-31 16:32:45 -0700605 C_t = P_t + B_t.T * self.S_t[t + 1] * A_t
606 D_t = Q_t + A_t.T * self.S_t[t + 1] * A_t
607 E_t = R_t + B_t.T * self.S_t[t + 1] * B_t
608 d_t = q_t + A_t.T * self.s_t[t + 1] + A_t.T * self.S_t[t +
609 1] * c_t
610 e_t = r_t + B_t.T * self.s_t[t + 1] + B_t.T * self.S_t[t +
611 1] * c_t
612 self.L_t[t] = -numpy.linalg.inv(E_t) * C_t
613 self.l_t[t] = -numpy.linalg.inv(E_t) * e_t
614 self.s_t[t] = d_t + C_t.T * self.l_t[t]
615 self.S_t[t] = D_t + C_t.T * self.L_t[t]
616 self.s_scalar_t[t] = q_scalar_t \
617 - 0.5 * e_t.T * numpy.linalg.inv(E_t) * e_t \
618 + 0.5 * c_t.T * self.S_t[t + 1] * c_t \
619 + c_t.T * self.s_t[t + 1] \
620 + self.s_scalar_t[t + 1]
Austin Schuh434c8372018-01-21 16:30:06 -0800621
Ravago Jones5127ccc2022-07-31 16:32:45 -0700622 x_hat = -numpy.linalg.solve((self.S_t[t] + self.S_bar_t[t]),
623 (self.s_t[t] + self.s_bar_t[t]))
624 if t == 0:
625 self.last_x_hat_t[t] = x_hat_initial
626 else:
627 self.last_x_hat_t[t] = x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800628
Ravago Jones5127ccc2022-07-31 16:32:45 -0700629 x_hat_t = [x_hat_initial]
Austin Schuh434c8372018-01-21 16:30:06 -0800630
Ravago Jones5127ccc2022-07-31 16:32:45 -0700631 pylab.figure('states %d' % a)
632 pylab.ion()
633 for dim in range(num_states):
634 pylab.plot(
635 numpy.arange(len(self.last_x_hat_t)),
636 [x_hat_loop[dim, 0] for x_hat_loop in self.last_x_hat_t],
637 marker='o',
638 label='Xhat[%d]' % dim)
639 pylab.legend()
640 pylab.draw()
Austin Schuh434c8372018-01-21 16:30:06 -0800641
Ravago Jones5127ccc2022-07-31 16:32:45 -0700642 pylab.figure('xy %d' % a)
643 pylab.ion()
644 pylab.plot([x_hat_loop[0, 0] for x_hat_loop in self.last_x_hat_t],
645 [x_hat_loop[1, 0] for x_hat_loop in self.last_x_hat_t],
646 marker='o',
647 label='trajectory')
648 pylab.legend()
649 pylab.draw()
Austin Schuh434c8372018-01-21 16:30:06 -0800650
Ravago Jones5127ccc2022-07-31 16:32:45 -0700651 final_u_t = [
652 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
653 ]
654 cost_to_go = []
655 cost_to_come = []
656 cost = []
657 for t in range(l):
658 cost_to_go.append(
659 (0.5 * self.last_x_hat_t[t].T * self.S_t[t] * self.last_x_hat_t[t] \
660 + self.last_x_hat_t[t].T * self.s_t[t] + self.s_scalar_t[t])[0, 0])
661 cost_to_come.append(
662 (0.5 * self.last_x_hat_t[t].T * self.S_bar_t[t] * self.last_x_hat_t[t] \
663 + self.last_x_hat_t[t].T * self.s_bar_t[t] + self.s_scalar_bar_t[t])[0, 0])
664 cost.append(cost_to_go[-1] + cost_to_come[-1])
665 final_u_t[t] = self.L_t[t] * self.last_x_hat_t[t] + self.l_t[t]
Austin Schuh434c8372018-01-21 16:30:06 -0800666
Ravago Jones5127ccc2022-07-31 16:32:45 -0700667 for t in range(l):
668 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
669 self.last_x_hat_t[t], final_u_t[t])
670 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
671 self.last_x_hat_t[t], final_u_t[t])
672 c_t = self.dynamics.discrete_dynamics(self.last_x_hat_t[t], final_u_t[t]) \
673 - A_t * self.last_x_hat_t[t] - B_t * final_u_t[t]
674 print("Infeasability at", t, "is",
675 ((A_t * self.last_x_hat_t[t] + B_t * final_u_t[t] + c_t) \
676 - self.last_x_hat_t[t + 1]).T)
Austin Schuh434c8372018-01-21 16:30:06 -0800677
Ravago Jones5127ccc2022-07-31 16:32:45 -0700678 pylab.figure('u')
679 samples = numpy.arange(len(final_u_t))
680 for i in range(num_inputs):
681 pylab.plot(samples, [u[i, 0] for u in final_u_t],
682 label='u[%d]' % i,
683 marker='o')
684 pylab.legend()
685
686 pylab.figure('cost')
687 cost_samples = numpy.arange(len(cost))
688 pylab.plot(cost_samples, cost_to_go, label='cost to go', marker='o')
689 pylab.plot(cost_samples,
690 cost_to_come,
691 label='cost to come',
692 marker='o')
693 pylab.plot(cost_samples, cost, label='cost', marker='o')
694 pylab.legend()
695
696 pylab.ioff()
697 pylab.show()
698
Austin Schuh434c8372018-01-21 16:30:06 -0800699
700if __name__ == '__main__':
Ravago Jones5127ccc2022-07-31 16:32:45 -0700701 dt = 0.05
702 #arm_dynamics = ArmDynamics(dt=dt)
703 #elqr = ELQR(arm_dynamics, ArmCostFunction(dt=dt, dynamics=arm_dynamics))
704 #x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0], [0.0]])
705 #elqr.Solve(x_hat_initial, 100, 3)
Austin Schuh434c8372018-01-21 16:30:06 -0800706
Ravago Jones5127ccc2022-07-31 16:32:45 -0700707 elqr = ELQR(SkidSteerDynamics(dt=dt), CostFunction(dt=dt))
708 x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0]])
709 elqr.Solve(x_hat_initial, 100, 15)
710 sys.exit(1)