blob: a2a31af079b762f844e2ba560c9afdefe1cc7252 [file] [log] [blame]
Austin Schuh085eab92020-11-26 13:54:51 -08001#!/usr/bin/python3
Austin Schuh434c8372018-01-21 16:30:06 -08002# This is an initial, hacky implementation of the extended LQR paper. It's just
3# a proof of concept, so don't trust it too much.
4
5import numpy
6import scipy.optimize
7from matplotlib import pylab
8import sys
9
10from frc971.control_loops.python import controls
11
12
13class ArmDynamics(object):
Austin Schuh434c8372018-01-21 16:30:06 -080014
Ravago Jones5127ccc2022-07-31 16:32:45 -070015 def __init__(self, dt):
16 self.dt = dt
Austin Schuh434c8372018-01-21 16:30:06 -080017
Ravago Jones5127ccc2022-07-31 16:32:45 -070018 self.l1 = 1.0
19 self.l2 = 0.8
20 self.num_states = 4
21 self.num_inputs = 2
22
23 def dynamics(self, X, U):
24 """Calculates the dynamics for a double jointed arm.
Austin Schuh434c8372018-01-21 16:30:06 -080025
26 Args:
27 X, numpy.matrix(4, 1), The state. [theta1, omega1, theta2, omega2]
28 U, numpy.matrix(2, 1), The input. [torque1, torque2]
29
30 Returns:
31 numpy.matrix(4, 1), The derivative of the dynamics.
32 """
Ravago Jones5127ccc2022-07-31 16:32:45 -070033 return numpy.matrix([[X[1, 0]], [U[0, 0]], [X[3, 0]], [U[1, 0]]])
Austin Schuh434c8372018-01-21 16:30:06 -080034
Ravago Jones5127ccc2022-07-31 16:32:45 -070035 def discrete_dynamics(self, X, U):
36 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
Austin Schuh434c8372018-01-21 16:30:06 -080037
Ravago Jones5127ccc2022-07-31 16:32:45 -070038 def inverse_discrete_dynamics(self, X, U):
39 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
40 dt)
41
Austin Schuh434c8372018-01-21 16:30:06 -080042
43# Simple implementation for a quadratic cost function.
44class ArmCostFunction:
Austin Schuh434c8372018-01-21 16:30:06 -080045
Ravago Jones5127ccc2022-07-31 16:32:45 -070046 def __init__(self, dt, dynamics):
47 self.num_states = 4
48 self.num_inputs = 2
49 self.dt = dt
50 self.dynamics = dynamics
Austin Schuh434c8372018-01-21 16:30:06 -080051
Ravago Jones5127ccc2022-07-31 16:32:45 -070052 q_pos = 0.5
53 q_vel = 1.65
54 self.Q = numpy.matrix(
55 numpy.diag([
56 1.0 / (q_pos**2.0), 1.0 / (q_vel**2.0), 1.0 / (q_pos**2.0),
57 1.0 / (q_vel**2.0)
58 ]))
Austin Schuh434c8372018-01-21 16:30:06 -080059
Ravago Jones5127ccc2022-07-31 16:32:45 -070060 self.R = numpy.matrix(
61 numpy.diag([1.0 / (12.0**2.0), 1.0 / (12.0**2.0)]))
Austin Schuh434c8372018-01-21 16:30:06 -080062
Ravago Jones5127ccc2022-07-31 16:32:45 -070063 final_A = numerical_jacobian_x(self.dynamics.discrete_dynamics,
64 numpy.matrix(numpy.zeros((4, 1))),
65 numpy.matrix(numpy.zeros((2, 1))))
66 final_B = numerical_jacobian_u(self.dynamics.discrete_dynamics,
67 numpy.matrix(numpy.zeros((4, 1))),
68 numpy.matrix(numpy.zeros((2, 1))))
Austin Schuhea5f0a72024-09-02 15:02:36 -070069 print('Final A', final_A)
70 print('Final B', final_B)
Ravago Jones5127ccc2022-07-31 16:32:45 -070071 K, self.S = controls.dlqr(final_A,
72 final_B,
73 self.Q,
74 self.R,
75 optimal_cost_function=True)
Austin Schuhea5f0a72024-09-02 15:02:36 -070076 print('Final eig:', numpy.linalg.eig(final_A - final_B * K))
Ravago Jones5127ccc2022-07-31 16:32:45 -070077
78 def final_cost(self, X, U):
79 """Computes the final cost of being at X
Austin Schuh434c8372018-01-21 16:30:06 -080080
81 Args:
82 X: numpy.matrix(self.num_states, 1)
83 U: numpy.matrix(self.num_inputs, 1), ignored
84
85 Returns:
86 numpy.matrix(1, 1), The quadratic cost of being at X
87 """
Austin Schuh7227a8b2024-09-21 16:22:42 -070088 print('TODO(austin): S -> X.T * S * X, need to fix if we care')
Ravago Jones5127ccc2022-07-31 16:32:45 -070089 return 0.5 * X.T * self.S * X
Austin Schuh434c8372018-01-21 16:30:06 -080090
Ravago Jones5127ccc2022-07-31 16:32:45 -070091 def cost(self, X, U):
92 """Computes the incremental cost given a position and U.
Austin Schuh434c8372018-01-21 16:30:06 -080093
94 Args:
95 X: numpy.matrix(self.num_states, 1)
96 U: numpy.matrix(self.num_inputs, 1)
97
98 Returns:
99 numpy.matrix(1, 1), The quadratic cost of evaluating U.
100 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700101 return U.T * self.R * U + X.T * self.Q * X
Austin Schuh434c8372018-01-21 16:30:06 -0800102
Ravago Jones5127ccc2022-07-31 16:32:45 -0700103 def estimate_Q_final(self, X_hat):
104 """Returns the quadraticized final Q around X_hat.
Austin Schuh434c8372018-01-21 16:30:06 -0800105
106 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
107
108 Args:
109 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
110
111 Result:
112 numpy.matrix(self.num_states, self.num_states)
113 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700114 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
Austin Schuhea5f0a72024-09-02 15:02:36 -0700115 print('S', self.S)
116 print('Q_final', numerical_jacobian_x_x(self.final_cost, X_hat,
117 zero_U))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700118 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
Austin Schuh434c8372018-01-21 16:30:06 -0800119
Ravago Jones5127ccc2022-07-31 16:32:45 -0700120 def estimate_partial_cost_partial_x_final(self, X_hat):
121 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
Austin Schuh434c8372018-01-21 16:30:06 -0800122
123 Args:
124 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
125
126 Result:
127 numpy.matrix(self.num_states, 1)
128 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700129 return numerical_jacobian_x(
130 self.final_cost, X_hat,
131 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
Austin Schuh434c8372018-01-21 16:30:06 -0800132
Ravago Jones5127ccc2022-07-31 16:32:45 -0700133 def estimate_q_final(self, X_hat):
134 """Returns q evaluated at X_hat for the final cost function."""
135 return self.estimate_partial_cost_partial_x_final(
136 X_hat) - self.estimate_Q_final(X_hat) * X_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800137
138
139class SkidSteerDynamics(object):
Austin Schuh434c8372018-01-21 16:30:06 -0800140
Ravago Jones5127ccc2022-07-31 16:32:45 -0700141 def __init__(self, dt):
142 self.width = 0.2
143 self.dt = dt
144 self.num_states = 3
145 self.num_inputs = 2
146
147 def dynamics(self, X, U):
148 """Calculates the dynamics for a 2 wheeled robot.
Austin Schuh434c8372018-01-21 16:30:06 -0800149
150 Args:
151 X, numpy.matrix(3, 1), The state. [x, y, theta]
152 U, numpy.matrix(2, 1), The input. [left velocity, right velocity]
153
154 Returns:
155 numpy.matrix(3, 1), The derivative of the dynamics.
156 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700157 #return numpy.matrix([[X[1, 0]],
158 # [X[2, 0]],
159 # [U[0, 0]]])
160 return numpy.matrix([[(U[0, 0] + U[1, 0]) * numpy.cos(X[2, 0]) / 2.0],
161 [(U[0, 0] + U[1, 0]) * numpy.sin(X[2, 0]) / 2.0],
162 [(U[1, 0] - U[0, 0]) / self.width]])
Austin Schuh434c8372018-01-21 16:30:06 -0800163
Ravago Jones5127ccc2022-07-31 16:32:45 -0700164 def discrete_dynamics(self, X, U):
165 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
Austin Schuh434c8372018-01-21 16:30:06 -0800166
Ravago Jones5127ccc2022-07-31 16:32:45 -0700167 def inverse_discrete_dynamics(self, X, U):
168 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
169 dt)
Austin Schuh434c8372018-01-21 16:30:06 -0800170
171
172# Simple implementation for a quadratic cost function.
173class CostFunction:
Austin Schuh434c8372018-01-21 16:30:06 -0800174
Ravago Jones5127ccc2022-07-31 16:32:45 -0700175 def __init__(self, dt):
176 self.num_states = 3
177 self.num_inputs = 2
178 self.dt = dt
179 self.Q = numpy.matrix([[0.1, 0, 0], [0, 0.6, 0], [0, 0, 0.1]
180 ]) / self.dt / self.dt
181 self.R = numpy.matrix([[0.40, 0], [0, 0.40]]) / self.dt / self.dt
182
183 def final_cost(self, X, U):
184 """Computes the final cost of being at X
Austin Schuh434c8372018-01-21 16:30:06 -0800185
186 Args:
187 X: numpy.matrix(self.num_states, 1)
188 U: numpy.matrix(self.num_inputs, 1), ignored
189
190 Returns:
191 numpy.matrix(1, 1), The quadratic cost of being at X
192 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700193 return X.T * self.Q * X * 1000
Austin Schuh434c8372018-01-21 16:30:06 -0800194
Ravago Jones5127ccc2022-07-31 16:32:45 -0700195 def cost(self, X, U):
196 """Computes the incremental cost given a position and U.
Austin Schuh434c8372018-01-21 16:30:06 -0800197
198 Args:
199 X: numpy.matrix(self.num_states, 1)
200 U: numpy.matrix(self.num_inputs, 1)
201
202 Returns:
203 numpy.matrix(1, 1), The quadratic cost of evaluating U.
204 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700205 return U.T * self.R * U + X.T * self.Q * X
Austin Schuh434c8372018-01-21 16:30:06 -0800206
Ravago Jones5127ccc2022-07-31 16:32:45 -0700207 def estimate_Q_final(self, X_hat):
208 """Returns the quadraticized final Q around X_hat.
Austin Schuh434c8372018-01-21 16:30:06 -0800209
210 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
211
212 Args:
213 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
214
215 Result:
216 numpy.matrix(self.num_states, self.num_states)
217 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700218 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
219 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
Austin Schuh434c8372018-01-21 16:30:06 -0800220
Ravago Jones5127ccc2022-07-31 16:32:45 -0700221 def estimate_partial_cost_partial_x_final(self, X_hat):
222 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
Austin Schuh434c8372018-01-21 16:30:06 -0800223
224 Args:
225 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
226
227 Result:
228 numpy.matrix(self.num_states, 1)
229 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700230 return numerical_jacobian_x(
231 self.final_cost, X_hat,
232 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
Austin Schuh434c8372018-01-21 16:30:06 -0800233
Ravago Jones5127ccc2022-07-31 16:32:45 -0700234 def estimate_q_final(self, X_hat):
235 """Returns q evaluated at X_hat for the final cost function."""
236 return self.estimate_partial_cost_partial_x_final(
237 X_hat) - self.estimate_Q_final(X_hat) * X_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800238
239
240def RungeKutta(f, x, dt):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700241 """4th order RungeKutta integration of F starting at X."""
242 a = f(x)
243 b = f(x + dt / 2.0 * a)
244 c = f(x + dt / 2.0 * b)
245 d = f(x + dt * c)
246 return x + dt * (a + 2.0 * b + 2.0 * c + d) / 6.0
247
Austin Schuh434c8372018-01-21 16:30:06 -0800248
249def numerical_jacobian_x(fn, X, U, epsilon=1e-4):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700250 """Numerically estimates the jacobian around X, U in X.
Austin Schuh434c8372018-01-21 16:30:06 -0800251
252 Args:
253 fn: A function of X, U.
254 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
255 around.
256 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
257 around.
258
259 Returns:
260 numpy.matrix(num_states, num_states), The jacobian of fn with X as the
261 variable.
262 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700263 num_states = X.shape[0]
264 nominal = fn(X, U)
265 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_states)))
266 # It's more expensive, but +- epsilon will be more reliable
267 for i in range(0, num_states):
268 dX_plus = X.copy()
269 dX_plus[i] += epsilon
270 dX_minus = X.copy()
271 dX_minus[i] -= epsilon
272 answer[:, i] = (fn(dX_plus, U) - fn(dX_minus, U)) / epsilon / 2.0
273 return answer
274
Austin Schuh434c8372018-01-21 16:30:06 -0800275
276def numerical_jacobian_u(fn, X, U, epsilon=1e-4):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700277 """Numerically estimates the jacobian around X, U in U.
Austin Schuh434c8372018-01-21 16:30:06 -0800278
279 Args:
280 fn: A function of X, U.
281 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
282 around.
283 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
284 around.
285
286 Returns:
287 numpy.matrix(num_states, num_inputs), The jacobian of fn with U as the
288 variable.
289 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700290 num_states = X.shape[0]
291 num_inputs = U.shape[0]
292 nominal = fn(X, U)
293 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_inputs)))
294 for i in range(0, num_inputs):
295 dU_plus = U.copy()
296 dU_plus[i] += epsilon
297 dU_minus = U.copy()
298 dU_minus[i] -= epsilon
299 answer[:, i] = (fn(X, dU_plus) - fn(X, dU_minus)) / epsilon / 2.0
300 return answer
301
Austin Schuh434c8372018-01-21 16:30:06 -0800302
303def numerical_jacobian_x_x(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700304 return numerical_jacobian_x(
305 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
306 X, U)
307
Austin Schuh434c8372018-01-21 16:30:06 -0800308
309def numerical_jacobian_x_u(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700310 return numerical_jacobian_x(
311 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
312 X, U)
313
Austin Schuh434c8372018-01-21 16:30:06 -0800314
315def numerical_jacobian_u_x(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700316 return numerical_jacobian_u(
317 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
318 X, U)
319
Austin Schuh434c8372018-01-21 16:30:06 -0800320
321def numerical_jacobian_u_u(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700322 return numerical_jacobian_u(
323 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
324 X, U)
Austin Schuh434c8372018-01-21 16:30:06 -0800325
326
327class ELQR(object):
Austin Schuh434c8372018-01-21 16:30:06 -0800328
Ravago Jones5127ccc2022-07-31 16:32:45 -0700329 def __init__(self, dynamics, cost):
330 self.dynamics = dynamics
331 self.cost = cost
Austin Schuh434c8372018-01-21 16:30:06 -0800332
Ravago Jones5127ccc2022-07-31 16:32:45 -0700333 def Solve(self, x_hat_initial, horizon, iterations):
334 l = horizon
335 num_states = self.dynamics.num_states
336 num_inputs = self.dynamics.num_inputs
337 self.S_bar_t = [
338 numpy.matrix(numpy.zeros((num_states, num_states)))
339 for _ in range(l + 1)
340 ]
341 self.s_bar_t = [
342 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
343 ]
344 self.s_scalar_bar_t = [
345 numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
346 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800347
Ravago Jones5127ccc2022-07-31 16:32:45 -0700348 self.L_t = [
349 numpy.matrix(numpy.zeros((num_inputs, num_states)))
350 for _ in range(l + 1)
351 ]
352 self.l_t = [
353 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
354 ]
355 self.L_bar_t = [
356 numpy.matrix(numpy.zeros((num_inputs, num_states)))
357 for _ in range(l + 1)
358 ]
359 self.l_bar_t = [
360 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
361 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800362
Ravago Jones5127ccc2022-07-31 16:32:45 -0700363 self.S_t = [
364 numpy.matrix(numpy.zeros((num_states, num_states)))
365 for _ in range(l + 1)
366 ]
367 self.s_t = [
368 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
369 ]
370 self.s_scalar_t = [
371 numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
372 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800373
Ravago Jones5127ccc2022-07-31 16:32:45 -0700374 self.last_x_hat_t = [
375 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
376 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800377
Ravago Jones5127ccc2022-07-31 16:32:45 -0700378 # Iterate the solver
379 for a in range(iterations):
380 x_hat = x_hat_initial
381 u_t = self.L_t[0] * x_hat + self.l_t[0]
382 self.S_bar_t[0] = numpy.matrix(
383 numpy.zeros((num_states, num_states)))
384 self.s_bar_t[0] = numpy.matrix(numpy.zeros((num_states, 1)))
385 self.s_scalar_bar_t[0] = numpy.matrix([[0]])
Austin Schuh434c8372018-01-21 16:30:06 -0800386
Ravago Jones5127ccc2022-07-31 16:32:45 -0700387 self.last_x_hat_t[0] = x_hat_initial
Austin Schuh434c8372018-01-21 16:30:06 -0800388
Ravago Jones5127ccc2022-07-31 16:32:45 -0700389 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_initial, u_t)
390 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_initial, u_t)
391 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_initial, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800392
Ravago Jones5127ccc2022-07-31 16:32:45 -0700393 q_t = numerical_jacobian_x(self.cost.cost, x_hat_initial, u_t).T \
394 - Q_t * x_hat_initial - P_t.T * u_t
395 r_t = numerical_jacobian_u(self.cost.cost, x_hat_initial, u_t).T \
396 - P_t * x_hat_initial - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800397
Ravago Jones5127ccc2022-07-31 16:32:45 -0700398 q_scalar_t = self.cost.cost(x_hat_initial, u_t) \
399 - 0.5 * (x_hat_initial.T * (Q_t * x_hat_initial + P_t.T * u_t) \
400 + u_t.T * (P_t * x_hat_initial + R_t * u_t)) \
401 - x_hat_initial.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800402
Ravago Jones5127ccc2022-07-31 16:32:45 -0700403 start_A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
404 x_hat_initial, u_t)
405 start_B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
406 x_hat_initial, u_t)
407 x_hat_next = self.dynamics.discrete_dynamics(x_hat_initial, u_t)
408 start_c_t = x_hat_next - start_A_t * x_hat_initial - start_B_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800409
Ravago Jones5127ccc2022-07-31 16:32:45 -0700410 B_svd_u, B_svd_sigma_diag, B_svd_v = numpy.linalg.svd(start_B_t)
411 B_svd_sigma = numpy.matrix(numpy.zeros(start_B_t.shape))
412 B_svd_sigma[0:B_svd_sigma_diag.shape[0], 0:B_svd_sigma_diag.shape[0]] = \
413 numpy.diag(B_svd_sigma_diag)
Austin Schuh434c8372018-01-21 16:30:06 -0800414
Ravago Jones5127ccc2022-07-31 16:32:45 -0700415 B_svd_sigma_inv = numpy.matrix(numpy.zeros(start_B_t.shape)).T
416 B_svd_sigma_inv[0:B_svd_sigma_diag.shape[0],
417 0:B_svd_sigma_diag.shape[0]] = \
418 numpy.linalg.inv(numpy.diag(B_svd_sigma_diag))
419 B_svd_inv = B_svd_v.T * B_svd_sigma_inv * B_svd_u.T
Austin Schuh434c8372018-01-21 16:30:06 -0800420
Ravago Jones5127ccc2022-07-31 16:32:45 -0700421 self.L_bar_t[1] = B_svd_inv
422 self.l_bar_t[1] = -B_svd_inv * (start_A_t * x_hat_initial +
423 start_c_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800424
Ravago Jones5127ccc2022-07-31 16:32:45 -0700425 self.S_bar_t[1] = self.L_bar_t[1].T * R_t * self.L_bar_t[1]
Austin Schuh434c8372018-01-21 16:30:06 -0800426
Ravago Jones5127ccc2022-07-31 16:32:45 -0700427 TotalS_1 = start_B_t.T * self.S_t[1] * start_B_t + R_t
428 Totals_1 = start_B_t.T * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
429 + start_B_t.T * self.s_t[1] + P_t * x_hat_initial + r_t
430 Totals_scalar_1 = 0.5 * (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
431 + self.s_scalar_t[1] + x_hat_initial.T * q_t + q_scalar_t \
432 + 0.5 * x_hat_initial.T * Q_t * x_hat_initial \
433 + (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.s_t[1]
Austin Schuh434c8372018-01-21 16:30:06 -0800434
Ravago Jones5127ccc2022-07-31 16:32:45 -0700435 optimal_u_1 = -numpy.linalg.solve(TotalS_1, Totals_1)
436 optimal_x_1 = start_A_t * x_hat_initial \
437 + start_B_t * optimal_u_1 + start_c_t
Austin Schuh434c8372018-01-21 16:30:06 -0800438
Ravago Jones5127ccc2022-07-31 16:32:45 -0700439 # TODO(austin): Disable this if we are controlable. It should not be needed then.
440 S_bar_1_eigh_eigenvalues, S_bar_1_eigh_eigenvectors = \
441 numpy.linalg.eigh(self.S_bar_t[1])
442 S_bar_1_eigh = numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues))
443 S_bar_1_eigh_eigenvalues_stiff = S_bar_1_eigh_eigenvalues.copy()
444 for i in range(S_bar_1_eigh_eigenvalues_stiff.shape[0]):
445 if abs(S_bar_1_eigh_eigenvalues_stiff[i]) < 1e-8:
446 S_bar_1_eigh_eigenvalues_stiff[i] = max(
447 S_bar_1_eigh_eigenvalues_stiff) * 1.0
Austin Schuh434c8372018-01-21 16:30:06 -0800448
Ravago Jones5127ccc2022-07-31 16:32:45 -0700449 S_bar_stiff = S_bar_1_eigh_eigenvectors * numpy.matrix(
450 numpy.diag(S_bar_1_eigh_eigenvalues_stiff)
451 ) * S_bar_1_eigh_eigenvectors.T
Austin Schuh434c8372018-01-21 16:30:06 -0800452
Austin Schuhea5f0a72024-09-02 15:02:36 -0700453 print('Min u', -numpy.linalg.solve(TotalS_1, Totals_1))
454 print('Min x_hat', optimal_x_1)
Ravago Jones5127ccc2022-07-31 16:32:45 -0700455 self.s_bar_t[1] = -self.s_t[1] - (S_bar_stiff +
456 self.S_t[1]) * optimal_x_1
457 self.s_scalar_bar_t[1] = 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1 \
458 - optimal_x_1.T * (S_bar_stiff + self.S_t[1]) * optimal_x_1) \
459 + optimal_u_1.T * Totals_1 \
460 - optimal_x_1.T * (self.s_bar_t[1] + self.s_t[1]) \
461 - self.s_scalar_t[1] + Totals_scalar_1
Austin Schuh434c8372018-01-21 16:30:06 -0800462
Austin Schuhea5f0a72024-09-02 15:02:36 -0700463 print('optimal_u_1', optimal_u_1)
464 print('TotalS_1', TotalS_1)
465 print('Totals_1', Totals_1)
466 print('Totals_scalar_1', Totals_scalar_1)
467 print('overall cost 1', 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1) \
468 + optimal_u_1.T * Totals_1 + Totals_scalar_1)
469 print('overall cost 0', 0.5 * (x_hat_initial.T * self.S_t[0] * x_hat_initial) \
470 + x_hat_initial.T * self.s_t[0] + self.s_scalar_t[0])
Austin Schuh434c8372018-01-21 16:30:06 -0800471
Austin Schuhea5f0a72024-09-02 15:02:36 -0700472 print('t forward 0')
473 print('x_hat_initial[ 0]: %s' % (x_hat_initial))
474 print('x_hat[%2d]: %s' % (0, x_hat.T))
475 print('x_hat_next[%2d]: %s' % (0, x_hat_next.T))
476 print('u[%2d]: %s' % (0, u_t.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700477 print('L[ 0]: %s' % (self.L_t[0], )).replace('\n', '\n ')
478 print('l[ 0]: %s' % (self.l_t[0], )).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800479
Ravago Jones5127ccc2022-07-31 16:32:45 -0700480 print('A_t[%2d]: %s' % (0, start_A_t)).replace('\n', '\n ')
481 print('B_t[%2d]: %s' % (0, start_B_t)).replace('\n', '\n ')
482 print('c_t[%2d]: %s' % (0, start_c_t)).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800483
Ravago Jones5127ccc2022-07-31 16:32:45 -0700484 # TODO(austin): optimal_x_1 is x_hat
485 x_hat = -numpy.linalg.solve((self.S_t[1] + S_bar_stiff),
486 (self.s_t[1] + self.s_bar_t[1]))
Austin Schuhea5f0a72024-09-02 15:02:36 -0700487 print('new xhat', x_hat)
Austin Schuh434c8372018-01-21 16:30:06 -0800488
Ravago Jones5127ccc2022-07-31 16:32:45 -0700489 self.S_bar_t[1] = S_bar_stiff
Austin Schuh434c8372018-01-21 16:30:06 -0800490
Ravago Jones5127ccc2022-07-31 16:32:45 -0700491 self.last_x_hat_t[1] = x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800492
Ravago Jones5127ccc2022-07-31 16:32:45 -0700493 for t in range(1, l):
Austin Schuhea5f0a72024-09-02 15:02:36 -0700494 print('t forward', t)
Ravago Jones5127ccc2022-07-31 16:32:45 -0700495 u_t = self.L_t[t] * x_hat + self.l_t[t]
Austin Schuh434c8372018-01-21 16:30:06 -0800496
Ravago Jones5127ccc2022-07-31 16:32:45 -0700497 x_hat_next = self.dynamics.discrete_dynamics(x_hat, u_t)
498 A_bar_t = numerical_jacobian_x(
499 self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
500 B_bar_t = numerical_jacobian_u(
501 self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
502 c_bar_t = x_hat - A_bar_t * x_hat_next - B_bar_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800503
Austin Schuhea5f0a72024-09-02 15:02:36 -0700504 print('x_hat[%2d]: %s' % (t, x_hat.T))
505 print('x_hat_next[%2d]: %s' % (t, x_hat_next.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700506 print('L[%2d]: %s' % (
507 t,
508 self.L_t[t],
509 )).replace('\n', '\n ')
510 print('l[%2d]: %s' % (
511 t,
512 self.l_t[t],
513 )).replace('\n', '\n ')
Austin Schuhea5f0a72024-09-02 15:02:36 -0700514 print('u[%2d]: %s' % (t, u_t.T))
Austin Schuh434c8372018-01-21 16:30:06 -0800515
Ravago Jones5127ccc2022-07-31 16:32:45 -0700516 print('A_bar_t[%2d]: %s' % (t, A_bar_t)).replace(
517 '\n', '\n ')
518 print('B_bar_t[%2d]: %s' % (t, B_bar_t)).replace(
519 '\n', '\n ')
520 print('c_bar_t[%2d]: %s' % (t, c_bar_t)).replace(
521 '\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800522
Ravago Jones5127ccc2022-07-31 16:32:45 -0700523 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat, u_t)
524 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat, u_t)
525 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800526
Ravago Jones5127ccc2022-07-31 16:32:45 -0700527 q_t = numerical_jacobian_x(self.cost.cost, x_hat, u_t).T \
528 - Q_t * x_hat - P_t.T * u_t
529 r_t = numerical_jacobian_u(self.cost.cost, x_hat, u_t).T \
530 - P_t * x_hat - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800531
Ravago Jones5127ccc2022-07-31 16:32:45 -0700532 q_scalar_t = self.cost.cost(x_hat, u_t) \
533 - 0.5 * (x_hat.T * (Q_t * x_hat + P_t.T * u_t) \
534 + u_t.T * (P_t * x_hat + R_t * u_t)) - x_hat.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800535
Ravago Jones5127ccc2022-07-31 16:32:45 -0700536 C_bar_t = B_bar_t.T * (self.S_bar_t[t] +
537 Q_t) * A_bar_t + P_t * A_bar_t
538 D_bar_t = A_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t
539 E_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * B_bar_t + R_t \
540 + P_t * B_bar_t + B_bar_t.T * P_t.T
541 d_bar_t = A_bar_t.T * (self.s_bar_t[t] + q_t) \
542 + A_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
543 e_bar_t = r_t + P_t * c_bar_t + B_bar_t.T * self.s_bar_t[t] \
544 + B_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800545
Ravago Jones5127ccc2022-07-31 16:32:45 -0700546 self.L_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * C_bar_t
547 self.l_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * e_bar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800548
Ravago Jones5127ccc2022-07-31 16:32:45 -0700549 self.S_bar_t[t + 1] = D_bar_t + C_bar_t.T * self.L_bar_t[t + 1]
550 self.s_bar_t[t + 1] = d_bar_t + C_bar_t.T * self.l_bar_t[t + 1]
551 self.s_scalar_bar_t[t + 1] = \
552 -0.5 * e_bar_t.T * numpy.linalg.inv(E_bar_t) * e_bar_t \
553 + 0.5 * c_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t \
554 + c_bar_t.T * self.s_bar_t[t] + c_bar_t.T * q_t \
555 + self.s_scalar_bar_t[t] + q_scalar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800556
Ravago Jones5127ccc2022-07-31 16:32:45 -0700557 x_hat = -numpy.linalg.solve(
558 (self.S_t[t + 1] + self.S_bar_t[t + 1]),
559 (self.s_t[t + 1] + self.s_bar_t[t + 1]))
560 self.S_t[l] = self.cost.estimate_Q_final(x_hat)
561 self.s_t[l] = self.cost.estimate_q_final(x_hat)
562 x_hat = -numpy.linalg.inv(self.S_t[l] + self.S_bar_t[l]) \
563 * (self.s_t[l] + self.s_bar_t[l])
Austin Schuh434c8372018-01-21 16:30:06 -0800564
Ravago Jones5127ccc2022-07-31 16:32:45 -0700565 for t in reversed(range(l)):
Austin Schuhea5f0a72024-09-02 15:02:36 -0700566 print('t backward', t)
Ravago Jones5127ccc2022-07-31 16:32:45 -0700567 # TODO(austin): I don't think we can use L_t like this here.
568 # I think we are off by 1 somewhere...
569 u_t = self.L_bar_t[t + 1] * x_hat + self.l_bar_t[t + 1]
Austin Schuh434c8372018-01-21 16:30:06 -0800570
Ravago Jones5127ccc2022-07-31 16:32:45 -0700571 x_hat_prev = self.dynamics.inverse_discrete_dynamics(
572 x_hat, u_t)
Austin Schuhea5f0a72024-09-02 15:02:36 -0700573 print('x_hat[%2d]: %s' % (t, x_hat.T))
574 print('x_hat_prev[%2d]: %s' % (t, x_hat_prev.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700575 print('L_bar[%2d]: %s' % (t + 1, self.L_bar_t[t + 1])).replace(
576 '\n', '\n ')
577 print('l_bar[%2d]: %s' % (t + 1, self.l_bar_t[t + 1])).replace(
578 '\n', '\n ')
Austin Schuhea5f0a72024-09-02 15:02:36 -0700579 print('u[%2d]: %s' % (t, u_t.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700580 # Now compute the linearized A, B, and C
581 # Start by doing it numerically, and then optimize.
582 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
583 x_hat_prev, u_t)
584 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
585 x_hat_prev, u_t)
586 c_t = x_hat - A_t * x_hat_prev - B_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800587
Ravago Jones5127ccc2022-07-31 16:32:45 -0700588 print('A_t[%2d]: %s' % (t, A_t)).replace('\n', '\n ')
589 print('B_t[%2d]: %s' % (t, B_t)).replace('\n', '\n ')
590 print('c_t[%2d]: %s' % (t, c_t)).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800591
Ravago Jones5127ccc2022-07-31 16:32:45 -0700592 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_prev, u_t)
593 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_prev, u_t)
594 P_T_t = numerical_jacobian_u_x(self.cost.cost, x_hat_prev, u_t)
595 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_prev, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800596
Ravago Jones5127ccc2022-07-31 16:32:45 -0700597 q_t = numerical_jacobian_x(self.cost.cost, x_hat_prev, u_t).T \
598 - Q_t * x_hat_prev - P_T_t * u_t
599 r_t = numerical_jacobian_u(self.cost.cost, x_hat_prev, u_t).T \
600 - P_t * x_hat_prev - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800601
Ravago Jones5127ccc2022-07-31 16:32:45 -0700602 q_scalar_t = self.cost.cost(x_hat_prev, u_t) \
603 - 0.5 * (x_hat_prev.T * (Q_t * x_hat_prev + P_t.T * u_t) \
604 + u_t.T * (P_t * x_hat_prev + R_t * u_t)) \
605 - x_hat_prev.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800606
Ravago Jones5127ccc2022-07-31 16:32:45 -0700607 C_t = P_t + B_t.T * self.S_t[t + 1] * A_t
608 D_t = Q_t + A_t.T * self.S_t[t + 1] * A_t
609 E_t = R_t + B_t.T * self.S_t[t + 1] * B_t
610 d_t = q_t + A_t.T * self.s_t[t + 1] + A_t.T * self.S_t[t +
611 1] * c_t
612 e_t = r_t + B_t.T * self.s_t[t + 1] + B_t.T * self.S_t[t +
613 1] * c_t
614 self.L_t[t] = -numpy.linalg.inv(E_t) * C_t
615 self.l_t[t] = -numpy.linalg.inv(E_t) * e_t
616 self.s_t[t] = d_t + C_t.T * self.l_t[t]
617 self.S_t[t] = D_t + C_t.T * self.L_t[t]
618 self.s_scalar_t[t] = q_scalar_t \
619 - 0.5 * e_t.T * numpy.linalg.inv(E_t) * e_t \
620 + 0.5 * c_t.T * self.S_t[t + 1] * c_t \
621 + c_t.T * self.s_t[t + 1] \
622 + self.s_scalar_t[t + 1]
Austin Schuh434c8372018-01-21 16:30:06 -0800623
Ravago Jones5127ccc2022-07-31 16:32:45 -0700624 x_hat = -numpy.linalg.solve((self.S_t[t] + self.S_bar_t[t]),
625 (self.s_t[t] + self.s_bar_t[t]))
626 if t == 0:
627 self.last_x_hat_t[t] = x_hat_initial
628 else:
629 self.last_x_hat_t[t] = x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800630
Ravago Jones5127ccc2022-07-31 16:32:45 -0700631 x_hat_t = [x_hat_initial]
Austin Schuh434c8372018-01-21 16:30:06 -0800632
Ravago Jones5127ccc2022-07-31 16:32:45 -0700633 pylab.figure('states %d' % a)
634 pylab.ion()
635 for dim in range(num_states):
636 pylab.plot(
637 numpy.arange(len(self.last_x_hat_t)),
638 [x_hat_loop[dim, 0] for x_hat_loop in self.last_x_hat_t],
639 marker='o',
640 label='Xhat[%d]' % dim)
641 pylab.legend()
642 pylab.draw()
Austin Schuh434c8372018-01-21 16:30:06 -0800643
Ravago Jones5127ccc2022-07-31 16:32:45 -0700644 pylab.figure('xy %d' % a)
645 pylab.ion()
646 pylab.plot([x_hat_loop[0, 0] for x_hat_loop in self.last_x_hat_t],
647 [x_hat_loop[1, 0] for x_hat_loop in self.last_x_hat_t],
648 marker='o',
649 label='trajectory')
650 pylab.legend()
651 pylab.draw()
Austin Schuh434c8372018-01-21 16:30:06 -0800652
Ravago Jones5127ccc2022-07-31 16:32:45 -0700653 final_u_t = [
654 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
655 ]
656 cost_to_go = []
657 cost_to_come = []
658 cost = []
659 for t in range(l):
660 cost_to_go.append(
661 (0.5 * self.last_x_hat_t[t].T * self.S_t[t] * self.last_x_hat_t[t] \
662 + self.last_x_hat_t[t].T * self.s_t[t] + self.s_scalar_t[t])[0, 0])
663 cost_to_come.append(
664 (0.5 * self.last_x_hat_t[t].T * self.S_bar_t[t] * self.last_x_hat_t[t] \
665 + self.last_x_hat_t[t].T * self.s_bar_t[t] + self.s_scalar_bar_t[t])[0, 0])
666 cost.append(cost_to_go[-1] + cost_to_come[-1])
667 final_u_t[t] = self.L_t[t] * self.last_x_hat_t[t] + self.l_t[t]
Austin Schuh434c8372018-01-21 16:30:06 -0800668
Ravago Jones5127ccc2022-07-31 16:32:45 -0700669 for t in range(l):
670 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
671 self.last_x_hat_t[t], final_u_t[t])
672 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
673 self.last_x_hat_t[t], final_u_t[t])
674 c_t = self.dynamics.discrete_dynamics(self.last_x_hat_t[t], final_u_t[t]) \
675 - A_t * self.last_x_hat_t[t] - B_t * final_u_t[t]
676 print("Infeasability at", t, "is",
677 ((A_t * self.last_x_hat_t[t] + B_t * final_u_t[t] + c_t) \
678 - self.last_x_hat_t[t + 1]).T)
Austin Schuh434c8372018-01-21 16:30:06 -0800679
Ravago Jones5127ccc2022-07-31 16:32:45 -0700680 pylab.figure('u')
681 samples = numpy.arange(len(final_u_t))
682 for i in range(num_inputs):
683 pylab.plot(samples, [u[i, 0] for u in final_u_t],
684 label='u[%d]' % i,
685 marker='o')
686 pylab.legend()
687
688 pylab.figure('cost')
689 cost_samples = numpy.arange(len(cost))
690 pylab.plot(cost_samples, cost_to_go, label='cost to go', marker='o')
691 pylab.plot(cost_samples,
692 cost_to_come,
693 label='cost to come',
694 marker='o')
695 pylab.plot(cost_samples, cost, label='cost', marker='o')
696 pylab.legend()
697
698 pylab.ioff()
699 pylab.show()
700
Austin Schuh434c8372018-01-21 16:30:06 -0800701
702if __name__ == '__main__':
Ravago Jones5127ccc2022-07-31 16:32:45 -0700703 dt = 0.05
704 #arm_dynamics = ArmDynamics(dt=dt)
705 #elqr = ELQR(arm_dynamics, ArmCostFunction(dt=dt, dynamics=arm_dynamics))
706 #x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0], [0.0]])
707 #elqr.Solve(x_hat_initial, 100, 3)
Austin Schuh434c8372018-01-21 16:30:06 -0800708
Ravago Jones5127ccc2022-07-31 16:32:45 -0700709 elqr = ELQR(SkidSteerDynamics(dt=dt), CostFunction(dt=dt))
710 x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0]])
711 elqr.Solve(x_hat_initial, 100, 15)
712 sys.exit(1)