blob: e19ee8f540f4a98810c63dced60609f5a7089708 [file] [log] [blame]
Austin Schuh085eab92020-11-26 13:54:51 -08001#!/usr/bin/python3
Austin Schuh434c8372018-01-21 16:30:06 -08002# This is an initial, hacky implementation of the extended LQR paper. It's just
3# a proof of concept, so don't trust it too much.
4
5import numpy
6import scipy.optimize
7from matplotlib import pylab
8import sys
9
10from frc971.control_loops.python import controls
11
12
13class ArmDynamics(object):
Austin Schuh434c8372018-01-21 16:30:06 -080014
Ravago Jones5127ccc2022-07-31 16:32:45 -070015 def __init__(self, dt):
16 self.dt = dt
Austin Schuh434c8372018-01-21 16:30:06 -080017
Ravago Jones5127ccc2022-07-31 16:32:45 -070018 self.l1 = 1.0
19 self.l2 = 0.8
20 self.num_states = 4
21 self.num_inputs = 2
22
23 def dynamics(self, X, U):
24 """Calculates the dynamics for a double jointed arm.
Austin Schuh434c8372018-01-21 16:30:06 -080025
26 Args:
27 X, numpy.matrix(4, 1), The state. [theta1, omega1, theta2, omega2]
28 U, numpy.matrix(2, 1), The input. [torque1, torque2]
29
30 Returns:
31 numpy.matrix(4, 1), The derivative of the dynamics.
32 """
Ravago Jones5127ccc2022-07-31 16:32:45 -070033 return numpy.matrix([[X[1, 0]], [U[0, 0]], [X[3, 0]], [U[1, 0]]])
Austin Schuh434c8372018-01-21 16:30:06 -080034
Ravago Jones5127ccc2022-07-31 16:32:45 -070035 def discrete_dynamics(self, X, U):
36 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
Austin Schuh434c8372018-01-21 16:30:06 -080037
Ravago Jones5127ccc2022-07-31 16:32:45 -070038 def inverse_discrete_dynamics(self, X, U):
39 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
40 dt)
41
Austin Schuh434c8372018-01-21 16:30:06 -080042
43# Simple implementation for a quadratic cost function.
44class ArmCostFunction:
Austin Schuh434c8372018-01-21 16:30:06 -080045
Ravago Jones5127ccc2022-07-31 16:32:45 -070046 def __init__(self, dt, dynamics):
47 self.num_states = 4
48 self.num_inputs = 2
49 self.dt = dt
50 self.dynamics = dynamics
Austin Schuh434c8372018-01-21 16:30:06 -080051
Ravago Jones5127ccc2022-07-31 16:32:45 -070052 q_pos = 0.5
53 q_vel = 1.65
54 self.Q = numpy.matrix(
55 numpy.diag([
56 1.0 / (q_pos**2.0), 1.0 / (q_vel**2.0), 1.0 / (q_pos**2.0),
57 1.0 / (q_vel**2.0)
58 ]))
Austin Schuh434c8372018-01-21 16:30:06 -080059
Ravago Jones5127ccc2022-07-31 16:32:45 -070060 self.R = numpy.matrix(
61 numpy.diag([1.0 / (12.0**2.0), 1.0 / (12.0**2.0)]))
Austin Schuh434c8372018-01-21 16:30:06 -080062
Ravago Jones5127ccc2022-07-31 16:32:45 -070063 final_A = numerical_jacobian_x(self.dynamics.discrete_dynamics,
64 numpy.matrix(numpy.zeros((4, 1))),
65 numpy.matrix(numpy.zeros((2, 1))))
66 final_B = numerical_jacobian_u(self.dynamics.discrete_dynamics,
67 numpy.matrix(numpy.zeros((4, 1))),
68 numpy.matrix(numpy.zeros((2, 1))))
Austin Schuhea5f0a72024-09-02 15:02:36 -070069 print('Final A', final_A)
70 print('Final B', final_B)
Ravago Jones5127ccc2022-07-31 16:32:45 -070071 K, self.S = controls.dlqr(final_A,
72 final_B,
73 self.Q,
74 self.R,
75 optimal_cost_function=True)
Austin Schuhea5f0a72024-09-02 15:02:36 -070076 print('Final eig:', numpy.linalg.eig(final_A - final_B * K))
Ravago Jones5127ccc2022-07-31 16:32:45 -070077
78 def final_cost(self, X, U):
79 """Computes the final cost of being at X
Austin Schuh434c8372018-01-21 16:30:06 -080080
81 Args:
82 X: numpy.matrix(self.num_states, 1)
83 U: numpy.matrix(self.num_inputs, 1), ignored
84
85 Returns:
86 numpy.matrix(1, 1), The quadratic cost of being at X
87 """
Ravago Jones5127ccc2022-07-31 16:32:45 -070088 return 0.5 * X.T * self.S * X
Austin Schuh434c8372018-01-21 16:30:06 -080089
Ravago Jones5127ccc2022-07-31 16:32:45 -070090 def cost(self, X, U):
91 """Computes the incremental cost given a position and U.
Austin Schuh434c8372018-01-21 16:30:06 -080092
93 Args:
94 X: numpy.matrix(self.num_states, 1)
95 U: numpy.matrix(self.num_inputs, 1)
96
97 Returns:
98 numpy.matrix(1, 1), The quadratic cost of evaluating U.
99 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700100 return U.T * self.R * U + X.T * self.Q * X
Austin Schuh434c8372018-01-21 16:30:06 -0800101
Ravago Jones5127ccc2022-07-31 16:32:45 -0700102 def estimate_Q_final(self, X_hat):
103 """Returns the quadraticized final Q around X_hat.
Austin Schuh434c8372018-01-21 16:30:06 -0800104
105 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
106
107 Args:
108 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
109
110 Result:
111 numpy.matrix(self.num_states, self.num_states)
112 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700113 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
Austin Schuhea5f0a72024-09-02 15:02:36 -0700114 print('S', self.S)
115 print('Q_final', numerical_jacobian_x_x(self.final_cost, X_hat,
116 zero_U))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700117 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
Austin Schuh434c8372018-01-21 16:30:06 -0800118
Ravago Jones5127ccc2022-07-31 16:32:45 -0700119 def estimate_partial_cost_partial_x_final(self, X_hat):
120 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
Austin Schuh434c8372018-01-21 16:30:06 -0800121
122 Args:
123 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
124
125 Result:
126 numpy.matrix(self.num_states, 1)
127 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700128 return numerical_jacobian_x(
129 self.final_cost, X_hat,
130 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
Austin Schuh434c8372018-01-21 16:30:06 -0800131
Ravago Jones5127ccc2022-07-31 16:32:45 -0700132 def estimate_q_final(self, X_hat):
133 """Returns q evaluated at X_hat for the final cost function."""
134 return self.estimate_partial_cost_partial_x_final(
135 X_hat) - self.estimate_Q_final(X_hat) * X_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800136
137
138class SkidSteerDynamics(object):
Austin Schuh434c8372018-01-21 16:30:06 -0800139
Ravago Jones5127ccc2022-07-31 16:32:45 -0700140 def __init__(self, dt):
141 self.width = 0.2
142 self.dt = dt
143 self.num_states = 3
144 self.num_inputs = 2
145
146 def dynamics(self, X, U):
147 """Calculates the dynamics for a 2 wheeled robot.
Austin Schuh434c8372018-01-21 16:30:06 -0800148
149 Args:
150 X, numpy.matrix(3, 1), The state. [x, y, theta]
151 U, numpy.matrix(2, 1), The input. [left velocity, right velocity]
152
153 Returns:
154 numpy.matrix(3, 1), The derivative of the dynamics.
155 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700156 #return numpy.matrix([[X[1, 0]],
157 # [X[2, 0]],
158 # [U[0, 0]]])
159 return numpy.matrix([[(U[0, 0] + U[1, 0]) * numpy.cos(X[2, 0]) / 2.0],
160 [(U[0, 0] + U[1, 0]) * numpy.sin(X[2, 0]) / 2.0],
161 [(U[1, 0] - U[0, 0]) / self.width]])
Austin Schuh434c8372018-01-21 16:30:06 -0800162
Ravago Jones5127ccc2022-07-31 16:32:45 -0700163 def discrete_dynamics(self, X, U):
164 return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
Austin Schuh434c8372018-01-21 16:30:06 -0800165
Ravago Jones5127ccc2022-07-31 16:32:45 -0700166 def inverse_discrete_dynamics(self, X, U):
167 return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
168 dt)
Austin Schuh434c8372018-01-21 16:30:06 -0800169
170
171# Simple implementation for a quadratic cost function.
172class CostFunction:
Austin Schuh434c8372018-01-21 16:30:06 -0800173
Ravago Jones5127ccc2022-07-31 16:32:45 -0700174 def __init__(self, dt):
175 self.num_states = 3
176 self.num_inputs = 2
177 self.dt = dt
178 self.Q = numpy.matrix([[0.1, 0, 0], [0, 0.6, 0], [0, 0, 0.1]
179 ]) / self.dt / self.dt
180 self.R = numpy.matrix([[0.40, 0], [0, 0.40]]) / self.dt / self.dt
181
182 def final_cost(self, X, U):
183 """Computes the final cost of being at X
Austin Schuh434c8372018-01-21 16:30:06 -0800184
185 Args:
186 X: numpy.matrix(self.num_states, 1)
187 U: numpy.matrix(self.num_inputs, 1), ignored
188
189 Returns:
190 numpy.matrix(1, 1), The quadratic cost of being at X
191 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700192 return X.T * self.Q * X * 1000
Austin Schuh434c8372018-01-21 16:30:06 -0800193
Ravago Jones5127ccc2022-07-31 16:32:45 -0700194 def cost(self, X, U):
195 """Computes the incremental cost given a position and U.
Austin Schuh434c8372018-01-21 16:30:06 -0800196
197 Args:
198 X: numpy.matrix(self.num_states, 1)
199 U: numpy.matrix(self.num_inputs, 1)
200
201 Returns:
202 numpy.matrix(1, 1), The quadratic cost of evaluating U.
203 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700204 return U.T * self.R * U + X.T * self.Q * X
Austin Schuh434c8372018-01-21 16:30:06 -0800205
Ravago Jones5127ccc2022-07-31 16:32:45 -0700206 def estimate_Q_final(self, X_hat):
207 """Returns the quadraticized final Q around X_hat.
Austin Schuh434c8372018-01-21 16:30:06 -0800208
209 This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
210
211 Args:
212 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
213
214 Result:
215 numpy.matrix(self.num_states, self.num_states)
216 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700217 zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
218 return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
Austin Schuh434c8372018-01-21 16:30:06 -0800219
Ravago Jones5127ccc2022-07-31 16:32:45 -0700220 def estimate_partial_cost_partial_x_final(self, X_hat):
221 """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
Austin Schuh434c8372018-01-21 16:30:06 -0800222
223 Args:
224 X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
225
226 Result:
227 numpy.matrix(self.num_states, 1)
228 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700229 return numerical_jacobian_x(
230 self.final_cost, X_hat,
231 numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
Austin Schuh434c8372018-01-21 16:30:06 -0800232
Ravago Jones5127ccc2022-07-31 16:32:45 -0700233 def estimate_q_final(self, X_hat):
234 """Returns q evaluated at X_hat for the final cost function."""
235 return self.estimate_partial_cost_partial_x_final(
236 X_hat) - self.estimate_Q_final(X_hat) * X_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800237
238
239def RungeKutta(f, x, dt):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700240 """4th order RungeKutta integration of F starting at X."""
241 a = f(x)
242 b = f(x + dt / 2.0 * a)
243 c = f(x + dt / 2.0 * b)
244 d = f(x + dt * c)
245 return x + dt * (a + 2.0 * b + 2.0 * c + d) / 6.0
246
Austin Schuh434c8372018-01-21 16:30:06 -0800247
248def numerical_jacobian_x(fn, X, U, epsilon=1e-4):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700249 """Numerically estimates the jacobian around X, U in X.
Austin Schuh434c8372018-01-21 16:30:06 -0800250
251 Args:
252 fn: A function of X, U.
253 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
254 around.
255 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
256 around.
257
258 Returns:
259 numpy.matrix(num_states, num_states), The jacobian of fn with X as the
260 variable.
261 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700262 num_states = X.shape[0]
263 nominal = fn(X, U)
264 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_states)))
265 # It's more expensive, but +- epsilon will be more reliable
266 for i in range(0, num_states):
267 dX_plus = X.copy()
268 dX_plus[i] += epsilon
269 dX_minus = X.copy()
270 dX_minus[i] -= epsilon
271 answer[:, i] = (fn(dX_plus, U) - fn(dX_minus, U)) / epsilon / 2.0
272 return answer
273
Austin Schuh434c8372018-01-21 16:30:06 -0800274
275def numerical_jacobian_u(fn, X, U, epsilon=1e-4):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700276 """Numerically estimates the jacobian around X, U in U.
Austin Schuh434c8372018-01-21 16:30:06 -0800277
278 Args:
279 fn: A function of X, U.
280 X: numpy.matrix(num_states, 1), The state vector to take the jacobian
281 around.
282 U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
283 around.
284
285 Returns:
286 numpy.matrix(num_states, num_inputs), The jacobian of fn with U as the
287 variable.
288 """
Ravago Jones5127ccc2022-07-31 16:32:45 -0700289 num_states = X.shape[0]
290 num_inputs = U.shape[0]
291 nominal = fn(X, U)
292 answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_inputs)))
293 for i in range(0, num_inputs):
294 dU_plus = U.copy()
295 dU_plus[i] += epsilon
296 dU_minus = U.copy()
297 dU_minus[i] -= epsilon
298 answer[:, i] = (fn(X, dU_plus) - fn(X, dU_minus)) / epsilon / 2.0
299 return answer
300
Austin Schuh434c8372018-01-21 16:30:06 -0800301
302def numerical_jacobian_x_x(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700303 return numerical_jacobian_x(
304 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
305 X, U)
306
Austin Schuh434c8372018-01-21 16:30:06 -0800307
308def numerical_jacobian_x_u(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700309 return numerical_jacobian_x(
310 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
311 X, U)
312
Austin Schuh434c8372018-01-21 16:30:06 -0800313
314def numerical_jacobian_u_x(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700315 return numerical_jacobian_u(
316 lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
317 X, U)
318
Austin Schuh434c8372018-01-21 16:30:06 -0800319
320def numerical_jacobian_u_u(fn, X, U):
Ravago Jones5127ccc2022-07-31 16:32:45 -0700321 return numerical_jacobian_u(
322 lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
323 X, U)
Austin Schuh434c8372018-01-21 16:30:06 -0800324
325
326class ELQR(object):
Austin Schuh434c8372018-01-21 16:30:06 -0800327
Ravago Jones5127ccc2022-07-31 16:32:45 -0700328 def __init__(self, dynamics, cost):
329 self.dynamics = dynamics
330 self.cost = cost
Austin Schuh434c8372018-01-21 16:30:06 -0800331
Ravago Jones5127ccc2022-07-31 16:32:45 -0700332 def Solve(self, x_hat_initial, horizon, iterations):
333 l = horizon
334 num_states = self.dynamics.num_states
335 num_inputs = self.dynamics.num_inputs
336 self.S_bar_t = [
337 numpy.matrix(numpy.zeros((num_states, num_states)))
338 for _ in range(l + 1)
339 ]
340 self.s_bar_t = [
341 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
342 ]
343 self.s_scalar_bar_t = [
344 numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
345 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800346
Ravago Jones5127ccc2022-07-31 16:32:45 -0700347 self.L_t = [
348 numpy.matrix(numpy.zeros((num_inputs, num_states)))
349 for _ in range(l + 1)
350 ]
351 self.l_t = [
352 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
353 ]
354 self.L_bar_t = [
355 numpy.matrix(numpy.zeros((num_inputs, num_states)))
356 for _ in range(l + 1)
357 ]
358 self.l_bar_t = [
359 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
360 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800361
Ravago Jones5127ccc2022-07-31 16:32:45 -0700362 self.S_t = [
363 numpy.matrix(numpy.zeros((num_states, num_states)))
364 for _ in range(l + 1)
365 ]
366 self.s_t = [
367 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
368 ]
369 self.s_scalar_t = [
370 numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
371 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800372
Ravago Jones5127ccc2022-07-31 16:32:45 -0700373 self.last_x_hat_t = [
374 numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
375 ]
Austin Schuh434c8372018-01-21 16:30:06 -0800376
Ravago Jones5127ccc2022-07-31 16:32:45 -0700377 # Iterate the solver
378 for a in range(iterations):
379 x_hat = x_hat_initial
380 u_t = self.L_t[0] * x_hat + self.l_t[0]
381 self.S_bar_t[0] = numpy.matrix(
382 numpy.zeros((num_states, num_states)))
383 self.s_bar_t[0] = numpy.matrix(numpy.zeros((num_states, 1)))
384 self.s_scalar_bar_t[0] = numpy.matrix([[0]])
Austin Schuh434c8372018-01-21 16:30:06 -0800385
Ravago Jones5127ccc2022-07-31 16:32:45 -0700386 self.last_x_hat_t[0] = x_hat_initial
Austin Schuh434c8372018-01-21 16:30:06 -0800387
Ravago Jones5127ccc2022-07-31 16:32:45 -0700388 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_initial, u_t)
389 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_initial, u_t)
390 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_initial, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800391
Ravago Jones5127ccc2022-07-31 16:32:45 -0700392 q_t = numerical_jacobian_x(self.cost.cost, x_hat_initial, u_t).T \
393 - Q_t * x_hat_initial - P_t.T * u_t
394 r_t = numerical_jacobian_u(self.cost.cost, x_hat_initial, u_t).T \
395 - P_t * x_hat_initial - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800396
Ravago Jones5127ccc2022-07-31 16:32:45 -0700397 q_scalar_t = self.cost.cost(x_hat_initial, u_t) \
398 - 0.5 * (x_hat_initial.T * (Q_t * x_hat_initial + P_t.T * u_t) \
399 + u_t.T * (P_t * x_hat_initial + R_t * u_t)) \
400 - x_hat_initial.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800401
Ravago Jones5127ccc2022-07-31 16:32:45 -0700402 start_A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
403 x_hat_initial, u_t)
404 start_B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
405 x_hat_initial, u_t)
406 x_hat_next = self.dynamics.discrete_dynamics(x_hat_initial, u_t)
407 start_c_t = x_hat_next - start_A_t * x_hat_initial - start_B_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800408
Ravago Jones5127ccc2022-07-31 16:32:45 -0700409 B_svd_u, B_svd_sigma_diag, B_svd_v = numpy.linalg.svd(start_B_t)
410 B_svd_sigma = numpy.matrix(numpy.zeros(start_B_t.shape))
411 B_svd_sigma[0:B_svd_sigma_diag.shape[0], 0:B_svd_sigma_diag.shape[0]] = \
412 numpy.diag(B_svd_sigma_diag)
Austin Schuh434c8372018-01-21 16:30:06 -0800413
Ravago Jones5127ccc2022-07-31 16:32:45 -0700414 B_svd_sigma_inv = numpy.matrix(numpy.zeros(start_B_t.shape)).T
415 B_svd_sigma_inv[0:B_svd_sigma_diag.shape[0],
416 0:B_svd_sigma_diag.shape[0]] = \
417 numpy.linalg.inv(numpy.diag(B_svd_sigma_diag))
418 B_svd_inv = B_svd_v.T * B_svd_sigma_inv * B_svd_u.T
Austin Schuh434c8372018-01-21 16:30:06 -0800419
Ravago Jones5127ccc2022-07-31 16:32:45 -0700420 self.L_bar_t[1] = B_svd_inv
421 self.l_bar_t[1] = -B_svd_inv * (start_A_t * x_hat_initial +
422 start_c_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800423
Ravago Jones5127ccc2022-07-31 16:32:45 -0700424 self.S_bar_t[1] = self.L_bar_t[1].T * R_t * self.L_bar_t[1]
Austin Schuh434c8372018-01-21 16:30:06 -0800425
Ravago Jones5127ccc2022-07-31 16:32:45 -0700426 TotalS_1 = start_B_t.T * self.S_t[1] * start_B_t + R_t
427 Totals_1 = start_B_t.T * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
428 + start_B_t.T * self.s_t[1] + P_t * x_hat_initial + r_t
429 Totals_scalar_1 = 0.5 * (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
430 + self.s_scalar_t[1] + x_hat_initial.T * q_t + q_scalar_t \
431 + 0.5 * x_hat_initial.T * Q_t * x_hat_initial \
432 + (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.s_t[1]
Austin Schuh434c8372018-01-21 16:30:06 -0800433
Ravago Jones5127ccc2022-07-31 16:32:45 -0700434 optimal_u_1 = -numpy.linalg.solve(TotalS_1, Totals_1)
435 optimal_x_1 = start_A_t * x_hat_initial \
436 + start_B_t * optimal_u_1 + start_c_t
Austin Schuh434c8372018-01-21 16:30:06 -0800437
Ravago Jones5127ccc2022-07-31 16:32:45 -0700438 # TODO(austin): Disable this if we are controlable. It should not be needed then.
439 S_bar_1_eigh_eigenvalues, S_bar_1_eigh_eigenvectors = \
440 numpy.linalg.eigh(self.S_bar_t[1])
441 S_bar_1_eigh = numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues))
442 S_bar_1_eigh_eigenvalues_stiff = S_bar_1_eigh_eigenvalues.copy()
443 for i in range(S_bar_1_eigh_eigenvalues_stiff.shape[0]):
444 if abs(S_bar_1_eigh_eigenvalues_stiff[i]) < 1e-8:
445 S_bar_1_eigh_eigenvalues_stiff[i] = max(
446 S_bar_1_eigh_eigenvalues_stiff) * 1.0
Austin Schuh434c8372018-01-21 16:30:06 -0800447
Ravago Jones5127ccc2022-07-31 16:32:45 -0700448 S_bar_stiff = S_bar_1_eigh_eigenvectors * numpy.matrix(
449 numpy.diag(S_bar_1_eigh_eigenvalues_stiff)
450 ) * S_bar_1_eigh_eigenvectors.T
Austin Schuh434c8372018-01-21 16:30:06 -0800451
Austin Schuhea5f0a72024-09-02 15:02:36 -0700452 print('Min u', -numpy.linalg.solve(TotalS_1, Totals_1))
453 print('Min x_hat', optimal_x_1)
Ravago Jones5127ccc2022-07-31 16:32:45 -0700454 self.s_bar_t[1] = -self.s_t[1] - (S_bar_stiff +
455 self.S_t[1]) * optimal_x_1
456 self.s_scalar_bar_t[1] = 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1 \
457 - optimal_x_1.T * (S_bar_stiff + self.S_t[1]) * optimal_x_1) \
458 + optimal_u_1.T * Totals_1 \
459 - optimal_x_1.T * (self.s_bar_t[1] + self.s_t[1]) \
460 - self.s_scalar_t[1] + Totals_scalar_1
Austin Schuh434c8372018-01-21 16:30:06 -0800461
Austin Schuhea5f0a72024-09-02 15:02:36 -0700462 print('optimal_u_1', optimal_u_1)
463 print('TotalS_1', TotalS_1)
464 print('Totals_1', Totals_1)
465 print('Totals_scalar_1', Totals_scalar_1)
466 print('overall cost 1', 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1) \
467 + optimal_u_1.T * Totals_1 + Totals_scalar_1)
468 print('overall cost 0', 0.5 * (x_hat_initial.T * self.S_t[0] * x_hat_initial) \
469 + x_hat_initial.T * self.s_t[0] + self.s_scalar_t[0])
Austin Schuh434c8372018-01-21 16:30:06 -0800470
Austin Schuhea5f0a72024-09-02 15:02:36 -0700471 print('t forward 0')
472 print('x_hat_initial[ 0]: %s' % (x_hat_initial))
473 print('x_hat[%2d]: %s' % (0, x_hat.T))
474 print('x_hat_next[%2d]: %s' % (0, x_hat_next.T))
475 print('u[%2d]: %s' % (0, u_t.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700476 print('L[ 0]: %s' % (self.L_t[0], )).replace('\n', '\n ')
477 print('l[ 0]: %s' % (self.l_t[0], )).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800478
Ravago Jones5127ccc2022-07-31 16:32:45 -0700479 print('A_t[%2d]: %s' % (0, start_A_t)).replace('\n', '\n ')
480 print('B_t[%2d]: %s' % (0, start_B_t)).replace('\n', '\n ')
481 print('c_t[%2d]: %s' % (0, start_c_t)).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800482
Ravago Jones5127ccc2022-07-31 16:32:45 -0700483 # TODO(austin): optimal_x_1 is x_hat
484 x_hat = -numpy.linalg.solve((self.S_t[1] + S_bar_stiff),
485 (self.s_t[1] + self.s_bar_t[1]))
Austin Schuhea5f0a72024-09-02 15:02:36 -0700486 print('new xhat', x_hat)
Austin Schuh434c8372018-01-21 16:30:06 -0800487
Ravago Jones5127ccc2022-07-31 16:32:45 -0700488 self.S_bar_t[1] = S_bar_stiff
Austin Schuh434c8372018-01-21 16:30:06 -0800489
Ravago Jones5127ccc2022-07-31 16:32:45 -0700490 self.last_x_hat_t[1] = x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800491
Ravago Jones5127ccc2022-07-31 16:32:45 -0700492 for t in range(1, l):
Austin Schuhea5f0a72024-09-02 15:02:36 -0700493 print('t forward', t)
Ravago Jones5127ccc2022-07-31 16:32:45 -0700494 u_t = self.L_t[t] * x_hat + self.l_t[t]
Austin Schuh434c8372018-01-21 16:30:06 -0800495
Ravago Jones5127ccc2022-07-31 16:32:45 -0700496 x_hat_next = self.dynamics.discrete_dynamics(x_hat, u_t)
497 A_bar_t = numerical_jacobian_x(
498 self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
499 B_bar_t = numerical_jacobian_u(
500 self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
501 c_bar_t = x_hat - A_bar_t * x_hat_next - B_bar_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800502
Austin Schuhea5f0a72024-09-02 15:02:36 -0700503 print('x_hat[%2d]: %s' % (t, x_hat.T))
504 print('x_hat_next[%2d]: %s' % (t, x_hat_next.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700505 print('L[%2d]: %s' % (
506 t,
507 self.L_t[t],
508 )).replace('\n', '\n ')
509 print('l[%2d]: %s' % (
510 t,
511 self.l_t[t],
512 )).replace('\n', '\n ')
Austin Schuhea5f0a72024-09-02 15:02:36 -0700513 print('u[%2d]: %s' % (t, u_t.T))
Austin Schuh434c8372018-01-21 16:30:06 -0800514
Ravago Jones5127ccc2022-07-31 16:32:45 -0700515 print('A_bar_t[%2d]: %s' % (t, A_bar_t)).replace(
516 '\n', '\n ')
517 print('B_bar_t[%2d]: %s' % (t, B_bar_t)).replace(
518 '\n', '\n ')
519 print('c_bar_t[%2d]: %s' % (t, c_bar_t)).replace(
520 '\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800521
Ravago Jones5127ccc2022-07-31 16:32:45 -0700522 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat, u_t)
523 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat, u_t)
524 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800525
Ravago Jones5127ccc2022-07-31 16:32:45 -0700526 q_t = numerical_jacobian_x(self.cost.cost, x_hat, u_t).T \
527 - Q_t * x_hat - P_t.T * u_t
528 r_t = numerical_jacobian_u(self.cost.cost, x_hat, u_t).T \
529 - P_t * x_hat - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800530
Ravago Jones5127ccc2022-07-31 16:32:45 -0700531 q_scalar_t = self.cost.cost(x_hat, u_t) \
532 - 0.5 * (x_hat.T * (Q_t * x_hat + P_t.T * u_t) \
533 + u_t.T * (P_t * x_hat + R_t * u_t)) - x_hat.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800534
Ravago Jones5127ccc2022-07-31 16:32:45 -0700535 C_bar_t = B_bar_t.T * (self.S_bar_t[t] +
536 Q_t) * A_bar_t + P_t * A_bar_t
537 D_bar_t = A_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t
538 E_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * B_bar_t + R_t \
539 + P_t * B_bar_t + B_bar_t.T * P_t.T
540 d_bar_t = A_bar_t.T * (self.s_bar_t[t] + q_t) \
541 + A_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
542 e_bar_t = r_t + P_t * c_bar_t + B_bar_t.T * self.s_bar_t[t] \
543 + B_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800544
Ravago Jones5127ccc2022-07-31 16:32:45 -0700545 self.L_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * C_bar_t
546 self.l_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * e_bar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800547
Ravago Jones5127ccc2022-07-31 16:32:45 -0700548 self.S_bar_t[t + 1] = D_bar_t + C_bar_t.T * self.L_bar_t[t + 1]
549 self.s_bar_t[t + 1] = d_bar_t + C_bar_t.T * self.l_bar_t[t + 1]
550 self.s_scalar_bar_t[t + 1] = \
551 -0.5 * e_bar_t.T * numpy.linalg.inv(E_bar_t) * e_bar_t \
552 + 0.5 * c_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t \
553 + c_bar_t.T * self.s_bar_t[t] + c_bar_t.T * q_t \
554 + self.s_scalar_bar_t[t] + q_scalar_t
Austin Schuh434c8372018-01-21 16:30:06 -0800555
Ravago Jones5127ccc2022-07-31 16:32:45 -0700556 x_hat = -numpy.linalg.solve(
557 (self.S_t[t + 1] + self.S_bar_t[t + 1]),
558 (self.s_t[t + 1] + self.s_bar_t[t + 1]))
559 self.S_t[l] = self.cost.estimate_Q_final(x_hat)
560 self.s_t[l] = self.cost.estimate_q_final(x_hat)
561 x_hat = -numpy.linalg.inv(self.S_t[l] + self.S_bar_t[l]) \
562 * (self.s_t[l] + self.s_bar_t[l])
Austin Schuh434c8372018-01-21 16:30:06 -0800563
Ravago Jones5127ccc2022-07-31 16:32:45 -0700564 for t in reversed(range(l)):
Austin Schuhea5f0a72024-09-02 15:02:36 -0700565 print('t backward', t)
Ravago Jones5127ccc2022-07-31 16:32:45 -0700566 # TODO(austin): I don't think we can use L_t like this here.
567 # I think we are off by 1 somewhere...
568 u_t = self.L_bar_t[t + 1] * x_hat + self.l_bar_t[t + 1]
Austin Schuh434c8372018-01-21 16:30:06 -0800569
Ravago Jones5127ccc2022-07-31 16:32:45 -0700570 x_hat_prev = self.dynamics.inverse_discrete_dynamics(
571 x_hat, u_t)
Austin Schuhea5f0a72024-09-02 15:02:36 -0700572 print('x_hat[%2d]: %s' % (t, x_hat.T))
573 print('x_hat_prev[%2d]: %s' % (t, x_hat_prev.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700574 print('L_bar[%2d]: %s' % (t + 1, self.L_bar_t[t + 1])).replace(
575 '\n', '\n ')
576 print('l_bar[%2d]: %s' % (t + 1, self.l_bar_t[t + 1])).replace(
577 '\n', '\n ')
Austin Schuhea5f0a72024-09-02 15:02:36 -0700578 print('u[%2d]: %s' % (t, u_t.T))
Ravago Jones5127ccc2022-07-31 16:32:45 -0700579 # Now compute the linearized A, B, and C
580 # Start by doing it numerically, and then optimize.
581 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
582 x_hat_prev, u_t)
583 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
584 x_hat_prev, u_t)
585 c_t = x_hat - A_t * x_hat_prev - B_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800586
Ravago Jones5127ccc2022-07-31 16:32:45 -0700587 print('A_t[%2d]: %s' % (t, A_t)).replace('\n', '\n ')
588 print('B_t[%2d]: %s' % (t, B_t)).replace('\n', '\n ')
589 print('c_t[%2d]: %s' % (t, c_t)).replace('\n', '\n ')
Austin Schuh434c8372018-01-21 16:30:06 -0800590
Ravago Jones5127ccc2022-07-31 16:32:45 -0700591 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_prev, u_t)
592 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_prev, u_t)
593 P_T_t = numerical_jacobian_u_x(self.cost.cost, x_hat_prev, u_t)
594 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_prev, u_t)
Austin Schuh434c8372018-01-21 16:30:06 -0800595
Ravago Jones5127ccc2022-07-31 16:32:45 -0700596 q_t = numerical_jacobian_x(self.cost.cost, x_hat_prev, u_t).T \
597 - Q_t * x_hat_prev - P_T_t * u_t
598 r_t = numerical_jacobian_u(self.cost.cost, x_hat_prev, u_t).T \
599 - P_t * x_hat_prev - R_t * u_t
Austin Schuh434c8372018-01-21 16:30:06 -0800600
Ravago Jones5127ccc2022-07-31 16:32:45 -0700601 q_scalar_t = self.cost.cost(x_hat_prev, u_t) \
602 - 0.5 * (x_hat_prev.T * (Q_t * x_hat_prev + P_t.T * u_t) \
603 + u_t.T * (P_t * x_hat_prev + R_t * u_t)) \
604 - x_hat_prev.T * q_t - u_t.T * r_t
Austin Schuh434c8372018-01-21 16:30:06 -0800605
Ravago Jones5127ccc2022-07-31 16:32:45 -0700606 C_t = P_t + B_t.T * self.S_t[t + 1] * A_t
607 D_t = Q_t + A_t.T * self.S_t[t + 1] * A_t
608 E_t = R_t + B_t.T * self.S_t[t + 1] * B_t
609 d_t = q_t + A_t.T * self.s_t[t + 1] + A_t.T * self.S_t[t +
610 1] * c_t
611 e_t = r_t + B_t.T * self.s_t[t + 1] + B_t.T * self.S_t[t +
612 1] * c_t
613 self.L_t[t] = -numpy.linalg.inv(E_t) * C_t
614 self.l_t[t] = -numpy.linalg.inv(E_t) * e_t
615 self.s_t[t] = d_t + C_t.T * self.l_t[t]
616 self.S_t[t] = D_t + C_t.T * self.L_t[t]
617 self.s_scalar_t[t] = q_scalar_t \
618 - 0.5 * e_t.T * numpy.linalg.inv(E_t) * e_t \
619 + 0.5 * c_t.T * self.S_t[t + 1] * c_t \
620 + c_t.T * self.s_t[t + 1] \
621 + self.s_scalar_t[t + 1]
Austin Schuh434c8372018-01-21 16:30:06 -0800622
Ravago Jones5127ccc2022-07-31 16:32:45 -0700623 x_hat = -numpy.linalg.solve((self.S_t[t] + self.S_bar_t[t]),
624 (self.s_t[t] + self.s_bar_t[t]))
625 if t == 0:
626 self.last_x_hat_t[t] = x_hat_initial
627 else:
628 self.last_x_hat_t[t] = x_hat
Austin Schuh434c8372018-01-21 16:30:06 -0800629
Ravago Jones5127ccc2022-07-31 16:32:45 -0700630 x_hat_t = [x_hat_initial]
Austin Schuh434c8372018-01-21 16:30:06 -0800631
Ravago Jones5127ccc2022-07-31 16:32:45 -0700632 pylab.figure('states %d' % a)
633 pylab.ion()
634 for dim in range(num_states):
635 pylab.plot(
636 numpy.arange(len(self.last_x_hat_t)),
637 [x_hat_loop[dim, 0] for x_hat_loop in self.last_x_hat_t],
638 marker='o',
639 label='Xhat[%d]' % dim)
640 pylab.legend()
641 pylab.draw()
Austin Schuh434c8372018-01-21 16:30:06 -0800642
Ravago Jones5127ccc2022-07-31 16:32:45 -0700643 pylab.figure('xy %d' % a)
644 pylab.ion()
645 pylab.plot([x_hat_loop[0, 0] for x_hat_loop in self.last_x_hat_t],
646 [x_hat_loop[1, 0] for x_hat_loop in self.last_x_hat_t],
647 marker='o',
648 label='trajectory')
649 pylab.legend()
650 pylab.draw()
Austin Schuh434c8372018-01-21 16:30:06 -0800651
Ravago Jones5127ccc2022-07-31 16:32:45 -0700652 final_u_t = [
653 numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
654 ]
655 cost_to_go = []
656 cost_to_come = []
657 cost = []
658 for t in range(l):
659 cost_to_go.append(
660 (0.5 * self.last_x_hat_t[t].T * self.S_t[t] * self.last_x_hat_t[t] \
661 + self.last_x_hat_t[t].T * self.s_t[t] + self.s_scalar_t[t])[0, 0])
662 cost_to_come.append(
663 (0.5 * self.last_x_hat_t[t].T * self.S_bar_t[t] * self.last_x_hat_t[t] \
664 + self.last_x_hat_t[t].T * self.s_bar_t[t] + self.s_scalar_bar_t[t])[0, 0])
665 cost.append(cost_to_go[-1] + cost_to_come[-1])
666 final_u_t[t] = self.L_t[t] * self.last_x_hat_t[t] + self.l_t[t]
Austin Schuh434c8372018-01-21 16:30:06 -0800667
Ravago Jones5127ccc2022-07-31 16:32:45 -0700668 for t in range(l):
669 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
670 self.last_x_hat_t[t], final_u_t[t])
671 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
672 self.last_x_hat_t[t], final_u_t[t])
673 c_t = self.dynamics.discrete_dynamics(self.last_x_hat_t[t], final_u_t[t]) \
674 - A_t * self.last_x_hat_t[t] - B_t * final_u_t[t]
675 print("Infeasability at", t, "is",
676 ((A_t * self.last_x_hat_t[t] + B_t * final_u_t[t] + c_t) \
677 - self.last_x_hat_t[t + 1]).T)
Austin Schuh434c8372018-01-21 16:30:06 -0800678
Ravago Jones5127ccc2022-07-31 16:32:45 -0700679 pylab.figure('u')
680 samples = numpy.arange(len(final_u_t))
681 for i in range(num_inputs):
682 pylab.plot(samples, [u[i, 0] for u in final_u_t],
683 label='u[%d]' % i,
684 marker='o')
685 pylab.legend()
686
687 pylab.figure('cost')
688 cost_samples = numpy.arange(len(cost))
689 pylab.plot(cost_samples, cost_to_go, label='cost to go', marker='o')
690 pylab.plot(cost_samples,
691 cost_to_come,
692 label='cost to come',
693 marker='o')
694 pylab.plot(cost_samples, cost, label='cost', marker='o')
695 pylab.legend()
696
697 pylab.ioff()
698 pylab.show()
699
Austin Schuh434c8372018-01-21 16:30:06 -0800700
701if __name__ == '__main__':
Ravago Jones5127ccc2022-07-31 16:32:45 -0700702 dt = 0.05
703 #arm_dynamics = ArmDynamics(dt=dt)
704 #elqr = ELQR(arm_dynamics, ArmCostFunction(dt=dt, dynamics=arm_dynamics))
705 #x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0], [0.0]])
706 #elqr.Solve(x_hat_initial, 100, 3)
Austin Schuh434c8372018-01-21 16:30:06 -0800707
Ravago Jones5127ccc2022-07-31 16:32:45 -0700708 elqr = ELQR(SkidSteerDynamics(dt=dt), CostFunction(dt=dt))
709 x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0]])
710 elqr.Solve(x_hat_initial, 100, 15)
711 sys.exit(1)