Blame - y2018/control_loops/python/extended_lqr.py - RealtimeRoboticsGroup/test

blob: 4eecee9067e349d9edf6aa273e6e4a07859fc014 [file] [log] [blame]

Austin Schuh	085eab9	2020-11-26 13:54:51 -0800	[diff] [blame^]	1	#!/usr/bin/python3
Austin Schuh	434c837	2018-01-21 16:30:06 -0800	[diff] [blame]	2	# This is an initial, hacky implementation of the extended LQR paper. It's just
				3	# a proof of concept, so don't trust it too much.
				4
				5	import numpy
				6	import scipy.optimize
				7	from matplotlib import pylab
				8	import sys
				9
				10	from frc971.control_loops.python import controls
				11
				12
				13	class ArmDynamics(object):
				14	def __init__(self, dt):
				15	self.dt = dt
				16
				17	self.l1 = 1.0
				18	self.l2 = 0.8
				19	self.num_states = 4
				20	self.num_inputs = 2
				21
				22	def dynamics(self, X, U):
				23	"""Calculates the dynamics for a double jointed arm.
				24
				25	Args:
				26	X, numpy.matrix(4, 1), The state. [theta1, omega1, theta2, omega2]
				27	U, numpy.matrix(2, 1), The input. [torque1, torque2]
				28
				29	Returns:
				30	numpy.matrix(4, 1), The derivative of the dynamics.
				31	"""
				32	return numpy.matrix([[X[1, 0]],
				33	[U[0, 0]],
				34	[X[3, 0]],
				35	[U[1, 0]]])
				36
				37	def discrete_dynamics(self, X, U):
				38	return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
				39
				40	def inverse_discrete_dynamics(self, X, U):
				41	return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X, dt)
				42
				43	# Simple implementation for a quadratic cost function.
				44	class ArmCostFunction:
				45	def __init__(self, dt, dynamics):
				46	self.num_states = 4
				47	self.num_inputs = 2
				48	self.dt = dt
				49	self.dynamics = dynamics
				50
				51	q_pos = 0.5
				52	q_vel = 1.65
				53	self.Q = numpy.matrix(numpy.diag([
				54	1.0 / (q_pos 2.0), 1.0 / (q_vel 2.0),
				55	1.0 / (q_pos 2.0), 1.0 / (q_vel 2.0)]))
				56
				57	self.R = numpy.matrix(numpy.diag([1.0 / (12.0 ** 2.0),
				58	1.0 / (12.0 ** 2.0)]))
				59
				60	final_A = numerical_jacobian_x(self.dynamics.discrete_dynamics,
				61	numpy.matrix(numpy.zeros((4, 1))),
				62	numpy.matrix(numpy.zeros((2, 1))))
				63	final_B = numerical_jacobian_u(self.dynamics.discrete_dynamics,
				64	numpy.matrix(numpy.zeros((4, 1))),
				65	numpy.matrix(numpy.zeros((2, 1))))
				66	print 'Final A', final_A
				67	print 'Final B', final_B
				68	K, self.S = controls.dlqr(
				69	final_A, final_B, self.Q, self.R, optimal_cost_function=True)
				70	print 'Final eig:', numpy.linalg.eig(final_A - final_B * K)
				71
				72	def final_cost(self, X, U):
				73	"""Computes the final cost of being at X
				74
				75	Args:
				76	X: numpy.matrix(self.num_states, 1)
				77	U: numpy.matrix(self.num_inputs, 1), ignored
				78
				79	Returns:
				80	numpy.matrix(1, 1), The quadratic cost of being at X
				81	"""
				82	return 0.5 * X.T * self.S * X
				83
				84	def cost(self, X, U):
				85	"""Computes the incremental cost given a position and U.
				86
				87	Args:
				88	X: numpy.matrix(self.num_states, 1)
				89	U: numpy.matrix(self.num_inputs, 1)
				90
				91	Returns:
				92	numpy.matrix(1, 1), The quadratic cost of evaluating U.
				93	"""
				94	return U.T * self.R * U + X.T * self.Q * X
				95
				96	def estimate_Q_final(self, X_hat):
				97	"""Returns the quadraticized final Q around X_hat.
				98
				99	This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
				100
				101	Args:
				102	X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
				103
				104	Result:
				105	numpy.matrix(self.num_states, self.num_states)
				106	"""
				107	zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
				108	print 'S', self.S
				109	print 'Q_final', numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
				110	return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
				111
				112	def estimate_partial_cost_partial_x_final(self, X_hat):
				113	"""Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
				114
				115	Args:
				116	X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
				117
				118	Result:
				119	numpy.matrix(self.num_states, 1)
				120	"""
				121	return numerical_jacobian_x(self.final_cost, X_hat,
				122	numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
				123
				124	def estimate_q_final(self, X_hat):
				125	"""Returns q evaluated at X_hat for the final cost function."""
				126	return self.estimate_partial_cost_partial_x_final(X_hat) - self.estimate_Q_final(X_hat) * X_hat
				127
				128
				129
				130	class SkidSteerDynamics(object):
				131	def __init__(self, dt):
				132	self.width = 0.2
				133	self.dt = dt
				134	self.num_states = 3
				135	self.num_inputs = 2
				136
				137	def dynamics(self, X, U):
				138	"""Calculates the dynamics for a 2 wheeled robot.
				139
				140	Args:
				141	X, numpy.matrix(3, 1), The state. [x, y, theta]
				142	U, numpy.matrix(2, 1), The input. [left velocity, right velocity]
				143
				144	Returns:
				145	numpy.matrix(3, 1), The derivative of the dynamics.
				146	"""
				147	#return numpy.matrix([[X[1, 0]],
				148	# [X[2, 0]],
				149	# [U[0, 0]]])
				150	return numpy.matrix([[(U[0, 0] + U[1, 0]) * numpy.cos(X[2, 0]) / 2.0],
				151	[(U[0, 0] + U[1, 0]) * numpy.sin(X[2, 0]) / 2.0],
				152	[(U[1, 0] - U[0, 0]) / self.width]])
				153
				154	def discrete_dynamics(self, X, U):
				155	return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)
				156
				157	def inverse_discrete_dynamics(self, X, U):
				158	return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X, dt)
				159
				160
				161	# Simple implementation for a quadratic cost function.
				162	class CostFunction:
				163	def __init__(self, dt):
				164	self.num_states = 3
				165	self.num_inputs = 2
				166	self.dt = dt
				167	self.Q = numpy.matrix([[0.1, 0, 0],
				168	[0, 0.6, 0],
				169	[0, 0, 0.1]]) / self.dt / self.dt
				170	self.R = numpy.matrix([[0.40, 0],
				171	[0, 0.40]]) / self.dt / self.dt
				172
				173	def final_cost(self, X, U):
				174	"""Computes the final cost of being at X
				175
				176	Args:
				177	X: numpy.matrix(self.num_states, 1)
				178	U: numpy.matrix(self.num_inputs, 1), ignored
				179
				180	Returns:
				181	numpy.matrix(1, 1), The quadratic cost of being at X
				182	"""
				183	return X.T * self.Q * X * 1000
				184
				185	def cost(self, X, U):
				186	"""Computes the incremental cost given a position and U.
				187
				188	Args:
				189	X: numpy.matrix(self.num_states, 1)
				190	U: numpy.matrix(self.num_inputs, 1)
				191
				192	Returns:
				193	numpy.matrix(1, 1), The quadratic cost of evaluating U.
				194	"""
				195	return U.T * self.R * U + X.T * self.Q * X
				196
				197	def estimate_Q_final(self, X_hat):
				198	"""Returns the quadraticized final Q around X_hat.
				199
				200	This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)
				201
				202	Args:
				203	X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
				204
				205	Result:
				206	numpy.matrix(self.num_states, self.num_states)
				207	"""
				208	zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
				209	return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)
				210
				211	def estimate_partial_cost_partial_x_final(self, X_hat):
				212	"""Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.
				213
				214	Args:
				215	X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.
				216
				217	Result:
				218	numpy.matrix(self.num_states, 1)
				219	"""
				220	return numerical_jacobian_x(self.final_cost, X_hat, numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T
				221
				222	def estimate_q_final(self, X_hat):
				223	"""Returns q evaluated at X_hat for the final cost function."""
				224	return self.estimate_partial_cost_partial_x_final(X_hat) - self.estimate_Q_final(X_hat) * X_hat
				225
				226
				227	def RungeKutta(f, x, dt):
				228	"""4th order RungeKutta integration of F starting at X."""
				229	a = f(x)
				230	b = f(x + dt / 2.0 * a)
				231	c = f(x + dt / 2.0 * b)
				232	d = f(x + dt * c)
				233	return x + dt * (a + 2.0 * b + 2.0 * c + d) / 6.0
				234
				235	def numerical_jacobian_x(fn, X, U, epsilon=1e-4):
				236	"""Numerically estimates the jacobian around X, U in X.
				237
				238	Args:
				239	fn: A function of X, U.
				240	X: numpy.matrix(num_states, 1), The state vector to take the jacobian
				241	around.
				242	U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
				243	around.
				244
				245	Returns:
				246	numpy.matrix(num_states, num_states), The jacobian of fn with X as the
				247	variable.
				248	"""
				249	num_states = X.shape[0]
				250	nominal = fn(X, U)
				251	answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_states)))
				252	# It's more expensive, but +- epsilon will be more reliable
				253	for i in range(0, num_states):
				254	dX_plus = X.copy()
				255	dX_plus[i] += epsilon
				256	dX_minus = X.copy()
				257	dX_minus[i] -= epsilon
				258	answer[:, i] = (fn(dX_plus, U) - fn(dX_minus, U)) / epsilon / 2.0
				259	return answer
				260
				261	def numerical_jacobian_u(fn, X, U, epsilon=1e-4):
				262	"""Numerically estimates the jacobian around X, U in U.
				263
				264	Args:
				265	fn: A function of X, U.
				266	X: numpy.matrix(num_states, 1), The state vector to take the jacobian
				267	around.
				268	U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
				269	around.
				270
				271	Returns:
				272	numpy.matrix(num_states, num_inputs), The jacobian of fn with U as the
				273	variable.
				274	"""
				275	num_states = X.shape[0]
				276	num_inputs = U.shape[0]
				277	nominal = fn(X, U)
				278	answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_inputs)))
				279	for i in range(0, num_inputs):
				280	dU_plus = U.copy()
				281	dU_plus[i] += epsilon
				282	dU_minus = U.copy()
				283	dU_minus[i] -= epsilon
				284	answer[:, i] = (fn(X, dU_plus) - fn(X, dU_minus)) / epsilon / 2.0
				285	return answer
				286
				287	def numerical_jacobian_x_x(fn, X, U):
				288	return numerical_jacobian_x(
				289	lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T, X, U)
				290
				291	def numerical_jacobian_x_u(fn, X, U):
				292	return numerical_jacobian_x(
				293	lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T, X, U)
				294
				295	def numerical_jacobian_u_x(fn, X, U):
				296	return numerical_jacobian_u(
				297	lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T, X, U)
				298
				299	def numerical_jacobian_u_u(fn, X, U):
				300	return numerical_jacobian_u(
				301	lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T, X, U)
				302
				303
				304	class ELQR(object):
				305	def __init__(self, dynamics, cost):
				306	self.dynamics = dynamics
				307	self.cost = cost
				308
				309	def Solve(self, x_hat_initial, horizon, iterations):
				310	l = horizon
				311	num_states = self.dynamics.num_states
				312	num_inputs = self.dynamics.num_inputs
				313	self.S_bar_t = [numpy.matrix(numpy.zeros((num_states, num_states))) for _ in range(l + 1)]
				314	self.s_bar_t = [numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)]
				315	self.s_scalar_bar_t = [numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)]
				316
				317	self.L_t = [numpy.matrix(numpy.zeros((num_inputs, num_states))) for _ in range(l + 1)]
				318	self.l_t = [numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)]
				319	self.L_bar_t = [numpy.matrix(numpy.zeros((num_inputs, num_states))) for _ in range(l + 1)]
				320	self.l_bar_t = [numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)]
				321
				322	self.S_t = [numpy.matrix(numpy.zeros((num_states, num_states))) for _ in range(l + 1)]
				323	self.s_t = [numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)]
				324	self.s_scalar_t = [numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)]
				325
				326	self.last_x_hat_t = [numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)]
				327
				328	# Iterate the solver
				329	for a in range(iterations):
				330	x_hat = x_hat_initial
				331	u_t = self.L_t[0] * x_hat + self.l_t[0]
				332	self.S_bar_t[0] = numpy.matrix(numpy.zeros((num_states, num_states)))
				333	self.s_bar_t[0] = numpy.matrix(numpy.zeros((num_states, 1)))
				334	self.s_scalar_bar_t[0] = numpy.matrix([[0]])
				335
				336	self.last_x_hat_t[0] = x_hat_initial
				337
				338	Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_initial, u_t)
				339	P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_initial, u_t)
				340	R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_initial, u_t)
				341
				342	q_t = numerical_jacobian_x(self.cost.cost, x_hat_initial, u_t).T \
				343	- Q_t * x_hat_initial - P_t.T * u_t
				344	r_t = numerical_jacobian_u(self.cost.cost, x_hat_initial, u_t).T \
				345	- P_t * x_hat_initial - R_t * u_t
				346
				347	q_scalar_t = self.cost.cost(x_hat_initial, u_t) \
				348	- 0.5 * (x_hat_initial.T * (Q_t * x_hat_initial + P_t.T * u_t) \
				349	+ u_t.T * (P_t * x_hat_initial + R_t * u_t)) \
				350	- x_hat_initial.T * q_t - u_t.T * r_t
				351
				352	start_A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics, x_hat_initial, u_t)
				353	start_B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics, x_hat_initial, u_t)
				354	x_hat_next = self.dynamics.discrete_dynamics(x_hat_initial, u_t)
				355	start_c_t = x_hat_next - start_A_t * x_hat_initial - start_B_t * u_t
				356
				357	B_svd_u, B_svd_sigma_diag, B_svd_v = numpy.linalg.svd(start_B_t)
				358	B_svd_sigma = numpy.matrix(numpy.zeros(start_B_t.shape))
				359	B_svd_sigma[0:B_svd_sigma_diag.shape[0], 0:B_svd_sigma_diag.shape[0]] = \
				360	numpy.diag(B_svd_sigma_diag)
				361
				362	B_svd_sigma_inv = numpy.matrix(numpy.zeros(start_B_t.shape)).T
				363	B_svd_sigma_inv[0:B_svd_sigma_diag.shape[0],
				364	0:B_svd_sigma_diag.shape[0]] = \
				365	numpy.linalg.inv(numpy.diag(B_svd_sigma_diag))
				366	B_svd_inv = B_svd_v.T * B_svd_sigma_inv * B_svd_u.T
				367
				368	self.L_bar_t[1] = B_svd_inv
				369	self.l_bar_t[1] = -B_svd_inv * (start_A_t * x_hat_initial + start_c_t)
				370
				371	self.S_bar_t[1] = self.L_bar_t[1].T * R_t * self.L_bar_t[1]
				372
				373	TotalS_1 = start_B_t.T * self.S_t[1] * start_B_t + R_t
				374	Totals_1 = start_B_t.T * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
				375	+ start_B_t.T * self.s_t[1] + P_t * x_hat_initial + r_t
				376	Totals_scalar_1 = 0.5 * (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
				377	+ self.s_scalar_t[1] + x_hat_initial.T * q_t + q_scalar_t \
				378	+ 0.5 * x_hat_initial.T * Q_t * x_hat_initial \
				379	+ (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.s_t[1]
				380
				381	optimal_u_1 = -numpy.linalg.solve(TotalS_1, Totals_1)
				382	optimal_x_1 = start_A_t * x_hat_initial \
				383	+ start_B_t * optimal_u_1 + start_c_t
				384
				385	# TODO(austin): Disable this if we are controlable. It should not be needed then.
				386	S_bar_1_eigh_eigenvalues, S_bar_1_eigh_eigenvectors = \
				387	numpy.linalg.eigh(self.S_bar_t[1])
				388	S_bar_1_eigh = numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues))
				389	S_bar_1_eigh_eigenvalues_stiff = S_bar_1_eigh_eigenvalues.copy()
				390	for i in range(S_bar_1_eigh_eigenvalues_stiff.shape[0]):
				391	if abs(S_bar_1_eigh_eigenvalues_stiff[i]) < 1e-8:
				392	S_bar_1_eigh_eigenvalues_stiff[i] = max(S_bar_1_eigh_eigenvalues_stiff) * 1.0
				393
				394	S_bar_stiff = S_bar_1_eigh_eigenvectors * numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues_stiff)) * S_bar_1_eigh_eigenvectors.T
				395
				396	print 'Min u', -numpy.linalg.solve(TotalS_1, Totals_1)
				397	print 'Min x_hat', optimal_x_1
				398	self.s_bar_t[1] = -self.s_t[1] - (S_bar_stiff + self.S_t[1]) * optimal_x_1
				399	self.s_scalar_bar_t[1] = 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1 \
				400	- optimal_x_1.T * (S_bar_stiff + self.S_t[1]) * optimal_x_1) \
				401	+ optimal_u_1.T * Totals_1 \
				402	- optimal_x_1.T * (self.s_bar_t[1] + self.s_t[1]) \
				403	- self.s_scalar_t[1] + Totals_scalar_1
				404
				405	print 'optimal_u_1', optimal_u_1
				406	print 'TotalS_1', TotalS_1
				407	print 'Totals_1', Totals_1
				408	print 'Totals_scalar_1', Totals_scalar_1
				409	print 'overall cost 1', 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1) \
				410	+ optimal_u_1.T * Totals_1 + Totals_scalar_1
				411	print 'overall cost 0', 0.5 * (x_hat_initial.T * self.S_t[0] * x_hat_initial) \
				412	+ x_hat_initial.T * self.s_t[0] + self.s_scalar_t[0]
				413
				414	print 't forward 0'
				415	print 'x_hat_initial[ 0]: %s' % (x_hat_initial)
				416	print 'x_hat[%2d]: %s' % (0, x_hat.T)
				417	print 'x_hat_next[%2d]: %s' % (0, x_hat_next.T)
				418	print 'u[%2d]: %s' % (0, u_t.T)
				419	print ('L[ 0]: %s' % (self.L_t[0],)).replace('\n', '\n ')
				420	print ('l[ 0]: %s' % (self.l_t[0],)).replace('\n', '\n ')
				421
				422	print ('A_t[%2d]: %s' % (0, start_A_t)).replace('\n', '\n ')
				423	print ('B_t[%2d]: %s' % (0, start_B_t)).replace('\n', '\n ')
				424	print ('c_t[%2d]: %s' % (0, start_c_t)).replace('\n', '\n ')
				425
				426	# TODO(austin): optimal_x_1 is x_hat
				427	x_hat = -numpy.linalg.solve((self.S_t[1] + S_bar_stiff),
				428	(self.s_t[1] + self.s_bar_t[1]))
				429	print 'new xhat', x_hat
				430
				431	self.S_bar_t[1] = S_bar_stiff
				432
				433	self.last_x_hat_t[1] = x_hat
				434
				435	for t in range(1, l):
				436	print 't forward', t
				437	u_t = self.L_t[t] * x_hat + self.l_t[t]
				438
				439	x_hat_next = self.dynamics.discrete_dynamics(x_hat, u_t)
				440	A_bar_t = numerical_jacobian_x(self.dynamics.inverse_discrete_dynamics,
				441	x_hat_next, u_t)
				442	B_bar_t = numerical_jacobian_u(self.dynamics.inverse_discrete_dynamics,
				443	x_hat_next, u_t)
				444	c_bar_t = x_hat - A_bar_t * x_hat_next - B_bar_t * u_t
				445
				446	print 'x_hat[%2d]: %s' % (t, x_hat.T)
				447	print 'x_hat_next[%2d]: %s' % (t, x_hat_next.T)
				448	print ('L[%2d]: %s' % (t, self.L_t[t],)).replace('\n', '\n ')
				449	print ('l[%2d]: %s' % (t, self.l_t[t],)).replace('\n', '\n ')
				450	print 'u[%2d]: %s' % (t, u_t.T)
				451
				452	print ('A_bar_t[%2d]: %s' % (t, A_bar_t)).replace('\n', '\n ')
				453	print ('B_bar_t[%2d]: %s' % (t, B_bar_t)).replace('\n', '\n ')
				454	print ('c_bar_t[%2d]: %s' % (t, c_bar_t)).replace('\n', '\n ')
				455
				456	Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat, u_t)
				457	P_t = numerical_jacobian_x_u(self.cost.cost, x_hat, u_t)
				458	R_t = numerical_jacobian_u_u(self.cost.cost, x_hat, u_t)
				459
				460	q_t = numerical_jacobian_x(self.cost.cost, x_hat, u_t).T \
				461	- Q_t * x_hat - P_t.T * u_t
				462	r_t = numerical_jacobian_u(self.cost.cost, x_hat, u_t).T \
				463	- P_t * x_hat - R_t * u_t
				464
				465	q_scalar_t = self.cost.cost(x_hat, u_t) \
				466	- 0.5 * (x_hat.T * (Q_t * x_hat + P_t.T * u_t) \
				467	+ u_t.T * (P_t * x_hat + R_t * u_t)) - x_hat.T * q_t - u_t.T * r_t
				468
				469	C_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t + P_t * A_bar_t
				470	D_bar_t = A_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t
				471	E_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * B_bar_t + R_t \
				472	+ P_t * B_bar_t + B_bar_t.T * P_t.T
				473	d_bar_t = A_bar_t.T * (self.s_bar_t[t] + q_t) \
				474	+ A_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
				475	e_bar_t = r_t + P_t * c_bar_t + B_bar_t.T * self.s_bar_t[t] \
				476	+ B_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
				477
				478	self.L_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * C_bar_t
				479	self.l_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * e_bar_t
				480
				481	self.S_bar_t[t + 1] = D_bar_t + C_bar_t.T * self.L_bar_t[t + 1]
				482	self.s_bar_t[t + 1] = d_bar_t + C_bar_t.T * self.l_bar_t[t + 1]
				483	self.s_scalar_bar_t[t + 1] = \
				484	-0.5 * e_bar_t.T * numpy.linalg.inv(E_bar_t) * e_bar_t \
				485	+ 0.5 * c_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t \
				486	+ c_bar_t.T * self.s_bar_t[t] + c_bar_t.T * q_t \
				487	+ self.s_scalar_bar_t[t] + q_scalar_t
				488
				489	x_hat = -numpy.linalg.solve((self.S_t[t + 1] + self.S_bar_t[t + 1]),
				490	(self.s_t[t + 1] + self.s_bar_t[t + 1]))
				491	self.S_t[l] = self.cost.estimate_Q_final(x_hat)
				492	self.s_t[l] = self.cost.estimate_q_final(x_hat)
				493	x_hat = -numpy.linalg.inv(self.S_t[l] + self.S_bar_t[l]) \
				494	* (self.s_t[l] + self.s_bar_t[l])
				495
				496	for t in reversed(range(l)):
				497	print 't backward', t
				498	# TODO(austin): I don't think we can use L_t like this here.
				499	# I think we are off by 1 somewhere...
				500	u_t = self.L_bar_t[t + 1] * x_hat + self.l_bar_t[t + 1]
				501
				502	x_hat_prev = self.dynamics.inverse_discrete_dynamics(x_hat, u_t)
				503	print 'x_hat[%2d]: %s' % (t, x_hat.T)
				504	print 'x_hat_prev[%2d]: %s' % (t, x_hat_prev.T)
				505	print ('L_bar[%2d]: %s' % (t + 1, self.L_bar_t[t + 1])).replace('\n', '\n ')
				506	print ('l_bar[%2d]: %s' % (t + 1, self.l_bar_t[t + 1])).replace('\n', '\n ')
				507	print 'u[%2d]: %s' % (t, u_t.T)
				508	# Now compute the linearized A, B, and C
				509	# Start by doing it numerically, and then optimize.
				510	A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics, x_hat_prev, u_t)
				511	B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics, x_hat_prev, u_t)
				512	c_t = x_hat - A_t * x_hat_prev - B_t * u_t
				513
				514	print ('A_t[%2d]: %s' % (t, A_t)).replace('\n', '\n ')
				515	print ('B_t[%2d]: %s' % (t, B_t)).replace('\n', '\n ')
				516	print ('c_t[%2d]: %s' % (t, c_t)).replace('\n', '\n ')
				517
				518	Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_prev, u_t)
				519	P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_prev, u_t)
				520	P_T_t = numerical_jacobian_u_x(self.cost.cost, x_hat_prev, u_t)
				521	R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_prev, u_t)
				522
				523	q_t = numerical_jacobian_x(self.cost.cost, x_hat_prev, u_t).T \
				524	- Q_t * x_hat_prev - P_T_t * u_t
				525	r_t = numerical_jacobian_u(self.cost.cost, x_hat_prev, u_t).T \
				526	- P_t * x_hat_prev - R_t * u_t
				527
				528	q_scalar_t = self.cost.cost(x_hat_prev, u_t) \
				529	- 0.5 * (x_hat_prev.T * (Q_t * x_hat_prev + P_t.T * u_t) \
				530	+ u_t.T * (P_t * x_hat_prev + R_t * u_t)) \
				531	- x_hat_prev.T * q_t - u_t.T * r_t
				532
				533	C_t = P_t + B_t.T * self.S_t[t + 1] * A_t
				534	D_t = Q_t + A_t.T * self.S_t[t + 1] * A_t
				535	E_t = R_t + B_t.T * self.S_t[t + 1] * B_t
				536	d_t = q_t + A_t.T * self.s_t[t + 1] + A_t.T * self.S_t[t + 1] * c_t
				537	e_t = r_t + B_t.T * self.s_t[t + 1] + B_t.T * self.S_t[t + 1] * c_t
				538	self.L_t[t] = -numpy.linalg.inv(E_t) * C_t
				539	self.l_t[t] = -numpy.linalg.inv(E_t) * e_t
				540	self.s_t[t] = d_t + C_t.T * self.l_t[t]
				541	self.S_t[t] = D_t + C_t.T * self.L_t[t]
				542	self.s_scalar_t[t] = q_scalar_t \
				543	- 0.5 * e_t.T * numpy.linalg.inv(E_t) * e_t \
				544	+ 0.5 * c_t.T * self.S_t[t + 1] * c_t \
				545	+ c_t.T * self.s_t[t + 1] \
				546	+ self.s_scalar_t[t + 1]
				547
				548	x_hat = -numpy.linalg.solve((self.S_t[t] + self.S_bar_t[t]),
				549	(self.s_t[t] + self.s_bar_t[t]))
				550	if t == 0:
				551	self.last_x_hat_t[t] = x_hat_initial
				552	else:
				553	self.last_x_hat_t[t] = x_hat
				554
				555	x_hat_t = [x_hat_initial]
				556
				557	pylab.figure('states %d' % a)
				558	pylab.ion()
				559	for dim in range(num_states):
				560	pylab.plot(numpy.arange(len(self.last_x_hat_t)),
				561	[x_hat_loop[dim, 0] for x_hat_loop in self.last_x_hat_t],
				562	marker='o', label='Xhat[%d]' % dim)
				563	pylab.legend()
				564	pylab.draw()
				565
				566	pylab.figure('xy %d' % a)
				567	pylab.ion()
				568	pylab.plot([x_hat_loop[0, 0] for x_hat_loop in self.last_x_hat_t],
				569	[x_hat_loop[1, 0] for x_hat_loop in self.last_x_hat_t],
				570	marker='o', label='trajectory')
				571	pylab.legend()
				572	pylab.draw()
				573
				574	final_u_t = [numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)]
				575	cost_to_go = []
				576	cost_to_come = []
				577	cost = []
				578	for t in range(l):
				579	cost_to_go.append(
				580	(0.5 * self.last_x_hat_t[t].T * self.S_t[t] * self.last_x_hat_t[t] \
				581	+ self.last_x_hat_t[t].T * self.s_t[t] + self.s_scalar_t[t])[0, 0])
				582	cost_to_come.append(
				583	(0.5 * self.last_x_hat_t[t].T * self.S_bar_t[t] * self.last_x_hat_t[t] \
				584	+ self.last_x_hat_t[t].T * self.s_bar_t[t] + self.s_scalar_bar_t[t])[0, 0])
				585	cost.append(cost_to_go[-1] + cost_to_come[-1])
				586	final_u_t[t] = self.L_t[t] * self.last_x_hat_t[t] + self.l_t[t]
				587
				588	for t in range(l):
				589	A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
				590	self.last_x_hat_t[t], final_u_t[t])
				591	B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
				592	self.last_x_hat_t[t], final_u_t[t])
				593	c_t = self.dynamics.discrete_dynamics(self.last_x_hat_t[t], final_u_t[t]) \
				594	- A_t * self.last_x_hat_t[t] - B_t * final_u_t[t]
				595	print("Infeasability at", t, "is",
				596	((A_t * self.last_x_hat_t[t] + B_t * final_u_t[t] + c_t) \
				597	- self.last_x_hat_t[t + 1]).T)
				598
				599	pylab.figure('u')
				600	samples = numpy.arange(len(final_u_t))
				601	for i in range(num_inputs):
				602	pylab.plot(samples, [u[i, 0] for u in final_u_t],
				603	label='u[%d]' % i, marker='o')
				604	pylab.legend()
				605
				606	pylab.figure('cost')
				607	cost_samples = numpy.arange(len(cost))
				608	pylab.plot(cost_samples, cost_to_go, label='cost to go', marker='o')
				609	pylab.plot(cost_samples, cost_to_come, label='cost to come', marker='o')
				610	pylab.plot(cost_samples, cost, label='cost', marker='o')
				611	pylab.legend()
				612
				613	pylab.ioff()
				614	pylab.show()
				615
				616	if __name__ == '__main__':
				617	dt = 0.05
				618	#arm_dynamics = ArmDynamics(dt=dt)
				619	#elqr = ELQR(arm_dynamics, ArmCostFunction(dt=dt, dynamics=arm_dynamics))
				620	#x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0], [0.0]])
				621	#elqr.Solve(x_hat_initial, 100, 3)
				622
				623	elqr = ELQR(SkidSteerDynamics(dt=dt), CostFunction(dt=dt))
				624	x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0]])
				625	elqr.Solve(x_hat_initial, 100, 15)
				626	sys.exit(1)