y2018/control_loops/python/extended_lqr.py - RealtimeRoboticsGroup/test - Gitiles

 #!/usr/bin/python3
 # This is an initial, hacky implementation of the extended LQR paper. It's just
 # a proof of concept, so don't trust it too much.

 import numpy
 import scipy.optimize
 from matplotlib import pylab
 import sys

 from frc971.control_loops.python import controls


 class ArmDynamics(object):

     def __init__(self, dt):
         self.dt = dt

         self.l1 = 1.0
         self.l2 = 0.8
         self.num_states = 4
         self.num_inputs = 2

     def dynamics(self, X, U):
         """Calculates the dynamics for a double jointed arm.

     Args:
       X, numpy.matrix(4, 1), The state.  [theta1, omega1, theta2, omega2]
       U, numpy.matrix(2, 1), The input.  [torque1, torque2]

     Returns:
       numpy.matrix(4, 1), The derivative of the dynamics.
     """
         return numpy.matrix([[X[1, 0]], [U[0, 0]], [X[3, 0]], [U[1, 0]]])

     def discrete_dynamics(self, X, U):
         return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)

     def inverse_discrete_dynamics(self, X, U):
         return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
                           dt)


 # Simple implementation for a quadratic cost function.
 class ArmCostFunction:

     def __init__(self, dt, dynamics):
         self.num_states = 4
         self.num_inputs = 2
         self.dt = dt
         self.dynamics = dynamics

         q_pos = 0.5
         q_vel = 1.65
         self.Q = numpy.matrix(
             numpy.diag([
                 1.0 / (q_pos**2.0), 1.0 / (q_vel**2.0), 1.0 / (q_pos**2.0),
                 1.0 / (q_vel**2.0)
             ]))

         self.R = numpy.matrix(
             numpy.diag([1.0 / (12.0**2.0), 1.0 / (12.0**2.0)]))

         final_A = numerical_jacobian_x(self.dynamics.discrete_dynamics,
                                        numpy.matrix(numpy.zeros((4, 1))),
                                        numpy.matrix(numpy.zeros((2, 1))))
         final_B = numerical_jacobian_u(self.dynamics.discrete_dynamics,
                                        numpy.matrix(numpy.zeros((4, 1))),
                                        numpy.matrix(numpy.zeros((2, 1))))
         print('Final A', final_A)
         print('Final B', final_B)
         K, self.S = controls.dlqr(final_A,
                                   final_B,
                                   self.Q,
                                   self.R,
                                   optimal_cost_function=True)
         print('Final eig:', numpy.linalg.eig(final_A - final_B * K))

     def final_cost(self, X, U):
         """Computes the final cost of being at X

     Args:
       X: numpy.matrix(self.num_states, 1)
       U: numpy.matrix(self.num_inputs, 1), ignored

     Returns:
       numpy.matrix(1, 1), The quadratic cost of being at X
     """
         print('TODO(austin): S -> X.T * S * X, need to fix if we care')
         return 0.5 * X.T * self.S * X

     def cost(self, X, U):
         """Computes the incremental cost given a position and U.

     Args:
       X: numpy.matrix(self.num_states, 1)
       U: numpy.matrix(self.num_inputs, 1)

     Returns:
       numpy.matrix(1, 1), The quadratic cost of evaluating U.
     """
         return U.T * self.R * U + X.T * self.Q * X

     def estimate_Q_final(self, X_hat):
         """Returns the quadraticized final Q around X_hat.

     This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)

     Args:
       X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.

     Result:
       numpy.matrix(self.num_states, self.num_states)
     """
         zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
         print('S', self.S)
         print('Q_final', numerical_jacobian_x_x(self.final_cost, X_hat,
                                                 zero_U))
         return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)

     def estimate_partial_cost_partial_x_final(self, X_hat):
         """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.

     Args:
       X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.

     Result:
       numpy.matrix(self.num_states, 1)
     """
         return numerical_jacobian_x(
             self.final_cost, X_hat,
             numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T

     def estimate_q_final(self, X_hat):
         """Returns q evaluated at X_hat for the final cost function."""
         return self.estimate_partial_cost_partial_x_final(
             X_hat) - self.estimate_Q_final(X_hat) * X_hat


 class SkidSteerDynamics(object):

     def __init__(self, dt):
         self.width = 0.2
         self.dt = dt
         self.num_states = 3
         self.num_inputs = 2

     def dynamics(self, X, U):
         """Calculates the dynamics for a 2 wheeled robot.

     Args:
       X, numpy.matrix(3, 1), The state.  [x, y, theta]
       U, numpy.matrix(2, 1), The input.  [left velocity, right velocity]

     Returns:
       numpy.matrix(3, 1), The derivative of the dynamics.
     """
         #return numpy.matrix([[X[1, 0]],
         #                     [X[2, 0]],
         #                     [U[0, 0]]])
         return numpy.matrix([[(U[0, 0] + U[1, 0]) * numpy.cos(X[2, 0]) / 2.0],
                              [(U[0, 0] + U[1, 0]) * numpy.sin(X[2, 0]) / 2.0],
                              [(U[1, 0] - U[0, 0]) / self.width]])

     def discrete_dynamics(self, X, U):
         return RungeKutta(lambda startingX: self.dynamics(startingX, U), X, dt)

     def inverse_discrete_dynamics(self, X, U):
         return RungeKutta(lambda startingX: -self.dynamics(startingX, U), X,
                           dt)


 # Simple implementation for a quadratic cost function.
 class CostFunction:

     def __init__(self, dt):
         self.num_states = 3
         self.num_inputs = 2
         self.dt = dt
         self.Q = numpy.matrix([[0.1, 0, 0], [0, 0.6, 0], [0, 0, 0.1]
                                ]) / self.dt / self.dt
         self.R = numpy.matrix([[0.40, 0], [0, 0.40]]) / self.dt / self.dt

     def final_cost(self, X, U):
         """Computes the final cost of being at X

     Args:
       X: numpy.matrix(self.num_states, 1)
       U: numpy.matrix(self.num_inputs, 1), ignored

     Returns:
       numpy.matrix(1, 1), The quadratic cost of being at X
     """
         return X.T * self.Q * X * 1000

     def cost(self, X, U):
         """Computes the incremental cost given a position and U.

     Args:
       X: numpy.matrix(self.num_states, 1)
       U: numpy.matrix(self.num_inputs, 1)

     Returns:
       numpy.matrix(1, 1), The quadratic cost of evaluating U.
     """
         return U.T * self.R * U + X.T * self.Q * X

     def estimate_Q_final(self, X_hat):
         """Returns the quadraticized final Q around X_hat.

     This is calculated by evaluating partial^2 cost(X_hat) / (partial X * partial X)

     Args:
       X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.

     Result:
       numpy.matrix(self.num_states, self.num_states)
     """
         zero_U = numpy.matrix(numpy.zeros((self.num_inputs, 1)))
         return numerical_jacobian_x_x(self.final_cost, X_hat, zero_U)

     def estimate_partial_cost_partial_x_final(self, X_hat):
         """Returns \frac{\partial cost}{\partial X}(X_hat) for the final cost.

     Args:
       X_hat: numpy.matrix(self.num_states, 1), The state to quadraticize around.

     Result:
       numpy.matrix(self.num_states, 1)
     """
         return numerical_jacobian_x(
             self.final_cost, X_hat,
             numpy.matrix(numpy.zeros((self.num_inputs, 1)))).T

     def estimate_q_final(self, X_hat):
         """Returns q evaluated at X_hat for the final cost function."""
         return self.estimate_partial_cost_partial_x_final(
             X_hat) - self.estimate_Q_final(X_hat) * X_hat


 def RungeKutta(f, x, dt):
     """4th order RungeKutta integration of F starting at X."""
     a = f(x)
     b = f(x + dt / 2.0 * a)
     c = f(x + dt / 2.0 * b)
     d = f(x + dt * c)
     return x + dt * (a + 2.0 * b + 2.0 * c + d) / 6.0


 def numerical_jacobian_x(fn, X, U, epsilon=1e-4):
     """Numerically estimates the jacobian around X, U in X.

   Args:
     fn: A function of X, U.
     X: numpy.matrix(num_states, 1), The state vector to take the jacobian
       around.
     U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
       around.

   Returns:
     numpy.matrix(num_states, num_states), The jacobian of fn with X as the
       variable.
   """
     num_states = X.shape[0]
     nominal = fn(X, U)
     answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_states)))
     # It's more expensive, but +- epsilon will be more reliable
     for i in range(0, num_states):
         dX_plus = X.copy()
         dX_plus[i] += epsilon
         dX_minus = X.copy()
         dX_minus[i] -= epsilon
         answer[:, i] = (fn(dX_plus, U) - fn(dX_minus, U)) / epsilon / 2.0
     return answer


 def numerical_jacobian_u(fn, X, U, epsilon=1e-4):
     """Numerically estimates the jacobian around X, U in U.

   Args:
     fn: A function of X, U.
     X: numpy.matrix(num_states, 1), The state vector to take the jacobian
       around.
     U: numpy.matrix(num_inputs, 1), The input vector to take the jacobian
       around.

   Returns:
     numpy.matrix(num_states, num_inputs), The jacobian of fn with U as the
       variable.
   """
     num_states = X.shape[0]
     num_inputs = U.shape[0]
     nominal = fn(X, U)
     answer = numpy.matrix(numpy.zeros((nominal.shape[0], num_inputs)))
     for i in range(0, num_inputs):
         dU_plus = U.copy()
         dU_plus[i] += epsilon
         dU_minus = U.copy()
         dU_minus[i] -= epsilon
         answer[:, i] = (fn(X, dU_plus) - fn(X, dU_minus)) / epsilon / 2.0
     return answer


 def numerical_jacobian_x_x(fn, X, U):
     return numerical_jacobian_x(
         lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
         X, U)


 def numerical_jacobian_x_u(fn, X, U):
     return numerical_jacobian_x(
         lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
         X, U)


 def numerical_jacobian_u_x(fn, X, U):
     return numerical_jacobian_u(
         lambda X_inner, U_inner: numerical_jacobian_x(fn, X_inner, U_inner).T,
         X, U)


 def numerical_jacobian_u_u(fn, X, U):
     return numerical_jacobian_u(
         lambda X_inner, U_inner: numerical_jacobian_u(fn, X_inner, U_inner).T,
         X, U)


 class ELQR(object):

     def __init__(self, dynamics, cost):
         self.dynamics = dynamics
         self.cost = cost

     def Solve(self, x_hat_initial, horizon, iterations):
         l = horizon
         num_states = self.dynamics.num_states
         num_inputs = self.dynamics.num_inputs
         self.S_bar_t = [
             numpy.matrix(numpy.zeros((num_states, num_states)))
             for _ in range(l + 1)
         ]
         self.s_bar_t = [
             numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
         ]
         self.s_scalar_bar_t = [
             numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
         ]

         self.L_t = [
             numpy.matrix(numpy.zeros((num_inputs, num_states)))
             for _ in range(l + 1)
         ]
         self.l_t = [
             numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
         ]
         self.L_bar_t = [
             numpy.matrix(numpy.zeros((num_inputs, num_states)))
             for _ in range(l + 1)
         ]
         self.l_bar_t = [
             numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
         ]

         self.S_t = [
             numpy.matrix(numpy.zeros((num_states, num_states)))
             for _ in range(l + 1)
         ]
         self.s_t = [
             numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
         ]
         self.s_scalar_t = [
             numpy.matrix(numpy.zeros((1, 1))) for _ in range(l + 1)
         ]

         self.last_x_hat_t = [
             numpy.matrix(numpy.zeros((num_states, 1))) for _ in range(l + 1)
         ]

         # Iterate the solver
         for a in range(iterations):
             x_hat = x_hat_initial
             u_t = self.L_t[0] * x_hat + self.l_t[0]
             self.S_bar_t[0] = numpy.matrix(
                 numpy.zeros((num_states, num_states)))
             self.s_bar_t[0] = numpy.matrix(numpy.zeros((num_states, 1)))
             self.s_scalar_bar_t[0] = numpy.matrix([[0]])

             self.last_x_hat_t[0] = x_hat_initial

             Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_initial, u_t)
             P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_initial, u_t)
             R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_initial, u_t)

             q_t = numerical_jacobian_x(self.cost.cost, x_hat_initial, u_t).T \
                 - Q_t * x_hat_initial - P_t.T * u_t
             r_t = numerical_jacobian_u(self.cost.cost, x_hat_initial, u_t).T \
                 - P_t * x_hat_initial - R_t * u_t

             q_scalar_t = self.cost.cost(x_hat_initial, u_t) \
                 - 0.5 * (x_hat_initial.T * (Q_t * x_hat_initial + P_t.T * u_t) \
                 + u_t.T * (P_t * x_hat_initial + R_t * u_t)) \
                 - x_hat_initial.T * q_t - u_t.T * r_t

             start_A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
                                              x_hat_initial, u_t)
             start_B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
                                              x_hat_initial, u_t)
             x_hat_next = self.dynamics.discrete_dynamics(x_hat_initial, u_t)
             start_c_t = x_hat_next - start_A_t * x_hat_initial - start_B_t * u_t

             B_svd_u, B_svd_sigma_diag, B_svd_v = numpy.linalg.svd(start_B_t)
             B_svd_sigma = numpy.matrix(numpy.zeros(start_B_t.shape))
             B_svd_sigma[0:B_svd_sigma_diag.shape[0], 0:B_svd_sigma_diag.shape[0]] = \
                 numpy.diag(B_svd_sigma_diag)

             B_svd_sigma_inv = numpy.matrix(numpy.zeros(start_B_t.shape)).T
             B_svd_sigma_inv[0:B_svd_sigma_diag.shape[0],
                             0:B_svd_sigma_diag.shape[0]] = \
                                 numpy.linalg.inv(numpy.diag(B_svd_sigma_diag))
             B_svd_inv = B_svd_v.T * B_svd_sigma_inv * B_svd_u.T

             self.L_bar_t[1] = B_svd_inv
             self.l_bar_t[1] = -B_svd_inv * (start_A_t * x_hat_initial +
                                             start_c_t)

             self.S_bar_t[1] = self.L_bar_t[1].T * R_t * self.L_bar_t[1]

             TotalS_1 = start_B_t.T * self.S_t[1] * start_B_t + R_t
             Totals_1 = start_B_t.T * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
                 + start_B_t.T * self.s_t[1] + P_t * x_hat_initial + r_t
             Totals_scalar_1 = 0.5 * (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.S_t[1] * (start_c_t + start_A_t * x_hat_initial) \
                 + self.s_scalar_t[1] + x_hat_initial.T * q_t + q_scalar_t \
                 + 0.5 * x_hat_initial.T * Q_t * x_hat_initial \
                 + (start_c_t.T + x_hat_initial.T * start_A_t.T) * self.s_t[1]

             optimal_u_1 = -numpy.linalg.solve(TotalS_1, Totals_1)
             optimal_x_1 = start_A_t * x_hat_initial \
                 + start_B_t * optimal_u_1 + start_c_t

             # TODO(austin): Disable this if we are controlable.  It should not be needed then.
             S_bar_1_eigh_eigenvalues, S_bar_1_eigh_eigenvectors = \
                 numpy.linalg.eigh(self.S_bar_t[1])
             S_bar_1_eigh = numpy.matrix(numpy.diag(S_bar_1_eigh_eigenvalues))
             S_bar_1_eigh_eigenvalues_stiff = S_bar_1_eigh_eigenvalues.copy()
             for i in range(S_bar_1_eigh_eigenvalues_stiff.shape[0]):
                 if abs(S_bar_1_eigh_eigenvalues_stiff[i]) < 1e-8:
                     S_bar_1_eigh_eigenvalues_stiff[i] = max(
                         S_bar_1_eigh_eigenvalues_stiff) * 1.0

             S_bar_stiff = S_bar_1_eigh_eigenvectors * numpy.matrix(
                 numpy.diag(S_bar_1_eigh_eigenvalues_stiff)
             ) * S_bar_1_eigh_eigenvectors.T

             print('Min u', -numpy.linalg.solve(TotalS_1, Totals_1))
             print('Min x_hat', optimal_x_1)
             self.s_bar_t[1] = -self.s_t[1] - (S_bar_stiff +
                                               self.S_t[1]) * optimal_x_1
             self.s_scalar_bar_t[1] = 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1 \
                 - optimal_x_1.T * (S_bar_stiff + self.S_t[1]) * optimal_x_1) \
                 + optimal_u_1.T * Totals_1 \
                 - optimal_x_1.T * (self.s_bar_t[1] + self.s_t[1]) \
                 - self.s_scalar_t[1] + Totals_scalar_1

             print('optimal_u_1', optimal_u_1)
             print('TotalS_1', TotalS_1)
             print('Totals_1', Totals_1)
             print('Totals_scalar_1', Totals_scalar_1)
             print('overall cost 1', 0.5 * (optimal_u_1.T * TotalS_1 * optimal_u_1) \
                                      + optimal_u_1.T * Totals_1 + Totals_scalar_1)
             print('overall cost 0', 0.5 * (x_hat_initial.T * self.S_t[0] * x_hat_initial) \
                                     + x_hat_initial.T * self.s_t[0] + self.s_scalar_t[0])

             print('t forward 0')
             print('x_hat_initial[ 0]: %s' % (x_hat_initial))
             print('x_hat[%2d]: %s' % (0, x_hat.T))
             print('x_hat_next[%2d]: %s' % (0, x_hat_next.T))
             print('u[%2d]: %s' % (0, u_t.T))
             print('L[ 0]: %s' % (self.L_t[0], )).replace('\n', '\n        ')
             print('l[ 0]: %s' % (self.l_t[0], )).replace('\n', '\n        ')

             print('A_t[%2d]: %s' % (0, start_A_t)).replace('\n', '\n         ')
             print('B_t[%2d]: %s' % (0, start_B_t)).replace('\n', '\n         ')
             print('c_t[%2d]: %s' % (0, start_c_t)).replace('\n', '\n         ')

             # TODO(austin): optimal_x_1 is x_hat
             x_hat = -numpy.linalg.solve((self.S_t[1] + S_bar_stiff),
                                         (self.s_t[1] + self.s_bar_t[1]))
             print('new xhat', x_hat)

             self.S_bar_t[1] = S_bar_stiff

             self.last_x_hat_t[1] = x_hat

             for t in range(1, l):
                 print('t forward', t)
                 u_t = self.L_t[t] * x_hat + self.l_t[t]

                 x_hat_next = self.dynamics.discrete_dynamics(x_hat, u_t)
                 A_bar_t = numerical_jacobian_x(
                     self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
                 B_bar_t = numerical_jacobian_u(
                     self.dynamics.inverse_discrete_dynamics, x_hat_next, u_t)
                 c_bar_t = x_hat - A_bar_t * x_hat_next - B_bar_t * u_t

                 print('x_hat[%2d]: %s' % (t, x_hat.T))
                 print('x_hat_next[%2d]: %s' % (t, x_hat_next.T))
                 print('L[%2d]: %s' % (
                     t,
                     self.L_t[t],
                 )).replace('\n', '\n        ')
                 print('l[%2d]: %s' % (
                     t,
                     self.l_t[t],
                 )).replace('\n', '\n        ')
                 print('u[%2d]: %s' % (t, u_t.T))

                 print('A_bar_t[%2d]: %s' % (t, A_bar_t)).replace(
                     '\n', '\n             ')
                 print('B_bar_t[%2d]: %s' % (t, B_bar_t)).replace(
                     '\n', '\n             ')
                 print('c_bar_t[%2d]: %s' % (t, c_bar_t)).replace(
                     '\n', '\n             ')

                 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat, u_t)
                 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat, u_t)
                 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat, u_t)

                 q_t = numerical_jacobian_x(self.cost.cost, x_hat, u_t).T \
                     - Q_t * x_hat - P_t.T * u_t
                 r_t = numerical_jacobian_u(self.cost.cost, x_hat, u_t).T \
                     - P_t * x_hat - R_t * u_t

                 q_scalar_t = self.cost.cost(x_hat, u_t) \
                     - 0.5 * (x_hat.T * (Q_t * x_hat + P_t.T * u_t) \
                     + u_t.T * (P_t * x_hat + R_t * u_t)) - x_hat.T * q_t - u_t.T * r_t

                 C_bar_t = B_bar_t.T * (self.S_bar_t[t] +
                                        Q_t) * A_bar_t + P_t * A_bar_t
                 D_bar_t = A_bar_t.T * (self.S_bar_t[t] + Q_t) * A_bar_t
                 E_bar_t = B_bar_t.T * (self.S_bar_t[t] + Q_t) * B_bar_t + R_t \
                     + P_t * B_bar_t + B_bar_t.T * P_t.T
                 d_bar_t = A_bar_t.T * (self.s_bar_t[t] + q_t) \
                     + A_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t
                 e_bar_t = r_t + P_t * c_bar_t + B_bar_t.T * self.s_bar_t[t] \
                     + B_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t

                 self.L_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * C_bar_t
                 self.l_bar_t[t + 1] = -numpy.linalg.inv(E_bar_t) * e_bar_t

                 self.S_bar_t[t + 1] = D_bar_t + C_bar_t.T * self.L_bar_t[t + 1]
                 self.s_bar_t[t + 1] = d_bar_t + C_bar_t.T * self.l_bar_t[t + 1]
                 self.s_scalar_bar_t[t + 1] = \
                     -0.5 * e_bar_t.T * numpy.linalg.inv(E_bar_t) * e_bar_t \
                     + 0.5 * c_bar_t.T * (self.S_bar_t[t] + Q_t) * c_bar_t \
                     + c_bar_t.T * self.s_bar_t[t] + c_bar_t.T * q_t \
                     + self.s_scalar_bar_t[t] + q_scalar_t

                 x_hat = -numpy.linalg.solve(
                     (self.S_t[t + 1] + self.S_bar_t[t + 1]),
                     (self.s_t[t + 1] + self.s_bar_t[t + 1]))
                 self.S_t[l] = self.cost.estimate_Q_final(x_hat)
             self.s_t[l] = self.cost.estimate_q_final(x_hat)
             x_hat = -numpy.linalg.inv(self.S_t[l] + self.S_bar_t[l]) \
                 * (self.s_t[l] + self.s_bar_t[l])

             for t in reversed(range(l)):
                 print('t backward', t)
                 # TODO(austin): I don't think we can use L_t like this here.
                 # I think we are off by 1 somewhere...
                 u_t = self.L_bar_t[t + 1] * x_hat + self.l_bar_t[t + 1]

                 x_hat_prev = self.dynamics.inverse_discrete_dynamics(
                     x_hat, u_t)
                 print('x_hat[%2d]: %s' % (t, x_hat.T))
                 print('x_hat_prev[%2d]: %s' % (t, x_hat_prev.T))
                 print('L_bar[%2d]: %s' % (t + 1, self.L_bar_t[t + 1])).replace(
                     '\n', '\n            ')
                 print('l_bar[%2d]: %s' % (t + 1, self.l_bar_t[t + 1])).replace(
                     '\n', '\n            ')
                 print('u[%2d]: %s' % (t, u_t.T))
                 # Now compute the linearized A, B, and C
                 # Start by doing it numerically, and then optimize.
                 A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
                                            x_hat_prev, u_t)
                 B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
                                            x_hat_prev, u_t)
                 c_t = x_hat - A_t * x_hat_prev - B_t * u_t

                 print('A_t[%2d]: %s' % (t, A_t)).replace('\n', '\n         ')
                 print('B_t[%2d]: %s' % (t, B_t)).replace('\n', '\n         ')
                 print('c_t[%2d]: %s' % (t, c_t)).replace('\n', '\n         ')

                 Q_t = numerical_jacobian_x_x(self.cost.cost, x_hat_prev, u_t)
                 P_t = numerical_jacobian_x_u(self.cost.cost, x_hat_prev, u_t)
                 P_T_t = numerical_jacobian_u_x(self.cost.cost, x_hat_prev, u_t)
                 R_t = numerical_jacobian_u_u(self.cost.cost, x_hat_prev, u_t)

                 q_t = numerical_jacobian_x(self.cost.cost, x_hat_prev, u_t).T \
                     - Q_t * x_hat_prev - P_T_t * u_t
                 r_t = numerical_jacobian_u(self.cost.cost, x_hat_prev, u_t).T \
                     - P_t * x_hat_prev - R_t * u_t

                 q_scalar_t = self.cost.cost(x_hat_prev, u_t) \
                     - 0.5 * (x_hat_prev.T * (Q_t * x_hat_prev + P_t.T * u_t) \
                     + u_t.T * (P_t * x_hat_prev + R_t * u_t)) \
                     - x_hat_prev.T * q_t - u_t.T * r_t

                 C_t = P_t + B_t.T * self.S_t[t + 1] * A_t
                 D_t = Q_t + A_t.T * self.S_t[t + 1] * A_t
                 E_t = R_t + B_t.T * self.S_t[t + 1] * B_t
                 d_t = q_t + A_t.T * self.s_t[t + 1] + A_t.T * self.S_t[t +
                                                                        1] * c_t
                 e_t = r_t + B_t.T * self.s_t[t + 1] + B_t.T * self.S_t[t +
                                                                        1] * c_t
                 self.L_t[t] = -numpy.linalg.inv(E_t) * C_t
                 self.l_t[t] = -numpy.linalg.inv(E_t) * e_t
                 self.s_t[t] = d_t + C_t.T * self.l_t[t]
                 self.S_t[t] = D_t + C_t.T * self.L_t[t]
                 self.s_scalar_t[t] = q_scalar_t \
                     - 0.5 * e_t.T * numpy.linalg.inv(E_t) * e_t \
                     + 0.5 * c_t.T * self.S_t[t + 1] * c_t \
                     + c_t.T * self.s_t[t + 1] \
                     + self.s_scalar_t[t + 1]

                 x_hat = -numpy.linalg.solve((self.S_t[t] + self.S_bar_t[t]),
                                             (self.s_t[t] + self.s_bar_t[t]))
                 if t == 0:
                     self.last_x_hat_t[t] = x_hat_initial
                 else:
                     self.last_x_hat_t[t] = x_hat

             x_hat_t = [x_hat_initial]

             pylab.figure('states %d' % a)
             pylab.ion()
             for dim in range(num_states):
                 pylab.plot(
                     numpy.arange(len(self.last_x_hat_t)),
                     [x_hat_loop[dim, 0] for x_hat_loop in self.last_x_hat_t],
                     marker='o',
                     label='Xhat[%d]' % dim)
             pylab.legend()
             pylab.draw()

             pylab.figure('xy %d' % a)
             pylab.ion()
             pylab.plot([x_hat_loop[0, 0] for x_hat_loop in self.last_x_hat_t],
                        [x_hat_loop[1, 0] for x_hat_loop in self.last_x_hat_t],
                        marker='o',
                        label='trajectory')
             pylab.legend()
             pylab.draw()

         final_u_t = [
             numpy.matrix(numpy.zeros((num_inputs, 1))) for _ in range(l + 1)
         ]
         cost_to_go = []
         cost_to_come = []
         cost = []
         for t in range(l):
             cost_to_go.append(
                 (0.5 * self.last_x_hat_t[t].T * self.S_t[t] * self.last_x_hat_t[t] \
                 + self.last_x_hat_t[t].T * self.s_t[t] + self.s_scalar_t[t])[0, 0])
             cost_to_come.append(
                 (0.5 * self.last_x_hat_t[t].T * self.S_bar_t[t] * self.last_x_hat_t[t] \
                  + self.last_x_hat_t[t].T * self.s_bar_t[t] + self.s_scalar_bar_t[t])[0, 0])
             cost.append(cost_to_go[-1] + cost_to_come[-1])
             final_u_t[t] = self.L_t[t] * self.last_x_hat_t[t] + self.l_t[t]

         for t in range(l):
             A_t = numerical_jacobian_x(self.dynamics.discrete_dynamics,
                                        self.last_x_hat_t[t], final_u_t[t])
             B_t = numerical_jacobian_u(self.dynamics.discrete_dynamics,
                                        self.last_x_hat_t[t], final_u_t[t])
             c_t = self.dynamics.discrete_dynamics(self.last_x_hat_t[t], final_u_t[t]) \
                 - A_t * self.last_x_hat_t[t] - B_t * final_u_t[t]
             print("Infeasability at", t, "is",
                   ((A_t * self.last_x_hat_t[t] + B_t * final_u_t[t] + c_t) \
                   - self.last_x_hat_t[t + 1]).T)

         pylab.figure('u')
         samples = numpy.arange(len(final_u_t))
         for i in range(num_inputs):
             pylab.plot(samples, [u[i, 0] for u in final_u_t],
                        label='u[%d]' % i,
                        marker='o')
             pylab.legend()

         pylab.figure('cost')
         cost_samples = numpy.arange(len(cost))
         pylab.plot(cost_samples, cost_to_go, label='cost to go', marker='o')
         pylab.plot(cost_samples,
                    cost_to_come,
                    label='cost to come',
                    marker='o')
         pylab.plot(cost_samples, cost, label='cost', marker='o')
         pylab.legend()

         pylab.ioff()
         pylab.show()


 if __name__ == '__main__':
     dt = 0.05
     #arm_dynamics = ArmDynamics(dt=dt)
     #elqr = ELQR(arm_dynamics, ArmCostFunction(dt=dt, dynamics=arm_dynamics))
     #x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0], [0.0]])
     #elqr.Solve(x_hat_initial, 100, 3)

     elqr = ELQR(SkidSteerDynamics(dt=dt), CostFunction(dt=dt))
     x_hat_initial = numpy.matrix([[0.10], [1.0], [0.0]])
     elqr.Solve(x_hat_initial, 100, 15)
     sys.exit(1)