Add flag to scale target entropy.

This helps change how much exploration to do compared to how much to do
the optimal action.  Less entropy, ie a larger scalar, means that we
will explore less.  This helps swerve not get stuck at the limits.

Change-Id: I74b862963632454c64f62d89c15dfc41422acc18
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/frc971/control_loops/swerve/velocity_controller/model.py b/frc971/control_loops/swerve/velocity_controller/model.py
index ebf47f7..9d5c5bc 100644
--- a/frc971/control_loops/swerve/velocity_controller/model.py
+++ b/frc971/control_loops/swerve/velocity_controller/model.py
@@ -61,6 +61,13 @@
     help='Fraction of --pi_learning_rate to reduce by by the end.',
 )
 
+absl.flags.DEFINE_float(
+    'target_entropy_scalar',
+    default=1.0,
+    help=
+    'Target entropy scalar for use when using automatic temperature adjustment.',
+)
+
 absl.flags.DEFINE_integer(
     'replay_size',
     default=2000000,
@@ -389,7 +396,7 @@
             q_opt_state=q_opt_state,
             alpha_tx=alpha_tx,
             alpha_opt_state=alpha_opt_state,
-            target_entropy=-problem.num_states,
+            target_entropy=-problem.num_outputs * FLAGS.target_entropy_scalar,
             mesh=mesh,
             sharding=sharding,
             replicated_sharding=replicated_sharding,