Add flag to scale target entropy.
This helps change how much exploration to do compared to how much to do
the optimal action. Less entropy, ie a larger scalar, means that we
will explore less. This helps swerve not get stuck at the limits.
Change-Id: I74b862963632454c64f62d89c15dfc41422acc18
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/frc971/control_loops/swerve/velocity_controller/model.py b/frc971/control_loops/swerve/velocity_controller/model.py
index ebf47f7..9d5c5bc 100644
--- a/frc971/control_loops/swerve/velocity_controller/model.py
+++ b/frc971/control_loops/swerve/velocity_controller/model.py
@@ -61,6 +61,13 @@
help='Fraction of --pi_learning_rate to reduce by by the end.',
)
+absl.flags.DEFINE_float(
+ 'target_entropy_scalar',
+ default=1.0,
+ help=
+ 'Target entropy scalar for use when using automatic temperature adjustment.',
+)
+
absl.flags.DEFINE_integer(
'replay_size',
default=2000000,
@@ -389,7 +396,7 @@
q_opt_state=q_opt_state,
alpha_tx=alpha_tx,
alpha_opt_state=alpha_opt_state,
- target_entropy=-problem.num_states,
+ target_entropy=-problem.num_outputs * FLAGS.target_entropy_scalar,
mesh=mesh,
sharding=sharding,
replicated_sharding=replicated_sharding,