Add flag to scale target entropy. This helps change how much exploration to do compared to how much to do the optimal action. Less entropy, ie a larger scalar, means that we will explore less. This helps swerve not get stuck at the limits. Change-Id: I74b862963632454c64f62d89c15dfc41422acc18 Signed-off-by: Austin Schuh <austin.linux@gmail.com>

commit: ce95d87db7f1251e5dfd7cbc6f09947f8add946c [log] [tgz]
author: Austin Schuh <austin.linux@gmail.com> Sat Oct 26 16:52:26 2024 -0700
committer: Austin Schuh <austin.linux@gmail.com> Sat Oct 26 21:50:31 2024 -0700
tree: 044f2737e6cb81bfd0392d9200cc4efa6f20ba86
parent: 398857cd3780c6fd0c6369acd5934fb2eb4465aa [diff]
diff --git a/frc971/control_loops/swerve/velocity_controller/model.py b/frc971/control_loops/swerve/velocity_controller/model.py
index ebf47f7..9d5c5bc 100644
--- a/frc971/control_loops/swerve/velocity_controller/model.py
+++ b/frc971/control_loops/swerve/velocity_controller/model.py

@@ -61,6 +61,13 @@
     help='Fraction of --pi_learning_rate to reduce by by the end.',
 )
 
+absl.flags.DEFINE_float(
+    'target_entropy_scalar',
+    default=1.0,
+    help=
+    'Target entropy scalar for use when using automatic temperature adjustment.',
+)
+
 absl.flags.DEFINE_integer(
     'replay_size',
     default=2000000,
@@ -389,7 +396,7 @@
             q_opt_state=q_opt_state,
             alpha_tx=alpha_tx,
             alpha_opt_state=alpha_opt_state,
-            target_entropy=-problem.num_states,
+            target_entropy=-problem.num_outputs * FLAGS.target_entropy_scalar,
             mesh=mesh,
             sharding=sharding,
             replicated_sharding=replicated_sharding,
commit	ce95d87db7f1251e5dfd7cbc6f09947f8add946c	[log] [tgz]
author	Austin Schuh <austin.linux@gmail.com>	Sat Oct 26 16:52:26 2024 -0700
committer	Austin Schuh <austin.linux@gmail.com>	Sat Oct 26 21:50:31 2024 -0700
tree	044f2737e6cb81bfd0392d9200cc4efa6f20ba86
parent	398857cd3780c6fd0c6369acd5934fb2eb4465aa [diff]