Stop zero initializing the SAC networks
This makes gradient decent super hard, and it fails to converge. The
default random initialization is much better.
Change-Id: I96465dad8313e5101a58365fee4458ffe7336442
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/frc971/control_loops/swerve/velocity_controller/model.py b/frc971/control_loops/swerve/velocity_controller/model.py
index 0d8a410..1394463 100644
--- a/frc971/control_loops/swerve/velocity_controller/model.py
+++ b/frc971/control_loops/swerve/velocity_controller/model.py
@@ -113,7 +113,6 @@
mu = nn.Dense(
features=self.action_space,
name='mu',
- kernel_init=nn.initializers.zeros,
)(x)
log_std_layer = nn.Dense(features=self.action_space,
@@ -180,8 +179,10 @@
)(x)
x = self.activation(x)
- x = nn.Dense(name=f'q', features=1,
- kernel_init=nn.initializers.zeros)(x)
+ x = nn.Dense(
+ name=f'q',
+ features=1,
+ )(x)
return x