Switch to silu activation function

SAC with experience paper says this works better for continuous action
spaces.  Lets do it!

Change-Id: I78ddd38b87e5600efb82d50cd2923e8a4fb58fde
Signed-off-by: Austin Schuh <austin.linux@gmail.com>
diff --git a/frc971/control_loops/swerve/velocity_controller/model.py b/frc971/control_loops/swerve/velocity_controller/model.py
index 9d5c5bc..6aa3e47 100644
--- a/frc971/control_loops/swerve/velocity_controller/model.py
+++ b/frc971/control_loops/swerve/velocity_controller/model.py
@@ -409,12 +409,12 @@
 def create_train_state(rng: PRNGKey, problem: Problem, q_learning_rate,
                        pi_learning_rate, alpha_learning_rate):
     """Creates initial `TrainState`."""
-    pi = SquashedGaussianMLPActor(activation=nn.activation.gelu,
+    pi = SquashedGaussianMLPActor(activation=nn.activation.silu,
                                   action_space=problem.num_outputs,
                                   action_limit=problem.action_limit)
     # We want q1 and q2 to have different network architectures so they pick up differnet things.
-    q1 = MLPQFunction(activation=nn.activation.gelu, hidden_sizes=[128, 256])
-    q2 = MLPQFunction(activation=nn.activation.gelu, hidden_sizes=[256, 128])
+    q1 = MLPQFunction(activation=nn.activation.silu, hidden_sizes=[128, 256])
+    q2 = MLPQFunction(activation=nn.activation.silu, hidden_sizes=[256, 128])
 
     @jax.jit
     def init_params(rng):