More completely seed UUID::Random()

Generate enuogh randomness with std::random_device to fully seed the
internal state of an std::mt19937. This ensures that even the first
UUID::Random() call after boot has full entropy, so long as
`std::random_device` is non-deterministic.

Also, initialize the random number generator during AOS initialization.

Change-Id: Ie09d85f09f7969bcd47576a577b4415d39186233
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/BUILD b/aos/BUILD
index 6a45949..d7a9b14 100644
--- a/aos/BUILD
+++ b/aos/BUILD
@@ -177,6 +177,7 @@
     visibility = ["//visibility:public"],
     deps = [
         ":realtime",
+        ":uuid",
         "//aos:die",
         "//aos/logging:implementations",
     ],
@@ -209,6 +210,7 @@
     visibility = ["//visibility:public"],
     deps = [
         ":thread_local",
+        ":uuid",
         "@com_github_google_glog//:glog",
     ],
 )
@@ -714,7 +716,9 @@
 
 cc_test(
     name = "uuid_collision_test",
+    timeout = "eternal",
     srcs = ["uuid_collision_test.cc"],
+    shard_count = 2,
     target_compatible_with = ["@platforms//os:linux"],
     deps = [
         ":uuid",
diff --git a/aos/init.cc b/aos/init.cc
index 7d4840c..3ca7314 100644
--- a/aos/init.cc
+++ b/aos/init.cc
@@ -15,6 +15,7 @@
 #include "glog/logging.h"
 
 #include "aos/realtime.h"
+#include "aos/uuid.h"
 
 DEFINE_bool(coredump, false, "If true, write core dumps on failure.");
 
@@ -37,6 +38,10 @@
   }
 
   RegisterMallocHook();
+  // Ensure that the random number generator for the UUID code is initialized
+  // (it does some potentially expensive random number generation).
+  UUID::Random();
+
   initialized = true;
 }
 
diff --git a/aos/realtime.cc b/aos/realtime.cc
index 14849a4..8b34ada 100644
--- a/aos/realtime.cc
+++ b/aos/realtime.cc
@@ -19,6 +19,7 @@
 #include "glog/raw_logging.h"
 
 #include "aos/thread_local.h"
+#include "aos/uuid.h"
 
 DEFINE_bool(
     die_on_malloc, true,
@@ -184,6 +185,10 @@
 }
 
 void SetCurrentThreadRealtimePriority(int priority) {
+  // Ensure that we won't get expensive reads of /dev/random when the realtime
+  // scheduler is running.
+  UUID::Random();
+
   if (FLAGS_skip_realtime_scheduler) {
     LOG(WARNING) << "Ignoring request to switch to the RT scheduler due to "
                     "--skip_realtime_scheduler.";
diff --git a/aos/uuid.cc b/aos/uuid.cc
index 4d3aa26..9da2cbf 100644
--- a/aos/uuid.cc
+++ b/aos/uuid.cc
@@ -64,24 +64,45 @@
   }
 }
 
-uint32_t RandomSeed() {
-  std::random_device rd;
-  return rd();
-}
 }  // namespace
 
+namespace internal {
+std::mt19937 FullySeededRandomGenerator() {
+  // Total bits that the mt19937 has internally that we could plausibly
+  // initialize with.
+  // The internal state ends up being ~1200 bytes, which is significantly more
+  // than the 128 bits we want for UUIDs, but since we should only need to
+  // generate this randomness once, it should be fine.
+  // If the performance cost ends up causing issues, then we can revisit the
+  // need to *fully* seed the twister.
+  constexpr size_t kInternalEntropy =
+      std::mt19937::state_size * sizeof(std::mt19937::result_type);
+  // Number, rounded up, of random values required.
+  constexpr size_t kSeedsRequired =
+      ((kInternalEntropy - 1) / sizeof(std::random_device::result_type)) + 1;
+  std::random_device random_device;
+// Older LLVM libstdc++'s just return 0 for the random device entropy.
+#if !defined(__clang__) || (__clang_major__ > 13)
+  CHECK_EQ(sizeof(std::random_device::result_type) * 8, random_device.entropy())
+      << ": Does your random_device actually support generating entropy?";
+#endif
+  std::array<std::random_device::result_type, kSeedsRequired> random_data;
+  std::generate(std::begin(random_data), std::end(random_data),
+                std::ref(random_device));
+  std::seed_seq seeds(std::begin(random_data), std::end(random_data));
+  return std::mt19937(seeds);
+}
+}  // namespace internal
+
 UUID UUID::Random() {
   // Note: This only provides 32 bits of randomness to each thread. However, by
   // keeping persistent pseudo-random number generators, we can increase the
   // overall randomness of each call to Random() (since, essentially, the
   // randomness is coming from the combination of the initial seed + the number
   // of times that Random() has been called in the given thread).
-  // TODO(james): Seed with a minimum of 128 bits of randomness, or even the
-  // full 624 bits of the internal mersenne twister state (see
-  // https://codereview.stackexchange.com/questions/109260/seed-stdmt19937-from-stdrandom-device/109266#109266).
   //
   // thread_local to guarantee safe use of the generator itself.
-  thread_local std::mt19937 gen(RandomSeed());
+  thread_local std::mt19937 gen(internal::FullySeededRandomGenerator());
 
   std::uniform_int_distribution<> dis(0, 255);
   UUID result;
diff --git a/aos/uuid.h b/aos/uuid.h
index 1b63ac3..371eb6d 100644
--- a/aos/uuid.h
+++ b/aos/uuid.h
@@ -3,6 +3,7 @@
 
 #include <array>
 #include <ostream>
+#include <random>
 #include <string>
 
 #include "absl/types/span.h"
@@ -20,6 +21,9 @@
   static constexpr size_t kDataSize = 16;
 
   // Returns a randomly generated UUID.  This is known as a UUID4.
+  // The first Random() call in a thread will tend to be slightly slower than
+  // the rest so that it can seed the pseudo-random number generator used
+  // internally.
   static UUID Random();
 
   // Returns a uuid with all '0's.
@@ -95,6 +99,13 @@
 
 std::ostream &operator<<(std::ostream &os, const UUID &uuid);
 
+namespace internal {
+// Initializes a mt19937 with as much entropy as it can take (rather than just a
+// 32-bit value from std::random_device).
+// Exposed for testing purposes.
+std::mt19937 FullySeededRandomGenerator();
+}  // namespace internal
+
 }  // namespace aos
 
 #endif  // AOS_EVENTS_LOGGING_UUID_H_
diff --git a/aos/uuid_collision_test.cc b/aos/uuid_collision_test.cc
index f06f6de..05bbccd 100644
--- a/aos/uuid_collision_test.cc
+++ b/aos/uuid_collision_test.cc
@@ -24,5 +24,21 @@
     uuids.insert(uuid);
   }
 }
+
+// Tests that our random seed generation for the mt19937 does not trivially
+// collide.
+TEST(UUIDTest, SeedInitializationTest) {
+  std::uniform_int_distribution<uint64_t> distribution(0);
+  std::set<uint64_t> values;
+  // This test takes significantly longer than the above due to needing to query
+  // std::random_device substantially. However, covering a range of 2 ** 18
+  // should readily catch things if we are accidentally using 32-bit seeds.
+  for (size_t ii = 0; ii < (1UL << 18); ++ii) {
+    std::mt19937 twister = internal::FullySeededRandomGenerator();
+    const uint64_t value = distribution(twister);
+    ASSERT_FALSE(values.count(value) > 0) << ii;
+    values.insert(value);
+  }
+}
 }  // namespace testing
 }  // namespace aos