Statically initialize mersenne twister for UUID::Random()

By re-seeding the pseudo-random number generator on every call to
Random() we were effectively limiting ourselves to at most 2 ** 32
possible UUIDs (because std::random_device only produces 32 bits).

By making the pseudo-random number generator thread_local, subsequent
calls to UUID::Random() in the same thread will cover additional bit
space and get us closer to an "actual" 128 bits of randomness.

This alone is *probably* sufficient to remove any potential for hash
collisions in the foreseeable future, but we probably should more
properly initialize the random number generator.

Change-Id: I24be8e7a750337237a24a178ae5078af6ae3d8d8
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/BUILD b/aos/BUILD
index 396d107..6a45949 100644
--- a/aos/BUILD
+++ b/aos/BUILD
@@ -713,6 +713,16 @@
 )
 
 cc_test(
+    name = "uuid_collision_test",
+    srcs = ["uuid_collision_test.cc"],
+    target_compatible_with = ["@platforms//os:linux"],
+    deps = [
+        ":uuid",
+        "//aos/testing:googletest",
+    ],
+)
+
+cc_test(
     name = "uuid_test",
     srcs = ["uuid_test.cc"],
     target_compatible_with = ["@platforms//os:linux"],
diff --git a/aos/uuid.cc b/aos/uuid.cc
index 67075b6..4d3aa26 100644
--- a/aos/uuid.cc
+++ b/aos/uuid.cc
@@ -64,14 +64,26 @@
   }
 }
 
+uint32_t RandomSeed() {
+  std::random_device rd;
+  return rd();
+}
 }  // namespace
 
 UUID UUID::Random() {
-  std::random_device rd;
-  std::mt19937 gen(rd());
+  // Note: This only provides 32 bits of randomness to each thread. However, by
+  // keeping persistent pseudo-random number generators, we can increase the
+  // overall randomness of each call to Random() (since, essentially, the
+  // randomness is coming from the combination of the initial seed + the number
+  // of times that Random() has been called in the given thread).
+  // TODO(james): Seed with a minimum of 128 bits of randomness, or even the
+  // full 624 bits of the internal mersenne twister state (see
+  // https://codereview.stackexchange.com/questions/109260/seed-stdmt19937-from-stdrandom-device/109266#109266).
+  //
+  // thread_local to guarantee safe use of the generator itself.
+  thread_local std::mt19937 gen(RandomSeed());
 
   std::uniform_int_distribution<> dis(0, 255);
-  std::uniform_int_distribution<> dis2(8, 11);
   UUID result;
   for (size_t i = 0; i < kDataSize; ++i) {
     result.data_[i] = dis(gen);
diff --git a/aos/uuid_collision_test.cc b/aos/uuid_collision_test.cc
new file mode 100644
index 0000000..f06f6de
--- /dev/null
+++ b/aos/uuid_collision_test.cc
@@ -0,0 +1,28 @@
+#include <set>
+#include <unordered_set>
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+#include "aos/uuid.h"
+
+namespace aos {
+namespace testing {
+
+// Tests that modest numbers of UUID::Random() calls cannot create UUID
+// collisions (to test that we have not *completely* messed up the random number
+// generation).
+TEST(UUIDTest, CollisionTest) {
+  std::set<UUID> uuids;
+  // When we only had ~32 bits of randomness in our UUIDs, we could generate
+  // issues with only ~sqrt(2 ** 32) (aka 2 ** 16) UUIDs.
+  // Just go up to 2 ** 22, since too much longer just makes this test take
+  // obnoxiously long.
+  for (size_t ii = 0; ii < (1UL << 22); ++ii) {
+    UUID uuid = UUID::Random();
+    ASSERT_FALSE(uuids.count(uuid) > 0) << ii;
+    uuids.insert(uuid);
+  }
+}
+}  // namespace testing
+}  // namespace aos