Statically initialize mersenne twister for UUID::Random()
By re-seeding the pseudo-random number generator on every call to
Random() we were effectively limiting ourselves to at most 2 ** 32
possible UUIDs (because std::random_device only produces 32 bits).
By making the pseudo-random number generator thread_local, subsequent
calls to UUID::Random() in the same thread will cover additional bit
space and get us closer to an "actual" 128 bits of randomness.
This alone is *probably* sufficient to remove any potential for hash
collisions in the foreseeable future, but we probably should more
properly initialize the random number generator.
Change-Id: I24be8e7a750337237a24a178ae5078af6ae3d8d8
Signed-off-by: James Kuszmaul <james.kuszmaul@bluerivertech.com>
diff --git a/aos/BUILD b/aos/BUILD
index 396d107..6a45949 100644
--- a/aos/BUILD
+++ b/aos/BUILD
@@ -713,6 +713,16 @@
)
cc_test(
+ name = "uuid_collision_test",
+ srcs = ["uuid_collision_test.cc"],
+ target_compatible_with = ["@platforms//os:linux"],
+ deps = [
+ ":uuid",
+ "//aos/testing:googletest",
+ ],
+)
+
+cc_test(
name = "uuid_test",
srcs = ["uuid_test.cc"],
target_compatible_with = ["@platforms//os:linux"],
diff --git a/aos/uuid.cc b/aos/uuid.cc
index 67075b6..4d3aa26 100644
--- a/aos/uuid.cc
+++ b/aos/uuid.cc
@@ -64,14 +64,26 @@
}
}
+uint32_t RandomSeed() {
+ std::random_device rd;
+ return rd();
+}
} // namespace
UUID UUID::Random() {
- std::random_device rd;
- std::mt19937 gen(rd());
+ // Note: This only provides 32 bits of randomness to each thread. However, by
+ // keeping persistent pseudo-random number generators, we can increase the
+ // overall randomness of each call to Random() (since, essentially, the
+ // randomness is coming from the combination of the initial seed + the number
+ // of times that Random() has been called in the given thread).
+ // TODO(james): Seed with a minimum of 128 bits of randomness, or even the
+ // full 624 bits of the internal mersenne twister state (see
+ // https://codereview.stackexchange.com/questions/109260/seed-stdmt19937-from-stdrandom-device/109266#109266).
+ //
+ // thread_local to guarantee safe use of the generator itself.
+ thread_local std::mt19937 gen(RandomSeed());
std::uniform_int_distribution<> dis(0, 255);
- std::uniform_int_distribution<> dis2(8, 11);
UUID result;
for (size_t i = 0; i < kDataSize; ++i) {
result.data_[i] = dis(gen);
diff --git a/aos/uuid_collision_test.cc b/aos/uuid_collision_test.cc
new file mode 100644
index 0000000..f06f6de
--- /dev/null
+++ b/aos/uuid_collision_test.cc
@@ -0,0 +1,28 @@
+#include <set>
+#include <unordered_set>
+
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+#include "aos/uuid.h"
+
+namespace aos {
+namespace testing {
+
+// Tests that modest numbers of UUID::Random() calls cannot create UUID
+// collisions (to test that we have not *completely* messed up the random number
+// generation).
+TEST(UUIDTest, CollisionTest) {
+ std::set<UUID> uuids;
+ // When we only had ~32 bits of randomness in our UUIDs, we could generate
+ // issues with only ~sqrt(2 ** 32) (aka 2 ** 16) UUIDs.
+ // Just go up to 2 ** 22, since too much longer just makes this test take
+ // obnoxiously long.
+ for (size_t ii = 0; ii < (1UL << 22); ++ii) {
+ UUID uuid = UUID::Random();
+ ASSERT_FALSE(uuids.count(uuid) > 0) << ii;
+ uuids.insert(uuid);
+ }
+}
+} // namespace testing
+} // namespace aos