Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/random/gaussian_distribution.h" |
| 16 | |
| 17 | #include <algorithm> |
| 18 | #include <cmath> |
| 19 | #include <cstddef> |
| 20 | #include <ios> |
| 21 | #include <iterator> |
| 22 | #include <random> |
| 23 | #include <string> |
| 24 | #include <vector> |
| 25 | |
| 26 | #include "gmock/gmock.h" |
| 27 | #include "gtest/gtest.h" |
| 28 | #include "absl/base/internal/raw_logging.h" |
| 29 | #include "absl/base/macros.h" |
| 30 | #include "absl/random/internal/chi_square.h" |
| 31 | #include "absl/random/internal/distribution_test_util.h" |
| 32 | #include "absl/random/internal/sequence_urbg.h" |
| 33 | #include "absl/random/random.h" |
| 34 | #include "absl/strings/str_cat.h" |
| 35 | #include "absl/strings/str_format.h" |
| 36 | #include "absl/strings/str_replace.h" |
| 37 | #include "absl/strings/strip.h" |
| 38 | |
| 39 | namespace { |
| 40 | |
| 41 | using absl::random_internal::kChiSquared; |
| 42 | |
| 43 | template <typename RealType> |
| 44 | class GaussianDistributionInterfaceTest : public ::testing::Test {}; |
| 45 | |
| 46 | using RealTypes = ::testing::Types<float, double, long double>; |
| 47 | TYPED_TEST_CASE(GaussianDistributionInterfaceTest, RealTypes); |
| 48 | |
| 49 | TYPED_TEST(GaussianDistributionInterfaceTest, SerializeTest) { |
| 50 | using param_type = |
| 51 | typename absl::gaussian_distribution<TypeParam>::param_type; |
| 52 | |
| 53 | const TypeParam kParams[] = { |
| 54 | // Cases around 1. |
| 55 | 1, // |
| 56 | std::nextafter(TypeParam(1), TypeParam(0)), // 1 - epsilon |
| 57 | std::nextafter(TypeParam(1), TypeParam(2)), // 1 + epsilon |
| 58 | // Arbitrary values. |
| 59 | TypeParam(1e-8), TypeParam(1e-4), TypeParam(2), TypeParam(1e4), |
| 60 | TypeParam(1e8), TypeParam(1e20), TypeParam(2.5), |
| 61 | // Boundary cases. |
| 62 | std::numeric_limits<TypeParam>::infinity(), |
| 63 | std::numeric_limits<TypeParam>::max(), |
| 64 | std::numeric_limits<TypeParam>::epsilon(), |
| 65 | std::nextafter(std::numeric_limits<TypeParam>::min(), |
| 66 | TypeParam(1)), // min + epsilon |
| 67 | std::numeric_limits<TypeParam>::min(), // smallest normal |
| 68 | // There are some errors dealing with denorms on apple platforms. |
| 69 | std::numeric_limits<TypeParam>::denorm_min(), // smallest denorm |
| 70 | std::numeric_limits<TypeParam>::min() / 2, |
| 71 | std::nextafter(std::numeric_limits<TypeParam>::min(), |
| 72 | TypeParam(0)), // denorm_max |
| 73 | }; |
| 74 | |
| 75 | constexpr int kCount = 1000; |
| 76 | absl::InsecureBitGen gen; |
| 77 | |
| 78 | // Use a loop to generate the combinations of {+/-x, +/-y}, and assign x, y to |
| 79 | // all values in kParams, |
| 80 | for (const auto mod : {0, 1, 2, 3}) { |
| 81 | for (const auto x : kParams) { |
| 82 | if (!std::isfinite(x)) continue; |
| 83 | for (const auto y : kParams) { |
| 84 | const TypeParam mean = (mod & 0x1) ? -x : x; |
| 85 | const TypeParam stddev = (mod & 0x2) ? -y : y; |
| 86 | const param_type param(mean, stddev); |
| 87 | |
| 88 | absl::gaussian_distribution<TypeParam> before(mean, stddev); |
| 89 | EXPECT_EQ(before.mean(), param.mean()); |
| 90 | EXPECT_EQ(before.stddev(), param.stddev()); |
| 91 | |
| 92 | { |
| 93 | absl::gaussian_distribution<TypeParam> via_param(param); |
| 94 | EXPECT_EQ(via_param, before); |
| 95 | EXPECT_EQ(via_param.param(), before.param()); |
| 96 | } |
| 97 | |
| 98 | // Smoke test. |
| 99 | auto sample_min = before.max(); |
| 100 | auto sample_max = before.min(); |
| 101 | for (int i = 0; i < kCount; i++) { |
| 102 | auto sample = before(gen); |
| 103 | if (sample > sample_max) sample_max = sample; |
| 104 | if (sample < sample_min) sample_min = sample; |
| 105 | EXPECT_GE(sample, before.min()) << before; |
| 106 | EXPECT_LE(sample, before.max()) << before; |
| 107 | } |
| 108 | if (!std::is_same<TypeParam, long double>::value) { |
| 109 | ABSL_INTERNAL_LOG( |
| 110 | INFO, absl::StrFormat("Range{%f, %f}: %f, %f", mean, stddev, |
| 111 | sample_min, sample_max)); |
| 112 | } |
| 113 | |
| 114 | std::stringstream ss; |
| 115 | ss << before; |
| 116 | |
| 117 | if (!std::isfinite(mean) || !std::isfinite(stddev)) { |
| 118 | // Streams do not parse inf/nan. |
| 119 | continue; |
| 120 | } |
| 121 | |
| 122 | // Validate stream serialization. |
| 123 | absl::gaussian_distribution<TypeParam> after(-0.53f, 2.3456f); |
| 124 | |
| 125 | EXPECT_NE(before.mean(), after.mean()); |
| 126 | EXPECT_NE(before.stddev(), after.stddev()); |
| 127 | EXPECT_NE(before.param(), after.param()); |
| 128 | EXPECT_NE(before, after); |
| 129 | |
| 130 | ss >> after; |
| 131 | |
| 132 | #if defined(__powerpc64__) || defined(__PPC64__) || defined(__powerpc__) || \ |
| 133 | defined(__ppc__) || defined(__PPC__) |
| 134 | if (std::is_same<TypeParam, long double>::value) { |
| 135 | // Roundtripping floating point values requires sufficient precision |
| 136 | // to reconstruct the exact value. It turns out that long double |
| 137 | // has some errors doing this on ppc, particularly for values |
| 138 | // near {1.0 +/- epsilon}. |
| 139 | if (mean <= std::numeric_limits<double>::max() && |
| 140 | mean >= std::numeric_limits<double>::lowest()) { |
| 141 | EXPECT_EQ(static_cast<double>(before.mean()), |
| 142 | static_cast<double>(after.mean())) |
| 143 | << ss.str(); |
| 144 | } |
| 145 | if (stddev <= std::numeric_limits<double>::max() && |
| 146 | stddev >= std::numeric_limits<double>::lowest()) { |
| 147 | EXPECT_EQ(static_cast<double>(before.stddev()), |
| 148 | static_cast<double>(after.stddev())) |
| 149 | << ss.str(); |
| 150 | } |
| 151 | continue; |
| 152 | } |
| 153 | #endif |
| 154 | |
| 155 | EXPECT_EQ(before.mean(), after.mean()); |
| 156 | EXPECT_EQ(before.stddev(), after.stddev()) // |
| 157 | << ss.str() << " " // |
| 158 | << (ss.good() ? "good " : "") // |
| 159 | << (ss.bad() ? "bad " : "") // |
| 160 | << (ss.eof() ? "eof " : "") // |
| 161 | << (ss.fail() ? "fail " : ""); |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3661.htm |
| 168 | |
| 169 | class GaussianModel { |
| 170 | public: |
| 171 | GaussianModel(double mean, double stddev) : mean_(mean), stddev_(stddev) {} |
| 172 | |
| 173 | double mean() const { return mean_; } |
| 174 | double variance() const { return stddev() * stddev(); } |
| 175 | double stddev() const { return stddev_; } |
| 176 | double skew() const { return 0; } |
| 177 | double kurtosis() const { return 3.0; } |
| 178 | |
| 179 | // The inverse CDF, or PercentPoint function. |
| 180 | double InverseCDF(double p) { |
| 181 | ABSL_ASSERT(p >= 0.0); |
| 182 | ABSL_ASSERT(p < 1.0); |
| 183 | return mean() + stddev() * -absl::random_internal::InverseNormalSurvival(p); |
| 184 | } |
| 185 | |
| 186 | private: |
| 187 | const double mean_; |
| 188 | const double stddev_; |
| 189 | }; |
| 190 | |
| 191 | struct Param { |
| 192 | double mean; |
| 193 | double stddev; |
| 194 | double p_fail; // Z-Test probability of failure. |
| 195 | int trials; // Z-Test trials. |
| 196 | }; |
| 197 | |
| 198 | // GaussianDistributionTests implements a z-test for the gaussian |
| 199 | // distribution. |
| 200 | class GaussianDistributionTests : public testing::TestWithParam<Param>, |
| 201 | public GaussianModel { |
| 202 | public: |
| 203 | GaussianDistributionTests() |
| 204 | : GaussianModel(GetParam().mean, GetParam().stddev) {} |
| 205 | |
| 206 | // SingleZTest provides a basic z-squared test of the mean vs. expected |
| 207 | // mean for data generated by the poisson distribution. |
| 208 | template <typename D> |
| 209 | bool SingleZTest(const double p, const size_t samples); |
| 210 | |
| 211 | // SingleChiSquaredTest provides a basic chi-squared test of the normal |
| 212 | // distribution. |
| 213 | template <typename D> |
| 214 | double SingleChiSquaredTest(); |
| 215 | |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 216 | // We use a fixed bit generator for distribution accuracy tests. This allows |
| 217 | // these tests to be deterministic, while still testing the qualify of the |
| 218 | // implementation. |
| 219 | absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 220 | }; |
| 221 | |
| 222 | template <typename D> |
| 223 | bool GaussianDistributionTests::SingleZTest(const double p, |
| 224 | const size_t samples) { |
| 225 | D dis(mean(), stddev()); |
| 226 | |
| 227 | std::vector<double> data; |
| 228 | data.reserve(samples); |
| 229 | for (size_t i = 0; i < samples; i++) { |
| 230 | const double x = dis(rng_); |
| 231 | data.push_back(x); |
| 232 | } |
| 233 | |
| 234 | const double max_err = absl::random_internal::MaxErrorTolerance(p); |
| 235 | const auto m = absl::random_internal::ComputeDistributionMoments(data); |
| 236 | const double z = absl::random_internal::ZScore(mean(), m); |
| 237 | const bool pass = absl::random_internal::Near("z", z, 0.0, max_err); |
| 238 | |
| 239 | // NOTE: Informational statistical test: |
| 240 | // |
| 241 | // Compute the Jarque-Bera test statistic given the excess skewness |
| 242 | // and kurtosis. The statistic is drawn from a chi-square(2) distribution. |
| 243 | // https://en.wikipedia.org/wiki/Jarque%E2%80%93Bera_test |
| 244 | // |
| 245 | // The null-hypothesis (normal distribution) is rejected when |
| 246 | // (p = 0.05 => jb > 5.99) |
| 247 | // (p = 0.01 => jb > 9.21) |
| 248 | // NOTE: JB has a large type-I error rate, so it will reject the |
| 249 | // null-hypothesis even when it is true more often than the z-test. |
| 250 | // |
| 251 | const double jb = |
| 252 | static_cast<double>(m.n) / 6.0 * |
| 253 | (std::pow(m.skewness, 2.0) + std::pow(m.kurtosis - 3.0, 2.0) / 4.0); |
| 254 | |
| 255 | if (!pass || jb > 9.21) { |
| 256 | ABSL_INTERNAL_LOG( |
| 257 | INFO, absl::StrFormat("p=%f max_err=%f\n" |
| 258 | " mean=%f vs. %f\n" |
| 259 | " stddev=%f vs. %f\n" |
| 260 | " skewness=%f vs. %f\n" |
| 261 | " kurtosis=%f vs. %f\n" |
| 262 | " z=%f vs. 0\n" |
| 263 | " jb=%f vs. 9.21", |
| 264 | p, max_err, m.mean, mean(), std::sqrt(m.variance), |
| 265 | stddev(), m.skewness, skew(), m.kurtosis, |
| 266 | kurtosis(), z, jb)); |
| 267 | } |
| 268 | return pass; |
| 269 | } |
| 270 | |
| 271 | template <typename D> |
| 272 | double GaussianDistributionTests::SingleChiSquaredTest() { |
| 273 | const size_t kSamples = 10000; |
| 274 | const int kBuckets = 50; |
| 275 | |
| 276 | // The InverseCDF is the percent point function of the |
| 277 | // distribution, and can be used to assign buckets |
| 278 | // roughly uniformly. |
| 279 | std::vector<double> cutoffs; |
| 280 | const double kInc = 1.0 / static_cast<double>(kBuckets); |
| 281 | for (double p = kInc; p < 1.0; p += kInc) { |
| 282 | cutoffs.push_back(InverseCDF(p)); |
| 283 | } |
| 284 | if (cutoffs.back() != std::numeric_limits<double>::infinity()) { |
| 285 | cutoffs.push_back(std::numeric_limits<double>::infinity()); |
| 286 | } |
| 287 | |
| 288 | D dis(mean(), stddev()); |
| 289 | |
| 290 | std::vector<int32_t> counts(cutoffs.size(), 0); |
| 291 | for (int j = 0; j < kSamples; j++) { |
| 292 | const double x = dis(rng_); |
| 293 | auto it = std::upper_bound(cutoffs.begin(), cutoffs.end(), x); |
| 294 | counts[std::distance(cutoffs.begin(), it)]++; |
| 295 | } |
| 296 | |
| 297 | // Null-hypothesis is that the distribution is a gaussian distribution |
| 298 | // with the provided mean and stddev (not estimated from the data). |
| 299 | const int dof = static_cast<int>(counts.size()) - 1; |
| 300 | |
| 301 | // Our threshold for logging is 1-in-50. |
| 302 | const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98); |
| 303 | |
| 304 | const double expected = |
| 305 | static_cast<double>(kSamples) / static_cast<double>(counts.size()); |
| 306 | |
| 307 | double chi_square = absl::random_internal::ChiSquareWithExpected( |
| 308 | std::begin(counts), std::end(counts), expected); |
| 309 | double p = absl::random_internal::ChiSquarePValue(chi_square, dof); |
| 310 | |
| 311 | // Log if the chi_square value is above the threshold. |
| 312 | if (chi_square > threshold) { |
| 313 | for (int i = 0; i < cutoffs.size(); i++) { |
| 314 | ABSL_INTERNAL_LOG( |
| 315 | INFO, absl::StrFormat("%d : (%f) = %d", i, cutoffs[i], counts[i])); |
| 316 | } |
| 317 | |
| 318 | ABSL_INTERNAL_LOG( |
| 319 | INFO, absl::StrCat("mean=", mean(), " stddev=", stddev(), "\n", // |
| 320 | " expected ", expected, "\n", // |
| 321 | kChiSquared, " ", chi_square, " (", p, ")\n", // |
| 322 | kChiSquared, " @ 0.98 = ", threshold)); |
| 323 | } |
| 324 | return p; |
| 325 | } |
| 326 | |
| 327 | TEST_P(GaussianDistributionTests, ZTest) { |
| 328 | // TODO(absl-team): Run these tests against std::normal_distribution<double> |
| 329 | // to validate outcomes are similar. |
| 330 | const size_t kSamples = 10000; |
| 331 | const auto& param = GetParam(); |
| 332 | const int expected_failures = |
| 333 | std::max(1, static_cast<int>(std::ceil(param.trials * param.p_fail))); |
| 334 | const double p = absl::random_internal::RequiredSuccessProbability( |
| 335 | param.p_fail, param.trials); |
| 336 | |
| 337 | int failures = 0; |
| 338 | for (int i = 0; i < param.trials; i++) { |
| 339 | failures += |
| 340 | SingleZTest<absl::gaussian_distribution<double>>(p, kSamples) ? 0 : 1; |
| 341 | } |
| 342 | EXPECT_LE(failures, expected_failures); |
| 343 | } |
| 344 | |
| 345 | TEST_P(GaussianDistributionTests, ChiSquaredTest) { |
| 346 | const int kTrials = 20; |
| 347 | int failures = 0; |
| 348 | |
| 349 | for (int i = 0; i < kTrials; i++) { |
| 350 | double p_value = |
| 351 | SingleChiSquaredTest<absl::gaussian_distribution<double>>(); |
| 352 | if (p_value < 0.0025) { // 1/400 |
| 353 | failures++; |
| 354 | } |
| 355 | } |
| 356 | // There is a 0.05% chance of producing at least one failure, so raise the |
| 357 | // failure threshold high enough to allow for a flake rate of less than one in |
| 358 | // 10,000. |
| 359 | EXPECT_LE(failures, 4); |
| 360 | } |
| 361 | |
| 362 | std::vector<Param> GenParams() { |
| 363 | return { |
| 364 | // Mean around 0. |
| 365 | Param{0.0, 1.0, 0.01, 100}, |
| 366 | Param{0.0, 1e2, 0.01, 100}, |
| 367 | Param{0.0, 1e4, 0.01, 100}, |
| 368 | Param{0.0, 1e8, 0.01, 100}, |
| 369 | Param{0.0, 1e16, 0.01, 100}, |
| 370 | Param{0.0, 1e-3, 0.01, 100}, |
| 371 | Param{0.0, 1e-5, 0.01, 100}, |
| 372 | Param{0.0, 1e-9, 0.01, 100}, |
| 373 | Param{0.0, 1e-17, 0.01, 100}, |
| 374 | |
| 375 | // Mean around 1. |
| 376 | Param{1.0, 1.0, 0.01, 100}, |
| 377 | Param{1.0, 1e2, 0.01, 100}, |
| 378 | Param{1.0, 1e-2, 0.01, 100}, |
| 379 | |
| 380 | // Mean around 100 / -100 |
| 381 | Param{1e2, 1.0, 0.01, 100}, |
| 382 | Param{-1e2, 1.0, 0.01, 100}, |
| 383 | Param{1e2, 1e6, 0.01, 100}, |
| 384 | Param{-1e2, 1e6, 0.01, 100}, |
| 385 | |
| 386 | // More extreme |
| 387 | Param{1e4, 1e4, 0.01, 100}, |
| 388 | Param{1e8, 1e4, 0.01, 100}, |
| 389 | Param{1e12, 1e4, 0.01, 100}, |
| 390 | }; |
| 391 | } |
| 392 | |
| 393 | std::string ParamName(const ::testing::TestParamInfo<Param>& info) { |
| 394 | const auto& p = info.param; |
| 395 | std::string name = absl::StrCat("mean_", absl::SixDigits(p.mean), "__stddev_", |
| 396 | absl::SixDigits(p.stddev)); |
| 397 | return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}}); |
| 398 | } |
| 399 | |
| 400 | INSTANTIATE_TEST_SUITE_P(All, GaussianDistributionTests, |
| 401 | ::testing::ValuesIn(GenParams()), ParamName); |
| 402 | |
| 403 | // NOTE: absl::gaussian_distribution is not guaranteed to be stable. |
| 404 | TEST(GaussianDistributionTest, StabilityTest) { |
| 405 | // absl::gaussian_distribution stability relies on the underlying zignor |
| 406 | // data, absl::random_interna::RandU64ToDouble, std::exp, std::log, and |
| 407 | // std::abs. |
| 408 | absl::random_internal::sequence_urbg urbg( |
| 409 | {0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull, |
| 410 | 0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull, |
| 411 | 0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull, |
| 412 | 0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull}); |
| 413 | |
| 414 | std::vector<int> output(11); |
| 415 | |
| 416 | { |
| 417 | absl::gaussian_distribution<double> dist; |
| 418 | std::generate(std::begin(output), std::end(output), |
| 419 | [&] { return static_cast<int>(10000000.0 * dist(urbg)); }); |
| 420 | |
| 421 | EXPECT_EQ(13, urbg.invocations()); |
| 422 | EXPECT_THAT(output, // |
| 423 | testing::ElementsAre(1494, 25518841, 9991550, 1351856, |
| 424 | -20373238, 3456682, 333530, -6804981, |
| 425 | -15279580, -16459654, 1494)); |
| 426 | } |
| 427 | |
| 428 | urbg.reset(); |
| 429 | { |
| 430 | absl::gaussian_distribution<float> dist; |
| 431 | std::generate(std::begin(output), std::end(output), |
| 432 | [&] { return static_cast<int>(1000000.0f * dist(urbg)); }); |
| 433 | |
| 434 | EXPECT_EQ(13, urbg.invocations()); |
| 435 | EXPECT_THAT( |
| 436 | output, // |
| 437 | testing::ElementsAre(149, 2551884, 999155, 135185, -2037323, 345668, |
| 438 | 33353, -680498, -1527958, -1645965, 149)); |
| 439 | } |
| 440 | } |
| 441 | |
| 442 | // This is an implementation-specific test. If any part of the implementation |
| 443 | // changes, then it is likely that this test will change as well. |
| 444 | // Also, if dependencies of the distribution change, such as RandU64ToDouble, |
| 445 | // then this is also likely to change. |
| 446 | TEST(GaussianDistributionTest, AlgorithmBounds) { |
| 447 | absl::gaussian_distribution<double> dist; |
| 448 | |
| 449 | // In ~95% of cases, a single value is used to generate the output. |
| 450 | // for all inputs where |x| < 0.750461021389 this should be the case. |
| 451 | // |
| 452 | // The exact constraints are based on the ziggurat tables, and any |
| 453 | // changes to the ziggurat tables may require adjusting these bounds. |
| 454 | // |
| 455 | // for i in range(0, len(X)-1): |
| 456 | // print i, X[i+1]/X[i], (X[i+1]/X[i] > 0.984375) |
| 457 | // |
| 458 | // 0.125 <= |values| <= 0.75 |
| 459 | const uint64_t kValues[] = { |
| 460 | 0x1000000000000100ull, 0x2000000000000100ull, 0x3000000000000100ull, |
| 461 | 0x4000000000000100ull, 0x5000000000000100ull, 0x6000000000000100ull, |
| 462 | // negative values |
| 463 | 0x9000000000000100ull, 0xa000000000000100ull, 0xb000000000000100ull, |
| 464 | 0xc000000000000100ull, 0xd000000000000100ull, 0xe000000000000100ull}; |
| 465 | |
| 466 | // 0.875 <= |values| <= 0.984375 |
| 467 | const uint64_t kExtraValues[] = { |
| 468 | 0x7000000000000100ull, 0x7800000000000100ull, // |
| 469 | 0x7c00000000000100ull, 0x7e00000000000100ull, // |
| 470 | // negative values |
| 471 | 0xf000000000000100ull, 0xf800000000000100ull, // |
| 472 | 0xfc00000000000100ull, 0xfe00000000000100ull}; |
| 473 | |
| 474 | auto make_box = [](uint64_t v, uint64_t box) { |
| 475 | return (v & 0xffffffffffffff80ull) | box; |
| 476 | }; |
| 477 | |
| 478 | // The box is the lower 7 bits of the value. When the box == 0, then |
| 479 | // the algorithm uses an escape hatch to select the result for large |
| 480 | // outputs. |
| 481 | for (uint64_t box = 0; box < 0x7f; box++) { |
| 482 | for (const uint64_t v : kValues) { |
| 483 | // Extra values are added to the sequence to attempt to avoid |
| 484 | // infinite loops from rejection sampling on bugs/errors. |
| 485 | absl::random_internal::sequence_urbg urbg( |
| 486 | {make_box(v, box), 0x0003eb76f6f7f755ull, 0x5FCEA50FDB2F953Bull}); |
| 487 | |
| 488 | auto a = dist(urbg); |
| 489 | EXPECT_EQ(1, urbg.invocations()) << box << " " << std::hex << v; |
| 490 | if (v & 0x8000000000000000ull) { |
| 491 | EXPECT_LT(a, 0.0) << box << " " << std::hex << v; |
| 492 | } else { |
| 493 | EXPECT_GT(a, 0.0) << box << " " << std::hex << v; |
| 494 | } |
| 495 | } |
| 496 | if (box > 10 && box < 100) { |
| 497 | // The center boxes use the fast algorithm for more |
| 498 | // than 98.4375% of values. |
| 499 | for (const uint64_t v : kExtraValues) { |
| 500 | absl::random_internal::sequence_urbg urbg( |
| 501 | {make_box(v, box), 0x0003eb76f6f7f755ull, 0x5FCEA50FDB2F953Bull}); |
| 502 | |
| 503 | auto a = dist(urbg); |
| 504 | EXPECT_EQ(1, urbg.invocations()) << box << " " << std::hex << v; |
| 505 | if (v & 0x8000000000000000ull) { |
| 506 | EXPECT_LT(a, 0.0) << box << " " << std::hex << v; |
| 507 | } else { |
| 508 | EXPECT_GT(a, 0.0) << box << " " << std::hex << v; |
| 509 | } |
| 510 | } |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | // When the box == 0, the fallback algorithm uses a ratio of uniforms, |
| 515 | // which consumes 2 additional values from the urbg. |
| 516 | // Fallback also requires that the initial value be > 0.9271586026096681. |
| 517 | auto make_fallback = [](uint64_t v) { return (v & 0xffffffffffffff80ull); }; |
| 518 | |
| 519 | double tail[2]; |
| 520 | { |
| 521 | // 0.9375 |
| 522 | absl::random_internal::sequence_urbg urbg( |
| 523 | {make_fallback(0x7800000000000000ull), 0x13CCA830EB61BD96ull, |
| 524 | 0x00000076f6f7f755ull}); |
| 525 | tail[0] = dist(urbg); |
| 526 | EXPECT_EQ(3, urbg.invocations()); |
| 527 | EXPECT_GT(tail[0], 0); |
| 528 | } |
| 529 | { |
| 530 | // -0.9375 |
| 531 | absl::random_internal::sequence_urbg urbg( |
| 532 | {make_fallback(0xf800000000000000ull), 0x13CCA830EB61BD96ull, |
| 533 | 0x00000076f6f7f755ull}); |
| 534 | tail[1] = dist(urbg); |
| 535 | EXPECT_EQ(3, urbg.invocations()); |
| 536 | EXPECT_LT(tail[1], 0); |
| 537 | } |
| 538 | EXPECT_EQ(tail[0], -tail[1]); |
| 539 | EXPECT_EQ(418610, static_cast<int64_t>(tail[0] * 100000.0)); |
| 540 | |
| 541 | // When the box != 0, the fallback algorithm computes a wedge function. |
| 542 | // Depending on the box, the threshold for varies as high as |
| 543 | // 0.991522480228. |
| 544 | { |
| 545 | // 0.9921875, 0.875 |
| 546 | absl::random_internal::sequence_urbg urbg( |
| 547 | {make_box(0x7f00000000000000ull, 120), 0xe000000000000001ull, |
| 548 | 0x13CCA830EB61BD96ull}); |
| 549 | tail[0] = dist(urbg); |
| 550 | EXPECT_EQ(2, urbg.invocations()); |
| 551 | EXPECT_GT(tail[0], 0); |
| 552 | } |
| 553 | { |
| 554 | // -0.9921875, 0.875 |
| 555 | absl::random_internal::sequence_urbg urbg( |
| 556 | {make_box(0xff00000000000000ull, 120), 0xe000000000000001ull, |
| 557 | 0x13CCA830EB61BD96ull}); |
| 558 | tail[1] = dist(urbg); |
| 559 | EXPECT_EQ(2, urbg.invocations()); |
| 560 | EXPECT_LT(tail[1], 0); |
| 561 | } |
| 562 | EXPECT_EQ(tail[0], -tail[1]); |
| 563 | EXPECT_EQ(61948, static_cast<int64_t>(tail[0] * 100000.0)); |
| 564 | |
| 565 | // Fallback rejected, try again. |
| 566 | { |
| 567 | // -0.9921875, 0.0625 |
| 568 | absl::random_internal::sequence_urbg urbg( |
| 569 | {make_box(0xff00000000000000ull, 120), 0x1000000000000001, |
| 570 | make_box(0x1000000000000100ull, 50), 0x13CCA830EB61BD96ull}); |
| 571 | dist(urbg); |
| 572 | EXPECT_EQ(3, urbg.invocations()); |
| 573 | } |
| 574 | } |
| 575 | |
| 576 | } // namespace |