Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #ifndef ABSL_RANDOM_INTERNAL_DISTRIBUTION_IMPL_H_ |
| 16 | #define ABSL_RANDOM_INTERNAL_DISTRIBUTION_IMPL_H_ |
| 17 | |
| 18 | // This file contains some implementation details which are used by one or more |
| 19 | // of the absl random number distributions. |
| 20 | |
| 21 | #include <cfloat> |
| 22 | #include <cstddef> |
| 23 | #include <cstdint> |
| 24 | #include <cstring> |
| 25 | #include <limits> |
| 26 | #include <type_traits> |
| 27 | |
| 28 | #if (defined(_WIN32) || defined(_WIN64)) && defined(_M_IA64) |
| 29 | #include <intrin.h> // NOLINT(build/include_order) |
| 30 | #pragma intrinsic(_umul128) |
| 31 | #define ABSL_INTERNAL_USE_UMUL128 1 |
| 32 | #endif |
| 33 | |
| 34 | #include "absl/base/config.h" |
| 35 | #include "absl/base/internal/bits.h" |
| 36 | #include "absl/numeric/int128.h" |
| 37 | #include "absl/random/internal/fastmath.h" |
| 38 | #include "absl/random/internal/traits.h" |
| 39 | |
| 40 | namespace absl { |
| 41 | namespace random_internal { |
| 42 | |
| 43 | // Creates a double from `bits`, with the template fields controlling the |
| 44 | // output. |
| 45 | // |
| 46 | // RandU64To is both more efficient and generates more unique values in the |
| 47 | // result interval than known implementations of std::generate_canonical(). |
| 48 | // |
| 49 | // The `Signed` parameter controls whether positive, negative, or both are |
| 50 | // returned (thus affecting the output interval). |
| 51 | // When Signed == SignedValueT, range is U(-1, 1) |
| 52 | // When Signed == NegativeValueT, range is U(-1, 0) |
| 53 | // When Signed == PositiveValueT, range is U(0, 1) |
| 54 | // |
| 55 | // When the `IncludeZero` parameter is true, the function may return 0 for some |
| 56 | // inputs, otherwise it never returns 0. |
| 57 | // |
| 58 | // The `ExponentBias` parameter determines the scale of the output range by |
| 59 | // adjusting the exponent. |
| 60 | // |
| 61 | // When a value in U(0,1) is required, use: |
| 62 | // RandU64ToDouble<PositiveValueT, true, 0>(); |
| 63 | // |
| 64 | // When a value in U(-1,1) is required, use: |
| 65 | // RandU64ToDouble<SignedValueT, false, 0>() => U(-1, 1) |
| 66 | // This generates more distinct values than the mathematically equivalent |
| 67 | // expression `U(0, 1) * 2.0 - 1.0`, and is preferable. |
| 68 | // |
| 69 | // Scaling the result by powers of 2 (and avoiding a multiply) is also possible: |
| 70 | // RandU64ToDouble<PositiveValueT, false, 1>(); => U(0, 2) |
| 71 | // RandU64ToDouble<PositiveValueT, false, -1>(); => U(0, 0.5) |
| 72 | // |
| 73 | |
| 74 | // Tristate types controlling the output. |
| 75 | struct PositiveValueT {}; |
| 76 | struct NegativeValueT {}; |
| 77 | struct SignedValueT {}; |
| 78 | |
| 79 | // RandU64ToDouble is the double-result variant of RandU64To, described above. |
| 80 | template <typename Signed, bool IncludeZero, int ExponentBias = 0> |
| 81 | inline double RandU64ToDouble(uint64_t bits) { |
| 82 | static_assert(std::is_same<Signed, PositiveValueT>::value || |
| 83 | std::is_same<Signed, NegativeValueT>::value || |
| 84 | std::is_same<Signed, SignedValueT>::value, |
| 85 | ""); |
| 86 | |
| 87 | // Maybe use the left-most bit for a sign bit. |
| 88 | uint64_t sign = std::is_same<Signed, NegativeValueT>::value |
| 89 | ? 0x8000000000000000ull |
| 90 | : 0; // Sign bits. |
| 91 | |
| 92 | if (std::is_same<Signed, SignedValueT>::value) { |
| 93 | sign = bits & 0x8000000000000000ull; |
| 94 | bits = bits & 0x7FFFFFFFFFFFFFFFull; |
| 95 | } |
| 96 | if (IncludeZero) { |
| 97 | if (bits == 0u) return 0; |
| 98 | } |
| 99 | |
| 100 | // Number of leading zeros is mapped to the exponent: 2^-clz |
| 101 | int clz = base_internal::CountLeadingZeros64(bits); |
| 102 | // Shift number left to erase leading zeros. |
| 103 | bits <<= IncludeZero ? clz : (clz & 63); |
| 104 | |
| 105 | // Shift number right to remove bits that overflow double mantissa. The |
| 106 | // direction of the shift depends on `clz`. |
| 107 | bits >>= (64 - DBL_MANT_DIG); |
| 108 | |
| 109 | // Compute IEEE 754 double exponent. |
| 110 | // In the Signed case, bits is a 63-bit number with a 0 msb. Adjust the |
| 111 | // exponent to account for that. |
| 112 | const uint64_t exp = |
| 113 | (std::is_same<Signed, SignedValueT>::value ? 1023U : 1022U) + |
| 114 | static_cast<uint64_t>(ExponentBias - clz); |
| 115 | constexpr int kExp = DBL_MANT_DIG - 1; |
| 116 | // Construct IEEE 754 double from exponent and mantissa. |
| 117 | const uint64_t val = sign | (exp << kExp) | (bits & ((1ULL << kExp) - 1U)); |
| 118 | |
| 119 | double res; |
| 120 | static_assert(sizeof(res) == sizeof(val), "double is not 64 bit"); |
| 121 | // Memcpy value from "val" to "res" to avoid aliasing problems. Assumes that |
| 122 | // endian-ness is same for double and uint64_t. |
| 123 | std::memcpy(&res, &val, sizeof(res)); |
| 124 | |
| 125 | return res; |
| 126 | } |
| 127 | |
| 128 | // RandU64ToFloat is the float-result variant of RandU64To, described above. |
| 129 | template <typename Signed, bool IncludeZero, int ExponentBias = 0> |
| 130 | inline float RandU64ToFloat(uint64_t bits) { |
| 131 | static_assert(std::is_same<Signed, PositiveValueT>::value || |
| 132 | std::is_same<Signed, NegativeValueT>::value || |
| 133 | std::is_same<Signed, SignedValueT>::value, |
| 134 | ""); |
| 135 | |
| 136 | // Maybe use the left-most bit for a sign bit. |
| 137 | uint64_t sign = std::is_same<Signed, NegativeValueT>::value |
| 138 | ? 0x80000000ul |
| 139 | : 0; // Sign bits. |
| 140 | |
| 141 | if (std::is_same<Signed, SignedValueT>::value) { |
| 142 | uint64_t a = bits & 0x8000000000000000ull; |
| 143 | sign = static_cast<uint32_t>(a >> 32); |
| 144 | bits = bits & 0x7FFFFFFFFFFFFFFFull; |
| 145 | } |
| 146 | if (IncludeZero) { |
| 147 | if (bits == 0u) return 0; |
| 148 | } |
| 149 | |
| 150 | // Number of leading zeros is mapped to the exponent: 2^-clz |
| 151 | int clz = base_internal::CountLeadingZeros64(bits); |
| 152 | // Shift number left to erase leading zeros. |
| 153 | bits <<= IncludeZero ? clz : (clz & 63); |
| 154 | // Shift number right to remove bits that overflow double mantissa. The |
| 155 | // direction of the shift depends on `clz`. |
| 156 | bits >>= (64 - FLT_MANT_DIG); |
| 157 | |
| 158 | // Construct IEEE 754 float exponent. |
| 159 | // In the Signed case, bits is a 63-bit number with a 0 msb. Adjust the |
| 160 | // exponent to account for that. |
| 161 | const uint32_t exp = |
| 162 | (std::is_same<Signed, SignedValueT>::value ? 127U : 126U) + |
| 163 | static_cast<uint32_t>(ExponentBias - clz); |
| 164 | constexpr int kExp = FLT_MANT_DIG - 1; |
| 165 | const uint32_t val = sign | (exp << kExp) | (bits & ((1U << kExp) - 1U)); |
| 166 | |
| 167 | float res; |
| 168 | static_assert(sizeof(res) == sizeof(val), "float is not 32 bit"); |
| 169 | // Assumes that endian-ness is same for float and uint32_t. |
| 170 | std::memcpy(&res, &val, sizeof(res)); |
| 171 | |
| 172 | return res; |
| 173 | } |
| 174 | |
| 175 | template <typename Result> |
| 176 | struct RandU64ToReal { |
| 177 | template <typename Signed, bool IncludeZero, int ExponentBias = 0> |
| 178 | static inline Result Value(uint64_t bits) { |
| 179 | return RandU64ToDouble<Signed, IncludeZero, ExponentBias>(bits); |
| 180 | } |
| 181 | }; |
| 182 | |
| 183 | template <> |
| 184 | struct RandU64ToReal<float> { |
| 185 | template <typename Signed, bool IncludeZero, int ExponentBias = 0> |
| 186 | static inline float Value(uint64_t bits) { |
| 187 | return RandU64ToFloat<Signed, IncludeZero, ExponentBias>(bits); |
| 188 | } |
| 189 | }; |
| 190 | |
| 191 | inline uint128 MultiplyU64ToU128(uint64_t a, uint64_t b) { |
| 192 | #if defined(ABSL_HAVE_INTRINSIC_INT128) |
| 193 | return uint128(static_cast<__uint128_t>(a) * b); |
| 194 | #elif defined(ABSL_INTERNAL_USE_UMUL128) |
| 195 | // uint64_t * uint64_t => uint128 multiply using imul intrinsic on MSVC. |
| 196 | uint64_t high = 0; |
| 197 | const uint64_t low = _umul128(a, b, &high); |
| 198 | return absl::MakeUint128(high, low); |
| 199 | #else |
| 200 | // uint128(a) * uint128(b) in emulated mode computes a full 128-bit x 128-bit |
| 201 | // multiply. However there are many cases where that is not necessary, and it |
| 202 | // is only necessary to support a 64-bit x 64-bit = 128-bit multiply. This is |
| 203 | // for those cases. |
| 204 | const uint64_t a00 = static_cast<uint32_t>(a); |
| 205 | const uint64_t a32 = a >> 32; |
| 206 | const uint64_t b00 = static_cast<uint32_t>(b); |
| 207 | const uint64_t b32 = b >> 32; |
| 208 | |
| 209 | const uint64_t c00 = a00 * b00; |
| 210 | const uint64_t c32a = a00 * b32; |
| 211 | const uint64_t c32b = a32 * b00; |
| 212 | const uint64_t c64 = a32 * b32; |
| 213 | |
| 214 | const uint32_t carry = |
| 215 | static_cast<uint32_t>(((c00 >> 32) + static_cast<uint32_t>(c32a) + |
| 216 | static_cast<uint32_t>(c32b)) >> |
| 217 | 32); |
| 218 | |
| 219 | return absl::MakeUint128(c64 + (c32a >> 32) + (c32b >> 32) + carry, |
| 220 | c00 + (c32a << 32) + (c32b << 32)); |
| 221 | #endif |
| 222 | } |
| 223 | |
| 224 | // wide_multiply<T> multiplies two N-bit values to a 2N-bit result. |
| 225 | template <typename UIntType> |
| 226 | struct wide_multiply { |
| 227 | static constexpr size_t kN = std::numeric_limits<UIntType>::digits; |
| 228 | using input_type = UIntType; |
| 229 | using result_type = typename random_internal::unsigned_bits<kN * 2>::type; |
| 230 | |
| 231 | static result_type multiply(input_type a, input_type b) { |
| 232 | return static_cast<result_type>(a) * b; |
| 233 | } |
| 234 | |
| 235 | static input_type hi(result_type r) { return r >> kN; } |
| 236 | static input_type lo(result_type r) { return r; } |
| 237 | |
| 238 | static_assert(std::is_unsigned<UIntType>::value, |
| 239 | "Class-template wide_multiply<> argument must be unsigned."); |
| 240 | }; |
| 241 | |
| 242 | #ifndef ABSL_HAVE_INTRINSIC_INT128 |
| 243 | template <> |
| 244 | struct wide_multiply<uint64_t> { |
| 245 | using input_type = uint64_t; |
| 246 | using result_type = uint128; |
| 247 | |
| 248 | static result_type multiply(uint64_t a, uint64_t b) { |
| 249 | return MultiplyU64ToU128(a, b); |
| 250 | } |
| 251 | |
| 252 | static uint64_t hi(result_type r) { return Uint128High64(r); } |
| 253 | static uint64_t lo(result_type r) { return Uint128Low64(r); } |
| 254 | }; |
| 255 | #endif |
| 256 | |
| 257 | } // namespace random_internal |
| 258 | } // namespace absl |
| 259 | |
| 260 | #endif // ABSL_RANDOM_INTERNAL_DISTRIBUTION_IMPL_H_ |