blob: 9b6ffb0fb504be8c1a037a321db3a581e17c9e9d [file] [log] [blame]
Austin Schuh36244a12019-09-21 17:52:38 -07001// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef ABSL_RANDOM_INTERNAL_DISTRIBUTION_IMPL_H_
16#define ABSL_RANDOM_INTERNAL_DISTRIBUTION_IMPL_H_
17
18// This file contains some implementation details which are used by one or more
19// of the absl random number distributions.
20
21#include <cfloat>
22#include <cstddef>
23#include <cstdint>
24#include <cstring>
25#include <limits>
26#include <type_traits>
27
28#if (defined(_WIN32) || defined(_WIN64)) && defined(_M_IA64)
29#include <intrin.h> // NOLINT(build/include_order)
30#pragma intrinsic(_umul128)
31#define ABSL_INTERNAL_USE_UMUL128 1
32#endif
33
34#include "absl/base/config.h"
35#include "absl/base/internal/bits.h"
36#include "absl/numeric/int128.h"
37#include "absl/random/internal/fastmath.h"
38#include "absl/random/internal/traits.h"
39
40namespace absl {
41namespace random_internal {
42
43// Creates a double from `bits`, with the template fields controlling the
44// output.
45//
46// RandU64To is both more efficient and generates more unique values in the
47// result interval than known implementations of std::generate_canonical().
48//
49// The `Signed` parameter controls whether positive, negative, or both are
50// returned (thus affecting the output interval).
51// When Signed == SignedValueT, range is U(-1, 1)
52// When Signed == NegativeValueT, range is U(-1, 0)
53// When Signed == PositiveValueT, range is U(0, 1)
54//
55// When the `IncludeZero` parameter is true, the function may return 0 for some
56// inputs, otherwise it never returns 0.
57//
58// The `ExponentBias` parameter determines the scale of the output range by
59// adjusting the exponent.
60//
61// When a value in U(0,1) is required, use:
62// RandU64ToDouble<PositiveValueT, true, 0>();
63//
64// When a value in U(-1,1) is required, use:
65// RandU64ToDouble<SignedValueT, false, 0>() => U(-1, 1)
66// This generates more distinct values than the mathematically equivalent
67// expression `U(0, 1) * 2.0 - 1.0`, and is preferable.
68//
69// Scaling the result by powers of 2 (and avoiding a multiply) is also possible:
70// RandU64ToDouble<PositiveValueT, false, 1>(); => U(0, 2)
71// RandU64ToDouble<PositiveValueT, false, -1>(); => U(0, 0.5)
72//
73
74// Tristate types controlling the output.
75struct PositiveValueT {};
76struct NegativeValueT {};
77struct SignedValueT {};
78
79// RandU64ToDouble is the double-result variant of RandU64To, described above.
80template <typename Signed, bool IncludeZero, int ExponentBias = 0>
81inline double RandU64ToDouble(uint64_t bits) {
82 static_assert(std::is_same<Signed, PositiveValueT>::value ||
83 std::is_same<Signed, NegativeValueT>::value ||
84 std::is_same<Signed, SignedValueT>::value,
85 "");
86
87 // Maybe use the left-most bit for a sign bit.
88 uint64_t sign = std::is_same<Signed, NegativeValueT>::value
89 ? 0x8000000000000000ull
90 : 0; // Sign bits.
91
92 if (std::is_same<Signed, SignedValueT>::value) {
93 sign = bits & 0x8000000000000000ull;
94 bits = bits & 0x7FFFFFFFFFFFFFFFull;
95 }
96 if (IncludeZero) {
97 if (bits == 0u) return 0;
98 }
99
100 // Number of leading zeros is mapped to the exponent: 2^-clz
101 int clz = base_internal::CountLeadingZeros64(bits);
102 // Shift number left to erase leading zeros.
103 bits <<= IncludeZero ? clz : (clz & 63);
104
105 // Shift number right to remove bits that overflow double mantissa. The
106 // direction of the shift depends on `clz`.
107 bits >>= (64 - DBL_MANT_DIG);
108
109 // Compute IEEE 754 double exponent.
110 // In the Signed case, bits is a 63-bit number with a 0 msb. Adjust the
111 // exponent to account for that.
112 const uint64_t exp =
113 (std::is_same<Signed, SignedValueT>::value ? 1023U : 1022U) +
114 static_cast<uint64_t>(ExponentBias - clz);
115 constexpr int kExp = DBL_MANT_DIG - 1;
116 // Construct IEEE 754 double from exponent and mantissa.
117 const uint64_t val = sign | (exp << kExp) | (bits & ((1ULL << kExp) - 1U));
118
119 double res;
120 static_assert(sizeof(res) == sizeof(val), "double is not 64 bit");
121 // Memcpy value from "val" to "res" to avoid aliasing problems. Assumes that
122 // endian-ness is same for double and uint64_t.
123 std::memcpy(&res, &val, sizeof(res));
124
125 return res;
126}
127
128// RandU64ToFloat is the float-result variant of RandU64To, described above.
129template <typename Signed, bool IncludeZero, int ExponentBias = 0>
130inline float RandU64ToFloat(uint64_t bits) {
131 static_assert(std::is_same<Signed, PositiveValueT>::value ||
132 std::is_same<Signed, NegativeValueT>::value ||
133 std::is_same<Signed, SignedValueT>::value,
134 "");
135
136 // Maybe use the left-most bit for a sign bit.
137 uint64_t sign = std::is_same<Signed, NegativeValueT>::value
138 ? 0x80000000ul
139 : 0; // Sign bits.
140
141 if (std::is_same<Signed, SignedValueT>::value) {
142 uint64_t a = bits & 0x8000000000000000ull;
143 sign = static_cast<uint32_t>(a >> 32);
144 bits = bits & 0x7FFFFFFFFFFFFFFFull;
145 }
146 if (IncludeZero) {
147 if (bits == 0u) return 0;
148 }
149
150 // Number of leading zeros is mapped to the exponent: 2^-clz
151 int clz = base_internal::CountLeadingZeros64(bits);
152 // Shift number left to erase leading zeros.
153 bits <<= IncludeZero ? clz : (clz & 63);
154 // Shift number right to remove bits that overflow double mantissa. The
155 // direction of the shift depends on `clz`.
156 bits >>= (64 - FLT_MANT_DIG);
157
158 // Construct IEEE 754 float exponent.
159 // In the Signed case, bits is a 63-bit number with a 0 msb. Adjust the
160 // exponent to account for that.
161 const uint32_t exp =
162 (std::is_same<Signed, SignedValueT>::value ? 127U : 126U) +
163 static_cast<uint32_t>(ExponentBias - clz);
164 constexpr int kExp = FLT_MANT_DIG - 1;
165 const uint32_t val = sign | (exp << kExp) | (bits & ((1U << kExp) - 1U));
166
167 float res;
168 static_assert(sizeof(res) == sizeof(val), "float is not 32 bit");
169 // Assumes that endian-ness is same for float and uint32_t.
170 std::memcpy(&res, &val, sizeof(res));
171
172 return res;
173}
174
175template <typename Result>
176struct RandU64ToReal {
177 template <typename Signed, bool IncludeZero, int ExponentBias = 0>
178 static inline Result Value(uint64_t bits) {
179 return RandU64ToDouble<Signed, IncludeZero, ExponentBias>(bits);
180 }
181};
182
183template <>
184struct RandU64ToReal<float> {
185 template <typename Signed, bool IncludeZero, int ExponentBias = 0>
186 static inline float Value(uint64_t bits) {
187 return RandU64ToFloat<Signed, IncludeZero, ExponentBias>(bits);
188 }
189};
190
191inline uint128 MultiplyU64ToU128(uint64_t a, uint64_t b) {
192#if defined(ABSL_HAVE_INTRINSIC_INT128)
193 return uint128(static_cast<__uint128_t>(a) * b);
194#elif defined(ABSL_INTERNAL_USE_UMUL128)
195 // uint64_t * uint64_t => uint128 multiply using imul intrinsic on MSVC.
196 uint64_t high = 0;
197 const uint64_t low = _umul128(a, b, &high);
198 return absl::MakeUint128(high, low);
199#else
200 // uint128(a) * uint128(b) in emulated mode computes a full 128-bit x 128-bit
201 // multiply. However there are many cases where that is not necessary, and it
202 // is only necessary to support a 64-bit x 64-bit = 128-bit multiply. This is
203 // for those cases.
204 const uint64_t a00 = static_cast<uint32_t>(a);
205 const uint64_t a32 = a >> 32;
206 const uint64_t b00 = static_cast<uint32_t>(b);
207 const uint64_t b32 = b >> 32;
208
209 const uint64_t c00 = a00 * b00;
210 const uint64_t c32a = a00 * b32;
211 const uint64_t c32b = a32 * b00;
212 const uint64_t c64 = a32 * b32;
213
214 const uint32_t carry =
215 static_cast<uint32_t>(((c00 >> 32) + static_cast<uint32_t>(c32a) +
216 static_cast<uint32_t>(c32b)) >>
217 32);
218
219 return absl::MakeUint128(c64 + (c32a >> 32) + (c32b >> 32) + carry,
220 c00 + (c32a << 32) + (c32b << 32));
221#endif
222}
223
224// wide_multiply<T> multiplies two N-bit values to a 2N-bit result.
225template <typename UIntType>
226struct wide_multiply {
227 static constexpr size_t kN = std::numeric_limits<UIntType>::digits;
228 using input_type = UIntType;
229 using result_type = typename random_internal::unsigned_bits<kN * 2>::type;
230
231 static result_type multiply(input_type a, input_type b) {
232 return static_cast<result_type>(a) * b;
233 }
234
235 static input_type hi(result_type r) { return r >> kN; }
236 static input_type lo(result_type r) { return r; }
237
238 static_assert(std::is_unsigned<UIntType>::value,
239 "Class-template wide_multiply<> argument must be unsigned.");
240};
241
242#ifndef ABSL_HAVE_INTRINSIC_INT128
243template <>
244struct wide_multiply<uint64_t> {
245 using input_type = uint64_t;
246 using result_type = uint128;
247
248 static result_type multiply(uint64_t a, uint64_t b) {
249 return MultiplyU64ToU128(a, b);
250 }
251
252 static uint64_t hi(result_type r) { return Uint128High64(r); }
253 static uint64_t lo(result_type r) { return Uint128Low64(r); }
254};
255#endif
256
257} // namespace random_internal
258} // namespace absl
259
260#endif // ABSL_RANDOM_INTERNAL_DISTRIBUTION_IMPL_H_