Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2018 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
| 16 | #define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
| 17 | |
| 18 | #include <algorithm> |
| 19 | #include <cstdint> |
| 20 | #include <iostream> |
| 21 | #include <string> |
| 22 | |
| 23 | #include "absl/strings/ascii.h" |
| 24 | #include "absl/strings/internal/charconv_parse.h" |
| 25 | #include "absl/strings/string_view.h" |
| 26 | |
| 27 | namespace absl { |
| 28 | namespace strings_internal { |
| 29 | |
| 30 | // The largest power that 5 that can be raised to, and still fit in a uint32_t. |
| 31 | constexpr int kMaxSmallPowerOfFive = 13; |
| 32 | // The largest power that 10 that can be raised to, and still fit in a uint32_t. |
| 33 | constexpr int kMaxSmallPowerOfTen = 9; |
| 34 | |
| 35 | extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1]; |
| 36 | extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; |
| 37 | |
| 38 | // Large, fixed-width unsigned integer. |
| 39 | // |
| 40 | // Exact rounding for decimal-to-binary floating point conversion requires very |
| 41 | // large integer math, but a design goal of absl::from_chars is to avoid |
| 42 | // allocating memory. The integer precision needed for decimal-to-binary |
| 43 | // conversions is large but bounded, so a huge fixed-width integer class |
| 44 | // suffices. |
| 45 | // |
| 46 | // This is an intentionally limited big integer class. Only needed operations |
| 47 | // are implemented. All storage lives in an array data member, and all |
| 48 | // arithmetic is done in-place, to avoid requiring separate storage for operand |
| 49 | // and result. |
| 50 | // |
| 51 | // This is an internal class. Some methods live in the .cc file, and are |
| 52 | // instantiated only for the values of max_words we need. |
| 53 | template <int max_words> |
| 54 | class BigUnsigned { |
| 55 | public: |
| 56 | static_assert(max_words == 4 || max_words == 84, |
| 57 | "unsupported max_words value"); |
| 58 | |
| 59 | BigUnsigned() : size_(0), words_{} {} |
| 60 | explicit constexpr BigUnsigned(uint64_t v) |
| 61 | : size_((v >> 32) ? 2 : v ? 1 : 0), |
| 62 | words_{static_cast<uint32_t>(v & 0xffffffffu), |
| 63 | static_cast<uint32_t>(v >> 32)} {} |
| 64 | |
| 65 | // Constructs a BigUnsigned from the given string_view containing a decimal |
| 66 | // value. If the input std::string is not a decimal integer, constructs a 0 |
| 67 | // instead. |
| 68 | explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { |
| 69 | // Check for valid input, returning a 0 otherwise. This is reasonable |
| 70 | // behavior only because this constructor is for unit tests. |
| 71 | if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || |
| 72 | sv.empty()) { |
| 73 | return; |
| 74 | } |
| 75 | int exponent_adjust = |
| 76 | ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); |
| 77 | if (exponent_adjust > 0) { |
| 78 | MultiplyByTenToTheNth(exponent_adjust); |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | // Loads the mantissa value of a previously-parsed float. |
| 83 | // |
| 84 | // Returns the associated decimal exponent. The value of the parsed float is |
| 85 | // exactly *this * 10**exponent. |
| 86 | int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); |
| 87 | |
| 88 | // Returns the number of decimal digits of precision this type provides. All |
| 89 | // numbers with this many decimal digits or fewer are representable by this |
| 90 | // type. |
| 91 | // |
| 92 | // Analagous to std::numeric_limits<BigUnsigned>::digits10. |
| 93 | static constexpr int Digits10() { |
| 94 | // 9975007/1035508 is very slightly less than log10(2**32). |
| 95 | return static_cast<uint64_t>(max_words) * 9975007 / 1035508; |
| 96 | } |
| 97 | |
| 98 | // Shifts left by the given number of bits. |
| 99 | void ShiftLeft(int count) { |
| 100 | if (count > 0) { |
| 101 | const int word_shift = count / 32; |
| 102 | if (word_shift >= max_words) { |
| 103 | SetToZero(); |
| 104 | return; |
| 105 | } |
| 106 | size_ = (std::min)(size_ + word_shift, max_words); |
| 107 | count %= 32; |
| 108 | if (count == 0) { |
| 109 | std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); |
| 110 | } else { |
| 111 | for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { |
| 112 | words_[i] = (words_[i - word_shift] << count) | |
| 113 | (words_[i - word_shift - 1] >> (32 - count)); |
| 114 | } |
| 115 | words_[word_shift] = words_[0] << count; |
| 116 | // Grow size_ if necessary. |
| 117 | if (size_ < max_words && words_[size_]) { |
| 118 | ++size_; |
| 119 | } |
| 120 | } |
| 121 | std::fill(words_, words_ + word_shift, 0u); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | |
| 126 | // Multiplies by v in-place. |
| 127 | void MultiplyBy(uint32_t v) { |
| 128 | if (size_ == 0 || v == 1) { |
| 129 | return; |
| 130 | } |
| 131 | if (v == 0) { |
| 132 | SetToZero(); |
| 133 | return; |
| 134 | } |
| 135 | const uint64_t factor = v; |
| 136 | uint64_t window = 0; |
| 137 | for (int i = 0; i < size_; ++i) { |
| 138 | window += factor * words_[i]; |
| 139 | words_[i] = window & 0xffffffff; |
| 140 | window >>= 32; |
| 141 | } |
| 142 | // If carry bits remain and there's space for them, grow size_. |
| 143 | if (window && size_ < max_words) { |
| 144 | words_[size_] = window & 0xffffffff; |
| 145 | ++size_; |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | void MultiplyBy(uint64_t v) { |
| 150 | uint32_t words[2]; |
| 151 | words[0] = static_cast<uint32_t>(v); |
| 152 | words[1] = static_cast<uint32_t>(v >> 32); |
| 153 | if (words[1] == 0) { |
| 154 | MultiplyBy(words[0]); |
| 155 | } else { |
| 156 | MultiplyBy(2, words); |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | // Multiplies in place by 5 to the power of n. n must be non-negative. |
| 161 | void MultiplyByFiveToTheNth(int n) { |
| 162 | while (n >= kMaxSmallPowerOfFive) { |
| 163 | MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); |
| 164 | n -= kMaxSmallPowerOfFive; |
| 165 | } |
| 166 | if (n > 0) { |
| 167 | MultiplyBy(kFiveToNth[n]); |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | // Multiplies in place by 10 to the power of n. n must be non-negative. |
| 172 | void MultiplyByTenToTheNth(int n) { |
| 173 | if (n > kMaxSmallPowerOfTen) { |
| 174 | // For large n, raise to a power of 5, then shift left by the same amount. |
| 175 | // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. |
| 176 | MultiplyByFiveToTheNth(n); |
| 177 | ShiftLeft(n); |
| 178 | } else if (n > 0) { |
| 179 | // We can do this more quickly for very small N by using a single |
| 180 | // multiplication. |
| 181 | MultiplyBy(kTenToNth[n]); |
| 182 | } |
| 183 | } |
| 184 | |
| 185 | // Returns the value of 5**n, for non-negative n. This implementation uses |
| 186 | // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling |
| 187 | // MultiplyByFiveToTheNth(). |
| 188 | static BigUnsigned FiveToTheNth(int n); |
| 189 | |
| 190 | // Multiplies by another BigUnsigned, in-place. |
| 191 | template <int M> |
| 192 | void MultiplyBy(const BigUnsigned<M>& other) { |
| 193 | MultiplyBy(other.size(), other.words()); |
| 194 | } |
| 195 | |
| 196 | void SetToZero() { |
| 197 | std::fill(words_, words_ + size_, 0u); |
| 198 | size_ = 0; |
| 199 | } |
| 200 | |
| 201 | // Returns the value of the nth word of this BigUnsigned. This is |
| 202 | // range-checked, and returns 0 on out-of-bounds accesses. |
| 203 | uint32_t GetWord(int index) const { |
| 204 | if (index < 0 || index >= size_) { |
| 205 | return 0; |
| 206 | } |
| 207 | return words_[index]; |
| 208 | } |
| 209 | |
| 210 | // Returns this integer as a decimal std::string. This is not used in the decimal- |
| 211 | // to-binary conversion; it is intended to aid in testing. |
| 212 | std::string ToString() const; |
| 213 | |
| 214 | int size() const { return size_; } |
| 215 | const uint32_t* words() const { return words_; } |
| 216 | |
| 217 | private: |
| 218 | // Reads the number between [begin, end), possibly containing a decimal point, |
| 219 | // into this BigUnsigned. |
| 220 | // |
| 221 | // Callers are required to ensure [begin, end) contains a valid number, with |
| 222 | // one or more decimal digits and at most one decimal point. This routine |
| 223 | // will behave unpredictably if these preconditions are not met. |
| 224 | // |
| 225 | // Only the first `significant_digits` digits are read. Digits beyond this |
| 226 | // limit are "sticky": If the final significant digit is 0 or 5, and if any |
| 227 | // dropped digit is nonzero, then that final significant digit is adjusted up |
| 228 | // to 1 or 6. This adjustment allows for precise rounding. |
| 229 | // |
| 230 | // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to |
| 231 | // account for the decimal point and for dropped significant digits. After |
| 232 | // this function returns, |
| 233 | // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. |
| 234 | int ReadDigits(const char* begin, const char* end, int significant_digits); |
| 235 | |
| 236 | // Performs a step of big integer multiplication. This computes the full |
| 237 | // (64-bit-wide) values that should be added at the given index (step), and |
| 238 | // adds to that location in-place. |
| 239 | // |
| 240 | // Because our math all occurs in place, we must multiply starting from the |
| 241 | // highest word working downward. (This is a bit more expensive due to the |
| 242 | // extra carries involved.) |
| 243 | // |
| 244 | // This must be called in steps, for each word to be calculated, starting from |
| 245 | // the high end and working down to 0. The first value of `step` should be |
| 246 | // `std::min(original_size + other.size_ - 2, max_words - 1)`. |
| 247 | // The reason for this expression is that multiplying the i'th word from one |
| 248 | // multiplicand and the j'th word of another multiplicand creates a |
| 249 | // two-word-wide value to be stored at the (i+j)'th element. The highest |
| 250 | // word indices we will access are `original_size - 1` from this object, and |
| 251 | // `other.size_ - 1` from our operand. Therefore, |
| 252 | // `original_size + other.size_ - 2` is the first step we should calculate, |
| 253 | // but limited on an upper bound by max_words. |
| 254 | |
| 255 | // Working from high-to-low ensures that we do not overwrite the portions of |
| 256 | // the initial value of *this which are still needed for later steps. |
| 257 | // |
| 258 | // Once called with step == 0, *this contains the result of the |
| 259 | // multiplication. |
| 260 | // |
| 261 | // `original_size` is the size_ of *this before the first call to |
| 262 | // MultiplyStep(). `other_words` and `other_size` are the contents of our |
| 263 | // operand. `step` is the step to perform, as described above. |
| 264 | void MultiplyStep(int original_size, const uint32_t* other_words, |
| 265 | int other_size, int step); |
| 266 | |
| 267 | void MultiplyBy(int other_size, const uint32_t* other_words) { |
| 268 | const int original_size = size_; |
| 269 | const int first_step = |
| 270 | (std::min)(original_size + other_size - 2, max_words - 1); |
| 271 | for (int step = first_step; step >= 0; --step) { |
| 272 | MultiplyStep(original_size, other_words, other_size, step); |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | // Adds a 32-bit value to the index'th word, with carry. |
| 277 | void AddWithCarry(int index, uint32_t value) { |
| 278 | if (value) { |
| 279 | while (index < max_words && value > 0) { |
| 280 | words_[index] += value; |
| 281 | // carry if we overflowed in this word: |
| 282 | if (value > words_[index]) { |
| 283 | value = 1; |
| 284 | ++index; |
| 285 | } else { |
| 286 | value = 0; |
| 287 | } |
| 288 | } |
| 289 | size_ = (std::min)(max_words, (std::max)(index + 1, size_)); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | void AddWithCarry(int index, uint64_t value) { |
| 294 | if (value && index < max_words) { |
| 295 | uint32_t high = value >> 32; |
| 296 | uint32_t low = value & 0xffffffff; |
| 297 | words_[index] += low; |
| 298 | if (words_[index] < low) { |
| 299 | ++high; |
| 300 | if (high == 0) { |
| 301 | // Carry from the low word caused our high word to overflow. |
| 302 | // Short circuit here to do the right thing. |
| 303 | AddWithCarry(index + 2, static_cast<uint32_t>(1)); |
| 304 | return; |
| 305 | } |
| 306 | } |
| 307 | if (high > 0) { |
| 308 | AddWithCarry(index + 1, high); |
| 309 | } else { |
| 310 | // Normally 32-bit AddWithCarry() sets size_, but since we don't call |
| 311 | // it when `high` is 0, do it ourselves here. |
| 312 | size_ = (std::min)(max_words, (std::max)(index + 1, size_)); |
| 313 | } |
| 314 | } |
| 315 | } |
| 316 | |
| 317 | // Divide this in place by a constant divisor. Returns the remainder of the |
| 318 | // division. |
| 319 | template <uint32_t divisor> |
| 320 | uint32_t DivMod() { |
| 321 | uint64_t accumulator = 0; |
| 322 | for (int i = size_ - 1; i >= 0; --i) { |
| 323 | accumulator <<= 32; |
| 324 | accumulator += words_[i]; |
| 325 | // accumulator / divisor will never overflow an int32_t in this loop |
| 326 | words_[i] = static_cast<uint32_t>(accumulator / divisor); |
| 327 | accumulator = accumulator % divisor; |
| 328 | } |
| 329 | while (size_ > 0 && words_[size_ - 1] == 0) { |
| 330 | --size_; |
| 331 | } |
| 332 | return static_cast<uint32_t>(accumulator); |
| 333 | } |
| 334 | |
| 335 | // The number of elements in words_ that may carry significant values. |
| 336 | // All elements beyond this point are 0. |
| 337 | // |
| 338 | // When size_ is 0, this BigUnsigned stores the value 0. |
| 339 | // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is |
| 340 | // nonzero. This can occur due to overflow truncation. |
| 341 | // In particular, x.size_ != y.size_ does *not* imply x != y. |
| 342 | int size_; |
| 343 | uint32_t words_[max_words]; |
| 344 | }; |
| 345 | |
| 346 | // Compares two big integer instances. |
| 347 | // |
| 348 | // Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. |
| 349 | template <int N, int M> |
| 350 | int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 351 | int limit = (std::max)(lhs.size(), rhs.size()); |
| 352 | for (int i = limit - 1; i >= 0; --i) { |
| 353 | const uint32_t lhs_word = lhs.GetWord(i); |
| 354 | const uint32_t rhs_word = rhs.GetWord(i); |
| 355 | if (lhs_word < rhs_word) { |
| 356 | return -1; |
| 357 | } else if (lhs_word > rhs_word) { |
| 358 | return 1; |
| 359 | } |
| 360 | } |
| 361 | return 0; |
| 362 | } |
| 363 | |
| 364 | template <int N, int M> |
| 365 | bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 366 | int limit = (std::max)(lhs.size(), rhs.size()); |
| 367 | for (int i = 0; i < limit; ++i) { |
| 368 | if (lhs.GetWord(i) != rhs.GetWord(i)) { |
| 369 | return false; |
| 370 | } |
| 371 | } |
| 372 | return true; |
| 373 | } |
| 374 | |
| 375 | template <int N, int M> |
| 376 | bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 377 | return !(lhs == rhs); |
| 378 | } |
| 379 | |
| 380 | template <int N, int M> |
| 381 | bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 382 | return Compare(lhs, rhs) == -1; |
| 383 | } |
| 384 | |
| 385 | template <int N, int M> |
| 386 | bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 387 | return rhs < lhs; |
| 388 | } |
| 389 | template <int N, int M> |
| 390 | bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 391 | return !(rhs < lhs); |
| 392 | } |
| 393 | template <int N, int M> |
| 394 | bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
| 395 | return !(lhs < rhs); |
| 396 | } |
| 397 | |
| 398 | // Output operator for BigUnsigned, for testing purposes only. |
| 399 | template <int N> |
| 400 | std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { |
| 401 | return os << num.ToString(); |
| 402 | } |
| 403 | |
| 404 | // Explicit instantiation declarations for the sizes of BigUnsigned that we |
| 405 | // are using. |
| 406 | // |
| 407 | // For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is |
| 408 | // still bigger than an int128, and 84 is a large value we will want to use |
| 409 | // in the from_chars implementation. |
| 410 | // |
| 411 | // Comments justifying the use of 84 belong in the from_chars implementation, |
| 412 | // and will be added in a follow-up CL. |
| 413 | extern template class BigUnsigned<4>; |
| 414 | extern template class BigUnsigned<84>; |
| 415 | |
| 416 | } // namespace strings_internal |
| 417 | } // namespace absl |
| 418 | |
| 419 | #endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |