Blame - absl/strings/internal/charconv_bigint.h - RealtimeRoboticsGroup/test

blob: 7da9a7e75acec29ab1fd5f9c299fcd4d5406de42 [file] [log] [blame]

Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame^]	1	// Copyright 2018 The Abseil Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
				16	#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
				17
				18	#include <algorithm>
				19	#include <cstdint>
				20	#include <iostream>
				21	#include <string>
				22
				23	#include "absl/strings/ascii.h"
				24	#include "absl/strings/internal/charconv_parse.h"
				25	#include "absl/strings/string_view.h"
				26
				27	namespace absl {
				28	namespace strings_internal {
				29
				30	// The largest power that 5 that can be raised to, and still fit in a uint32_t.
				31	constexpr int kMaxSmallPowerOfFive = 13;
				32	// The largest power that 10 that can be raised to, and still fit in a uint32_t.
				33	constexpr int kMaxSmallPowerOfTen = 9;
				34
				35	extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];
				36	extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
				37
				38	// Large, fixed-width unsigned integer.
				39	//
				40	// Exact rounding for decimal-to-binary floating point conversion requires very
				41	// large integer math, but a design goal of absl::from_chars is to avoid
				42	// allocating memory. The integer precision needed for decimal-to-binary
				43	// conversions is large but bounded, so a huge fixed-width integer class
				44	// suffices.
				45	//
				46	// This is an intentionally limited big integer class. Only needed operations
				47	// are implemented. All storage lives in an array data member, and all
				48	// arithmetic is done in-place, to avoid requiring separate storage for operand
				49	// and result.
				50	//
				51	// This is an internal class. Some methods live in the .cc file, and are
				52	// instantiated only for the values of max_words we need.
				53	template <int max_words>
				54	class BigUnsigned {
				55	public:
				56	static_assert(max_words == 4 \|\| max_words == 84,
				57	"unsupported max_words value");
				58
				59	BigUnsigned() : size_(0), words_{} {}
				60	explicit constexpr BigUnsigned(uint64_t v)
				61	: size_((v >> 32) ? 2 : v ? 1 : 0),
				62	words_{static_cast<uint32_t>(v & 0xffffffffu),
				63	static_cast<uint32_t>(v >> 32)} {}
				64
				65	// Constructs a BigUnsigned from the given string_view containing a decimal
				66	// value. If the input std::string is not a decimal integer, constructs a 0
				67	// instead.
				68	explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
				69	// Check for valid input, returning a 0 otherwise. This is reasonable
				70	// behavior only because this constructor is for unit tests.
				71	if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() \|\|
				72	sv.empty()) {
				73	return;
				74	}
				75	int exponent_adjust =
				76	ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
				77	if (exponent_adjust > 0) {
				78	MultiplyByTenToTheNth(exponent_adjust);
				79	}
				80	}
				81
				82	// Loads the mantissa value of a previously-parsed float.
				83	//
				84	// Returns the associated decimal exponent. The value of the parsed float is
				85	// exactly this 10**exponent.
				86	int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
				87
				88	// Returns the number of decimal digits of precision this type provides. All
				89	// numbers with this many decimal digits or fewer are representable by this
				90	// type.
				91	//
				92	// Analagous to std::numeric_limits<BigUnsigned>::digits10.
				93	static constexpr int Digits10() {
				94	// 9975007/1035508 is very slightly less than log10(2**32).
				95	return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
				96	}
				97
				98	// Shifts left by the given number of bits.
				99	void ShiftLeft(int count) {
				100	if (count > 0) {
				101	const int word_shift = count / 32;
				102	if (word_shift >= max_words) {
				103	SetToZero();
				104	return;
				105	}
				106	size_ = (std::min)(size_ + word_shift, max_words);
				107	count %= 32;
				108	if (count == 0) {
				109	std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
				110	} else {
				111	for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
				112	words_[i] = (words_[i - word_shift] << count) \|
				113	(words_[i - word_shift - 1] >> (32 - count));
				114	}
				115	words_[word_shift] = words_[0] << count;
				116	// Grow size_ if necessary.
				117	if (size_ < max_words && words_[size_]) {
				118	++size_;
				119	}
				120	}
				121	std::fill(words_, words_ + word_shift, 0u);
				122	}
				123	}
				124
				125
				126	// Multiplies by v in-place.
				127	void MultiplyBy(uint32_t v) {
				128	if (size_ == 0 \|\| v == 1) {
				129	return;
				130	}
				131	if (v == 0) {
				132	SetToZero();
				133	return;
				134	}
				135	const uint64_t factor = v;
				136	uint64_t window = 0;
				137	for (int i = 0; i < size_; ++i) {
				138	window += factor * words_[i];
				139	words_[i] = window & 0xffffffff;
				140	window >>= 32;
				141	}
				142	// If carry bits remain and there's space for them, grow size_.
				143	if (window && size_ < max_words) {
				144	words_[size_] = window & 0xffffffff;
				145	++size_;
				146	}
				147	}
				148
				149	void MultiplyBy(uint64_t v) {
				150	uint32_t words[2];
				151	words[0] = static_cast<uint32_t>(v);
				152	words[1] = static_cast<uint32_t>(v >> 32);
				153	if (words[1] == 0) {
				154	MultiplyBy(words[0]);
				155	} else {
				156	MultiplyBy(2, words);
				157	}
				158	}
				159
				160	// Multiplies in place by 5 to the power of n. n must be non-negative.
				161	void MultiplyByFiveToTheNth(int n) {
				162	while (n >= kMaxSmallPowerOfFive) {
				163	MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
				164	n -= kMaxSmallPowerOfFive;
				165	}
				166	if (n > 0) {
				167	MultiplyBy(kFiveToNth[n]);
				168	}
				169	}
				170
				171	// Multiplies in place by 10 to the power of n. n must be non-negative.
				172	void MultiplyByTenToTheNth(int n) {
				173	if (n > kMaxSmallPowerOfTen) {
				174	// For large n, raise to a power of 5, then shift left by the same amount.
				175	// (10n == 5n * 2**n.) This requires fewer multiplications overall.
				176	MultiplyByFiveToTheNth(n);
				177	ShiftLeft(n);
				178	} else if (n > 0) {
				179	// We can do this more quickly for very small N by using a single
				180	// multiplication.
				181	MultiplyBy(kTenToNth[n]);
				182	}
				183	}
				184
				185	// Returns the value of 5**n, for non-negative n. This implementation uses
				186	// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
				187	// MultiplyByFiveToTheNth().
				188	static BigUnsigned FiveToTheNth(int n);
				189
				190	// Multiplies by another BigUnsigned, in-place.
				191	template <int M>
				192	void MultiplyBy(const BigUnsigned<M>& other) {
				193	MultiplyBy(other.size(), other.words());
				194	}
				195
				196	void SetToZero() {
				197	std::fill(words_, words_ + size_, 0u);
				198	size_ = 0;
				199	}
				200
				201	// Returns the value of the nth word of this BigUnsigned. This is
				202	// range-checked, and returns 0 on out-of-bounds accesses.
				203	uint32_t GetWord(int index) const {
				204	if (index < 0 \|\| index >= size_) {
				205	return 0;
				206	}
				207	return words_[index];
				208	}
				209
				210	// Returns this integer as a decimal std::string. This is not used in the decimal-
				211	// to-binary conversion; it is intended to aid in testing.
				212	std::string ToString() const;
				213
				214	int size() const { return size_; }
				215	const uint32_t* words() const { return words_; }
				216
				217	private:
				218	// Reads the number between [begin, end), possibly containing a decimal point,
				219	// into this BigUnsigned.
				220	//
				221	// Callers are required to ensure [begin, end) contains a valid number, with
				222	// one or more decimal digits and at most one decimal point. This routine
				223	// will behave unpredictably if these preconditions are not met.
				224	//
				225	// Only the first `significant_digits` digits are read. Digits beyond this
				226	// limit are "sticky": If the final significant digit is 0 or 5, and if any
				227	// dropped digit is nonzero, then that final significant digit is adjusted up
				228	// to 1 or 6. This adjustment allows for precise rounding.
				229	//
				230	// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
				231	// account for the decimal point and for dropped significant digits. After
				232	// this function returns,
				233	// actual_value_of_parsed_string ~= this 10**exponent_adjustment.
				234	int ReadDigits(const char* begin, const char* end, int significant_digits);
				235
				236	// Performs a step of big integer multiplication. This computes the full
				237	// (64-bit-wide) values that should be added at the given index (step), and
				238	// adds to that location in-place.
				239	//
				240	// Because our math all occurs in place, we must multiply starting from the
				241	// highest word working downward. (This is a bit more expensive due to the
				242	// extra carries involved.)
				243	//
				244	// This must be called in steps, for each word to be calculated, starting from
				245	// the high end and working down to 0. The first value of `step` should be
				246	// `std::min(original_size + other.size_ - 2, max_words - 1)`.
				247	// The reason for this expression is that multiplying the i'th word from one
				248	// multiplicand and the j'th word of another multiplicand creates a
				249	// two-word-wide value to be stored at the (i+j)'th element. The highest
				250	// word indices we will access are `original_size - 1` from this object, and
				251	// `other.size_ - 1` from our operand. Therefore,
				252	// `original_size + other.size_ - 2` is the first step we should calculate,
				253	// but limited on an upper bound by max_words.
				254
				255	// Working from high-to-low ensures that we do not overwrite the portions of
				256	// the initial value of *this which are still needed for later steps.
				257	//
				258	// Once called with step == 0, *this contains the result of the
				259	// multiplication.
				260	//
				261	// `original_size` is the size_ of *this before the first call to
				262	// MultiplyStep(). `other_words` and `other_size` are the contents of our
				263	// operand. `step` is the step to perform, as described above.
				264	void MultiplyStep(int original_size, const uint32_t* other_words,
				265	int other_size, int step);
				266
				267	void MultiplyBy(int other_size, const uint32_t* other_words) {
				268	const int original_size = size_;
				269	const int first_step =
				270	(std::min)(original_size + other_size - 2, max_words - 1);
				271	for (int step = first_step; step >= 0; --step) {
				272	MultiplyStep(original_size, other_words, other_size, step);
				273	}
				274	}
				275
				276	// Adds a 32-bit value to the index'th word, with carry.
				277	void AddWithCarry(int index, uint32_t value) {
				278	if (value) {
				279	while (index < max_words && value > 0) {
				280	words_[index] += value;
				281	// carry if we overflowed in this word:
				282	if (value > words_[index]) {
				283	value = 1;
				284	++index;
				285	} else {
				286	value = 0;
				287	}
				288	}
				289	size_ = (std::min)(max_words, (std::max)(index + 1, size_));
				290	}
				291	}
				292
				293	void AddWithCarry(int index, uint64_t value) {
				294	if (value && index < max_words) {
				295	uint32_t high = value >> 32;
				296	uint32_t low = value & 0xffffffff;
				297	words_[index] += low;
				298	if (words_[index] < low) {
				299	++high;
				300	if (high == 0) {
				301	// Carry from the low word caused our high word to overflow.
				302	// Short circuit here to do the right thing.
				303	AddWithCarry(index + 2, static_cast<uint32_t>(1));
				304	return;
				305	}
				306	}
				307	if (high > 0) {
				308	AddWithCarry(index + 1, high);
				309	} else {
				310	// Normally 32-bit AddWithCarry() sets size_, but since we don't call
				311	// it when `high` is 0, do it ourselves here.
				312	size_ = (std::min)(max_words, (std::max)(index + 1, size_));
				313	}
				314	}
				315	}
				316
				317	// Divide this in place by a constant divisor. Returns the remainder of the
				318	// division.
				319	template <uint32_t divisor>
				320	uint32_t DivMod() {
				321	uint64_t accumulator = 0;
				322	for (int i = size_ - 1; i >= 0; --i) {
				323	accumulator <<= 32;
				324	accumulator += words_[i];
				325	// accumulator / divisor will never overflow an int32_t in this loop
				326	words_[i] = static_cast<uint32_t>(accumulator / divisor);
				327	accumulator = accumulator % divisor;
				328	}
				329	while (size_ > 0 && words_[size_ - 1] == 0) {
				330	--size_;
				331	}
				332	return static_cast<uint32_t>(accumulator);
				333	}
				334
				335	// The number of elements in words_ that may carry significant values.
				336	// All elements beyond this point are 0.
				337	//
				338	// When size_ is 0, this BigUnsigned stores the value 0.
				339	// When size_ is nonzero, is not guaranteed that words_[size_ - 1] is
				340	// nonzero. This can occur due to overflow truncation.
				341	// In particular, x.size_ != y.size_ does not imply x != y.
				342	int size_;
				343	uint32_t words_[max_words];
				344	};
				345
				346	// Compares two big integer instances.
				347	//
				348	// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
				349	template <int N, int M>
				350	int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				351	int limit = (std::max)(lhs.size(), rhs.size());
				352	for (int i = limit - 1; i >= 0; --i) {
				353	const uint32_t lhs_word = lhs.GetWord(i);
				354	const uint32_t rhs_word = rhs.GetWord(i);
				355	if (lhs_word < rhs_word) {
				356	return -1;
				357	} else if (lhs_word > rhs_word) {
				358	return 1;
				359	}
				360	}
				361	return 0;
				362	}
				363
				364	template <int N, int M>
				365	bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				366	int limit = (std::max)(lhs.size(), rhs.size());
				367	for (int i = 0; i < limit; ++i) {
				368	if (lhs.GetWord(i) != rhs.GetWord(i)) {
				369	return false;
				370	}
				371	}
				372	return true;
				373	}
				374
				375	template <int N, int M>
				376	bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				377	return !(lhs == rhs);
				378	}
				379
				380	template <int N, int M>
				381	bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				382	return Compare(lhs, rhs) == -1;
				383	}
				384
				385	template <int N, int M>
				386	bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				387	return rhs < lhs;
				388	}
				389	template <int N, int M>
				390	bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				391	return !(rhs < lhs);
				392	}
				393	template <int N, int M>
				394	bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
				395	return !(lhs < rhs);
				396	}
				397
				398	// Output operator for BigUnsigned, for testing purposes only.
				399	template <int N>
				400	std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
				401	return os << num.ToString();
				402	}
				403
				404	// Explicit instantiation declarations for the sizes of BigUnsigned that we
				405	// are using.
				406	//
				407	// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
				408	// still bigger than an int128, and 84 is a large value we will want to use
				409	// in the from_chars implementation.
				410	//
				411	// Comments justifying the use of 84 belong in the from_chars implementation,
				412	// and will be added in a follow-up CL.
				413	extern template class BigUnsigned<4>;
				414	extern template class BigUnsigned<84>;
				415
				416	} // namespace strings_internal
				417	} // namespace absl
				418
				419	#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_