diff options
Diffstat (limited to 'absl/strings/internal/charconv_bigint.h')
-rw-r--r-- | absl/strings/internal/charconv_bigint.h | 426 |
1 files changed, 426 insertions, 0 deletions
diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h new file mode 100644 index 00000000..aa70af2c --- /dev/null +++ b/absl/strings/internal/charconv_bigint.h @@ -0,0 +1,426 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ +#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ + +#include <algorithm> +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/strings/ascii.h" +#include "absl/strings/internal/charconv_parse.h" +#include "absl/strings/string_view.h" + +namespace absl { +namespace strings_internal { + +// The largest power that 5 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfFive = 13; +// The largest power that 10 that can be raised to, and still fit in a uint32_t. +constexpr int kMaxSmallPowerOfTen = 9; + +extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1]; +extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; + +// Large, fixed-width unsigned integer. +// +// Exact rounding for decimal-to-binary floating point conversion requires very +// large integer math, but a design goal of absl::from_chars is to avoid +// allocating memory. The integer precision needed for decimal-to-binary +// conversions is large but bounded, so a huge fixed-width integer class +// suffices. +// +// This is an intentionally limited big integer class. Only needed operations +// are implemented. All storage lives in an array data member, and all +// arithmetic is done in-place, to avoid requiring separate storage for operand +// and result. +// +// This is an internal class. Some methods live in the .cc file, and are +// instantiated only for the values of max_words we need. +template <int max_words> +class BigUnsigned { + public: + static_assert(max_words == 4 || max_words == 84, + "unsupported max_words value"); + + BigUnsigned() : size_(0), words_{} {} + explicit BigUnsigned(uint32_t v) : size_(v > 0 ? 1 : 0), words_{v} {} + explicit BigUnsigned(uint64_t v) + : size_(0), + words_{static_cast<uint32_t>(v & 0xffffffff), + static_cast<uint32_t>(v >> 32)} { + if (words_[1]) { + size_ = 2; + } else if (words_[0]) { + size_ = 1; + } + } + + // Constructs a BigUnsigned from the given string_view containing a decimal + // value. If the input std::string is not a decimal integer, constructs a 0 + // instead. + explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { + // Check for valid input, returning a 0 otherwise. This is reasonable + // behavior only because this constructor is for unit tests. + if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || + sv.empty()) { + return; + } + int exponent_adjust = + ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); + if (exponent_adjust > 0) { + MultiplyByTenToTheNth(exponent_adjust); + } + } + + // Loads the mantissa value of a previously-parsed float. + // + // Returns the associated decimal exponent. The value of the parsed float is + // exactly *this * 10**exponent. + int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); + + // Returns the number of decimal digits of precision this type provides. All + // numbers with this many decimal digits or fewer are representable by this + // type. + // + // Analagous to std::numeric_limits<BigUnsigned>::digits10. + static constexpr int Digits10() { + // 9975007/1035508 is very slightly less than log10(2**32). + return static_cast<uint64_t>(max_words) * 9975007 / 1035508; + } + + // Shifts left by the given number of bits. + void ShiftLeft(int count) { + if (count > 0) { + const int word_shift = count / 32; + if (word_shift >= max_words) { + SetToZero(); + return; + } + size_ = std::min(size_ + word_shift, max_words); + count %= 32; + if (count == 0) { + std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); + } else { + for (int i = std::min(size_, max_words - 1); i > word_shift; --i) { + words_[i] = (words_[i - word_shift] << count) | + (words_[i - word_shift - 1] >> (32 - count)); + } + words_[word_shift] = words_[0] << count; + // Grow size_ if necessary. + if (size_ < max_words && words_[size_]) { + ++size_; + } + } + std::fill(words_, words_ + word_shift, 0u); + } + } + + + // Multiplies by v in-place. + void MultiplyBy(uint32_t v) { + if (size_ == 0 || v == 1) { + return; + } + if (v == 0) { + SetToZero(); + return; + } + const uint64_t factor = v; + uint64_t window = 0; + for (int i = 0; i < size_; ++i) { + window += factor * words_[i]; + words_[i] = window & 0xffffffff; + window >>= 32; + } + // If carry bits remain and there's space for them, grow size_. + if (window && size_ < max_words) { + words_[size_] = window & 0xffffffff; + ++size_; + } + } + + void MultiplyBy(uint64_t v) { + uint32_t words[2]; + words[0] = static_cast<uint32_t>(v); + words[1] = static_cast<uint32_t>(v >> 32); + if (words[1] == 0) { + MultiplyBy(words[0]); + } else { + MultiplyBy(2, words); + } + } + + // Multiplies in place by 5 to the power of n. n must be non-negative. + void MultiplyByFiveToTheNth(int n) { + while (n >= kMaxSmallPowerOfFive) { + MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); + n -= kMaxSmallPowerOfFive; + } + if (n > 0) { + MultiplyBy(kFiveToNth[n]); + } + } + + // Multiplies in place by 10 to the power of n. n must be non-negative. + void MultiplyByTenToTheNth(int n) { + if (n > kMaxSmallPowerOfTen) { + // For large n, raise to a power of 5, then shift left by the same amount. + // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. + MultiplyByFiveToTheNth(n); + ShiftLeft(n); + } else if (n > 0) { + // We can do this more quickly for very small N by using a single + // multiplication. + MultiplyBy(kTenToNth[n]); + } + } + + // Returns the value of 5**n, for non-negative n. This implementation uses + // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling + // MultiplyByFiveToTheNth(). + static BigUnsigned FiveToTheNth(int n); + + // Multiplies by another BigUnsigned, in-place. + template <int M> + void MultiplyBy(const BigUnsigned<M>& other) { + MultiplyBy(other.size(), other.words()); + } + + void SetToZero() { + std::fill(words_, words_ + size_, 0u); + size_ = 0; + } + + // Returns the value of the nth word of this BigUnsigned. This is + // range-checked, and returns 0 on out-of-bounds accesses. + uint32_t GetWord(int index) const { + if (index < 0 || index >= size_) { + return 0; + } + return words_[index]; + } + + // Returns this integer as a decimal std::string. This is not used in the decimal- + // to-binary conversion; it is intended to aid in testing. + std::string ToString() const; + + int size() const { return size_; } + const uint32_t* words() const { return words_; } + + private: + // Reads the number between [begin, end), possibly containing a decimal point, + // into this BigUnsigned. + // + // Callers are required to ensure [begin, end) contains a valid number, with + // one or more decimal digits and at most one decimal point. This routine + // will behave unpredictably if these preconditions are not met. + // + // Only the first `significant_digits` digits are read. Digits beyond this + // limit are "sticky": If the final significant digit is 0 or 5, and if any + // dropped digit is nonzero, then that final significant digit is adjusted up + // to 1 or 6. This adjustment allows for precise rounding. + // + // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to + // account for the decimal point and for dropped significant digits. After + // this function returns, + // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. + int ReadDigits(const char* begin, const char* end, int significant_digits); + + // Performs a step of big integer multiplication. This computes the full + // (64-bit-wide) values that should be added at the given index (step), and + // adds to that location in-place. + // + // Because our math all occurs in place, we must multiply starting from the + // highest word working downward. (This is a bit more expensive due to the + // extra carries involved.) + // + // This must be called in steps, for each word to be calculated, starting from + // the high end and working down to 0. The first value of `step` should be + // `std::min(original_size + other.size_ - 2, max_words - 1)`. + // The reason for this expression is that multiplying the i'th word from one + // multiplicand and the j'th word of another multiplicand creates a + // two-word-wide value to be stored at the (i+j)'th element. The highest + // word indices we will access are `original_size - 1` from this object, and + // `other.size_ - 1` from our operand. Therefore, + // `original_size + other.size_ - 2` is the first step we should calculate, + // but limited on an upper bound by max_words. + + // Working from high-to-low ensures that we do not overwrite the portions of + // the initial value of *this which are still needed for later steps. + // + // Once called with step == 0, *this contains the result of the + // multiplication. + // + // `original_size` is the size_ of *this before the first call to + // MultiplyStep(). `other_words` and `other_size` are the contents of our + // operand. `step` is the step to perform, as described above. + void MultiplyStep(int original_size, const uint32_t* other_words, + int other_size, int step); + + void MultiplyBy(int other_size, const uint32_t* other_words) { + const int original_size = size_; + const int first_step = + std::min(original_size + other_size - 2, max_words - 1); + for (int step = first_step; step >= 0; --step) { + MultiplyStep(original_size, other_words, other_size, step); + } + } + + // Adds a 32-bit value to the index'th word, with carry. + void AddWithCarry(int index, uint32_t value) { + if (value) { + while (index < max_words && value > 0) { + words_[index] += value; + // carry if we overflowed in this word: + if (value > words_[index]) { + value = 1; + ++index; + } else { + value = 0; + } + } + size_ = std::min(max_words, std::max(index + 1, size_)); + } + } + + void AddWithCarry(int index, uint64_t value) { + if (value && index < max_words) { + uint32_t high = value >> 32; + uint32_t low = value & 0xffffffff; + words_[index] += low; + if (words_[index] < low) { + ++high; + if (high == 0) { + // Carry from the low word caused our high word to overflow. + // Short circuit here to do the right thing. + AddWithCarry(index + 2, static_cast<uint32_t>(1)); + return; + } + } + if (high > 0) { + AddWithCarry(index + 1, high); + } else { + // Normally 32-bit AddWithCarry() sets size_, but since we don't call + // it when `high` is 0, do it ourselves here. + size_ = std::min(max_words, std::max(index + 1, size_)); + } + } + } + + // Divide this in place by a constant divisor. Returns the remainder of the + // division. + template <uint32_t divisor> + uint32_t DivMod() { + uint64_t accumulator = 0; + for (int i = size_ - 1; i >= 0; --i) { + accumulator <<= 32; + accumulator += words_[i]; + // accumulator / divisor will never overflow an int32_t in this loop + words_[i] = static_cast<uint32_t>(accumulator / divisor); + accumulator = accumulator % divisor; + } + while (size_ > 0 && words_[size_ - 1] == 0) { + --size_; + } + return static_cast<uint32_t>(accumulator); + } + + // The number of elements in words_ that may carry significant values. + // All elements beyond this point are 0. + // + // When size_ is 0, this BigUnsigned stores the value 0. + // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is + // nonzero. This can occur due to overflow truncation. + // In particular, x.size_ != y.size_ does *not* imply x != y. + int size_; + uint32_t words_[max_words]; +}; + +// Compares two big integer instances. +// +// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. +template <int N, int M> +int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = std::max(lhs.size(), rhs.size()); + for (int i = limit - 1; i >= 0; --i) { + const uint32_t lhs_word = lhs.GetWord(i); + const uint32_t rhs_word = rhs.GetWord(i); + if (lhs_word < rhs_word) { + return -1; + } else if (lhs_word > rhs_word) { + return 1; + } + } + return 0; +} + +template <int N, int M> +bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + int limit = std::max(lhs.size(), rhs.size()); + for (int i = 0; i < limit; ++i) { + if (lhs.GetWord(i) != rhs.GetWord(i)) { + return false; + } + } + return true; +} + +template <int N, int M> +bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs == rhs); +} + +template <int N, int M> +bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return Compare(lhs, rhs) == -1; +} + +template <int N, int M> +bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return rhs < lhs; +} +template <int N, int M> +bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(rhs < lhs); +} +template <int N, int M> +bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { + return !(lhs < rhs); +} + +// Output operator for BigUnsigned, for testing purposes only. +template <int N> +std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { + return os << num.ToString(); +} + +// Explicit instantiation declarations for the sizes of BigUnsigned that we +// are using. +// +// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is +// still bigger than an int128, and 84 is a large value we will want to use +// in the from_chars implementation. +// +// Comments justifying the use of 84 belong in the from_chars implementation, +// and will be added in a follow-up CL. +extern template class BigUnsigned<4>; +extern template class BigUnsigned<84>; + +} // namespace strings_internal +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |