From bd40a41cc142b36c73b881099d08a9d83f7f4780 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Mon, 18 Jun 2018 13:18:53 -0700 Subject: -- f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 by Shaindel Schwartz : Internal change PiperOrigin-RevId: 201046831 -- 711715a78b7e53dfaafd4d7f08a74e76db22af88 by Mark Barolak : Internal fix PiperOrigin-RevId: 201043684 -- 64b53edd6bf1fa48f74e7f5d33f00f80d5089147 by Shaindel Schwartz : Remove extra whitespace PiperOrigin-RevId: 201041989 -- 0bdd2a0b33657b688e4a04aeba9ebba47e4dc6ca by Shaindel Schwartz : Whitespace fix. PiperOrigin-RevId: 201034413 -- 3deb0ac296ef1b74c4789e114a8a8bf53253f26b by Shaindel Schwartz : Scrub build tags. No functional changes. PiperOrigin-RevId: 201032927 -- da75d0f8b73baa7e8f4e9a092bba546012ed3b71 by Alex Strelnikov : Internal change. PiperOrigin-RevId: 201026131 -- 6815d80caa19870d0c441b6b9816c68db41393a5 by Tom Manshreck : Add documentation for our LTS snapshot branches PiperOrigin-RevId: 201025191 -- 64c3b02006f39e6a8127bbabf9ec947fb45b6504 by Greg Falcon : Provide absl::from_chars for double and float types. This is a forward-compatible implementation of std::from_chars from C++17. This provides exact "round_to_nearest" conversions, and has some nice properties: * Works with string_view (it can convert numbers from non-NUL-terminated buffers) * Never allocates memory * Faster than the standard library strtod() in our toolchain * Uses integer math in its calculations, so is unaffected by floating point environment * Unaffected by C locale Also change SimpleAtod/SimpleAtoi to use this new API under the hood. PiperOrigin-RevId: 201003324 -- 542869258eb100779497c899103dc96aced52749 by Greg Falcon : Internal change PiperOrigin-RevId: 200999200 -- 3aba192775c7f80e2cd7f221b0a73537823c54ea by Gennadiy Rozental : Internal change PiperOrigin-RevId: 200947470 -- daf9b9feedd748d5364a4c06165b7cb7604d3e1e by Mark Barolak : Add an absl:: qualification to a usage of base_internal::SchedulingMode outside of an absl:: namespace. PiperOrigin-RevId: 200748234 -- a8d265290a22d629f3d9bf9f872c204200bfe8c8 by Mark Barolak : Add a missing namespace closing comment to optional.h. PiperOrigin-RevId: 200739934 -- f05af8ee1c6b864dad2df7c907d424209a3e3202 by Abseil Team : Internal change PiperOrigin-RevId: 200719115 GitOrigin-RevId: f28d30df5769bb832dec3ff36d2fcd2bcdf494a3 Change-Id: Ie4fa601078fd4aa57286372611f1d114fdec82c0 --- absl/strings/internal/charconv_parse_test.cc | 357 +++++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 absl/strings/internal/charconv_parse_test.cc (limited to 'absl/strings/internal/charconv_parse_test.cc') diff --git a/absl/strings/internal/charconv_parse_test.cc b/absl/strings/internal/charconv_parse_test.cc new file mode 100644 index 0000000..1ff8600 --- /dev/null +++ b/absl/strings/internal/charconv_parse_test.cc @@ -0,0 +1,357 @@ +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/charconv_parse.h" + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/str_cat.h" + +using absl::chars_format; +using absl::strings_internal::FloatType; +using absl::strings_internal::ParsedFloat; +using absl::strings_internal::ParseFloat; + +namespace { + +// Check that a given std::string input is parsed to the expected mantissa and +// exponent. +// +// Input std::string `s` must contain a '$' character. It marks the end of the +// characters that should be consumed by the match. It is stripped from the +// input to ParseFloat. +// +// If input std::string `s` contains '[' and ']' characters, these mark the region +// of characters that should be marked as the "subrange". For NaNs, this is +// the location of the extended NaN std::string. For numbers, this is the location +// of the full, over-large mantissa. +template +void ExpectParsedFloat(std::string s, absl::chars_format format_flags, + FloatType expected_type, uint64_t expected_mantissa, + int expected_exponent, + int expected_literal_exponent = -999) { + SCOPED_TRACE(s); + + int begin_subrange = -1; + int end_subrange = -1; + // If s contains '[' and ']', then strip these characters and set the subrange + // indices appropriately. + std::string::size_type open_bracket_pos = s.find('['); + if (open_bracket_pos != std::string::npos) { + begin_subrange = static_cast(open_bracket_pos); + s.replace(open_bracket_pos, 1, ""); + std::string::size_type close_bracket_pos = s.find(']'); + ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos, + "Test input contains [ without matching ]"); + end_subrange = static_cast(close_bracket_pos); + s.replace(close_bracket_pos, 1, ""); + } + const std::string::size_type expected_characters_matched = s.find('$'); + ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, + "Input std::string must contain $"); + s.replace(expected_characters_matched, 1, ""); + + ParsedFloat parsed = + ParseFloat(s.data(), s.data() + s.size(), format_flags); + + EXPECT_NE(parsed.end, nullptr); + if (parsed.end == nullptr) { + return; // The following tests are not useful if we fully failed to parse + } + EXPECT_EQ(parsed.type, expected_type); + if (begin_subrange == -1) { + EXPECT_EQ(parsed.subrange_begin, nullptr); + EXPECT_EQ(parsed.subrange_end, nullptr); + } else { + EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange); + EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange); + } + if (parsed.type == FloatType::kNumber) { + EXPECT_EQ(parsed.mantissa, expected_mantissa); + EXPECT_EQ(parsed.exponent, expected_exponent); + if (expected_literal_exponent != -999) { + EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent); + } + } + auto characters_matched = static_cast(parsed.end - s.data()); + EXPECT_EQ(characters_matched, expected_characters_matched); +} + +// Check that a given std::string input is parsed to the expected mantissa and +// exponent. +// +// Input std::string `s` must contain a '$' character. It marks the end of the +// characters that were consumed by the match. +template +void ExpectNumber(std::string s, absl::chars_format format_flags, + uint64_t expected_mantissa, int expected_exponent, + int expected_literal_exponent = -999) { + ExpectParsedFloat(std::move(s), format_flags, FloatType::kNumber, + expected_mantissa, expected_exponent, + expected_literal_exponent); +} + +// Check that a given std::string input is parsed to the given special value. +// +// This tests against both number bases, since infinities and NaNs have +// identical representations in both modes. +void ExpectSpecial(const std::string& s, absl::chars_format format_flags, + FloatType type) { + ExpectParsedFloat<10>(s, format_flags, type, 0, 0); + ExpectParsedFloat<16>(s, format_flags, type, 0, 0); +} + +// Check that a given input std::string is not matched by Float. +template +void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) { + ParsedFloat parsed = + ParseFloat(s.data(), s.data() + s.size(), format_flags); + EXPECT_EQ(parsed.end, nullptr); +} + +TEST(ParseFloat, SimpleValue) { + // Test that various forms of floating point numbers all parse correctly. + ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3); + ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8); + + // ExpectNumber does not attempt to drop trailing zeroes. + ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900, + -5); + ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general, + 0x1234abcdef000, -20); + + // Ensure non-matching characters after a number are ignored, even when they + // look like potentially matching characters. + ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3); + ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789, + -3); + ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3); + ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3); + + ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general, + 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef, + -8); + ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8); + ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8); + + // Ensure we can read a full resolution mantissa without overflow. + ExpectNumber<10>("9999999999999999999$", chars_format::general, + 9999999999999999999u, 0); + ExpectNumber<16>("fffffffffffffff$", chars_format::general, + 0xfffffffffffffffu, 0); + + // Check that zero is consistently read. + ExpectNumber<10>("0$", chars_format::general, 0, 0); + ExpectNumber<16>("0$", chars_format::general, 0, 0); + ExpectNumber<10>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("000000000000000000000000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<16>("0000000000000000000000.000000000000000000$", + chars_format::general, 0, 0); + ExpectNumber<10>("0.00000000000000000000000000000000e123456$", + chars_format::general, 0, 0); + ExpectNumber<16>("0.00000000000000000000000000000000p123456$", + chars_format::general, 0, 0); +} + +TEST(ParseFloat, LargeDecimalMantissa) { + // After 19 significant decimal digits in the mantissa, ParsedFloat will + // truncate additional digits. We need to test that: + // 1) the truncation to 19 digits happens + // 2) the returned exponent reflects the dropped significant digits + // 3) a correct literal_exponent is set + // + // If and only if a significant digit is found after 19 digits, then the + // entirety of the mantissa in case the exact value is needed to make a + // rounding decision. The [ and ] characters below denote where such a + // subregion was marked by by ParseFloat. They are not part of the input. + + // Mark a capture group only if a dropped digit is significant (nonzero). + ExpectNumber<10>("100000000000000000000000000$", chars_format::general, + 1000000000000000000, + /* adjusted exponent */ 8); + + ExpectNumber<10>("123456789123456789100000000$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8); + + ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + // Leading zeroes should not count towards the 19 significant digit limit + ExpectNumber<10>("[00000000123456789123456789123456789]$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8, + /* literal exponent */ 0); + + ExpectNumber<10>("00000000123456789123456789100000000$", + chars_format::general, 1234567891234567891, + /* adjusted exponent */ 8); + + // Truncated digits after the decimal point should not cause a further + // exponent adjustment. + ExpectNumber<10>("1.234567891234567891e123$", chars_format::general, + 1234567891234567891, 105); + ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general, + 1234567891234567891, + /* adjusted exponent */ 105, + /* literal exponent */ 123); + + // Ensure we truncate, and not round. (The from_chars algorithm we use + // depends on our guess missing low, if it misses, so we need the rounding + // error to be downward.) + ExpectNumber<10>("[1999999999999999999999]$", chars_format::general, + 1999999999999999999, + /* adjusted exponent */ 3, + /* literal exponent */ 0); +} + +TEST(ParseFloat, LargeHexadecimalMantissa) { + // After 15 significant hex digits in the mantissa, ParsedFloat will treat + // additional digits as sticky, We need to test that: + // 1) The truncation to 15 digits happens + // 2) The returned exponent reflects the dropped significant digits + // 3) If a nonzero digit is dropped, the low bit of mantissa is set. + + ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general, + 0x123456789abcdef, 60); + + // Leading zeroes should not count towards the 15 significant digit limit + ExpectNumber<16>("000000123456789abcdef123456789abcdef$", + chars_format::general, 0x123456789abcdef, 60); + + // Truncated digits after the radix point should not cause a further + // exponent adjustment. + ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general, + 0x123456789abcdef, 44); + ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$", + chars_format::general, 0x123456789abcdef, 44); + + // test sticky digit behavior. The low bit should be set iff any dropped + // digit is nonzero. + ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general, + 0x123456789abcdef, 60); + ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general, + 0x123456789abcdee, 60); +} + +TEST(ParseFloat, ScientificVsFixed) { + // In fixed mode, an exponent is never matched (but the remainder of the + // number will be matched.) + ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8); + ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3); + ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36); + ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8); + + // In scientific mode, numbers don't match *unless* they have an exponent. + ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3); + ExpectFailedParse<10>("-123456.789$", chars_format::scientific); + ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef, + -8); + ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific); +} + +TEST(ParseFloat, Infinity) { + ExpectFailedParse<10>("in", chars_format::general); + ExpectFailedParse<16>("in", chars_format::general); + ExpectFailedParse<10>("inx", chars_format::general); + ExpectFailedParse<16>("inx", chars_format::general); + ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity); + ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity); + ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity); +} + +TEST(ParseFloat, NaN) { + ExpectFailedParse<10>("na", chars_format::general); + ExpectFailedParse<16>("na", chars_format::general); + ExpectFailedParse<10>("nah", chars_format::general); + ExpectFailedParse<16>("nah", chars_format::general); + ExpectSpecial("nan$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$", chars_format::general, FloatType::kNan); + ExpectSpecial("nAn$", chars_format::general, FloatType::kNan); + ExpectSpecial("NAN$", chars_format::general, FloatType::kNan); + ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan); + + // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to + // appear after an NaN. Check that this is allowed, and that the correct + // characters are grouped. + // + // (The characters [ and ] in the pattern below delimit the expected matched + // subgroup; they are not part of the input passed to ParseFloat.) + ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan); + ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan); + // If the subgroup contains illegal characters, don't match it at all. + ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan); + // Also cope with a missing close paren. + ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan); +} + +} // namespace -- cgit v1.2.3