From e4c8d0eb8ef4acb5d7a4252b3b87feb391ef7e41 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Fri, 18 Oct 2019 09:06:29 -0700 Subject: Export of internal Abseil changes -- a9ac6567c0933d786d68c10011e3f3ff9deedf89 by Greg Falcon : Add absl::FunctionRef, a type analogous to the proposed C++23 std::function_ref. Like std::function, FunctionRef can be used to type-erase any callable (invokable) object. However, FunctionRef works by reference: it does not store a copy of the type-erased object. If the wrapped object is destroyed before the FunctionRef, the reference becomes dangling. FunctionRef relates to std::function in much the same way that string_view relates to std::string. Because of these limitations, FunctionRef is best used only as a function argument type, and only where the function will be invoked immediately (rather than saved for later use). When `const std::function<...>&` is used in this way, `absl::FunctionRef<...>` is a better-performing replacement. PiperOrigin-RevId: 275484044 -- 1f7c4df3760f8b93e5a5baf40b070eca1d3f4c98 by Abseil Team : Add FastHexToBufferZeroPad16() function for blazingly fast hex encoding of uint64_t. PiperOrigin-RevId: 275420901 -- 08d48ac004eba57cf2f1ada827181a2995f74807 by Abseil Team : Avoid applying the workaround for MSVC's static initialization problems when using clang-cl. PiperOrigin-RevId: 275366326 -- 40be82bd2b34670b5458c0a72a0475086153c2d6 by Abseil Team : Added comments to SimpleAtof()/SimpleAtod() that clarify that they always use the "C" locale, unlike the standard functions strtod() and strtof() referenced now in the comments. PiperOrigin-RevId: 275355815 -- 086779dacb3f6f2b3ab59947e94e79046bdb1fe1 by Jorg Brown : Move the hex conversion table used by escaping.cc into numbers.h so that other parts of Abseil can more efficiently access it. PiperOrigin-RevId: 275331251 -- 3c4ed1b04e55d96a40cbe70fb70929ffbb0c0432 by Abseil Team : Avoid applying the workaround for MSVC's static initialization problems when using clang-cl. PiperOrigin-RevId: 275323858 -- 56ceb58ab688c3761978308609b09a1ac2739c9a by Derek Mauro : Add script for testing on Alpine Linux (for musl test coverage) PiperOrigin-RevId: 275321244 GitOrigin-RevId: a9ac6567c0933d786d68c10011e3f3ff9deedf89 Change-Id: I39799fa03768ddb44f3166200c860e1da4461807 --- absl/strings/BUILD.bazel | 5 ++++- absl/strings/CMakeLists.txt | 2 ++ absl/strings/escaping.cc | 33 ++++++---------------------- absl/strings/numbers.cc | 29 ++++++++++++++++++++++--- absl/strings/numbers.h | 45 +++++++++++++++++++++++++++++++++++++-- absl/strings/numbers_benchmark.cc | 23 ++++++++++++++++++++ absl/strings/numbers_test.cc | 30 ++++++++++++++++++++++++-- absl/strings/str_cat.cc | 5 +++-- absl/strings/substitute.cc | 5 ++--- 9 files changed, 137 insertions(+), 40 deletions(-) (limited to 'absl/strings') diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 4863ead..e38c8ad 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -413,8 +413,9 @@ cc_test( deps = [ ":pow10_helper", ":strings", - "//absl/base:core_headers", "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", "@com_google_googletest//:gtest_main", ], ) @@ -428,6 +429,8 @@ cc_test( deps = [ ":strings", "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", "@com_github_google_benchmark//:benchmark_main", ], ) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 3f90795..cd52a47 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -277,6 +277,8 @@ absl_cc_test( absl::core_headers absl::pow10_helper absl::raw_logging_internal + absl::random_random + absl::random_distributions gmock_main ) diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 0d336e3..88390fb 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -35,27 +35,6 @@ namespace absl { namespace { -// Digit conversion. -constexpr char kHexChar[] = "0123456789abcdef"; - -constexpr char kHexTable[513] = - "000102030405060708090a0b0c0d0e0f" - "101112131415161718191a1b1c1d1e1f" - "202122232425262728292a2b2c2d2e2f" - "303132333435363738393a3b3c3d3e3f" - "404142434445464748494a4b4c4d4e4f" - "505152535455565758595a5b5c5d5e5f" - "606162636465666768696a6b6c6d6e6f" - "707172737475767778797a7b7c7d7e7f" - "808182838485868788898a8b8c8d8e8f" - "909192939495969798999a9b9c9d9e9f" - "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" - "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" - "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" - "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" - "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" - "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; - // These are used for the leave_nulls_escaped argument to CUnescapeInternal(). constexpr bool kUnescapeNulls = false; @@ -348,14 +327,14 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex, (last_hex_escape && absl::ascii_isxdigit(c)))) { if (use_hex) { dest.append("\\" "x"); - dest.push_back(kHexChar[c / 16]); - dest.push_back(kHexChar[c % 16]); + dest.push_back(numbers_internal::kHexChar[c / 16]); + dest.push_back(numbers_internal::kHexChar[c % 16]); is_hex_escape = true; } else { dest.append("\\"); - dest.push_back(kHexChar[c / 64]); - dest.push_back(kHexChar[(c % 64) / 8]); - dest.push_back(kHexChar[c % 8]); + dest.push_back(numbers_internal::kHexChar[c / 64]); + dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]); + dest.push_back(numbers_internal::kHexChar[c % 8]); } } else { dest.push_back(c); @@ -1019,7 +998,7 @@ template void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { auto dest_ptr = &dest[0]; for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { - const char* hex_p = &kHexTable[*src_ptr * 2]; + const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; std::copy(hex_p, hex_p + 2, dest_ptr); } } diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 38d1486..d7b94fc 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -19,8 +19,8 @@ #include #include -#include // for DBL_DIG and FLT_DIG -#include // for HUGE_VAL +#include // for DBL_DIG and FLT_DIG +#include // for HUGE_VAL #include #include #include @@ -34,6 +34,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/strings/ascii.h" #include "absl/strings/charconv.h" +#include "absl/strings/escaping.h" #include "absl/strings/internal/memutil.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -885,6 +886,28 @@ inline bool safe_uint_internal(absl::string_view text, IntType* value_p, } // anonymous namespace namespace numbers_internal { + +// Digit conversion. +ABSL_CONST_INIT const char kHexChar[] = "0123456789abcdef"; + +ABSL_CONST_INIT const char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { return safe_int_internal(text, value, base); } @@ -900,6 +923,6 @@ bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) { return safe_uint_internal(text, value, base); } -} // namespace numbers_internal +} // namespace numbers_internal } // namespace absl diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index 100839b..745de67 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -24,6 +24,10 @@ #ifndef ABSL_STRINGS_NUMBERS_H_ #define ABSL_STRINGS_NUMBERS_H_ +#ifdef __SSE4_2__ +#include +#endif + #include #include #include @@ -32,6 +36,8 @@ #include #include +#include "absl/base/internal/bits.h" +#include "absl/base/internal/endian.h" #include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/numeric/int128.h" @@ -54,7 +60,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out); // Converts the given string (optionally followed or preceded by ASCII // whitespace) into a float, which may be rounded on overflow or underflow. // See https://en.cppreference.com/w/c/string/byte/strtof for details about the -// allowed formats for `str`. If any errors are encountered, this function +// allowed formats for `str`, except SimpleAtof() is locale-indepdent and will +// always use the "C" locale. If any errors are encountered, this function // returns `false`, leaving `out` in an unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out); @@ -63,7 +70,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out); // Converts the given string (optionally followed or preceded by ASCII // whitespace) into a double, which may be rounded on overflow or underflow. // See https://en.cppreference.com/w/c/string/byte/strtof for details about the -// allowed formats for `str`. If any errors are encountered, this function +// allowed formats for `str`, except SimpleAtod is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function // returns `false`, leaving `out` in an unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); @@ -84,6 +92,10 @@ ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); namespace absl { namespace numbers_internal { +// Digit conversion. +extern const char kHexChar[17]; // 0123456789abcdef +extern const char kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... + // safe_strto?() functions for implementing SimpleAtoi() bool safe_strto32_base(absl::string_view text, int32_t* value, int base); bool safe_strto64_base(absl::string_view text, int64_t* value, int base); @@ -170,6 +182,35 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out, return parsed; } +// FastHexToBufferZeroPad16() +// +// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but +// without the terminating null character. Thus `out` must be of length >= 16. +// Returns the number of non-pad digits of the output (it can never be zero +// since 0 has one digit). +inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { + uint64_t be = absl::big_endian::FromHost64(val); +#ifdef __SSE4_2__ + const auto kNibbleMask = _mm_set1_epi8(0xf); + const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'); + auto v = _mm_loadu_si64(reinterpret_cast<__m128i*>(&be)); // load lo dword + auto v4 = _mm_srli_epi64(v, 4); // shift 4 right + auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes + auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles + auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars); +#else + for (int i = 0; i < 8; ++i) { + auto byte = (be >> (8 * i)) & 0xFF; + auto* hex = &absl::numbers_internal::kHexTable[byte * 2]; + std::memcpy(out + 2 * i, hex, 2); + } +#endif + // | 0x1 so that even 0 has 1 digit. + return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4; +} + } // namespace numbers_internal // SimpleAtoi() diff --git a/absl/strings/numbers_benchmark.cc b/absl/strings/numbers_benchmark.cc index 54dbedd..6e79b3e 100644 --- a/absl/strings/numbers_benchmark.cc +++ b/absl/strings/numbers_benchmark.cc @@ -20,6 +20,8 @@ #include "benchmark/benchmark.h" #include "absl/base/internal/raw_logging.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "absl/strings/numbers.h" namespace { @@ -260,4 +262,25 @@ BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string) ->ArgPair(10, 4) ->ArgPair(10, 8); +void BM_FastHexToBufferZeroPad16(benchmark::State& state) { + absl::BitGen rng; + std::vector nums; + nums.resize(1000); + auto min = std::numeric_limits::min(); + auto max = std::numeric_limits::max(); + for (auto& num : nums) { + num = absl::LogUniform(rng, min, max); + } + + char buf[16]; + while (state.KeepRunningBatch(nums.size())) { + for (auto num : nums) { + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf); + benchmark::DoNotOptimize(digits); + benchmark::DoNotOptimize(buf); + } + } +} +BENCHMARK(BM_FastHexToBufferZeroPad16); + } // namespace diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index d964e56..b92b9a8 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -17,6 +17,7 @@ #include "absl/strings/numbers.h" #include + #include // NOLINT(build/c++11) #include #include @@ -36,10 +37,11 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" -#include "absl/strings/str_cat.h" - +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "absl/strings/internal/numbers_test_common.h" #include "absl/strings/internal/pow10_helper.h" +#include "absl/strings/str_cat.h" namespace { @@ -1187,4 +1189,28 @@ TEST(StrToUint64Base, PrefixOnly) { } } +void TestFastHexToBufferZeroPad16(uint64_t v) { + char buf[16]; + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(v, buf); + absl::string_view res(buf, 16); + char buf2[17]; + snprintf(buf2, sizeof(buf2), "%016" PRIx64, v); + EXPECT_EQ(res, buf2) << v; + size_t expected_digits = snprintf(buf2, sizeof(buf2), "%" PRIx64, v); + EXPECT_EQ(digits, expected_digits) << v; +} + +TEST(FastHexToBufferZeroPad16, Smoke) { + TestFastHexToBufferZeroPad16(std::numeric_limits::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits::max()); + TestFastHexToBufferZeroPad16(std::numeric_limits::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits::max()); + absl::BitGen rng; + for (int i = 0; i < 100000; ++i) { + TestFastHexToBufferZeroPad16( + absl::LogUniform(rng, std::numeric_limits::min(), + std::numeric_limits::max())); + } +} + } // namespace diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 9961944..d587789 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -15,12 +15,14 @@ #include "absl/strings/str_cat.h" #include + #include #include #include #include "absl/strings/ascii.h" #include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/numbers.h" namespace absl { @@ -28,9 +30,8 @@ AlphaNum::AlphaNum(Hex hex) { char* const end = &digits_[numbers_internal::kFastToBufferSize]; char* writer = end; uint64_t value = hex.value; - static const char hexdigits[] = "0123456789abcdef"; do { - *--writer = hexdigits[value & 0xF]; + *--writer = absl::numbers_internal::kHexChar[value & 0xF]; value >>= 4; } while (value != 0); diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index bc17695..36dbfe7 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -94,7 +94,6 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, assert(target == output->data() + output->size()); } -static const char kHexDigits[] = "0123456789abcdef"; Arg::Arg(const void* value) { static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2, "fix sizeof(scratch_)"); @@ -104,7 +103,7 @@ Arg::Arg(const void* value) { char* ptr = scratch_ + sizeof(scratch_); uintptr_t num = reinterpret_cast(value); do { - *--ptr = kHexDigits[num & 0xf]; + *--ptr = absl::numbers_internal::kHexChar[num & 0xf]; num >>= 4; } while (num != 0); *--ptr = 'x'; @@ -119,7 +118,7 @@ Arg::Arg(Hex hex) { char* writer = end; uint64_t value = hex.value; do { - *--writer = kHexDigits[value & 0xF]; + *--writer = absl::numbers_internal::kHexChar[value & 0xF]; value >>= 4; } while (value != 0); -- cgit v1.2.3