diff options
author | Abseil Team <absl-team@google.com> | 2019-10-18 09:06:29 -0700 |
---|---|---|
committer | Andy Soffer <asoffer@google.com> | 2019-10-18 12:40:18 -0400 |
commit | e4c8d0eb8ef4acb5d7a4252b3b87feb391ef7e41 (patch) | |
tree | de2b90294a678bb218a0533784bb9ae532ae869a /absl/strings | |
parent | a15364ce4d88534ae2295127e5d8e32aefb6b446 (diff) |
Export of internal Abseil changes
--
a9ac6567c0933d786d68c10011e3f3ff9deedf89 by Greg Falcon <gfalcon@google.com>:
Add absl::FunctionRef, a type analogous to the proposed C++23 std::function_ref.
Like std::function, FunctionRef can be used to type-erase any callable (invokable) object. However, FunctionRef works by reference: it does not store a copy of the type-erased object. If the wrapped object is destroyed before the FunctionRef, the reference becomes dangling.
FunctionRef relates to std::function in much the same way that string_view relates to std::string.
Because of these limitations, FunctionRef is best used only as a function argument type, and only where the function will be invoked immediately (rather than saved for later use). When `const std::function<...>&` is used in this way, `absl::FunctionRef<...>` is a better-performing replacement.
PiperOrigin-RevId: 275484044
--
1f7c4df3760f8b93e5a5baf40b070eca1d3f4c98 by Abseil Team <absl-team@google.com>:
Add FastHexToBufferZeroPad16() function for blazingly fast hex encoding of uint64_t.
PiperOrigin-RevId: 275420901
--
08d48ac004eba57cf2f1ada827181a2995f74807 by Abseil Team <absl-team@google.com>:
Avoid applying the workaround for MSVC's static initialization problems when using clang-cl.
PiperOrigin-RevId: 275366326
--
40be82bd2b34670b5458c0a72a0475086153c2d6 by Abseil Team <absl-team@google.com>:
Added comments to SimpleAtof()/SimpleAtod() that clarify that they
always use the "C" locale, unlike the standard functions strtod()
and strtof() referenced now in the comments.
PiperOrigin-RevId: 275355815
--
086779dacb3f6f2b3ab59947e94e79046bdb1fe1 by Jorg Brown <jorg@google.com>:
Move the hex conversion table used by escaping.cc into numbers.h so
that other parts of Abseil can more efficiently access it.
PiperOrigin-RevId: 275331251
--
3c4ed1b04e55d96a40cbe70fb70929ffbb0c0432 by Abseil Team <absl-team@google.com>:
Avoid applying the workaround for MSVC's static initialization problems when using clang-cl.
PiperOrigin-RevId: 275323858
--
56ceb58ab688c3761978308609b09a1ac2739c9a by Derek Mauro <dmauro@google.com>:
Add script for testing on Alpine Linux (for musl test coverage)
PiperOrigin-RevId: 275321244
GitOrigin-RevId: a9ac6567c0933d786d68c10011e3f3ff9deedf89
Change-Id: I39799fa03768ddb44f3166200c860e1da4461807
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 5 | ||||
-rw-r--r-- | absl/strings/CMakeLists.txt | 2 | ||||
-rw-r--r-- | absl/strings/escaping.cc | 33 | ||||
-rw-r--r-- | absl/strings/numbers.cc | 29 | ||||
-rw-r--r-- | absl/strings/numbers.h | 45 | ||||
-rw-r--r-- | absl/strings/numbers_benchmark.cc | 23 | ||||
-rw-r--r-- | absl/strings/numbers_test.cc | 30 | ||||
-rw-r--r-- | absl/strings/str_cat.cc | 5 | ||||
-rw-r--r-- | absl/strings/substitute.cc | 5 |
9 files changed, 137 insertions, 40 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 4863ead2..e38c8ad6 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -413,8 +413,9 @@ cc_test( deps = [ ":pow10_helper", ":strings", - "//absl/base:core_headers", "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", "@com_google_googletest//:gtest_main", ], ) @@ -428,6 +429,8 @@ cc_test( deps = [ ":strings", "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", "@com_github_google_benchmark//:benchmark_main", ], ) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 3f907957..cd52a472 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -277,6 +277,8 @@ absl_cc_test( absl::core_headers absl::pow10_helper absl::raw_logging_internal + absl::random_random + absl::random_distributions gmock_main ) diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 0d336e3f..88390fbf 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -35,27 +35,6 @@ namespace absl { namespace { -// Digit conversion. -constexpr char kHexChar[] = "0123456789abcdef"; - -constexpr char kHexTable[513] = - "000102030405060708090a0b0c0d0e0f" - "101112131415161718191a1b1c1d1e1f" - "202122232425262728292a2b2c2d2e2f" - "303132333435363738393a3b3c3d3e3f" - "404142434445464748494a4b4c4d4e4f" - "505152535455565758595a5b5c5d5e5f" - "606162636465666768696a6b6c6d6e6f" - "707172737475767778797a7b7c7d7e7f" - "808182838485868788898a8b8c8d8e8f" - "909192939495969798999a9b9c9d9e9f" - "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" - "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" - "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" - "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" - "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" - "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; - // These are used for the leave_nulls_escaped argument to CUnescapeInternal(). constexpr bool kUnescapeNulls = false; @@ -348,14 +327,14 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex, (last_hex_escape && absl::ascii_isxdigit(c)))) { if (use_hex) { dest.append("\\" "x"); - dest.push_back(kHexChar[c / 16]); - dest.push_back(kHexChar[c % 16]); + dest.push_back(numbers_internal::kHexChar[c / 16]); + dest.push_back(numbers_internal::kHexChar[c % 16]); is_hex_escape = true; } else { dest.append("\\"); - dest.push_back(kHexChar[c / 64]); - dest.push_back(kHexChar[(c % 64) / 8]); - dest.push_back(kHexChar[c % 8]); + dest.push_back(numbers_internal::kHexChar[c / 64]); + dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]); + dest.push_back(numbers_internal::kHexChar[c % 8]); } } else { dest.push_back(c); @@ -1019,7 +998,7 @@ template <typename T> void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { auto dest_ptr = &dest[0]; for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { - const char* hex_p = &kHexTable[*src_ptr * 2]; + const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; std::copy(hex_p, hex_p + 2, dest_ptr); } } diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 38d14869..d7b94fc1 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -19,8 +19,8 @@ #include <algorithm> #include <cassert> -#include <cfloat> // for DBL_DIG and FLT_DIG -#include <cmath> // for HUGE_VAL +#include <cfloat> // for DBL_DIG and FLT_DIG +#include <cmath> // for HUGE_VAL #include <cstdint> #include <cstdio> #include <cstdlib> @@ -34,6 +34,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/strings/ascii.h" #include "absl/strings/charconv.h" +#include "absl/strings/escaping.h" #include "absl/strings/internal/memutil.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" @@ -885,6 +886,28 @@ inline bool safe_uint_internal(absl::string_view text, IntType* value_p, } // anonymous namespace namespace numbers_internal { + +// Digit conversion. +ABSL_CONST_INIT const char kHexChar[] = "0123456789abcdef"; + +ABSL_CONST_INIT const char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { return safe_int_internal<int32_t>(text, value, base); } @@ -900,6 +923,6 @@ bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) { return safe_uint_internal<uint64_t>(text, value, base); } -} // namespace numbers_internal +} // namespace numbers_internal } // namespace absl diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index 100839b0..745de67a 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -24,6 +24,10 @@ #ifndef ABSL_STRINGS_NUMBERS_H_ #define ABSL_STRINGS_NUMBERS_H_ +#ifdef __SSE4_2__ +#include <x86intrin.h> +#endif + #include <cstddef> #include <cstdlib> #include <cstring> @@ -32,6 +36,8 @@ #include <string> #include <type_traits> +#include "absl/base/internal/bits.h" +#include "absl/base/internal/endian.h" #include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/numeric/int128.h" @@ -54,7 +60,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out); // Converts the given string (optionally followed or preceded by ASCII // whitespace) into a float, which may be rounded on overflow or underflow. // See https://en.cppreference.com/w/c/string/byte/strtof for details about the -// allowed formats for `str`. If any errors are encountered, this function +// allowed formats for `str`, except SimpleAtof() is locale-indepdent and will +// always use the "C" locale. If any errors are encountered, this function // returns `false`, leaving `out` in an unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out); @@ -63,7 +70,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out); // Converts the given string (optionally followed or preceded by ASCII // whitespace) into a double, which may be rounded on overflow or underflow. // See https://en.cppreference.com/w/c/string/byte/strtof for details about the -// allowed formats for `str`. If any errors are encountered, this function +// allowed formats for `str`, except SimpleAtod is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function // returns `false`, leaving `out` in an unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); @@ -84,6 +92,10 @@ ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); namespace absl { namespace numbers_internal { +// Digit conversion. +extern const char kHexChar[17]; // 0123456789abcdef +extern const char kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... + // safe_strto?() functions for implementing SimpleAtoi() bool safe_strto32_base(absl::string_view text, int32_t* value, int base); bool safe_strto64_base(absl::string_view text, int64_t* value, int base); @@ -170,6 +182,35 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out, return parsed; } +// FastHexToBufferZeroPad16() +// +// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but +// without the terminating null character. Thus `out` must be of length >= 16. +// Returns the number of non-pad digits of the output (it can never be zero +// since 0 has one digit). +inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { + uint64_t be = absl::big_endian::FromHost64(val); +#ifdef __SSE4_2__ + const auto kNibbleMask = _mm_set1_epi8(0xf); + const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'); + auto v = _mm_loadu_si64(reinterpret_cast<__m128i*>(&be)); // load lo dword + auto v4 = _mm_srli_epi64(v, 4); // shift 4 right + auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes + auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles + auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars); +#else + for (int i = 0; i < 8; ++i) { + auto byte = (be >> (8 * i)) & 0xFF; + auto* hex = &absl::numbers_internal::kHexTable[byte * 2]; + std::memcpy(out + 2 * i, hex, 2); + } +#endif + // | 0x1 so that even 0 has 1 digit. + return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4; +} + } // namespace numbers_internal // SimpleAtoi() diff --git a/absl/strings/numbers_benchmark.cc b/absl/strings/numbers_benchmark.cc index 54dbedd3..6e79b3e8 100644 --- a/absl/strings/numbers_benchmark.cc +++ b/absl/strings/numbers_benchmark.cc @@ -20,6 +20,8 @@ #include "benchmark/benchmark.h" #include "absl/base/internal/raw_logging.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "absl/strings/numbers.h" namespace { @@ -260,4 +262,25 @@ BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string) ->ArgPair(10, 4) ->ArgPair(10, 8); +void BM_FastHexToBufferZeroPad16(benchmark::State& state) { + absl::BitGen rng; + std::vector<uint64_t> nums; + nums.resize(1000); + auto min = std::numeric_limits<uint64_t>::min(); + auto max = std::numeric_limits<uint64_t>::max(); + for (auto& num : nums) { + num = absl::LogUniform(rng, min, max); + } + + char buf[16]; + while (state.KeepRunningBatch(nums.size())) { + for (auto num : nums) { + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf); + benchmark::DoNotOptimize(digits); + benchmark::DoNotOptimize(buf); + } + } +} +BENCHMARK(BM_FastHexToBufferZeroPad16); + } // namespace diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index d964e562..b92b9a8c 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -17,6 +17,7 @@ #include "absl/strings/numbers.h" #include <sys/types.h> + #include <cfenv> // NOLINT(build/c++11) #include <cinttypes> #include <climits> @@ -36,10 +37,11 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" -#include "absl/strings/str_cat.h" - +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "absl/strings/internal/numbers_test_common.h" #include "absl/strings/internal/pow10_helper.h" +#include "absl/strings/str_cat.h" namespace { @@ -1187,4 +1189,28 @@ TEST(StrToUint64Base, PrefixOnly) { } } +void TestFastHexToBufferZeroPad16(uint64_t v) { + char buf[16]; + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(v, buf); + absl::string_view res(buf, 16); + char buf2[17]; + snprintf(buf2, sizeof(buf2), "%016" PRIx64, v); + EXPECT_EQ(res, buf2) << v; + size_t expected_digits = snprintf(buf2, sizeof(buf2), "%" PRIx64, v); + EXPECT_EQ(digits, expected_digits) << v; +} + +TEST(FastHexToBufferZeroPad16, Smoke) { + TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::max()); + TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::max()); + absl::BitGen rng; + for (int i = 0; i < 100000; ++i) { + TestFastHexToBufferZeroPad16( + absl::LogUniform(rng, std::numeric_limits<uint64_t>::min(), + std::numeric_limits<uint64_t>::max())); + } +} + } // namespace diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 99619445..d5877899 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -15,12 +15,14 @@ #include "absl/strings/str_cat.h" #include <assert.h> + #include <algorithm> #include <cstdint> #include <cstring> #include "absl/strings/ascii.h" #include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/numbers.h" namespace absl { @@ -28,9 +30,8 @@ AlphaNum::AlphaNum(Hex hex) { char* const end = &digits_[numbers_internal::kFastToBufferSize]; char* writer = end; uint64_t value = hex.value; - static const char hexdigits[] = "0123456789abcdef"; do { - *--writer = hexdigits[value & 0xF]; + *--writer = absl::numbers_internal::kHexChar[value & 0xF]; value >>= 4; } while (value != 0); diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index bc176950..36dbfe7d 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -94,7 +94,6 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, assert(target == output->data() + output->size()); } -static const char kHexDigits[] = "0123456789abcdef"; Arg::Arg(const void* value) { static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2, "fix sizeof(scratch_)"); @@ -104,7 +103,7 @@ Arg::Arg(const void* value) { char* ptr = scratch_ + sizeof(scratch_); uintptr_t num = reinterpret_cast<uintptr_t>(value); do { - *--ptr = kHexDigits[num & 0xf]; + *--ptr = absl::numbers_internal::kHexChar[num & 0xf]; num >>= 4; } while (num != 0); *--ptr = 'x'; @@ -119,7 +118,7 @@ Arg::Arg(Hex hex) { char* writer = end; uint64_t value = hex.value; do { - *--writer = kHexDigits[value & 0xF]; + *--writer = absl::numbers_internal::kHexChar[value & 0xF]; value >>= 4; } while (value != 0); |