summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2019-10-18 09:06:29 -0700
committerGravatar Andy Soffer <asoffer@google.com>2019-10-18 12:40:18 -0400
commite4c8d0eb8ef4acb5d7a4252b3b87feb391ef7e41 (patch)
treede2b90294a678bb218a0533784bb9ae532ae869a /absl/strings
parenta15364ce4d88534ae2295127e5d8e32aefb6b446 (diff)
Export of internal Abseil changes
-- a9ac6567c0933d786d68c10011e3f3ff9deedf89 by Greg Falcon <gfalcon@google.com>: Add absl::FunctionRef, a type analogous to the proposed C++23 std::function_ref. Like std::function, FunctionRef can be used to type-erase any callable (invokable) object. However, FunctionRef works by reference: it does not store a copy of the type-erased object. If the wrapped object is destroyed before the FunctionRef, the reference becomes dangling. FunctionRef relates to std::function in much the same way that string_view relates to std::string. Because of these limitations, FunctionRef is best used only as a function argument type, and only where the function will be invoked immediately (rather than saved for later use). When `const std::function<...>&` is used in this way, `absl::FunctionRef<...>` is a better-performing replacement. PiperOrigin-RevId: 275484044 -- 1f7c4df3760f8b93e5a5baf40b070eca1d3f4c98 by Abseil Team <absl-team@google.com>: Add FastHexToBufferZeroPad16() function for blazingly fast hex encoding of uint64_t. PiperOrigin-RevId: 275420901 -- 08d48ac004eba57cf2f1ada827181a2995f74807 by Abseil Team <absl-team@google.com>: Avoid applying the workaround for MSVC's static initialization problems when using clang-cl. PiperOrigin-RevId: 275366326 -- 40be82bd2b34670b5458c0a72a0475086153c2d6 by Abseil Team <absl-team@google.com>: Added comments to SimpleAtof()/SimpleAtod() that clarify that they always use the "C" locale, unlike the standard functions strtod() and strtof() referenced now in the comments. PiperOrigin-RevId: 275355815 -- 086779dacb3f6f2b3ab59947e94e79046bdb1fe1 by Jorg Brown <jorg@google.com>: Move the hex conversion table used by escaping.cc into numbers.h so that other parts of Abseil can more efficiently access it. PiperOrigin-RevId: 275331251 -- 3c4ed1b04e55d96a40cbe70fb70929ffbb0c0432 by Abseil Team <absl-team@google.com>: Avoid applying the workaround for MSVC's static initialization problems when using clang-cl. PiperOrigin-RevId: 275323858 -- 56ceb58ab688c3761978308609b09a1ac2739c9a by Derek Mauro <dmauro@google.com>: Add script for testing on Alpine Linux (for musl test coverage) PiperOrigin-RevId: 275321244 GitOrigin-RevId: a9ac6567c0933d786d68c10011e3f3ff9deedf89 Change-Id: I39799fa03768ddb44f3166200c860e1da4461807
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel5
-rw-r--r--absl/strings/CMakeLists.txt2
-rw-r--r--absl/strings/escaping.cc33
-rw-r--r--absl/strings/numbers.cc29
-rw-r--r--absl/strings/numbers.h45
-rw-r--r--absl/strings/numbers_benchmark.cc23
-rw-r--r--absl/strings/numbers_test.cc30
-rw-r--r--absl/strings/str_cat.cc5
-rw-r--r--absl/strings/substitute.cc5
9 files changed, 137 insertions, 40 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index 4863ead2..e38c8ad6 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -413,8 +413,9 @@ cc_test(
deps = [
":pow10_helper",
":strings",
- "//absl/base:core_headers",
"//absl/base:raw_logging_internal",
+ "//absl/random",
+ "//absl/random:distributions",
"@com_google_googletest//:gtest_main",
],
)
@@ -428,6 +429,8 @@ cc_test(
deps = [
":strings",
"//absl/base:raw_logging_internal",
+ "//absl/random",
+ "//absl/random:distributions",
"@com_github_google_benchmark//:benchmark_main",
],
)
diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt
index 3f907957..cd52a472 100644
--- a/absl/strings/CMakeLists.txt
+++ b/absl/strings/CMakeLists.txt
@@ -277,6 +277,8 @@ absl_cc_test(
absl::core_headers
absl::pow10_helper
absl::raw_logging_internal
+ absl::random_random
+ absl::random_distributions
gmock_main
)
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 0d336e3f..88390fbf 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -35,27 +35,6 @@
namespace absl {
namespace {
-// Digit conversion.
-constexpr char kHexChar[] = "0123456789abcdef";
-
-constexpr char kHexTable[513] =
- "000102030405060708090a0b0c0d0e0f"
- "101112131415161718191a1b1c1d1e1f"
- "202122232425262728292a2b2c2d2e2f"
- "303132333435363738393a3b3c3d3e3f"
- "404142434445464748494a4b4c4d4e4f"
- "505152535455565758595a5b5c5d5e5f"
- "606162636465666768696a6b6c6d6e6f"
- "707172737475767778797a7b7c7d7e7f"
- "808182838485868788898a8b8c8d8e8f"
- "909192939495969798999a9b9c9d9e9f"
- "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
- "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
- "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
- "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
- "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
- "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
-
// These are used for the leave_nulls_escaped argument to CUnescapeInternal().
constexpr bool kUnescapeNulls = false;
@@ -348,14 +327,14 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex,
(last_hex_escape && absl::ascii_isxdigit(c)))) {
if (use_hex) {
dest.append("\\" "x");
- dest.push_back(kHexChar[c / 16]);
- dest.push_back(kHexChar[c % 16]);
+ dest.push_back(numbers_internal::kHexChar[c / 16]);
+ dest.push_back(numbers_internal::kHexChar[c % 16]);
is_hex_escape = true;
} else {
dest.append("\\");
- dest.push_back(kHexChar[c / 64]);
- dest.push_back(kHexChar[(c % 64) / 8]);
- dest.push_back(kHexChar[c % 8]);
+ dest.push_back(numbers_internal::kHexChar[c / 64]);
+ dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]);
+ dest.push_back(numbers_internal::kHexChar[c % 8]);
}
} else {
dest.push_back(c);
@@ -1019,7 +998,7 @@ template <typename T>
void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
auto dest_ptr = &dest[0];
for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
- const char* hex_p = &kHexTable[*src_ptr * 2];
+ const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
std::copy(hex_p, hex_p + 2, dest_ptr);
}
}
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
index 38d14869..d7b94fc1 100644
--- a/absl/strings/numbers.cc
+++ b/absl/strings/numbers.cc
@@ -19,8 +19,8 @@
#include <algorithm>
#include <cassert>
-#include <cfloat> // for DBL_DIG and FLT_DIG
-#include <cmath> // for HUGE_VAL
+#include <cfloat> // for DBL_DIG and FLT_DIG
+#include <cmath> // for HUGE_VAL
#include <cstdint>
#include <cstdio>
#include <cstdlib>
@@ -34,6 +34,7 @@
#include "absl/base/internal/raw_logging.h"
#include "absl/strings/ascii.h"
#include "absl/strings/charconv.h"
+#include "absl/strings/escaping.h"
#include "absl/strings/internal/memutil.h"
#include "absl/strings/match.h"
#include "absl/strings/str_cat.h"
@@ -885,6 +886,28 @@ inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
} // anonymous namespace
namespace numbers_internal {
+
+// Digit conversion.
+ABSL_CONST_INIT const char kHexChar[] = "0123456789abcdef";
+
+ABSL_CONST_INIT const char kHexTable[513] =
+ "000102030405060708090a0b0c0d0e0f"
+ "101112131415161718191a1b1c1d1e1f"
+ "202122232425262728292a2b2c2d2e2f"
+ "303132333435363738393a3b3c3d3e3f"
+ "404142434445464748494a4b4c4d4e4f"
+ "505152535455565758595a5b5c5d5e5f"
+ "606162636465666768696a6b6c6d6e6f"
+ "707172737475767778797a7b7c7d7e7f"
+ "808182838485868788898a8b8c8d8e8f"
+ "909192939495969798999a9b9c9d9e9f"
+ "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
+ "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
+ "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
+ "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
+ "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
+ "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
+
bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
return safe_int_internal<int32_t>(text, value, base);
}
@@ -900,6 +923,6 @@ bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
return safe_uint_internal<uint64_t>(text, value, base);
}
-} // namespace numbers_internal
+} // namespace numbers_internal
} // namespace absl
diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h
index 100839b0..745de67a 100644
--- a/absl/strings/numbers.h
+++ b/absl/strings/numbers.h
@@ -24,6 +24,10 @@
#ifndef ABSL_STRINGS_NUMBERS_H_
#define ABSL_STRINGS_NUMBERS_H_
+#ifdef __SSE4_2__
+#include <x86intrin.h>
+#endif
+
#include <cstddef>
#include <cstdlib>
#include <cstring>
@@ -32,6 +36,8 @@
#include <string>
#include <type_traits>
+#include "absl/base/internal/bits.h"
+#include "absl/base/internal/endian.h"
#include "absl/base/macros.h"
#include "absl/base/port.h"
#include "absl/numeric/int128.h"
@@ -54,7 +60,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out);
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into a float, which may be rounded on overflow or underflow.
// See https://en.cppreference.com/w/c/string/byte/strtof for details about the
-// allowed formats for `str`. If any errors are encountered, this function
+// allowed formats for `str`, except SimpleAtof() is locale-indepdent and will
+// always use the "C" locale. If any errors are encountered, this function
// returns `false`, leaving `out` in an unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);
@@ -63,7 +70,8 @@ ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out);
// Converts the given string (optionally followed or preceded by ASCII
// whitespace) into a double, which may be rounded on overflow or underflow.
// See https://en.cppreference.com/w/c/string/byte/strtof for details about the
-// allowed formats for `str`. If any errors are encountered, this function
+// allowed formats for `str`, except SimpleAtod is locale-independent and will
+// always use the "C" locale. If any errors are encountered, this function
// returns `false`, leaving `out` in an unspecified state.
ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out);
@@ -84,6 +92,10 @@ ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out);
namespace absl {
namespace numbers_internal {
+// Digit conversion.
+extern const char kHexChar[17]; // 0123456789abcdef
+extern const char kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011...
+
// safe_strto?() functions for implementing SimpleAtoi()
bool safe_strto32_base(absl::string_view text, int32_t* value, int base);
bool safe_strto64_base(absl::string_view text, int64_t* value, int base);
@@ -170,6 +182,35 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out,
return parsed;
}
+// FastHexToBufferZeroPad16()
+//
+// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but
+// without the terminating null character. Thus `out` must be of length >= 16.
+// Returns the number of non-pad digits of the output (it can never be zero
+// since 0 has one digit).
+inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) {
+ uint64_t be = absl::big_endian::FromHost64(val);
+#ifdef __SSE4_2__
+ const auto kNibbleMask = _mm_set1_epi8(0xf);
+ const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7',
+ '8', '9', 'a', 'b', 'c', 'd', 'e', 'f');
+ auto v = _mm_loadu_si64(reinterpret_cast<__m128i*>(&be)); // load lo dword
+ auto v4 = _mm_srli_epi64(v, 4); // shift 4 right
+ auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes
+ auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles
+ auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars
+ _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars);
+#else
+ for (int i = 0; i < 8; ++i) {
+ auto byte = (be >> (8 * i)) & 0xFF;
+ auto* hex = &absl::numbers_internal::kHexTable[byte * 2];
+ std::memcpy(out + 2 * i, hex, 2);
+ }
+#endif
+ // | 0x1 so that even 0 has 1 digit.
+ return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4;
+}
+
} // namespace numbers_internal
// SimpleAtoi()
diff --git a/absl/strings/numbers_benchmark.cc b/absl/strings/numbers_benchmark.cc
index 54dbedd3..6e79b3e8 100644
--- a/absl/strings/numbers_benchmark.cc
+++ b/absl/strings/numbers_benchmark.cc
@@ -20,6 +20,8 @@
#include "benchmark/benchmark.h"
#include "absl/base/internal/raw_logging.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
#include "absl/strings/numbers.h"
namespace {
@@ -260,4 +262,25 @@ BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string)
->ArgPair(10, 4)
->ArgPair(10, 8);
+void BM_FastHexToBufferZeroPad16(benchmark::State& state) {
+ absl::BitGen rng;
+ std::vector<uint64_t> nums;
+ nums.resize(1000);
+ auto min = std::numeric_limits<uint64_t>::min();
+ auto max = std::numeric_limits<uint64_t>::max();
+ for (auto& num : nums) {
+ num = absl::LogUniform(rng, min, max);
+ }
+
+ char buf[16];
+ while (state.KeepRunningBatch(nums.size())) {
+ for (auto num : nums) {
+ auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf);
+ benchmark::DoNotOptimize(digits);
+ benchmark::DoNotOptimize(buf);
+ }
+ }
+}
+BENCHMARK(BM_FastHexToBufferZeroPad16);
+
} // namespace
diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc
index d964e562..b92b9a8c 100644
--- a/absl/strings/numbers_test.cc
+++ b/absl/strings/numbers_test.cc
@@ -17,6 +17,7 @@
#include "absl/strings/numbers.h"
#include <sys/types.h>
+
#include <cfenv> // NOLINT(build/c++11)
#include <cinttypes>
#include <climits>
@@ -36,10 +37,11 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/base/internal/raw_logging.h"
-#include "absl/strings/str_cat.h"
-
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
#include "absl/strings/internal/numbers_test_common.h"
#include "absl/strings/internal/pow10_helper.h"
+#include "absl/strings/str_cat.h"
namespace {
@@ -1187,4 +1189,28 @@ TEST(StrToUint64Base, PrefixOnly) {
}
}
+void TestFastHexToBufferZeroPad16(uint64_t v) {
+ char buf[16];
+ auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(v, buf);
+ absl::string_view res(buf, 16);
+ char buf2[17];
+ snprintf(buf2, sizeof(buf2), "%016" PRIx64, v);
+ EXPECT_EQ(res, buf2) << v;
+ size_t expected_digits = snprintf(buf2, sizeof(buf2), "%" PRIx64, v);
+ EXPECT_EQ(digits, expected_digits) << v;
+}
+
+TEST(FastHexToBufferZeroPad16, Smoke) {
+ TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::min());
+ TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::max());
+ TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::min());
+ TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::max());
+ absl::BitGen rng;
+ for (int i = 0; i < 100000; ++i) {
+ TestFastHexToBufferZeroPad16(
+ absl::LogUniform(rng, std::numeric_limits<uint64_t>::min(),
+ std::numeric_limits<uint64_t>::max()));
+ }
+}
+
} // namespace
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
index 99619445..d5877899 100644
--- a/absl/strings/str_cat.cc
+++ b/absl/strings/str_cat.cc
@@ -15,12 +15,14 @@
#include "absl/strings/str_cat.h"
#include <assert.h>
+
#include <algorithm>
#include <cstdint>
#include <cstring>
#include "absl/strings/ascii.h"
#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/numbers.h"
namespace absl {
@@ -28,9 +30,8 @@ AlphaNum::AlphaNum(Hex hex) {
char* const end = &digits_[numbers_internal::kFastToBufferSize];
char* writer = end;
uint64_t value = hex.value;
- static const char hexdigits[] = "0123456789abcdef";
do {
- *--writer = hexdigits[value & 0xF];
+ *--writer = absl::numbers_internal::kHexChar[value & 0xF];
value >>= 4;
} while (value != 0);
diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc
index bc176950..36dbfe7d 100644
--- a/absl/strings/substitute.cc
+++ b/absl/strings/substitute.cc
@@ -94,7 +94,6 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format,
assert(target == output->data() + output->size());
}
-static const char kHexDigits[] = "0123456789abcdef";
Arg::Arg(const void* value) {
static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2,
"fix sizeof(scratch_)");
@@ -104,7 +103,7 @@ Arg::Arg(const void* value) {
char* ptr = scratch_ + sizeof(scratch_);
uintptr_t num = reinterpret_cast<uintptr_t>(value);
do {
- *--ptr = kHexDigits[num & 0xf];
+ *--ptr = absl::numbers_internal::kHexChar[num & 0xf];
num >>= 4;
} while (num != 0);
*--ptr = 'x';
@@ -119,7 +118,7 @@ Arg::Arg(Hex hex) {
char* writer = end;
uint64_t value = hex.value;
do {
- *--writer = kHexDigits[value & 0xF];
+ *--writer = absl::numbers_internal::kHexChar[value & 0xF];
value >>= 4;
} while (value != 0);