diff options
author | Abseil Team <absl-team@google.com> | 2017-09-27 10:50:48 -0700 |
---|---|---|
committer | Derek Mauro <dmauro@google.com> | 2017-09-27 16:03:52 -0400 |
commit | cdf20caa491f59c0a35a8d8fbec0d948e4bc7e4c (patch) | |
tree | 1d3dc27123440a6745c85bc6ae8265da0891a56b /absl/strings | |
parent | 7a64d73e1ef61b15971f8cc90876a5128729b0d4 (diff) |
Changes imported from Abseil "staging" branch:
- 43853019b439efb32c79d5d50e24508588e1bbe0 Undo the not applying qualifications to absl types in enc... by Derek Mauro <dmauro@google.com>
- 06d62a10621c9864279ee57097069cfe3cb7b42a fix capitalization by Abseil Team <absl-team@google.com>
- 22adbfee340bb452ba38b68975ade6f072859c4a Fix indices in str_split.h comments. by Derek Mauro <dmauro@google.com>
- ae5143a559ad8633a78cd76620e30a781006d088 Fix the inconsistent licenses directives in the BUILD fil... by Derek Mauro <dmauro@google.com>
- 0a76a3653b2ecfdad433d3e2f5b651c4ecdcf74b Remove strip.cc, fastmem.h, and fastmem_test.cc from the ... by Derek Mauro <dmauro@google.com>
- 77908cfce5927aabca1f8d62481106f22cfc1936 Internal change. by Derek Mauro <dmauro@google.com>
- d3277b4171f37e22ab346becb5e295c36c7a0219 Be consistent in (not) applying qualifications for enclos... by Abseil Team <absl-team@google.com>
- 9ec7f8164e7d6a5f64288a7360a346628393cc50 Add std:: qualification to isnan and isinf in duration_te... by Derek Mauro <dmauro@google.com>
- 9f7c87d7764ddba05286fabca1f4f15285f3250a Fix typos in string_view comments. by Abseil Team <absl-team@google.com>
- 281860804f8053143d969b99876e3dbc6deb1236 Fix typo in container.h docs. by Abseil Team <absl-team@google.com>
- 0b0a9388c7a9d7f72349d44b5b46132f45bde56c Add bazel-* symlinks to gitignore. by Michael Pratt <mpratt@google.com>
GitOrigin-RevId: 43853019b439efb32c79d5d50e24508588e1bbe0
Change-Id: I9e74a5430816a34ecf1acb86486ed3b0bd12a1d6
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/BUILD.bazel | 1 | ||||
-rw-r--r-- | absl/strings/internal/fastmem.h | 215 | ||||
-rw-r--r-- | absl/strings/internal/fastmem_test.cc | 453 | ||||
-rw-r--r-- | absl/strings/str_split.h | 8 | ||||
-rw-r--r-- | absl/strings/string_view.h | 4 | ||||
-rw-r--r-- | absl/strings/strip.cc | 268 |
6 files changed, 6 insertions, 943 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index c28f89e1..b2610663 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -86,7 +86,6 @@ cc_library( ], hdrs = [ "internal/char_map.h", - "internal/fastmem.h", "internal/ostringstream.h", "internal/resize_uninitialized.h", "internal/utf8.h", diff --git a/absl/strings/internal/fastmem.h b/absl/strings/internal/fastmem.h deleted file mode 100644 index 9989b12e..00000000 --- a/absl/strings/internal/fastmem.h +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Fast memory copying and comparison routines. -// strings::fastmemcmp_inlined() replaces memcmp() -// strings::memcpy_inlined() replaces memcpy() -// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0 -// -// strings::*_inlined() routines are inline versions of the -// routines exported by this module. Sometimes using the inlined -// versions is faster. Measure before using the inlined versions. -// - -#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_ -#define ABSL_STRINGS_INTERNAL_FASTMEM_H_ - -#ifdef __SSE4_1__ -#include <immintrin.h> -#endif -#include <cstddef> -#include <cstdint> -#include <cstdio> -#include <cstring> - -#include "absl/base/internal/unaligned_access.h" -#include "absl/base/macros.h" -#include "absl/base/port.h" - -namespace absl { -namespace strings_internal { - -// Return true if the n bytes at a equal the n bytes at b. -// The regions are allowed to overlap. -// -// The performance is similar to the performance of memcmp(), but faster for -// moderately-sized inputs, or inputs that share a common prefix and differ -// somewhere in their last 8 bytes. Further optimizations can be added later -// if it makes sense to do so. Alternatively, if the compiler & runtime improve -// to eliminate the need for this, we can remove it. -inline bool memeq(const char* a, const char* b, size_t n) { - size_t n_rounded_down = n & ~static_cast<size_t>(7); - if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 - return memcmp(a, b, n) == 0; - } - // n >= 8 - { - uint64_t u = - ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b); - uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^ - ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8); - if ((u | v) != 0) { // The first or last 8 bytes differ. - return false; - } - } - // The next line forces n to be a multiple of 8. - n = n_rounded_down; - if (n >= 80) { - // In 2013 or later, this should be fast on long strings. - return memcmp(a, b, n) == 0; - } - // Now force n to be a multiple of 16. Arguably, a "switch" would be smart - // here, but there's a difficult-to-evaluate code size vs. speed issue. The - // current approach often re-compares some bytes (worst case is if n initially - // was 16, 32, 48, or 64), but is fairly short. - size_t e = n & 8; - a += e; - b += e; - n -= e; - // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks. - while (n > 0) { -#ifdef __SSE4_1__ - __m128i u = - _mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)), - _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))); - if (!_mm_test_all_zeros(u, u)) { - return false; - } -#else - uint64_t x = - ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b); - uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^ - ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8); - if ((x | y) != 0) { - return false; - } -#endif - a += 16; - b += 16; - n -= 16; - } - return true; -} - -inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) { - const unsigned char* pa = static_cast<const unsigned char*>(va); - const unsigned char* pb = static_cast<const unsigned char*>(vb); - switch (n) { - default: - return memcmp(va, vb, n); - case 7: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ++pa; - ++pb; - ABSL_FALLTHROUGH_INTENDED; - case 6: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ++pa; - ++pb; - ABSL_FALLTHROUGH_INTENDED; - case 5: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ++pa; - ++pb; - ABSL_FALLTHROUGH_INTENDED; - case 4: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ++pa; - ++pb; - ABSL_FALLTHROUGH_INTENDED; - case 3: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ++pa; - ++pb; - ABSL_FALLTHROUGH_INTENDED; - case 2: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ++pa; - ++pb; - ABSL_FALLTHROUGH_INTENDED; - case 1: - if (*pa != *pb) return *pa < *pb ? -1 : +1; - ABSL_FALLTHROUGH_INTENDED; - case 0: - break; - } - return 0; -} - -// The standard memcpy operation is slow for variable small sizes. -// This implementation inlines the optimal realization for sizes 1 to 16. -// To avoid code bloat don't use it in case of not performance-critical spots, -// nor when you don't expect very frequent values of size <= 16. -inline void memcpy_inlined(char* dst, const char* src, size_t size) { - // Compiler inlines code with minimal amount of data movement when third - // parameter of memcpy is a constant. - switch (size) { - case 1: - memcpy(dst, src, 1); - break; - case 2: - memcpy(dst, src, 2); - break; - case 3: - memcpy(dst, src, 3); - break; - case 4: - memcpy(dst, src, 4); - break; - case 5: - memcpy(dst, src, 5); - break; - case 6: - memcpy(dst, src, 6); - break; - case 7: - memcpy(dst, src, 7); - break; - case 8: - memcpy(dst, src, 8); - break; - case 9: - memcpy(dst, src, 9); - break; - case 10: - memcpy(dst, src, 10); - break; - case 11: - memcpy(dst, src, 11); - break; - case 12: - memcpy(dst, src, 12); - break; - case 13: - memcpy(dst, src, 13); - break; - case 14: - memcpy(dst, src, 14); - break; - case 15: - memcpy(dst, src, 15); - break; - case 16: - memcpy(dst, src, 16); - break; - default: - memcpy(dst, src, size); - break; - } -} - -} // namespace strings_internal -} // namespace absl - -#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_ diff --git a/absl/strings/internal/fastmem_test.cc b/absl/strings/internal/fastmem_test.cc deleted file mode 100644 index 7c670f96..00000000 --- a/absl/strings/internal/fastmem_test.cc +++ /dev/null @@ -1,453 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/strings/internal/fastmem.h" - -#include <memory> -#include <random> -#include <string> - -#include "base/init_google.h" -#include "base/logging.h" -#include "testing/base/public/benchmark.h" -#include "gtest/gtest.h" - -namespace { - -using RandomEngine = std::minstd_rand0; - -void VerifyResults(const int r1, const int r2, const std::string& a, - const std::string& b) { - CHECK_EQ(a.size(), b.size()); - if (r1 == 0) { - EXPECT_EQ(r2, 0) << a << " " << b; - } else if (r1 > 0) { - EXPECT_GT(r2, 0) << a << " " << b; - } else { - EXPECT_LT(r2, 0) << a << " " << b; - } - if ((r1 == 0) == (r2 == 0)) { - EXPECT_EQ(r1 == 0, - absl::strings_internal::memeq(a.data(), b.data(), a.size())) - << r1 << " " << a << " " << b; - } -} - -// Check correctness against glibc's memcmp implementation -void CheckSingle(const std::string& a, const std::string& b) { - CHECK_EQ(a.size(), b.size()); - const int r1 = memcmp(a.data(), b.data(), a.size()); - const int r2 = - absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size()); - VerifyResults(r1, r2, a, b); -} - -void GenerateString(size_t len, std::string* s) { - s->clear(); - for (int i = 0; i < len; i++) { - *s += ('a' + (i % 26)); - } -} - -void CheckCompare(const std::string& a, const std::string& b) { - CheckSingle(a, b); - for (int common = 0; common <= 32; common++) { - std::string extra; - GenerateString(common, &extra); - CheckSingle(extra + a, extra + b); - CheckSingle(a + extra, b + extra); - for (char c1 = 'a'; c1 <= 'c'; c1++) { - for (char c2 = 'a'; c2 <= 'c'; c2++) { - CheckSingle(extra + c1 + a, extra + c2 + b); - } - } - } -} - -TEST(FastCompare, Misc) { - CheckCompare("", ""); - - CheckCompare("a", "a"); - CheckCompare("ab", "ab"); - CheckCompare("abc", "abc"); - CheckCompare("abcd", "abcd"); - CheckCompare("abcde", "abcde"); - - CheckCompare("a", "x"); - CheckCompare("ab", "xb"); - CheckCompare("abc", "xbc"); - CheckCompare("abcd", "xbcd"); - CheckCompare("abcde", "xbcde"); - - CheckCompare("x", "a"); - CheckCompare("xb", "ab"); - CheckCompare("xbc", "abc"); - CheckCompare("xbcd", "abcd"); - CheckCompare("xbcde", "abcde"); - - CheckCompare("a", "x"); - CheckCompare("ab", "ax"); - CheckCompare("abc", "abx"); - CheckCompare("abcd", "abcx"); - CheckCompare("abcde", "abcdx"); - - CheckCompare("x", "a"); - CheckCompare("ax", "ab"); - CheckCompare("abx", "abc"); - CheckCompare("abcx", "abcd"); - CheckCompare("abcdx", "abcde"); - - for (int len = 0; len < 1000; len++) { - std::string p(len, 'z'); - CheckCompare(p + "x", p + "a"); - CheckCompare(p + "ax", p + "ab"); - CheckCompare(p + "abx", p + "abc"); - CheckCompare(p + "abcx", p + "abcd"); - CheckCompare(p + "abcdx", p + "abcde"); - } -} - -TEST(FastCompare, TrailingByte) { - for (int i = 0; i < 256; i++) { - for (int j = 0; j < 256; j++) { - std::string a(1, i); - std::string b(1, j); - CheckSingle(a, b); - } - } -} - -// Check correctness of memcpy_inlined. -void CheckSingleMemcpyInlined(const std::string& a) { - std::unique_ptr<char[]> destination(new char[a.size() + 2]); - destination[0] = 'x'; - destination[a.size() + 1] = 'x'; - absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(), - a.size()); - CHECK_EQ('x', destination[0]); - CHECK_EQ('x', destination[a.size() + 1]); - CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size())); -} - -TEST(MemCpyInlined, Misc) { - CheckSingleMemcpyInlined(""); - CheckSingleMemcpyInlined("0"); - CheckSingleMemcpyInlined("012"); - CheckSingleMemcpyInlined("0123"); - CheckSingleMemcpyInlined("01234"); - CheckSingleMemcpyInlined("012345"); - CheckSingleMemcpyInlined("0123456"); - CheckSingleMemcpyInlined("01234567"); - CheckSingleMemcpyInlined("012345678"); - CheckSingleMemcpyInlined("0123456789"); - CheckSingleMemcpyInlined("0123456789a"); - CheckSingleMemcpyInlined("0123456789ab"); - CheckSingleMemcpyInlined("0123456789abc"); - CheckSingleMemcpyInlined("0123456789abcd"); - CheckSingleMemcpyInlined("0123456789abcde"); - CheckSingleMemcpyInlined("0123456789abcdef"); - CheckSingleMemcpyInlined("0123456789abcdefg"); -} - -template <typename Function> -inline void CopyLoop(benchmark::State& state, int size, Function func) { - char* src = new char[size]; - char* dst = new char[size]; - memset(src, 'x', size); - memset(dst, 'y', size); - for (auto _ : state) { - func(dst, src, size); - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); - CHECK_EQ(dst[0], 'x'); - delete[] src; - delete[] dst; -} - -void BM_memcpy(benchmark::State& state) { - CopyLoop(state, state.range(0), memcpy); -} -BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20); - -void BM_memcpy_inlined(benchmark::State& state) { - CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined); -} -BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20); - -// unaligned memcpy -void BM_unaligned_memcpy(benchmark::State& state) { - const int n = state.range(0); - const int kMaxOffset = 32; - char* src = new char[n + kMaxOffset]; - char* dst = new char[n + kMaxOffset]; - memset(src, 'x', n + kMaxOffset); - int r = 0, i = 0; - for (auto _ : state) { - memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n); - r += dst[0]; - ++i; - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); - delete[] src; - delete[] dst; - benchmark::DoNotOptimize(r); -} -BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20); - -// memmove worst case: heavy overlap, but not always by the same amount. -// Also, the source and destination will often be unaligned. -void BM_memmove_worst_case(benchmark::State& state) { - const int n = state.range(0); - const int32_t kDeterministicSeed = 301; - const int kMaxOffset = 32; - char* src = new char[n + kMaxOffset]; - memset(src, 'x', n + kMaxOffset); - size_t offsets[64]; - RandomEngine rng(kDeterministicSeed); - std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset); - for (size_t& offset : offsets) { - offset = random_to_max_offset(rng); - } - int r = 0, i = 0; - for (auto _ : state) { - memmove(src + offsets[i], src + offsets[i + 1], n); - r += src[0]; - i = (i + 2) % arraysize(offsets); - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); - delete[] src; - benchmark::DoNotOptimize(r); -} -BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20); - -// memmove cache-friendly: aligned and overlapping with 4k -// between the source and destination addresses. -void BM_memmove_cache_friendly(benchmark::State& state) { - const int n = state.range(0); - char* src = new char[n + 4096]; - memset(src, 'x', n); - int r = 0; - while (state.KeepRunningBatch(2)) { // count each memmove as an iteration - memmove(src + 4096, src, n); - memmove(src, src + 4096, n); - r += src[0]; - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); - delete[] src; - benchmark::DoNotOptimize(r); -} -BENCHMARK(BM_memmove_cache_friendly) - ->Arg(5 * 1024) - ->Arg(10 * 1024) - ->Range(16 << 10, 8 << 20); - -// memmove best(?) case: aligned and non-overlapping. -void BM_memmove_aligned_non_overlapping(benchmark::State& state) { - CopyLoop(state, state.range(0), memmove); -} -BENCHMARK(BM_memmove_aligned_non_overlapping) - ->DenseRange(1, 18) - ->Range(32, 8 << 20); - -// memset speed -void BM_memset(benchmark::State& state) { - const int n = state.range(0); - char* dst = new char[n]; - int r = 0; - for (auto _ : state) { - memset(dst, 'x', n); - r += dst[0]; - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); - delete[] dst; - benchmark::DoNotOptimize(r); -} -BENCHMARK(BM_memset)->Range(8, 4096 << 10); - -// Bandwidth (vectorization?) test: the ideal generated code will be limited -// by memory bandwidth. Even so-so generated code will max out memory bandwidth -// on some machines. -void BM_membandwidth(benchmark::State& state) { - const int n = state.range(0); - CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter. - char* dst = new char[n]; - int r = 0; - for (auto _ : state) { - const uint32_t* p = reinterpret_cast<uint32_t*>(dst); - const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n); - uint32_t x = 0; - while (p < limit) { - x += p[0]; - x += p[1]; - x += p[2]; - x += p[3]; - x += p[4]; - x += p[5]; - x += p[6]; - x += p[7]; - p += 8; - } - r += x; - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n); - delete[] dst; - benchmark::DoNotOptimize(r); -} -BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10); - -// Helper for benchmarks. Repeatedly compares two strings that are -// either equal or different only in one character. If test_equal_strings -// is false then position_to_modify determines where the difference will be. -template <typename Function> -ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop( - benchmark::State& state, bool test_equal_strings, - std::string::size_type position_to_modify, int size, Function func) { - const int kIterMult = 4; // Iteration multiplier for better timing resolution - CHECK_GT(size, 0); - const bool position_to_modify_is_valid = - position_to_modify != std::string::npos && position_to_modify < size; - CHECK_NE(position_to_modify_is_valid, test_equal_strings); - if (!position_to_modify_is_valid) { - position_to_modify = 0; - } - std::string sa(size, 'a'); - std::string sb = sa; - char last = sa[size - 1]; - int num = 0; - for (auto _ : state) { - for (int i = 0; i < kIterMult; ++i) { - sb[position_to_modify] = test_equal_strings ? last : last ^ 1; - num += func(sa, sb); - } - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); - benchmark::DoNotOptimize(num); -} - -// Helper for benchmarks. Repeatedly compares two memory regions that are -// either equal or different only in their final character. -template <typename Function> -ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state, - bool test_equal_strings, - int size, Function func) { - const int kIterMult = 4; // Iteration multiplier for better timing resolution - CHECK_GT(size, 0); - char* data = static_cast<char*>(malloc(size * 2)); - memset(data, 'a', size * 2); - char* a = data; - char* b = data + size; - char last = a[size - 1]; - int num = 0; - for (auto _ : state) { - for (int i = 0; i < kIterMult; ++i) { - b[size - 1] = test_equal_strings ? last : last ^ 1; - num += func(a, b, size); - } - } - state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size); - benchmark::DoNotOptimize(num); - free(data); -} - -void BM_memcmp(benchmark::State& state) { - CompareLoop(state, false, state.range(0), memcmp); -} -BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20); - -void BM_fastmemcmp_inlined(benchmark::State& state) { - CompareLoop(state, false, state.range(0), - absl::strings_internal::fastmemcmp_inlined); -} -BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20); - -void BM_memeq(benchmark::State& state) { - CompareLoop(state, false, state.range(0), absl::strings_internal::memeq); -} -BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20); - -void BM_memeq_equal(benchmark::State& state) { - CompareLoop(state, true, state.range(0), absl::strings_internal::memeq); -} -BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20); - -bool StringLess(const std::string& x, const std::string& y) { return x < y; } -bool StringEqual(const std::string& x, const std::string& y) { return x == y; } -bool StdEqual(const std::string& x, const std::string& y) { - return x.size() == y.size() && - std::equal(x.data(), x.data() + x.size(), y.data()); -} - -// Benchmark for x < y, where x and y are strings that differ in only their -// final char. That should be more-or-less the worst case for <. -void BM_string_less(benchmark::State& state) { - StringCompareLoop(state, false, state.range(0) - 1, state.range(0), - StringLess); -} -BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20); - -// Benchmark for x < y, where x and y are strings that differ in only their -// first char. That should be more-or-less the best case for <. -void BM_string_less_easy(benchmark::State& state) { - StringCompareLoop(state, false, 0, state.range(0), StringLess); -} -BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20); - -void BM_string_equal(benchmark::State& state) { - StringCompareLoop(state, false, state.range(0) - 1, state.range(0), - StringEqual); -} -BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20); - -void BM_string_equal_equal(benchmark::State& state) { - StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual); -} -BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20); - -void BM_std_equal(benchmark::State& state) { - StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual); -} -BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20); - -void BM_std_equal_equal(benchmark::State& state) { - StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual); -} -BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20); - -void BM_string_equal_unequal_lengths(benchmark::State& state) { - const int size = state.range(0); - std::string a(size, 'a'); - std::string b(size + 1, 'a'); - int count = 0; - for (auto _ : state) { - b[size - 1] = 'a'; - count += (a == b); - } - benchmark::DoNotOptimize(count); -} -BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20); - -void BM_stdstring_equal_unequal_lengths(benchmark::State& state) { - const int size = state.range(0); - std::string a(size, 'a'); - std::string b(size + 1, 'a'); - int count = 0; - for (auto _ : state) { - b[size - 1] = 'a'; - count += (a == b); - } - benchmark::DoNotOptimize(count); -} -BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20); - -} // namespace diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index a7b48b18..5b3d6a8a 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -118,7 +118,7 @@ namespace absl { // using absl::ByString; // std::vector<std::string> v2 = absl::StrSplit("a, b, c", // ByString(", ")); -// // v[0] == "a", v[1] == "b", v[3] == "c" +// // v[0] == "a", v[1] == "b", v[2] == "c" class ByString { public: explicit ByString(absl::string_view sp); @@ -141,7 +141,7 @@ class ByString { // std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); // using absl::ByChar; // std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(',')); -// // v[0] == "a", v[1] == "b", v[3] == "c" +// // v[0] == "a", v[1] == "b", v[2] == "c" // // `ByChar` is also the default delimiter if a single character is given // as the delimiter to `StrSplit()`. For example, the following calls are @@ -173,7 +173,7 @@ class ByChar { // // using absl::ByAnyChar; // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); -// // v[0] == "a", v[1] == "b", v[3] == "c" +// // v[0] == "a", v[1] == "b", v[2] == "c" // // If `ByAnyChar` is given the empty std::string, it behaves exactly like // `ByString` and matches each individual character in the input std::string. @@ -390,7 +390,7 @@ struct SkipWhitespace { // // using absl::ByAnyChar; // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); -// // v[0] == "a", v[1] == "b", v[3] == "c" +// // v[0] == "a", v[1] == "b", v[2] == "c" // // See above for more information on delimiters. // diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 8e37acb8..5dd08b3a 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -339,8 +339,8 @@ class string_view { // string_view::substr() // - // Returns a "substring" of the `string_view` (at offset `post` and length - // `n`) as another std::string views. This function throws `std::out_of_bounds` if + // Returns a "substring" of the `string_view` (at offset `pos` and length + // `n`) as another string_view. This function throws `std::out_of_bounds` if // `pos > size'. string_view substr(size_type pos, size_type n = npos) const { if (ABSL_PREDICT_FALSE(pos > length_)) diff --git a/absl/strings/strip.cc b/absl/strings/strip.cc deleted file mode 100644 index adc219f1..00000000 --- a/absl/strings/strip.cc +++ /dev/null @@ -1,268 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file contains functions that remove a defined part from the std::string, -// i.e., strip the std::string. - -#include "absl/strings/strip.h" - -#include <algorithm> -#include <cstring> -#include <string> - -#include "absl/strings/ascii.h" -#include "absl/strings/string_view.h" - -// ---------------------------------------------------------------------- -// ReplaceCharacters -// Replaces any occurrence of the character 'remove' (or the characters -// in 'remove') with the character 'replace_with'. -// ---------------------------------------------------------------------- -void ReplaceCharacters(char* str, size_t len, absl::string_view remove, - char replace_with) { - for (char* end = str + len; str != end; ++str) { - if (remove.find(*str) != absl::string_view::npos) { - *str = replace_with; - } - } -} - -void ReplaceCharacters(std::string* s, absl::string_view remove, char replace_with) { - for (char& ch : *s) { - if (remove.find(ch) != absl::string_view::npos) { - ch = replace_with; - } - } -} - -bool StripTrailingNewline(std::string* s) { - if (!s->empty() && (*s)[s->size() - 1] == '\n') { - if (s->size() > 1 && (*s)[s->size() - 2] == '\r') - s->resize(s->size() - 2); - else - s->resize(s->size() - 1); - return true; - } - return false; -} - -// ---------------------------------------------------------------------- -// Misc. stripping routines -// ---------------------------------------------------------------------- -void StripCurlyBraces(std::string* s) { - return StripBrackets('{', '}', s); -} - -void StripBrackets(char left, char right, std::string* s) { - std::string::iterator opencurly = std::find(s->begin(), s->end(), left); - while (opencurly != s->end()) { - std::string::iterator closecurly = std::find(opencurly, s->end(), right); - if (closecurly == s->end()) return; - opencurly = s->erase(opencurly, closecurly + 1); - opencurly = std::find(opencurly, s->end(), left); - } -} - -void StripMarkupTags(std::string* s) { - std::string::iterator output = std::find(s->begin(), s->end(), '<'); - std::string::iterator input = output; - while (input != s->end()) { - if (*input == '<') { - input = std::find(input, s->end(), '>'); - if (input == s->end()) break; - ++input; - } else { - *output++ = *input++; - } - } - s->resize(output - s->begin()); -} - -std::string OutputWithMarkupTagsStripped(const std::string& s) { - std::string result(s); - StripMarkupTags(&result); - return result; -} - -ptrdiff_t TrimStringLeft(std::string* s, absl::string_view remove) { - size_t i = 0; - while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { - ++i; - } - if (i > 0) s->erase(0, i); - return i; -} - -ptrdiff_t TrimStringRight(std::string* s, absl::string_view remove) { - size_t i = s->size(), trimmed = 0; - while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { - --i; - } - if (i < s->size()) { - trimmed = s->size() - i; - s->erase(i); - } - return trimmed; -} - -// Unfortunately, absl::string_view does not have erase, so we've to replicate -// the implementation with remove_prefix()/remove_suffix() -ptrdiff_t TrimStringLeft(absl::string_view* s, absl::string_view remove) { - size_t i = 0; - while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) { - ++i; - } - if (i > 0) s->remove_prefix(i); - return i; -} - -ptrdiff_t TrimStringRight(absl::string_view* s, absl::string_view remove) { - size_t i = s->size(), trimmed = 0; - while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) { - --i; - } - if (i < s->size()) { - trimmed = s->size() - i; - s->remove_suffix(trimmed); - } - return trimmed; -} - -// ---------------------------------------------------------------------- -// Various removal routines -// ---------------------------------------------------------------------- -ptrdiff_t strrm(char* str, char c) { - char* src; - char* dest; - for (src = dest = str; *src != '\0'; ++src) - if (*src != c) *(dest++) = *src; - *dest = '\0'; - return dest - str; -} - -ptrdiff_t memrm(char* str, ptrdiff_t strlen, char c) { - char* src; - char* dest; - for (src = dest = str; strlen-- > 0; ++src) - if (*src != c) *(dest++) = *src; - return dest - str; -} - -ptrdiff_t strrmm(char* str, const char* chars) { - char* src; - char* dest; - for (src = dest = str; *src != '\0'; ++src) { - bool skip = false; - for (const char* c = chars; *c != '\0'; c++) { - if (*src == *c) { - skip = true; - break; - } - } - if (!skip) *(dest++) = *src; - } - *dest = '\0'; - return dest - str; -} - -ptrdiff_t strrmm(std::string* str, const std::string& chars) { - size_t str_len = str->length(); - size_t in_index = str->find_first_of(chars); - if (in_index == std::string::npos) return str_len; - - size_t out_index = in_index++; - - while (in_index < str_len) { - char c = (*str)[in_index++]; - if (chars.find(c) == std::string::npos) (*str)[out_index++] = c; - } - - str->resize(out_index); - return out_index; -} - -// ---------------------------------------------------------------------- -// StripDupCharacters -// Replaces any repeated occurrence of the character 'dup_char' -// with single occurrence. e.g., -// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d" -// Return the number of characters removed -// ---------------------------------------------------------------------- -ptrdiff_t StripDupCharacters(std::string* s, char dup_char, ptrdiff_t start_pos) { - if (start_pos < 0) start_pos = 0; - - // remove dups by compaction in-place - ptrdiff_t input_pos = start_pos; // current reader position - ptrdiff_t output_pos = start_pos; // current writer position - const ptrdiff_t input_end = s->size(); - while (input_pos < input_end) { - // keep current character - const char curr_char = (*s)[input_pos]; - if (output_pos != input_pos) // must copy - (*s)[output_pos] = curr_char; - ++input_pos; - ++output_pos; - - if (curr_char == dup_char) { // skip subsequent dups - while ((input_pos < input_end) && ((*s)[input_pos] == dup_char)) - ++input_pos; - } - } - const ptrdiff_t num_deleted = input_pos - output_pos; - s->resize(s->size() - num_deleted); - return num_deleted; -} - -// ---------------------------------------------------------------------- -// TrimRunsInString -// Removes leading and trailing runs, and collapses middle -// runs of a set of characters into a single character (the -// first one specified in 'remove'). Useful for collapsing -// runs of repeated delimiters, whitespace, etc. E.g., -// TrimRunsInString(&s, " :,()") removes leading and trailing -// delimiter chars and collapses and converts internal runs -// of delimiters to single ' ' characters, so, for example, -// " a:(b):c " -> "a b c" -// "first,last::(area)phone, ::zip" -> "first last area phone zip" -// ---------------------------------------------------------------------- -void TrimRunsInString(std::string* s, absl::string_view remove) { - std::string::iterator dest = s->begin(); - std::string::iterator src_end = s->end(); - for (std::string::iterator src = s->begin(); src != src_end;) { - if (remove.find(*src) == absl::string_view::npos) { - *(dest++) = *(src++); - } else { - // Skip to the end of this run of chars that are in 'remove'. - for (++src; src != src_end; ++src) { - if (remove.find(*src) == absl::string_view::npos) { - if (dest != s->begin()) { - // This is an internal run; collapse it. - *(dest++) = remove[0]; - } - *(dest++) = *(src++); - break; - } - } - } - } - s->erase(dest, src_end); -} - -// ---------------------------------------------------------------------- -// RemoveNullsInString -// Removes any internal \0 characters from the std::string. -// ---------------------------------------------------------------------- -void RemoveNullsInString(std::string* s) { - s->erase(std::remove(s->begin(), s->end(), '\0'), s->end()); -} |