summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel1
-rw-r--r--absl/strings/internal/fastmem.h215
-rw-r--r--absl/strings/internal/fastmem_test.cc453
-rw-r--r--absl/strings/str_split.h8
-rw-r--r--absl/strings/string_view.h4
-rw-r--r--absl/strings/strip.cc268
6 files changed, 6 insertions, 943 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
index c28f89e1..b2610663 100644
--- a/absl/strings/BUILD.bazel
+++ b/absl/strings/BUILD.bazel
@@ -86,7 +86,6 @@ cc_library(
],
hdrs = [
"internal/char_map.h",
- "internal/fastmem.h",
"internal/ostringstream.h",
"internal/resize_uninitialized.h",
"internal/utf8.h",
diff --git a/absl/strings/internal/fastmem.h b/absl/strings/internal/fastmem.h
deleted file mode 100644
index 9989b12e..00000000
--- a/absl/strings/internal/fastmem.h
+++ /dev/null
@@ -1,215 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Fast memory copying and comparison routines.
-// strings::fastmemcmp_inlined() replaces memcmp()
-// strings::memcpy_inlined() replaces memcpy()
-// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
-//
-// strings::*_inlined() routines are inline versions of the
-// routines exported by this module. Sometimes using the inlined
-// versions is faster. Measure before using the inlined versions.
-//
-
-#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_
-#define ABSL_STRINGS_INTERNAL_FASTMEM_H_
-
-#ifdef __SSE4_1__
-#include <immintrin.h>
-#endif
-#include <cstddef>
-#include <cstdint>
-#include <cstdio>
-#include <cstring>
-
-#include "absl/base/internal/unaligned_access.h"
-#include "absl/base/macros.h"
-#include "absl/base/port.h"
-
-namespace absl {
-namespace strings_internal {
-
-// Return true if the n bytes at a equal the n bytes at b.
-// The regions are allowed to overlap.
-//
-// The performance is similar to the performance of memcmp(), but faster for
-// moderately-sized inputs, or inputs that share a common prefix and differ
-// somewhere in their last 8 bytes. Further optimizations can be added later
-// if it makes sense to do so. Alternatively, if the compiler & runtime improve
-// to eliminate the need for this, we can remove it.
-inline bool memeq(const char* a, const char* b, size_t n) {
- size_t n_rounded_down = n & ~static_cast<size_t>(7);
- if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7
- return memcmp(a, b, n) == 0;
- }
- // n >= 8
- {
- uint64_t u =
- ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
- uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^
- ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8);
- if ((u | v) != 0) { // The first or last 8 bytes differ.
- return false;
- }
- }
- // The next line forces n to be a multiple of 8.
- n = n_rounded_down;
- if (n >= 80) {
- // In 2013 or later, this should be fast on long strings.
- return memcmp(a, b, n) == 0;
- }
- // Now force n to be a multiple of 16. Arguably, a "switch" would be smart
- // here, but there's a difficult-to-evaluate code size vs. speed issue. The
- // current approach often re-compares some bytes (worst case is if n initially
- // was 16, 32, 48, or 64), but is fairly short.
- size_t e = n & 8;
- a += e;
- b += e;
- n -= e;
- // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks.
- while (n > 0) {
-#ifdef __SSE4_1__
- __m128i u =
- _mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
- _mm_loadu_si128(reinterpret_cast<const __m128i*>(b)));
- if (!_mm_test_all_zeros(u, u)) {
- return false;
- }
-#else
- uint64_t x =
- ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
- uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^
- ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8);
- if ((x | y) != 0) {
- return false;
- }
-#endif
- a += 16;
- b += 16;
- n -= 16;
- }
- return true;
-}
-
-inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) {
- const unsigned char* pa = static_cast<const unsigned char*>(va);
- const unsigned char* pb = static_cast<const unsigned char*>(vb);
- switch (n) {
- default:
- return memcmp(va, vb, n);
- case 7:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ++pa;
- ++pb;
- ABSL_FALLTHROUGH_INTENDED;
- case 6:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ++pa;
- ++pb;
- ABSL_FALLTHROUGH_INTENDED;
- case 5:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ++pa;
- ++pb;
- ABSL_FALLTHROUGH_INTENDED;
- case 4:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ++pa;
- ++pb;
- ABSL_FALLTHROUGH_INTENDED;
- case 3:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ++pa;
- ++pb;
- ABSL_FALLTHROUGH_INTENDED;
- case 2:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ++pa;
- ++pb;
- ABSL_FALLTHROUGH_INTENDED;
- case 1:
- if (*pa != *pb) return *pa < *pb ? -1 : +1;
- ABSL_FALLTHROUGH_INTENDED;
- case 0:
- break;
- }
- return 0;
-}
-
-// The standard memcpy operation is slow for variable small sizes.
-// This implementation inlines the optimal realization for sizes 1 to 16.
-// To avoid code bloat don't use it in case of not performance-critical spots,
-// nor when you don't expect very frequent values of size <= 16.
-inline void memcpy_inlined(char* dst, const char* src, size_t size) {
- // Compiler inlines code with minimal amount of data movement when third
- // parameter of memcpy is a constant.
- switch (size) {
- case 1:
- memcpy(dst, src, 1);
- break;
- case 2:
- memcpy(dst, src, 2);
- break;
- case 3:
- memcpy(dst, src, 3);
- break;
- case 4:
- memcpy(dst, src, 4);
- break;
- case 5:
- memcpy(dst, src, 5);
- break;
- case 6:
- memcpy(dst, src, 6);
- break;
- case 7:
- memcpy(dst, src, 7);
- break;
- case 8:
- memcpy(dst, src, 8);
- break;
- case 9:
- memcpy(dst, src, 9);
- break;
- case 10:
- memcpy(dst, src, 10);
- break;
- case 11:
- memcpy(dst, src, 11);
- break;
- case 12:
- memcpy(dst, src, 12);
- break;
- case 13:
- memcpy(dst, src, 13);
- break;
- case 14:
- memcpy(dst, src, 14);
- break;
- case 15:
- memcpy(dst, src, 15);
- break;
- case 16:
- memcpy(dst, src, 16);
- break;
- default:
- memcpy(dst, src, size);
- break;
- }
-}
-
-} // namespace strings_internal
-} // namespace absl
-
-#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_
diff --git a/absl/strings/internal/fastmem_test.cc b/absl/strings/internal/fastmem_test.cc
deleted file mode 100644
index 7c670f96..00000000
--- a/absl/strings/internal/fastmem_test.cc
+++ /dev/null
@@ -1,453 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "absl/strings/internal/fastmem.h"
-
-#include <memory>
-#include <random>
-#include <string>
-
-#include "base/init_google.h"
-#include "base/logging.h"
-#include "testing/base/public/benchmark.h"
-#include "gtest/gtest.h"
-
-namespace {
-
-using RandomEngine = std::minstd_rand0;
-
-void VerifyResults(const int r1, const int r2, const std::string& a,
- const std::string& b) {
- CHECK_EQ(a.size(), b.size());
- if (r1 == 0) {
- EXPECT_EQ(r2, 0) << a << " " << b;
- } else if (r1 > 0) {
- EXPECT_GT(r2, 0) << a << " " << b;
- } else {
- EXPECT_LT(r2, 0) << a << " " << b;
- }
- if ((r1 == 0) == (r2 == 0)) {
- EXPECT_EQ(r1 == 0,
- absl::strings_internal::memeq(a.data(), b.data(), a.size()))
- << r1 << " " << a << " " << b;
- }
-}
-
-// Check correctness against glibc's memcmp implementation
-void CheckSingle(const std::string& a, const std::string& b) {
- CHECK_EQ(a.size(), b.size());
- const int r1 = memcmp(a.data(), b.data(), a.size());
- const int r2 =
- absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size());
- VerifyResults(r1, r2, a, b);
-}
-
-void GenerateString(size_t len, std::string* s) {
- s->clear();
- for (int i = 0; i < len; i++) {
- *s += ('a' + (i % 26));
- }
-}
-
-void CheckCompare(const std::string& a, const std::string& b) {
- CheckSingle(a, b);
- for (int common = 0; common <= 32; common++) {
- std::string extra;
- GenerateString(common, &extra);
- CheckSingle(extra + a, extra + b);
- CheckSingle(a + extra, b + extra);
- for (char c1 = 'a'; c1 <= 'c'; c1++) {
- for (char c2 = 'a'; c2 <= 'c'; c2++) {
- CheckSingle(extra + c1 + a, extra + c2 + b);
- }
- }
- }
-}
-
-TEST(FastCompare, Misc) {
- CheckCompare("", "");
-
- CheckCompare("a", "a");
- CheckCompare("ab", "ab");
- CheckCompare("abc", "abc");
- CheckCompare("abcd", "abcd");
- CheckCompare("abcde", "abcde");
-
- CheckCompare("a", "x");
- CheckCompare("ab", "xb");
- CheckCompare("abc", "xbc");
- CheckCompare("abcd", "xbcd");
- CheckCompare("abcde", "xbcde");
-
- CheckCompare("x", "a");
- CheckCompare("xb", "ab");
- CheckCompare("xbc", "abc");
- CheckCompare("xbcd", "abcd");
- CheckCompare("xbcde", "abcde");
-
- CheckCompare("a", "x");
- CheckCompare("ab", "ax");
- CheckCompare("abc", "abx");
- CheckCompare("abcd", "abcx");
- CheckCompare("abcde", "abcdx");
-
- CheckCompare("x", "a");
- CheckCompare("ax", "ab");
- CheckCompare("abx", "abc");
- CheckCompare("abcx", "abcd");
- CheckCompare("abcdx", "abcde");
-
- for (int len = 0; len < 1000; len++) {
- std::string p(len, 'z');
- CheckCompare(p + "x", p + "a");
- CheckCompare(p + "ax", p + "ab");
- CheckCompare(p + "abx", p + "abc");
- CheckCompare(p + "abcx", p + "abcd");
- CheckCompare(p + "abcdx", p + "abcde");
- }
-}
-
-TEST(FastCompare, TrailingByte) {
- for (int i = 0; i < 256; i++) {
- for (int j = 0; j < 256; j++) {
- std::string a(1, i);
- std::string b(1, j);
- CheckSingle(a, b);
- }
- }
-}
-
-// Check correctness of memcpy_inlined.
-void CheckSingleMemcpyInlined(const std::string& a) {
- std::unique_ptr<char[]> destination(new char[a.size() + 2]);
- destination[0] = 'x';
- destination[a.size() + 1] = 'x';
- absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(),
- a.size());
- CHECK_EQ('x', destination[0]);
- CHECK_EQ('x', destination[a.size() + 1]);
- CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size()));
-}
-
-TEST(MemCpyInlined, Misc) {
- CheckSingleMemcpyInlined("");
- CheckSingleMemcpyInlined("0");
- CheckSingleMemcpyInlined("012");
- CheckSingleMemcpyInlined("0123");
- CheckSingleMemcpyInlined("01234");
- CheckSingleMemcpyInlined("012345");
- CheckSingleMemcpyInlined("0123456");
- CheckSingleMemcpyInlined("01234567");
- CheckSingleMemcpyInlined("012345678");
- CheckSingleMemcpyInlined("0123456789");
- CheckSingleMemcpyInlined("0123456789a");
- CheckSingleMemcpyInlined("0123456789ab");
- CheckSingleMemcpyInlined("0123456789abc");
- CheckSingleMemcpyInlined("0123456789abcd");
- CheckSingleMemcpyInlined("0123456789abcde");
- CheckSingleMemcpyInlined("0123456789abcdef");
- CheckSingleMemcpyInlined("0123456789abcdefg");
-}
-
-template <typename Function>
-inline void CopyLoop(benchmark::State& state, int size, Function func) {
- char* src = new char[size];
- char* dst = new char[size];
- memset(src, 'x', size);
- memset(dst, 'y', size);
- for (auto _ : state) {
- func(dst, src, size);
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
- CHECK_EQ(dst[0], 'x');
- delete[] src;
- delete[] dst;
-}
-
-void BM_memcpy(benchmark::State& state) {
- CopyLoop(state, state.range(0), memcpy);
-}
-BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
-
-void BM_memcpy_inlined(benchmark::State& state) {
- CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined);
-}
-BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20);
-
-// unaligned memcpy
-void BM_unaligned_memcpy(benchmark::State& state) {
- const int n = state.range(0);
- const int kMaxOffset = 32;
- char* src = new char[n + kMaxOffset];
- char* dst = new char[n + kMaxOffset];
- memset(src, 'x', n + kMaxOffset);
- int r = 0, i = 0;
- for (auto _ : state) {
- memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n);
- r += dst[0];
- ++i;
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
- delete[] src;
- delete[] dst;
- benchmark::DoNotOptimize(r);
-}
-BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
-
-// memmove worst case: heavy overlap, but not always by the same amount.
-// Also, the source and destination will often be unaligned.
-void BM_memmove_worst_case(benchmark::State& state) {
- const int n = state.range(0);
- const int32_t kDeterministicSeed = 301;
- const int kMaxOffset = 32;
- char* src = new char[n + kMaxOffset];
- memset(src, 'x', n + kMaxOffset);
- size_t offsets[64];
- RandomEngine rng(kDeterministicSeed);
- std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset);
- for (size_t& offset : offsets) {
- offset = random_to_max_offset(rng);
- }
- int r = 0, i = 0;
- for (auto _ : state) {
- memmove(src + offsets[i], src + offsets[i + 1], n);
- r += src[0];
- i = (i + 2) % arraysize(offsets);
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
- delete[] src;
- benchmark::DoNotOptimize(r);
-}
-BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20);
-
-// memmove cache-friendly: aligned and overlapping with 4k
-// between the source and destination addresses.
-void BM_memmove_cache_friendly(benchmark::State& state) {
- const int n = state.range(0);
- char* src = new char[n + 4096];
- memset(src, 'x', n);
- int r = 0;
- while (state.KeepRunningBatch(2)) { // count each memmove as an iteration
- memmove(src + 4096, src, n);
- memmove(src, src + 4096, n);
- r += src[0];
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
- delete[] src;
- benchmark::DoNotOptimize(r);
-}
-BENCHMARK(BM_memmove_cache_friendly)
- ->Arg(5 * 1024)
- ->Arg(10 * 1024)
- ->Range(16 << 10, 8 << 20);
-
-// memmove best(?) case: aligned and non-overlapping.
-void BM_memmove_aligned_non_overlapping(benchmark::State& state) {
- CopyLoop(state, state.range(0), memmove);
-}
-BENCHMARK(BM_memmove_aligned_non_overlapping)
- ->DenseRange(1, 18)
- ->Range(32, 8 << 20);
-
-// memset speed
-void BM_memset(benchmark::State& state) {
- const int n = state.range(0);
- char* dst = new char[n];
- int r = 0;
- for (auto _ : state) {
- memset(dst, 'x', n);
- r += dst[0];
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
- delete[] dst;
- benchmark::DoNotOptimize(r);
-}
-BENCHMARK(BM_memset)->Range(8, 4096 << 10);
-
-// Bandwidth (vectorization?) test: the ideal generated code will be limited
-// by memory bandwidth. Even so-so generated code will max out memory bandwidth
-// on some machines.
-void BM_membandwidth(benchmark::State& state) {
- const int n = state.range(0);
- CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter.
- char* dst = new char[n];
- int r = 0;
- for (auto _ : state) {
- const uint32_t* p = reinterpret_cast<uint32_t*>(dst);
- const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n);
- uint32_t x = 0;
- while (p < limit) {
- x += p[0];
- x += p[1];
- x += p[2];
- x += p[3];
- x += p[4];
- x += p[5];
- x += p[6];
- x += p[7];
- p += 8;
- }
- r += x;
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
- delete[] dst;
- benchmark::DoNotOptimize(r);
-}
-BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10);
-
-// Helper for benchmarks. Repeatedly compares two strings that are
-// either equal or different only in one character. If test_equal_strings
-// is false then position_to_modify determines where the difference will be.
-template <typename Function>
-ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop(
- benchmark::State& state, bool test_equal_strings,
- std::string::size_type position_to_modify, int size, Function func) {
- const int kIterMult = 4; // Iteration multiplier for better timing resolution
- CHECK_GT(size, 0);
- const bool position_to_modify_is_valid =
- position_to_modify != std::string::npos && position_to_modify < size;
- CHECK_NE(position_to_modify_is_valid, test_equal_strings);
- if (!position_to_modify_is_valid) {
- position_to_modify = 0;
- }
- std::string sa(size, 'a');
- std::string sb = sa;
- char last = sa[size - 1];
- int num = 0;
- for (auto _ : state) {
- for (int i = 0; i < kIterMult; ++i) {
- sb[position_to_modify] = test_equal_strings ? last : last ^ 1;
- num += func(sa, sb);
- }
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
- benchmark::DoNotOptimize(num);
-}
-
-// Helper for benchmarks. Repeatedly compares two memory regions that are
-// either equal or different only in their final character.
-template <typename Function>
-ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state,
- bool test_equal_strings,
- int size, Function func) {
- const int kIterMult = 4; // Iteration multiplier for better timing resolution
- CHECK_GT(size, 0);
- char* data = static_cast<char*>(malloc(size * 2));
- memset(data, 'a', size * 2);
- char* a = data;
- char* b = data + size;
- char last = a[size - 1];
- int num = 0;
- for (auto _ : state) {
- for (int i = 0; i < kIterMult; ++i) {
- b[size - 1] = test_equal_strings ? last : last ^ 1;
- num += func(a, b, size);
- }
- }
- state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
- benchmark::DoNotOptimize(num);
- free(data);
-}
-
-void BM_memcmp(benchmark::State& state) {
- CompareLoop(state, false, state.range(0), memcmp);
-}
-BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20);
-
-void BM_fastmemcmp_inlined(benchmark::State& state) {
- CompareLoop(state, false, state.range(0),
- absl::strings_internal::fastmemcmp_inlined);
-}
-BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20);
-
-void BM_memeq(benchmark::State& state) {
- CompareLoop(state, false, state.range(0), absl::strings_internal::memeq);
-}
-BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20);
-
-void BM_memeq_equal(benchmark::State& state) {
- CompareLoop(state, true, state.range(0), absl::strings_internal::memeq);
-}
-BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20);
-
-bool StringLess(const std::string& x, const std::string& y) { return x < y; }
-bool StringEqual(const std::string& x, const std::string& y) { return x == y; }
-bool StdEqual(const std::string& x, const std::string& y) {
- return x.size() == y.size() &&
- std::equal(x.data(), x.data() + x.size(), y.data());
-}
-
-// Benchmark for x < y, where x and y are strings that differ in only their
-// final char. That should be more-or-less the worst case for <.
-void BM_string_less(benchmark::State& state) {
- StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
- StringLess);
-}
-BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20);
-
-// Benchmark for x < y, where x and y are strings that differ in only their
-// first char. That should be more-or-less the best case for <.
-void BM_string_less_easy(benchmark::State& state) {
- StringCompareLoop(state, false, 0, state.range(0), StringLess);
-}
-BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20);
-
-void BM_string_equal(benchmark::State& state) {
- StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
- StringEqual);
-}
-BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
-
-void BM_string_equal_equal(benchmark::State& state) {
- StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual);
-}
-BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
-
-void BM_std_equal(benchmark::State& state) {
- StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual);
-}
-BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
-
-void BM_std_equal_equal(benchmark::State& state) {
- StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual);
-}
-BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
-
-void BM_string_equal_unequal_lengths(benchmark::State& state) {
- const int size = state.range(0);
- std::string a(size, 'a');
- std::string b(size + 1, 'a');
- int count = 0;
- for (auto _ : state) {
- b[size - 1] = 'a';
- count += (a == b);
- }
- benchmark::DoNotOptimize(count);
-}
-BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
-
-void BM_stdstring_equal_unequal_lengths(benchmark::State& state) {
- const int size = state.range(0);
- std::string a(size, 'a');
- std::string b(size + 1, 'a');
- int count = 0;
- for (auto _ : state) {
- b[size - 1] = 'a';
- count += (a == b);
- }
- benchmark::DoNotOptimize(count);
-}
-BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
-
-} // namespace
diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h
index a7b48b18..5b3d6a8a 100644
--- a/absl/strings/str_split.h
+++ b/absl/strings/str_split.h
@@ -118,7 +118,7 @@ namespace absl {
// using absl::ByString;
// std::vector<std::string> v2 = absl::StrSplit("a, b, c",
// ByString(", "));
-// // v[0] == "a", v[1] == "b", v[3] == "c"
+// // v[0] == "a", v[1] == "b", v[2] == "c"
class ByString {
public:
explicit ByString(absl::string_view sp);
@@ -141,7 +141,7 @@ class ByString {
// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
// using absl::ByChar;
// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
-// // v[0] == "a", v[1] == "b", v[3] == "c"
+// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// `ByChar` is also the default delimiter if a single character is given
// as the delimiter to `StrSplit()`. For example, the following calls are
@@ -173,7 +173,7 @@ class ByChar {
//
// using absl::ByAnyChar;
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
-// // v[0] == "a", v[1] == "b", v[3] == "c"
+// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// If `ByAnyChar` is given the empty std::string, it behaves exactly like
// `ByString` and matches each individual character in the input std::string.
@@ -390,7 +390,7 @@ struct SkipWhitespace {
//
// using absl::ByAnyChar;
// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
-// // v[0] == "a", v[1] == "b", v[3] == "c"
+// // v[0] == "a", v[1] == "b", v[2] == "c"
//
// See above for more information on delimiters.
//
diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h
index 8e37acb8..5dd08b3a 100644
--- a/absl/strings/string_view.h
+++ b/absl/strings/string_view.h
@@ -339,8 +339,8 @@ class string_view {
// string_view::substr()
//
- // Returns a "substring" of the `string_view` (at offset `post` and length
- // `n`) as another std::string views. This function throws `std::out_of_bounds` if
+ // Returns a "substring" of the `string_view` (at offset `pos` and length
+ // `n`) as another string_view. This function throws `std::out_of_bounds` if
// `pos > size'.
string_view substr(size_type pos, size_type n = npos) const {
if (ABSL_PREDICT_FALSE(pos > length_))
diff --git a/absl/strings/strip.cc b/absl/strings/strip.cc
deleted file mode 100644
index adc219f1..00000000
--- a/absl/strings/strip.cc
+++ /dev/null
@@ -1,268 +0,0 @@
-// Copyright 2017 The Abseil Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// This file contains functions that remove a defined part from the std::string,
-// i.e., strip the std::string.
-
-#include "absl/strings/strip.h"
-
-#include <algorithm>
-#include <cstring>
-#include <string>
-
-#include "absl/strings/ascii.h"
-#include "absl/strings/string_view.h"
-
-// ----------------------------------------------------------------------
-// ReplaceCharacters
-// Replaces any occurrence of the character 'remove' (or the characters
-// in 'remove') with the character 'replace_with'.
-// ----------------------------------------------------------------------
-void ReplaceCharacters(char* str, size_t len, absl::string_view remove,
- char replace_with) {
- for (char* end = str + len; str != end; ++str) {
- if (remove.find(*str) != absl::string_view::npos) {
- *str = replace_with;
- }
- }
-}
-
-void ReplaceCharacters(std::string* s, absl::string_view remove, char replace_with) {
- for (char& ch : *s) {
- if (remove.find(ch) != absl::string_view::npos) {
- ch = replace_with;
- }
- }
-}
-
-bool StripTrailingNewline(std::string* s) {
- if (!s->empty() && (*s)[s->size() - 1] == '\n') {
- if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
- s->resize(s->size() - 2);
- else
- s->resize(s->size() - 1);
- return true;
- }
- return false;
-}
-
-// ----------------------------------------------------------------------
-// Misc. stripping routines
-// ----------------------------------------------------------------------
-void StripCurlyBraces(std::string* s) {
- return StripBrackets('{', '}', s);
-}
-
-void StripBrackets(char left, char right, std::string* s) {
- std::string::iterator opencurly = std::find(s->begin(), s->end(), left);
- while (opencurly != s->end()) {
- std::string::iterator closecurly = std::find(opencurly, s->end(), right);
- if (closecurly == s->end()) return;
- opencurly = s->erase(opencurly, closecurly + 1);
- opencurly = std::find(opencurly, s->end(), left);
- }
-}
-
-void StripMarkupTags(std::string* s) {
- std::string::iterator output = std::find(s->begin(), s->end(), '<');
- std::string::iterator input = output;
- while (input != s->end()) {
- if (*input == '<') {
- input = std::find(input, s->end(), '>');
- if (input == s->end()) break;
- ++input;
- } else {
- *output++ = *input++;
- }
- }
- s->resize(output - s->begin());
-}
-
-std::string OutputWithMarkupTagsStripped(const std::string& s) {
- std::string result(s);
- StripMarkupTags(&result);
- return result;
-}
-
-ptrdiff_t TrimStringLeft(std::string* s, absl::string_view remove) {
- size_t i = 0;
- while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
- ++i;
- }
- if (i > 0) s->erase(0, i);
- return i;
-}
-
-ptrdiff_t TrimStringRight(std::string* s, absl::string_view remove) {
- size_t i = s->size(), trimmed = 0;
- while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
- --i;
- }
- if (i < s->size()) {
- trimmed = s->size() - i;
- s->erase(i);
- }
- return trimmed;
-}
-
-// Unfortunately, absl::string_view does not have erase, so we've to replicate
-// the implementation with remove_prefix()/remove_suffix()
-ptrdiff_t TrimStringLeft(absl::string_view* s, absl::string_view remove) {
- size_t i = 0;
- while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
- ++i;
- }
- if (i > 0) s->remove_prefix(i);
- return i;
-}
-
-ptrdiff_t TrimStringRight(absl::string_view* s, absl::string_view remove) {
- size_t i = s->size(), trimmed = 0;
- while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
- --i;
- }
- if (i < s->size()) {
- trimmed = s->size() - i;
- s->remove_suffix(trimmed);
- }
- return trimmed;
-}
-
-// ----------------------------------------------------------------------
-// Various removal routines
-// ----------------------------------------------------------------------
-ptrdiff_t strrm(char* str, char c) {
- char* src;
- char* dest;
- for (src = dest = str; *src != '\0'; ++src)
- if (*src != c) *(dest++) = *src;
- *dest = '\0';
- return dest - str;
-}
-
-ptrdiff_t memrm(char* str, ptrdiff_t strlen, char c) {
- char* src;
- char* dest;
- for (src = dest = str; strlen-- > 0; ++src)
- if (*src != c) *(dest++) = *src;
- return dest - str;
-}
-
-ptrdiff_t strrmm(char* str, const char* chars) {
- char* src;
- char* dest;
- for (src = dest = str; *src != '\0'; ++src) {
- bool skip = false;
- for (const char* c = chars; *c != '\0'; c++) {
- if (*src == *c) {
- skip = true;
- break;
- }
- }
- if (!skip) *(dest++) = *src;
- }
- *dest = '\0';
- return dest - str;
-}
-
-ptrdiff_t strrmm(std::string* str, const std::string& chars) {
- size_t str_len = str->length();
- size_t in_index = str->find_first_of(chars);
- if (in_index == std::string::npos) return str_len;
-
- size_t out_index = in_index++;
-
- while (in_index < str_len) {
- char c = (*str)[in_index++];
- if (chars.find(c) == std::string::npos) (*str)[out_index++] = c;
- }
-
- str->resize(out_index);
- return out_index;
-}
-
-// ----------------------------------------------------------------------
-// StripDupCharacters
-// Replaces any repeated occurrence of the character 'dup_char'
-// with single occurrence. e.g.,
-// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
-// Return the number of characters removed
-// ----------------------------------------------------------------------
-ptrdiff_t StripDupCharacters(std::string* s, char dup_char, ptrdiff_t start_pos) {
- if (start_pos < 0) start_pos = 0;
-
- // remove dups by compaction in-place
- ptrdiff_t input_pos = start_pos; // current reader position
- ptrdiff_t output_pos = start_pos; // current writer position
- const ptrdiff_t input_end = s->size();
- while (input_pos < input_end) {
- // keep current character
- const char curr_char = (*s)[input_pos];
- if (output_pos != input_pos) // must copy
- (*s)[output_pos] = curr_char;
- ++input_pos;
- ++output_pos;
-
- if (curr_char == dup_char) { // skip subsequent dups
- while ((input_pos < input_end) && ((*s)[input_pos] == dup_char))
- ++input_pos;
- }
- }
- const ptrdiff_t num_deleted = input_pos - output_pos;
- s->resize(s->size() - num_deleted);
- return num_deleted;
-}
-
-// ----------------------------------------------------------------------
-// TrimRunsInString
-// Removes leading and trailing runs, and collapses middle
-// runs of a set of characters into a single character (the
-// first one specified in 'remove'). Useful for collapsing
-// runs of repeated delimiters, whitespace, etc. E.g.,
-// TrimRunsInString(&s, " :,()") removes leading and trailing
-// delimiter chars and collapses and converts internal runs
-// of delimiters to single ' ' characters, so, for example,
-// " a:(b):c " -> "a b c"
-// "first,last::(area)phone, ::zip" -> "first last area phone zip"
-// ----------------------------------------------------------------------
-void TrimRunsInString(std::string* s, absl::string_view remove) {
- std::string::iterator dest = s->begin();
- std::string::iterator src_end = s->end();
- for (std::string::iterator src = s->begin(); src != src_end;) {
- if (remove.find(*src) == absl::string_view::npos) {
- *(dest++) = *(src++);
- } else {
- // Skip to the end of this run of chars that are in 'remove'.
- for (++src; src != src_end; ++src) {
- if (remove.find(*src) == absl::string_view::npos) {
- if (dest != s->begin()) {
- // This is an internal run; collapse it.
- *(dest++) = remove[0];
- }
- *(dest++) = *(src++);
- break;
- }
- }
- }
- }
- s->erase(dest, src_end);
-}
-
-// ----------------------------------------------------------------------
-// RemoveNullsInString
-// Removes any internal \0 characters from the std::string.
-// ----------------------------------------------------------------------
-void RemoveNullsInString(std::string* s) {
- s->erase(std::remove(s->begin(), s->end(), '\0'), s->end());
-}