From b2a6c1bca7e177cf033ebe2361e8f2d99fabf9a9 Mon Sep 17 00:00:00 2001 From: Derek Mauro Date: Tue, 11 Jul 2023 10:57:18 -0700 Subject: Cleanup `//absl/strings/internal/memutil.h` `memmatch()` is only used in `string_view.cc`, so move it there. The moving of `memmatch()` to `string_view.cc` decouples `string_view` from `memutil`, which will allow us to move `string_view` into its own target in a followup. The only other function that is used is `memcasecmp()`, so delete all other functions. PiperOrigin-RevId: 547238386 Change-Id: Id6fad47dd24191c8e8f26dd923fffa1007c8db4a --- absl/strings/internal/memutil.cc | 79 +----------- absl/strings/internal/memutil.h | 116 +---------------- absl/strings/internal/memutil_benchmark.cc | 195 ----------------------------- absl/strings/internal/memutil_test.cc | 142 +-------------------- absl/strings/string_view.cc | 30 ++++- 5 files changed, 34 insertions(+), 528 deletions(-) diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc index 44996a75..e2e7347c 100644 --- a/absl/strings/internal/memutil.cc +++ b/absl/strings/internal/memutil.cc @@ -16,6 +16,8 @@ #include +#include "absl/strings/ascii.h" + namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { @@ -33,83 +35,6 @@ int memcasecmp(const char* s1, const char* s2, size_t len) { return 0; } -char* memdup(const char* s, size_t slen) { - void* copy; - if ((copy = malloc(slen)) == nullptr) return nullptr; - memcpy(copy, s, slen); - return reinterpret_cast(copy); -} - -char* memrchr(const char* s, int c, size_t slen) { - for (const char* e = s + slen - 1; e >= s; e--) { - if (*e == c) return const_cast(e); - } - return nullptr; -} - -size_t memspn(const char* s, size_t slen, const char* accept) { - const char* p = s; - const char* spanp; - char c, sc; - -cont: - c = *p++; - if (slen-- == 0) - return static_cast(p - 1 - s); - for (spanp = accept; (sc = *spanp++) != '\0';) - if (sc == c) goto cont; - return static_cast(p - 1 - s); -} - -size_t memcspn(const char* s, size_t slen, const char* reject) { - const char* p = s; - const char* spanp; - char c, sc; - - while (slen-- != 0) { - c = *p++; - for (spanp = reject; (sc = *spanp++) != '\0';) - if (sc == c) - return static_cast(p - 1 - s); - } - return static_cast(p - s); -} - -char* mempbrk(const char* s, size_t slen, const char* accept) { - const char* scanp; - int sc; - - for (; slen; ++s, --slen) { - for (scanp = accept; (sc = *scanp++) != '\0';) - if (sc == *s) return const_cast(s); - } - return nullptr; -} - -// This is significantly faster for case-sensitive matches with very -// few possible matches. See unit test for benchmarks. -const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, - size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - if (haylen < neelen) return nullptr; - - const char* match; - const char* hayend = phaystack + haylen - neelen + 1; - // A static cast is used here to work around the fact that memchr returns - // a void* on Posix-compliant systems and const void* on Windows. - while ( - (match = static_cast(memchr( - phaystack, pneedle[0], static_cast(hayend - phaystack))))) { - if (memcmp(match, pneedle, neelen) == 0) - return match; - else - phaystack = match + 1; - } - return nullptr; -} - } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h index 9ad05358..b5911a01 100644 --- a/absl/strings/internal/memutil.h +++ b/absl/strings/internal/memutil.h @@ -14,51 +14,6 @@ // limitations under the License. // -// These routines provide mem versions of standard C string routines, -// such as strpbrk. They function exactly the same as the str versions, -// so if you wonder what they are, replace the word "mem" by -// "str" and check out the man page. I could return void*, as the -// strutil.h mem*() routines tend to do, but I return char* instead -// since this is by far the most common way these functions are called. -// -// The difference between the mem and str versions is the mem version -// takes a pointer and a length, rather than a '\0'-terminated string. -// The memcase* routines defined here assume the locale is "C" -// (they use absl::ascii_tolower instead of tolower). -// -// These routines are based on the BSD library. -// -// Here's a list of routines from string.h, and their mem analogues. -// Functions in lowercase are defined in string.h; those in UPPERCASE -// are defined here: -// -// strlen -- -// strcat strncat MEMCAT -// strcpy strncpy memcpy -// -- memccpy (very cool function, btw) -// -- memmove -// -- memset -// strcmp strncmp memcmp -// strcasecmp strncasecmp MEMCASECMP -// strchr memchr -// strcoll -- -// strxfrm -- -// strdup strndup MEMDUP -// strrchr MEMRCHR -// strspn MEMSPN -// strcspn MEMCSPN -// strpbrk MEMPBRK -// strstr MEMSTR MEMMEM -// (g)strcasestr MEMCASESTR MEMCASEMEM -// strtok -- -// strprefix MEMPREFIX (strprefix is from strutil.h) -// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) -// strsuffix MEMSUFFIX (strsuffix is from strutil.h) -// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) -// -- MEMIS -// -- MEMCASEIS -// strcount MEMCOUNT (strcount is from strutil.h) - #ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ #define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ @@ -72,74 +27,11 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -inline char* memcat(char* dest, size_t destlen, const char* src, - size_t srclen) { - return reinterpret_cast(memcpy(dest + destlen, src, srclen)); -} - +// Performs a byte-by-byte comparison of `len` bytes of the strings `s1` and +// `s2`, ignoring the case of the characters. It returns an integer less than, +// equal to, or greater than zero if `s1` is found, respectively, to be less +// than, to match, or be greater than `s2`. int memcasecmp(const char* s1, const char* s2, size_t len); -char* memdup(const char* s, size_t slen); -char* memrchr(const char* s, int c, size_t slen); -size_t memspn(const char* s, size_t slen, const char* accept); -size_t memcspn(const char* s, size_t slen, const char* reject); -char* mempbrk(const char* s, size_t slen, const char* accept); - -// This is for internal use only. Don't call this directly -template -const char* int_memmatch(const char* haystack, size_t haylen, - const char* needle, size_t neelen) { - if (0 == neelen) { - return haystack; // even if haylen is 0 - } - const char* hayend = haystack + haylen; - const char* needlestart = needle; - const char* needleend = needlestart + neelen; - - for (; haystack < hayend; ++haystack) { - char hay = case_sensitive - ? *haystack - : absl::ascii_tolower(static_cast(*haystack)); - char nee = case_sensitive - ? *needle - : absl::ascii_tolower(static_cast(*needle)); - if (hay == nee) { - if (++needle == needleend) { - return haystack + 1 - neelen; - } - } else if (needle != needlestart) { - // must back up haystack in case a prefix matched (find "aab" in "aaab") - haystack -= needle - needlestart; // for loop will advance one more - needle = needlestart; - } - } - return nullptr; -} - -// These are the guys you can call directly -inline const char* memstr(const char* phaystack, size_t haylen, - const char* pneedle) { - return int_memmatch(phaystack, haylen, pneedle, strlen(pneedle)); -} - -inline const char* memcasestr(const char* phaystack, size_t haylen, - const char* pneedle) { - return int_memmatch(phaystack, haylen, pneedle, strlen(pneedle)); -} - -inline const char* memmem(const char* phaystack, size_t haylen, - const char* pneedle, size_t needlelen) { - return int_memmatch(phaystack, haylen, pneedle, needlelen); -} - -inline const char* memcasemem(const char* phaystack, size_t haylen, - const char* pneedle, size_t needlelen) { - return int_memmatch(phaystack, haylen, pneedle, needlelen); -} - -// This is significantly faster for case-sensitive matches with very -// few possible matches. See unit test for benchmarks. -const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, - size_t neelen); } // namespace strings_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/memutil_benchmark.cc b/absl/strings/internal/memutil_benchmark.cc index dc95c3e5..61e323a4 100644 --- a/absl/strings/internal/memutil_benchmark.cc +++ b/absl/strings/internal/memutil_benchmark.cc @@ -25,62 +25,6 @@ // - an easy search: 'b' // - a medium search: 'ab'. That means every letter is a possible match. // - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack) -// We benchmark case-sensitive and case-insensitive versions of -// three memmem implementations: -// - memmem() from memutil.h -// - search() from STL -// - memmatch(), a custom implementation using memchr and memcmp. -// Here are sample results: -// -// Run on (12 X 3800 MHz CPU s) -// CPU Caches: -// L1 Data 32K (x6) -// L1 Instruction 32K (x6) -// L2 Unified 256K (x6) -// L3 Unified 15360K (x1) -// ---------------------------------------------------------------- -// Benchmark Time CPU Iterations -// ---------------------------------------------------------------- -// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s -// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s -// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s -// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s -// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s -// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s -// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s -// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s -// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s -// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s -// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s -// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s -// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s -// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s -// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s -// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s -// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s -// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s -// BM_MemmemStartup 5 ns 5 ns 150821500 -// BM_SearchStartup 5 ns 5 ns 150644203 -// BM_MemmatchStartup 7 ns 7 ns 97068802 -// -// Conclusions: -// -// The following recommendations are based on the sample results above. However, -// we have found that the performance of STL search can vary significantly -// depending on compiler and standard library implementation. We recommend you -// run the benchmarks for yourself on relevant platforms. -// -// If you need case-insensitive, STL search is slightly better than memmem for -// all cases. -// -// Case-sensitive is more subtle: -// Custom memmatch is _very_ fast at scanning, so if you have very few possible -// matches in your haystack, that's the way to go. Performance drops -// significantly with more matches. -// -// STL search is slightly faster than memmem in the medium and pathological -// benchmarks. However, the performance of memmem is currently more dependable -// across platforms and build configurations. namespace { @@ -94,96 +38,10 @@ const char* MakeHaystack() { } const char* const kHaystack = MakeHaystack(); -void BM_Memmem(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Memmem); - -void BM_MemmemMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmemMedium); - -void BM_MemmemPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmem( - kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, - kHaystackSize - kHaystackSize / 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmemPathological); - -void BM_Memcasemem(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Memcasemem); - -void BM_MemcasememMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemcasememMedium); - -void BM_MemcasememPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memcasemem( - kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, - kHaystackSize - kHaystackSize / 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemcasememPathological); - bool case_eq(const char a, const char b) { return absl::ascii_tolower(a) == absl::ascii_tolower(b); } -void BM_Search(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, - kHaystack + kHaystackSize - 1, - kHaystack + kHaystackSize)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Search); - -void BM_SearchMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, - kHaystack + kHaystackSize - 2, - kHaystack + kHaystackSize)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_SearchMedium); - -void BM_SearchPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, - kHaystack + kHaystackSize / 2, - kHaystack + kHaystackSize)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_SearchPathological); - void BM_Searchcase(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, @@ -241,34 +99,6 @@ const char* memcasematch(const char* phaystack, size_t haylen, return nullptr; } -void BM_Memmatch(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_Memmatch); - -void BM_MemmatchMedium(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmatchMedium); - -void BM_MemmatchPathological(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmatch( - kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, - kHaystackSize - kHaystackSize / 2)); - } - state.SetBytesProcessed(kHaystackSize64 * state.iterations()); -} -BENCHMARK(BM_MemmatchPathological); - void BM_Memcasematch(benchmark::State& state) { for (auto _ : state) { benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1)); @@ -295,29 +125,4 @@ void BM_MemcasematchPathological(benchmark::State& state) { } BENCHMARK(BM_MemcasematchPathological); -void BM_MemmemStartup(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmem( - kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); - } -} -BENCHMARK(BM_MemmemStartup); - -void BM_SearchStartup(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize( - std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize, - kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize)); - } -} -BENCHMARK(BM_SearchStartup); - -void BM_MemmatchStartup(benchmark::State& state) { - for (auto _ : state) { - benchmark::DoNotOptimize(absl::strings_internal::memmatch( - kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); - } -} -BENCHMARK(BM_MemmatchStartup); - } // namespace diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc index d8681ddf..277be2c4 100644 --- a/absl/strings/internal/memutil_test.cc +++ b/absl/strings/internal/memutil_test.cc @@ -19,42 +19,12 @@ #include #include "gtest/gtest.h" -#include "absl/strings/ascii.h" namespace { -static char* memcasechr(const char* s, int c, size_t slen) { - c = absl::ascii_tolower(c); - for (; slen; ++s, --slen) { - if (absl::ascii_tolower(*s) == c) return const_cast(s); - } - return nullptr; -} - -static const char* memcasematch(const char* phaystack, size_t haylen, - const char* pneedle, size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - if (haylen < neelen) return nullptr; - - const char* match; - const char* hayend = phaystack + haylen - neelen + 1; - while ((match = static_cast( - memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { - if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) - return match; - else - phaystack = match + 1; - } - return nullptr; -} - -TEST(MemUtilTest, AllTests) { +TEST(MemUtil, memcasecmp) { // check memutil functions - char a[1000]; - absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1); - absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1); + const char a[] = "hello there"; EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there", sizeof("hello there") - 1), @@ -66,114 +36,6 @@ TEST(MemUtilTest, AllTests) { sizeof("hello there") - 2), 0); EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0); - - char* p = absl::strings_internal::memdup("hello", 5); - free(p); - - p = absl::strings_internal::memrchr("hello there", 'e', - sizeof("hello there") - 1); - EXPECT_TRUE(p && p[-1] == 'r'); - p = absl::strings_internal::memrchr("hello there", 'e', - sizeof("hello there") - 2); - EXPECT_TRUE(p && p[-1] == 'h'); - p = absl::strings_internal::memrchr("hello there", 'u', - sizeof("hello there") - 1); - EXPECT_TRUE(p == nullptr); - - int len = absl::strings_internal::memspn("hello there", - sizeof("hello there") - 1, "hole"); - EXPECT_EQ(len, sizeof("hello") - 1); - len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, - "u"); - EXPECT_EQ(len, 0); - len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, - ""); - EXPECT_EQ(len, 0); - len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1, - "trole h"); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memspn("hello there!", - sizeof("hello there!") - 1, "trole h"); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memspn("hello there!", - sizeof("hello there!") - 2, "trole h!"); - EXPECT_EQ(len, sizeof("hello there!") - 2); - - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, "leho"); - EXPECT_EQ(len, 0); - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, "u"); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, ""); - EXPECT_EQ(len, sizeof("hello there") - 1); - len = absl::strings_internal::memcspn("hello there", - sizeof("hello there") - 1, " "); - EXPECT_EQ(len, 5); - - p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, - "leho"); - EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l'); - p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, - "nu"); - EXPECT_TRUE(p == nullptr); - p = absl::strings_internal::mempbrk("hello there!", - sizeof("hello there!") - 2, "!"); - EXPECT_TRUE(p == nullptr); - p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1, - " t "); - EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't'); - - { - const char kHaystack[] = "0123456789"; - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3), - kHaystack + 7); - EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1), - kHaystack + 9); - EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) == - nullptr); - EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) == - nullptr); - } - { - const char kHaystack[] = "aBcDeFgHiJ"; - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0), - kHaystack); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3), - kHaystack); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1), - kHaystack); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3), - kHaystack + 7); - EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1), - kHaystack + 9); - EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) == - nullptr); - EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) == - nullptr); - } - { - const char kHaystack[] = "0123456789"; - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1), - kHaystack); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3), - kHaystack + 7); - EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1), - kHaystack + 9); - EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) == - nullptr); - EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) == - nullptr); - } } } // namespace diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc index e2261625..f20ff530 100644 --- a/absl/strings/string_view.cc +++ b/absl/strings/string_view.cc @@ -21,12 +21,35 @@ #include #include -#include "absl/strings/internal/memutil.h" - namespace absl { ABSL_NAMESPACE_BEGIN namespace { + +// This is significantly faster for case-sensitive matches with very +// few possible matches. +const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, + size_t neelen) { + if (0 == neelen) { + return phaystack; // even if haylen is 0 + } + if (haylen < neelen) return nullptr; + + const char* match; + const char* hayend = phaystack + haylen - neelen + 1; + // A static cast is used here to work around the fact that memchr returns + // a void* on Posix-compliant systems and const void* on Windows. + while ( + (match = static_cast(memchr( + phaystack, pneedle[0], static_cast(hayend - phaystack))))) { + if (memcmp(match, pneedle, neelen) == 0) + return match; + else + phaystack = match + 1; + } + return nullptr; +} + void WritePadding(std::ostream& o, size_t pad) { char fill_buf[32]; memset(fill_buf, o.fill(), sizeof(fill_buf)); @@ -84,8 +107,7 @@ string_view::size_type string_view::find(string_view s, if (empty() && pos == 0 && s.empty()) return 0; return npos; } - const char* result = - strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); + const char* result = memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); return result ? static_cast(result - ptr_) : npos; } -- cgit v1.2.3