Functions added: FindLongestCommonSuffix, FindLongestCommonPrefix.

PiperOrigin-RevId: 539784770 Change-Id: Ie224afa04af023bbddc89b967e8c8440f9e8a887
author: Tsige Solomon <tsige@google.com> 2023-06-12 15:56:24 -0700
committer: Copybara-Service <copybara-worker@google.com> 2023-06-12 15:57:58 -0700
commit: 77111e3d5b40df6019fedc85198f7376c120bffb (patch)
tree: 35a9976fce72969966300ae4968bbbbabed36fcc /absl
parent: dfc7f46d9c8425933ffe2c275f982f956d33d06f (diff)
3 files changed, 193 insertions, 1 deletions
diff --git a/absl/strings/match.cc b/absl/strings/match.cc
index b65cbc67..3b81b2c0 100644
--- a/absl/strings/match.cc
+++ b/absl/strings/match.cc
@@ -13,8 +13,13 @@
 // limitations under the License.
 
 #include "absl/strings/match.h"
-#include "absl/strings/ascii.h"
 
+#include <algorithm>
+#include <cstdint>
+
+#include "absl/base/internal/endian.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/ascii.h"
 #include "absl/strings/internal/memutil.h"
 
 namespace absl {
@@ -61,5 +66,65 @@ bool EndsWithIgnoreCase(absl::string_view text,
          EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix);
 }
 
+absl::string_view FindLongestCommonPrefix(absl::string_view a,
+                                          absl::string_view b) {
+  const absl::string_view::size_type limit = std::min(a.size(), b.size());
+  const char* const pa = a.data();
+  const char* const pb = b.data();
+  absl::string_view::size_type count = (unsigned) 0;
+
+  if (ABSL_PREDICT_FALSE(limit < 8)) {
+    while (ABSL_PREDICT_TRUE(count + 2 <= limit)) {
+      uint16_t xor_bytes = absl::little_endian::Load16(pa + count) ^
+                           absl::little_endian::Load16(pb + count);
+      if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
+        if (ABSL_PREDICT_TRUE((xor_bytes & 0xff) == 0)) ++count;
+        return absl::string_view(pa, count);
+      }
+      count += 2;
+    }
+    if (ABSL_PREDICT_TRUE(count != limit)) {
+      if (ABSL_PREDICT_TRUE(pa[count] == pb[count])) ++count;
+    }
+    return absl::string_view(pa, count);
+  }
+
+  do {
+    uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
+                         absl::little_endian::Load64(pb + count);
+    if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
+      count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
+      return absl::string_view(pa, count);
+    }
+    count += 8;
+  } while (ABSL_PREDICT_TRUE(count + 8 < limit));
+
+  count = limit - 8;
+  uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
+                       absl::little_endian::Load64(pb + count);
+  if (ABSL_PREDICT_TRUE(xor_bytes != 0)) {
+    count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
+    return absl::string_view(pa, count);
+  }
+  return absl::string_view(pa, limit);
+}
+
+absl::string_view FindLongestCommonSuffix(absl::string_view a,
+                                          absl::string_view b) {
+  const absl::string_view::size_type limit = std::min(a.size(), b.size());
+  if (limit == 0) return absl::string_view();
+
+  const char* pa = a.data() + a.size() - 1;
+  const char* pb = b.data() + b.size() - 1;
+  absl::string_view::size_type count = (unsigned) 0;
+  while (count < limit && *pa == *pb) {
+    --pa;
+    --pb;
+    ++count;
+  }
+
+  return absl::string_view(++pa, count);
+}
+
 ABSL_NAMESPACE_END
 }  // namespace absl
diff --git a/absl/strings/match.h b/absl/strings/match.h
index 1dc0beaf..1eeafbbf 100644
--- a/absl/strings/match.h
+++ b/absl/strings/match.h
@@ -103,6 +103,16 @@ bool StartsWithIgnoreCase(absl::string_view text,
 bool EndsWithIgnoreCase(absl::string_view text,
                         absl::string_view suffix) noexcept;
 
+// Yields the longest prefix in common between both input strings.
+// Pointer-wise, the returned result is a subset of input "a".
+absl::string_view FindLongestCommonPrefix(absl::string_view a,
+                                          absl::string_view b);
+
+// Yields the longest suffix in common between both input strings.
+// Pointer-wise, the returned result is a subset of input "a".
+absl::string_view FindLongestCommonSuffix(absl::string_view a,
+                                          absl::string_view b);
+
 ABSL_NAMESPACE_END
 }  // namespace absl
 
diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc
index f063b4ea..71618f71 100644
--- a/absl/strings/match_test.cc
+++ b/absl/strings/match_test.cc
@@ -168,4 +168,121 @@ TEST(MatchTest, ContainsCharIgnoreCase) {
   EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0'));
 }
 
+TEST(MatchTest, FindLongestCommonPrefix) {
+  EXPECT_EQ(absl::FindLongestCommonPrefix("", ""), "");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("", "abc"), "");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("abc", ""), "");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("ab", "abc"), "ab");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "ab"), "ab");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abd"), "ab");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abcd"), "abc");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "abcd"), "abcd");
+  EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "efgh"), "");
+
+  // "abcde" v. "abc" but in the middle of other data
+  EXPECT_EQ(absl::FindLongestCommonPrefix(
+                absl::string_view("1234 abcdef").substr(5, 5),
+                absl::string_view("5678 abcdef").substr(5, 3)),
+            "abc");
+}
+
+// Since the little-endian implementation involves a bit of if-else and various
+// return paths, the following tests aims to provide full test coverage of the
+// implementation.
+TEST(MatchTest, FindLongestCommonPrefixLoad16Mismatch) {
+  const std::string x1 = "abcdefgh";
+  const std::string x2 = "abcde_";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesNoLast) {
+  const std::string x1 = "abcdef";
+  const std::string x2 = "abcdef";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastCharMismatches) {
+  const std::string x1 = "abcdefg";
+  const std::string x2 = "abcdef_h";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastMatches) {
+  const std::string x1 = "abcde";
+  const std::string x2 = "abcdefgh";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize8Load64Mismatches) {
+  const std::string x1 = "abcdefghijk";
+  const std::string x2 = "abcde_g_";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize8Load64Matches) {
+  const std::string x1 = "abcdefgh";
+  const std::string x2 = "abcdefgh";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdefgh");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdefgh");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize15Load64Mismatches) {
+  const std::string x1 = "012345670123456";
+  const std::string x2 = "0123456701_34_6";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize15Load64Matches) {
+  const std::string x1 = "012345670123456";
+  const std::string x2 = "0123456701234567";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "012345670123456");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "012345670123456");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSizeFirstByteOfLast8BytesMismatch) {
+  const std::string x1 = "012345670123456701234567";
+  const std::string x2 = "0123456701234567_1234567";
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701234567");
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701234567");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLargeLastCharMismatches) {
+  const std::string x1(300, 'x');
+  std::string x2 = x1;
+  x2.back() = '#';
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(299, 'x'));
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(299, 'x'));
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLargeFullMatch) {
+  const std::string x1(300, 'x');
+  const std::string x2 = x1;
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(300, 'x'));
+  EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(300, 'x'));
+}
+
+TEST(MatchTest, FindLongestCommonSuffix) {
+  EXPECT_EQ(absl::FindLongestCommonSuffix("", ""), "");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("", "abc"), "");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("abc", ""), "");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("bc", "abc"), "bc");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "bc"), "bc");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "dbc"), "bc");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("bcd", "abcd"), "bcd");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "abcd"), "abcd");
+  EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "efgh"), "");
+
+  // "abcde" v. "cde" but in the middle of other data
+  EXPECT_EQ(absl::FindLongestCommonSuffix(
+                absl::string_view("1234 abcdef").substr(5, 5),
+                absl::string_view("5678 abcdef").substr(7, 3)),
+            "cde");
+}
+
 }  // namespace
author	Tsige Solomon <tsige@google.com>	2023-06-12 15:56:24 -0700
committer	Copybara-Service <copybara-worker@google.com>	2023-06-12 15:57:58 -0700
commit	77111e3d5b40df6019fedc85198f7376c120bffb (patch)
tree	35a9976fce72969966300ae4968bbbbabed36fcc /absl
parent	dfc7f46d9c8425933ffe2c275f982f956d33d06f (diff)