summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Tsige Solomon <tsige@google.com>2023-06-12 15:56:24 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2023-06-12 15:57:58 -0700
commit77111e3d5b40df6019fedc85198f7376c120bffb (patch)
tree35a9976fce72969966300ae4968bbbbabed36fcc
parentdfc7f46d9c8425933ffe2c275f982f956d33d06f (diff)
Functions added: FindLongestCommonSuffix, FindLongestCommonPrefix.
PiperOrigin-RevId: 539784770 Change-Id: Ie224afa04af023bbddc89b967e8c8440f9e8a887
-rw-r--r--absl/strings/match.cc67
-rw-r--r--absl/strings/match.h10
-rw-r--r--absl/strings/match_test.cc117
3 files changed, 193 insertions, 1 deletions
diff --git a/absl/strings/match.cc b/absl/strings/match.cc
index b65cbc67..3b81b2c0 100644
--- a/absl/strings/match.cc
+++ b/absl/strings/match.cc
@@ -13,8 +13,13 @@
// limitations under the License.
#include "absl/strings/match.h"
-#include "absl/strings/ascii.h"
+#include <algorithm>
+#include <cstdint>
+
+#include "absl/base/internal/endian.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/ascii.h"
#include "absl/strings/internal/memutil.h"
namespace absl {
@@ -61,5 +66,65 @@ bool EndsWithIgnoreCase(absl::string_view text,
EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix);
}
+absl::string_view FindLongestCommonPrefix(absl::string_view a,
+ absl::string_view b) {
+ const absl::string_view::size_type limit = std::min(a.size(), b.size());
+ const char* const pa = a.data();
+ const char* const pb = b.data();
+ absl::string_view::size_type count = (unsigned) 0;
+
+ if (ABSL_PREDICT_FALSE(limit < 8)) {
+ while (ABSL_PREDICT_TRUE(count + 2 <= limit)) {
+ uint16_t xor_bytes = absl::little_endian::Load16(pa + count) ^
+ absl::little_endian::Load16(pb + count);
+ if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
+ if (ABSL_PREDICT_TRUE((xor_bytes & 0xff) == 0)) ++count;
+ return absl::string_view(pa, count);
+ }
+ count += 2;
+ }
+ if (ABSL_PREDICT_TRUE(count != limit)) {
+ if (ABSL_PREDICT_TRUE(pa[count] == pb[count])) ++count;
+ }
+ return absl::string_view(pa, count);
+ }
+
+ do {
+ uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
+ absl::little_endian::Load64(pb + count);
+ if (ABSL_PREDICT_FALSE(xor_bytes != 0)) {
+ count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
+ return absl::string_view(pa, count);
+ }
+ count += 8;
+ } while (ABSL_PREDICT_TRUE(count + 8 < limit));
+
+ count = limit - 8;
+ uint64_t xor_bytes = absl::little_endian::Load64(pa + count) ^
+ absl::little_endian::Load64(pb + count);
+ if (ABSL_PREDICT_TRUE(xor_bytes != 0)) {
+ count += static_cast<uint64_t>(absl::countr_zero(xor_bytes) >> 3);
+ return absl::string_view(pa, count);
+ }
+ return absl::string_view(pa, limit);
+}
+
+absl::string_view FindLongestCommonSuffix(absl::string_view a,
+ absl::string_view b) {
+ const absl::string_view::size_type limit = std::min(a.size(), b.size());
+ if (limit == 0) return absl::string_view();
+
+ const char* pa = a.data() + a.size() - 1;
+ const char* pb = b.data() + b.size() - 1;
+ absl::string_view::size_type count = (unsigned) 0;
+ while (count < limit && *pa == *pb) {
+ --pa;
+ --pb;
+ ++count;
+ }
+
+ return absl::string_view(++pa, count);
+}
+
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/match.h b/absl/strings/match.h
index 1dc0beaf..1eeafbbf 100644
--- a/absl/strings/match.h
+++ b/absl/strings/match.h
@@ -103,6 +103,16 @@ bool StartsWithIgnoreCase(absl::string_view text,
bool EndsWithIgnoreCase(absl::string_view text,
absl::string_view suffix) noexcept;
+// Yields the longest prefix in common between both input strings.
+// Pointer-wise, the returned result is a subset of input "a".
+absl::string_view FindLongestCommonPrefix(absl::string_view a,
+ absl::string_view b);
+
+// Yields the longest suffix in common between both input strings.
+// Pointer-wise, the returned result is a subset of input "a".
+absl::string_view FindLongestCommonSuffix(absl::string_view a,
+ absl::string_view b);
+
ABSL_NAMESPACE_END
} // namespace absl
diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc
index f063b4ea..71618f71 100644
--- a/absl/strings/match_test.cc
+++ b/absl/strings/match_test.cc
@@ -168,4 +168,121 @@ TEST(MatchTest, ContainsCharIgnoreCase) {
EXPECT_FALSE(absl::StrContainsIgnoreCase("", '0'));
}
+TEST(MatchTest, FindLongestCommonPrefix) {
+ EXPECT_EQ(absl::FindLongestCommonPrefix("", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("", "abc"), "");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("ab", "abc"), "ab");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "ab"), "ab");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abd"), "ab");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abc", "abcd"), "abc");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "abcd"), "abcd");
+ EXPECT_EQ(absl::FindLongestCommonPrefix("abcd", "efgh"), "");
+
+ // "abcde" v. "abc" but in the middle of other data
+ EXPECT_EQ(absl::FindLongestCommonPrefix(
+ absl::string_view("1234 abcdef").substr(5, 5),
+ absl::string_view("5678 abcdef").substr(5, 3)),
+ "abc");
+}
+
+// Since the little-endian implementation involves a bit of if-else and various
+// return paths, the following tests aims to provide full test coverage of the
+// implementation.
+TEST(MatchTest, FindLongestCommonPrefixLoad16Mismatch) {
+ const std::string x1 = "abcdefgh";
+ const std::string x2 = "abcde_";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesNoLast) {
+ const std::string x1 = "abcdef";
+ const std::string x2 = "abcdef";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastCharMismatches) {
+ const std::string x1 = "abcdefg";
+ const std::string x2 = "abcdef_h";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdef");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdef");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLoad16MatchesLastMatches) {
+ const std::string x1 = "abcde";
+ const std::string x2 = "abcdefgh";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize8Load64Mismatches) {
+ const std::string x1 = "abcdefghijk";
+ const std::string x2 = "abcde_g_";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcde");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcde");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize8Load64Matches) {
+ const std::string x1 = "abcdefgh";
+ const std::string x2 = "abcdefgh";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "abcdefgh");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "abcdefgh");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize15Load64Mismatches) {
+ const std::string x1 = "012345670123456";
+ const std::string x2 = "0123456701_34_6";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSize15Load64Matches) {
+ const std::string x1 = "012345670123456";
+ const std::string x2 = "0123456701234567";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "012345670123456");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "012345670123456");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixSizeFirstByteOfLast8BytesMismatch) {
+ const std::string x1 = "012345670123456701234567";
+ const std::string x2 = "0123456701234567_1234567";
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), "0123456701234567");
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), "0123456701234567");
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLargeLastCharMismatches) {
+ const std::string x1(300, 'x');
+ std::string x2 = x1;
+ x2.back() = '#';
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(299, 'x'));
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(299, 'x'));
+}
+
+TEST(MatchTest, FindLongestCommonPrefixLargeFullMatch) {
+ const std::string x1(300, 'x');
+ const std::string x2 = x1;
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x1, x2), std::string(300, 'x'));
+ EXPECT_EQ(absl::FindLongestCommonPrefix(x2, x1), std::string(300, 'x'));
+}
+
+TEST(MatchTest, FindLongestCommonSuffix) {
+ EXPECT_EQ(absl::FindLongestCommonSuffix("", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("", "abc"), "");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abc", ""), "");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("bc", "abc"), "bc");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "bc"), "bc");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abc", "dbc"), "bc");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("bcd", "abcd"), "bcd");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "abcd"), "abcd");
+ EXPECT_EQ(absl::FindLongestCommonSuffix("abcd", "efgh"), "");
+
+ // "abcde" v. "cde" but in the middle of other data
+ EXPECT_EQ(absl::FindLongestCommonSuffix(
+ absl::string_view("1234 abcdef").substr(5, 5),
+ absl::string_view("5678 abcdef").substr(7, 3)),
+ "cde");
+}
+
} // namespace