summaryrefslogtreecommitdiff
path: root/absl/strings/internal
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/internal')
-rw-r--r--absl/strings/internal/char_map.h154
-rw-r--r--absl/strings/internal/char_map_test.cc172
-rw-r--r--absl/strings/internal/escaping_test_common.inc113
-rw-r--r--absl/strings/internal/fastmem.h215
-rw-r--r--absl/strings/internal/fastmem_test.cc453
-rw-r--r--absl/strings/internal/memutil.cc110
-rw-r--r--absl/strings/internal/memutil.h146
-rw-r--r--absl/strings/internal/memutil_test.cc180
-rw-r--r--absl/strings/internal/numbers_test_common.inc166
-rw-r--r--absl/strings/internal/ostringstream.h97
-rw-r--r--absl/strings/internal/ostringstream_test.cc103
-rw-r--r--absl/strings/internal/resize_uninitialized.h69
-rw-r--r--absl/strings/internal/resize_uninitialized_test.cc68
-rw-r--r--absl/strings/internal/str_join_internal.h314
-rw-r--r--absl/strings/internal/str_split_internal.h439
-rw-r--r--absl/strings/internal/utf8.cc51
-rw-r--r--absl/strings/internal/utf8.h52
-rw-r--r--absl/strings/internal/utf8_test.cc58
18 files changed, 2960 insertions, 0 deletions
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h
new file mode 100644
index 00000000..8d92963a
--- /dev/null
+++ b/absl/strings/internal/char_map.h
@@ -0,0 +1,154 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Character Map Class
+//
+// A fast, bit-vector map for 8-bit unsigned characters.
+// This class is useful for non-character purposes as well.
+
+#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
+#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+
+namespace absl {
+namespace strings_internal {
+
+class Charmap {
+ public:
+ constexpr Charmap() : m_() {}
+
+ // Initializes with a given char*. Note that NUL is not treated as
+ // a terminator, but rather a char to be flicked.
+ Charmap(const char* str, int len) : m_() {
+ while (len--) SetChar(*str++);
+ }
+
+ // Initializes with a given char*. NUL is treated as a terminator
+ // and will not be in the charmap.
+ explicit Charmap(const char* str) : m_() {
+ while (*str) SetChar(*str++);
+ }
+
+ constexpr bool contains(unsigned char c) const {
+ return (m_[c / 64] >> (c % 64)) & 0x1;
+ }
+
+ // Returns true if and only if a character exists in both maps.
+ bool IntersectsWith(const Charmap& c) const {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
+ if ((m_[i] & c.m_[i]) != 0) return true;
+ }
+ return false;
+ }
+
+ bool IsZero() const {
+ for (uint64_t c : m_) {
+ if (c != 0) return false;
+ }
+ return true;
+ }
+
+ // Containing only a single specified char.
+ static constexpr Charmap Char(char x) {
+ return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
+ CharMaskForWord(x, 2), CharMaskForWord(x, 3));
+ }
+
+ // Containing all the chars in the C-std::string 's'.
+ // Note that this is expensively recursive because of the C++11 constexpr
+ // formulation. Use only in constexpr initializers.
+ static constexpr Charmap FromString(const char* s) {
+ return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
+ }
+
+ // Containing all the chars in the closed interval [lo,hi].
+ static constexpr Charmap Range(char lo, char hi) {
+ return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
+ RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
+ }
+
+ friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
+ return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
+ a.m_[3] & b.m_[3]);
+ }
+
+ friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
+ return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
+ a.m_[3] | b.m_[3]);
+ }
+
+ friend constexpr Charmap operator~(const Charmap& a) {
+ return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
+ }
+
+ private:
+ constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
+ : m_{b0, b1, b2, b3} {}
+
+ static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
+ uint64_t word) {
+ return OpenRangeFromZeroForWord(hi + 1, word) &
+ ~OpenRangeFromZeroForWord(lo, word);
+ }
+
+ // All the chars in the specified word of the range [0, upper).
+ static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
+ uint64_t word) {
+ return (upper <= 64 * word)
+ ? 0
+ : (upper >= 64 * (word + 1))
+ ? ~static_cast<uint64_t>(0)
+ : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
+ }
+
+ static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
+ return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
+ }
+
+ private:
+ void SetChar(unsigned char c) {
+ m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
+ }
+
+ uint64_t m_[4];
+};
+
+// Mirror the char-classifying predicates in <cctype>
+constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
+constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
+constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
+constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
+constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
+constexpr Charmap XDigitCharmap() {
+ return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
+}
+constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
+constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
+constexpr Charmap CntrlCharmap() {
+ return Charmap::Range(0, 0x7f) & ~PrintCharmap();
+}
+constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
+constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
+constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc
new file mode 100644
index 00000000..2167be97
--- /dev/null
+++ b/absl/strings/internal/char_map_test.cc
@@ -0,0 +1,172 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/char_map.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cctype>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+constexpr absl::strings_internal::Charmap everything_map =
+ ~absl::strings_internal::Charmap();
+constexpr absl::strings_internal::Charmap nothing_map{};
+
+TEST(Charmap, AllTests) {
+ const absl::strings_internal::Charmap also_nothing_map("", 0);
+ ASSERT_TRUE(everything_map.contains('\0'));
+ ASSERT_TRUE(!nothing_map.contains('\0'));
+ ASSERT_TRUE(!also_nothing_map.contains('\0'));
+ for (unsigned char ch = 1; ch != 0; ++ch) {
+ ASSERT_TRUE(everything_map.contains(ch));
+ ASSERT_TRUE(!nothing_map.contains(ch));
+ ASSERT_TRUE(!also_nothing_map.contains(ch));
+ }
+
+ const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
+ ASSERT_TRUE(symbols.contains('&'));
+ ASSERT_TRUE(symbols.contains('@'));
+ ASSERT_TRUE(symbols.contains('#'));
+ ASSERT_TRUE(symbols.contains('^'));
+ ASSERT_TRUE(!symbols.contains('!'));
+ ASSERT_TRUE(!symbols.contains('?'));
+ int cnt = 0;
+ for (unsigned char ch = 1; ch != 0; ++ch)
+ cnt += symbols.contains(ch);
+ ASSERT_EQ(cnt, 4);
+
+ const absl::strings_internal::Charmap lets("^abcde", 3);
+ const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
+ const absl::strings_internal::Charmap lets3("fghij\0klmnop");
+ ASSERT_TRUE(lets2.contains('k'));
+ ASSERT_TRUE(!lets3.contains('k'));
+
+ ASSERT_TRUE(symbols.IntersectsWith(lets));
+ ASSERT_TRUE(!lets2.IntersectsWith(lets));
+ ASSERT_TRUE(lets.IntersectsWith(symbols));
+ ASSERT_TRUE(!lets.IntersectsWith(lets2));
+
+ ASSERT_TRUE(nothing_map.IsZero());
+ ASSERT_TRUE(!lets.IsZero());
+}
+
+namespace {
+std::string Members(const absl::strings_internal::Charmap& m) {
+ std::string r;
+ for (size_t i = 0; i < 256; ++i)
+ if (m.contains(i)) r.push_back(i);
+ return r;
+}
+
+std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
+ // Don't depend on lo<hi. Just increment until lo==hi.
+ std::string s;
+ while (true) {
+ s.push_back(lo);
+ if (lo == hi) break;
+ ++lo;
+ }
+ return s;
+}
+
+} // namespace
+
+TEST(Charmap, Constexpr) {
+ constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
+ EXPECT_THAT(Members(kEmpty), "");
+ constexpr absl::strings_internal::Charmap kA =
+ absl::strings_internal::Charmap::Char('A');
+ EXPECT_THAT(Members(kA), "A");
+ constexpr absl::strings_internal::Charmap kAZ =
+ absl::strings_internal::Charmap::Range('A', 'Z');
+ EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+ constexpr absl::strings_internal::Charmap kIdentifier =
+ absl::strings_internal::Charmap::Range('0', '9') |
+ absl::strings_internal::Charmap::Range('A', 'Z') |
+ absl::strings_internal::Charmap::Range('a', 'z') |
+ absl::strings_internal::Charmap::Char('_');
+ EXPECT_THAT(Members(kIdentifier),
+ "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "_"
+ "abcdefghijklmnopqrstuvwxyz");
+ constexpr absl::strings_internal::Charmap kAll = everything_map;
+ for (size_t i = 0; i < 256; ++i) {
+ EXPECT_TRUE(kAll.contains(i)) << i;
+ }
+ constexpr absl::strings_internal::Charmap kHello =
+ absl::strings_internal::Charmap::FromString("Hello, world!");
+ EXPECT_THAT(Members(kHello), " !,Hdelorw");
+
+ // test negation and intersection
+ constexpr absl::strings_internal::Charmap kABC =
+ absl::strings_internal::Charmap::Range('A', 'Z') &
+ ~absl::strings_internal::Charmap::Range('D', 'Z');
+ EXPECT_THAT(Members(kABC), "ABC");
+}
+
+TEST(Charmap, Range) {
+ // Exhaustive testing takes too long, so test some of the boundaries that
+ // are perhaps going to cause trouble.
+ std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
+ 16, 17, 30, 31, 32, 33, 63, 64, 65,
+ 127, 128, 129, 223, 224, 225, 254, 255};
+ for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
+ SCOPED_TRACE(*lo);
+ for (auto hi = lo; hi != poi.end(); ++hi) {
+ SCOPED_TRACE(*hi);
+ EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
+ ClosedRangeString(*lo, *hi));
+ }
+ }
+}
+
+bool AsBool(int x) { return static_cast<bool>(x); }
+
+TEST(CharmapCtype, Match) {
+ for (int c = 0; c < 256; ++c) {
+ SCOPED_TRACE(c);
+ SCOPED_TRACE(static_cast<char>(c));
+ EXPECT_EQ(AsBool(std::isupper(c)),
+ absl::strings_internal::UpperCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::islower(c)),
+ absl::strings_internal::LowerCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isdigit(c)),
+ absl::strings_internal::DigitCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isalpha(c)),
+ absl::strings_internal::AlphaCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isalnum(c)),
+ absl::strings_internal::AlnumCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isxdigit(c)),
+ absl::strings_internal::XDigitCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isprint(c)),
+ absl::strings_internal::PrintCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isspace(c)),
+ absl::strings_internal::SpaceCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::iscntrl(c)),
+ absl::strings_internal::CntrlCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isblank(c)),
+ absl::strings_internal::BlankCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isgraph(c)),
+ absl::strings_internal::GraphCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::ispunct(c)),
+ absl::strings_internal::PunctCharmap().contains(c));
+ }
+}
+
+} // namespace
diff --git a/absl/strings/internal/escaping_test_common.inc b/absl/strings/internal/escaping_test_common.inc
new file mode 100644
index 00000000..6f29140e
--- /dev/null
+++ b/absl/strings/internal/escaping_test_common.inc
@@ -0,0 +1,113 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This test contains common things needed by both escaping_test.cc and
+// escaping_benchmark.cc.
+
+namespace {
+
+struct {
+ absl::string_view plaintext;
+ absl::string_view cyphertext;
+} const base64_strings[] = {
+ // Some google quotes
+ // Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
+ // (Note that we're testing the websafe encoding, though, so if
+ // you add messages, be sure to run "tr -- '+/' '-_'" on the output)
+ { "I was always good at math and science, and I never realized "
+ "that was unusual or somehow undesirable. So one of the things "
+ "I care a lot about is helping to remove that stigma, "
+ "to show girls that you can be feminine, you can like the things "
+ "that girls like, but you can also be really good at technology. "
+ "You can be really good at building things."
+ " - Marissa Meyer, Newsweek, 2010-12-22" "\n",
+
+ "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
+ "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
+ "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
+ "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
+ "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
+ "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
+ "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
+ "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
+ "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
+
+ { "Typical first year for a new cluster: "
+ "~0.5 overheating "
+ "~1 PDU failure "
+ "~1 rack-move "
+ "~1 network rewiring "
+ "~20 rack failures "
+ "~5 racks go wonky "
+ "~8 network maintenances "
+ "~12 router reloads "
+ "~3 router failures "
+ "~dozens of minor 30-second blips for dns "
+ "~1000 individual machine failures "
+ "~thousands of hard drive failures "
+ "slow disks, bad memory, misconfigured machines, flaky machines, etc."
+ " - Jeff Dean, The Joys of Real Hardware" "\n",
+
+ "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
+ "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
+ "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
+ "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
+ "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
+ "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
+ "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
+ "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
+ "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
+ "ZWFsIEhhcmR3YXJlCg" },
+
+ { "I'm the head of the webspam team at Google. "
+ "That means that if you type your name into Google and get porn back, "
+ "it's my fault. Unless you're a porn star, in which case porn is a "
+ "completely reasonable response."
+ " - Matt Cutts, Google Plus" "\n",
+
+ "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
+ "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
+ "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
+ "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
+ "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
+ "IEdvb2dsZSBQbHVzCg" },
+
+ { "It will still be a long time before machines approach human intelligence. "
+ "But luckily, machines don't actually have to be intelligent; "
+ "they just have to fake it. Access to a wealth of information, "
+ "combined with a rudimentary decision-making capacity, "
+ "can often be almost as useful. Of course, the results are better yet "
+ "when coupled with intelligence. A reference librarian with access to "
+ "a good search engine is a formidable tool."
+ " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n",
+
+ "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
+ "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
+ "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
+ "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
+ "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
+ "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
+ "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
+ "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
+ "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
+ "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
+ "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
+ "NAo" },
+
+ // Degenerate edge case
+ { "",
+ "" },
+};
+
+} // namespace
diff --git a/absl/strings/internal/fastmem.h b/absl/strings/internal/fastmem.h
new file mode 100644
index 00000000..9989b12e
--- /dev/null
+++ b/absl/strings/internal/fastmem.h
@@ -0,0 +1,215 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Fast memory copying and comparison routines.
+// strings::fastmemcmp_inlined() replaces memcmp()
+// strings::memcpy_inlined() replaces memcpy()
+// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
+//
+// strings::*_inlined() routines are inline versions of the
+// routines exported by this module. Sometimes using the inlined
+// versions is faster. Measure before using the inlined versions.
+//
+
+#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_
+#define ABSL_STRINGS_INTERNAL_FASTMEM_H_
+
+#ifdef __SSE4_1__
+#include <immintrin.h>
+#endif
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+#include "absl/base/internal/unaligned_access.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+
+namespace absl {
+namespace strings_internal {
+
+// Return true if the n bytes at a equal the n bytes at b.
+// The regions are allowed to overlap.
+//
+// The performance is similar to the performance of memcmp(), but faster for
+// moderately-sized inputs, or inputs that share a common prefix and differ
+// somewhere in their last 8 bytes. Further optimizations can be added later
+// if it makes sense to do so. Alternatively, if the compiler & runtime improve
+// to eliminate the need for this, we can remove it.
+inline bool memeq(const char* a, const char* b, size_t n) {
+ size_t n_rounded_down = n & ~static_cast<size_t>(7);
+ if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7
+ return memcmp(a, b, n) == 0;
+ }
+ // n >= 8
+ {
+ uint64_t u =
+ ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
+ uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^
+ ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8);
+ if ((u | v) != 0) { // The first or last 8 bytes differ.
+ return false;
+ }
+ }
+ // The next line forces n to be a multiple of 8.
+ n = n_rounded_down;
+ if (n >= 80) {
+ // In 2013 or later, this should be fast on long strings.
+ return memcmp(a, b, n) == 0;
+ }
+ // Now force n to be a multiple of 16. Arguably, a "switch" would be smart
+ // here, but there's a difficult-to-evaluate code size vs. speed issue. The
+ // current approach often re-compares some bytes (worst case is if n initially
+ // was 16, 32, 48, or 64), but is fairly short.
+ size_t e = n & 8;
+ a += e;
+ b += e;
+ n -= e;
+ // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks.
+ while (n > 0) {
+#ifdef __SSE4_1__
+ __m128i u =
+ _mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
+ _mm_loadu_si128(reinterpret_cast<const __m128i*>(b)));
+ if (!_mm_test_all_zeros(u, u)) {
+ return false;
+ }
+#else
+ uint64_t x =
+ ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
+ uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^
+ ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8);
+ if ((x | y) != 0) {
+ return false;
+ }
+#endif
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return true;
+}
+
+inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) {
+ const unsigned char* pa = static_cast<const unsigned char*>(va);
+ const unsigned char* pb = static_cast<const unsigned char*>(vb);
+ switch (n) {
+ default:
+ return memcmp(va, vb, n);
+ case 7:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 6:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 5:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 4:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 3:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 2:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 1:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 0:
+ break;
+ }
+ return 0;
+}
+
+// The standard memcpy operation is slow for variable small sizes.
+// This implementation inlines the optimal realization for sizes 1 to 16.
+// To avoid code bloat don't use it in case of not performance-critical spots,
+// nor when you don't expect very frequent values of size <= 16.
+inline void memcpy_inlined(char* dst, const char* src, size_t size) {
+ // Compiler inlines code with minimal amount of data movement when third
+ // parameter of memcpy is a constant.
+ switch (size) {
+ case 1:
+ memcpy(dst, src, 1);
+ break;
+ case 2:
+ memcpy(dst, src, 2);
+ break;
+ case 3:
+ memcpy(dst, src, 3);
+ break;
+ case 4:
+ memcpy(dst, src, 4);
+ break;
+ case 5:
+ memcpy(dst, src, 5);
+ break;
+ case 6:
+ memcpy(dst, src, 6);
+ break;
+ case 7:
+ memcpy(dst, src, 7);
+ break;
+ case 8:
+ memcpy(dst, src, 8);
+ break;
+ case 9:
+ memcpy(dst, src, 9);
+ break;
+ case 10:
+ memcpy(dst, src, 10);
+ break;
+ case 11:
+ memcpy(dst, src, 11);
+ break;
+ case 12:
+ memcpy(dst, src, 12);
+ break;
+ case 13:
+ memcpy(dst, src, 13);
+ break;
+ case 14:
+ memcpy(dst, src, 14);
+ break;
+ case 15:
+ memcpy(dst, src, 15);
+ break;
+ case 16:
+ memcpy(dst, src, 16);
+ break;
+ default:
+ memcpy(dst, src, size);
+ break;
+ }
+}
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_
diff --git a/absl/strings/internal/fastmem_test.cc b/absl/strings/internal/fastmem_test.cc
new file mode 100644
index 00000000..7c670f96
--- /dev/null
+++ b/absl/strings/internal/fastmem_test.cc
@@ -0,0 +1,453 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/fastmem.h"
+
+#include <memory>
+#include <random>
+#include <string>
+
+#include "base/init_google.h"
+#include "base/logging.h"
+#include "testing/base/public/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+using RandomEngine = std::minstd_rand0;
+
+void VerifyResults(const int r1, const int r2, const std::string& a,
+ const std::string& b) {
+ CHECK_EQ(a.size(), b.size());
+ if (r1 == 0) {
+ EXPECT_EQ(r2, 0) << a << " " << b;
+ } else if (r1 > 0) {
+ EXPECT_GT(r2, 0) << a << " " << b;
+ } else {
+ EXPECT_LT(r2, 0) << a << " " << b;
+ }
+ if ((r1 == 0) == (r2 == 0)) {
+ EXPECT_EQ(r1 == 0,
+ absl::strings_internal::memeq(a.data(), b.data(), a.size()))
+ << r1 << " " << a << " " << b;
+ }
+}
+
+// Check correctness against glibc's memcmp implementation
+void CheckSingle(const std::string& a, const std::string& b) {
+ CHECK_EQ(a.size(), b.size());
+ const int r1 = memcmp(a.data(), b.data(), a.size());
+ const int r2 =
+ absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size());
+ VerifyResults(r1, r2, a, b);
+}
+
+void GenerateString(size_t len, std::string* s) {
+ s->clear();
+ for (int i = 0; i < len; i++) {
+ *s += ('a' + (i % 26));
+ }
+}
+
+void CheckCompare(const std::string& a, const std::string& b) {
+ CheckSingle(a, b);
+ for (int common = 0; common <= 32; common++) {
+ std::string extra;
+ GenerateString(common, &extra);
+ CheckSingle(extra + a, extra + b);
+ CheckSingle(a + extra, b + extra);
+ for (char c1 = 'a'; c1 <= 'c'; c1++) {
+ for (char c2 = 'a'; c2 <= 'c'; c2++) {
+ CheckSingle(extra + c1 + a, extra + c2 + b);
+ }
+ }
+ }
+}
+
+TEST(FastCompare, Misc) {
+ CheckCompare("", "");
+
+ CheckCompare("a", "a");
+ CheckCompare("ab", "ab");
+ CheckCompare("abc", "abc");
+ CheckCompare("abcd", "abcd");
+ CheckCompare("abcde", "abcde");
+
+ CheckCompare("a", "x");
+ CheckCompare("ab", "xb");
+ CheckCompare("abc", "xbc");
+ CheckCompare("abcd", "xbcd");
+ CheckCompare("abcde", "xbcde");
+
+ CheckCompare("x", "a");
+ CheckCompare("xb", "ab");
+ CheckCompare("xbc", "abc");
+ CheckCompare("xbcd", "abcd");
+ CheckCompare("xbcde", "abcde");
+
+ CheckCompare("a", "x");
+ CheckCompare("ab", "ax");
+ CheckCompare("abc", "abx");
+ CheckCompare("abcd", "abcx");
+ CheckCompare("abcde", "abcdx");
+
+ CheckCompare("x", "a");
+ CheckCompare("ax", "ab");
+ CheckCompare("abx", "abc");
+ CheckCompare("abcx", "abcd");
+ CheckCompare("abcdx", "abcde");
+
+ for (int len = 0; len < 1000; len++) {
+ std::string p(len, 'z');
+ CheckCompare(p + "x", p + "a");
+ CheckCompare(p + "ax", p + "ab");
+ CheckCompare(p + "abx", p + "abc");
+ CheckCompare(p + "abcx", p + "abcd");
+ CheckCompare(p + "abcdx", p + "abcde");
+ }
+}
+
+TEST(FastCompare, TrailingByte) {
+ for (int i = 0; i < 256; i++) {
+ for (int j = 0; j < 256; j++) {
+ std::string a(1, i);
+ std::string b(1, j);
+ CheckSingle(a, b);
+ }
+ }
+}
+
+// Check correctness of memcpy_inlined.
+void CheckSingleMemcpyInlined(const std::string& a) {
+ std::unique_ptr<char[]> destination(new char[a.size() + 2]);
+ destination[0] = 'x';
+ destination[a.size() + 1] = 'x';
+ absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(),
+ a.size());
+ CHECK_EQ('x', destination[0]);
+ CHECK_EQ('x', destination[a.size() + 1]);
+ CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size()));
+}
+
+TEST(MemCpyInlined, Misc) {
+ CheckSingleMemcpyInlined("");
+ CheckSingleMemcpyInlined("0");
+ CheckSingleMemcpyInlined("012");
+ CheckSingleMemcpyInlined("0123");
+ CheckSingleMemcpyInlined("01234");
+ CheckSingleMemcpyInlined("012345");
+ CheckSingleMemcpyInlined("0123456");
+ CheckSingleMemcpyInlined("01234567");
+ CheckSingleMemcpyInlined("012345678");
+ CheckSingleMemcpyInlined("0123456789");
+ CheckSingleMemcpyInlined("0123456789a");
+ CheckSingleMemcpyInlined("0123456789ab");
+ CheckSingleMemcpyInlined("0123456789abc");
+ CheckSingleMemcpyInlined("0123456789abcd");
+ CheckSingleMemcpyInlined("0123456789abcde");
+ CheckSingleMemcpyInlined("0123456789abcdef");
+ CheckSingleMemcpyInlined("0123456789abcdefg");
+}
+
+template <typename Function>
+inline void CopyLoop(benchmark::State& state, int size, Function func) {
+ char* src = new char[size];
+ char* dst = new char[size];
+ memset(src, 'x', size);
+ memset(dst, 'y', size);
+ for (auto _ : state) {
+ func(dst, src, size);
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
+ CHECK_EQ(dst[0], 'x');
+ delete[] src;
+ delete[] dst;
+}
+
+void BM_memcpy(benchmark::State& state) {
+ CopyLoop(state, state.range(0), memcpy);
+}
+BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+void BM_memcpy_inlined(benchmark::State& state) {
+ CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined);
+}
+BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+// unaligned memcpy
+void BM_unaligned_memcpy(benchmark::State& state) {
+ const int n = state.range(0);
+ const int kMaxOffset = 32;
+ char* src = new char[n + kMaxOffset];
+ char* dst = new char[n + kMaxOffset];
+ memset(src, 'x', n + kMaxOffset);
+ int r = 0, i = 0;
+ for (auto _ : state) {
+ memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n);
+ r += dst[0];
+ ++i;
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] src;
+ delete[] dst;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+// memmove worst case: heavy overlap, but not always by the same amount.
+// Also, the source and destination will often be unaligned.
+void BM_memmove_worst_case(benchmark::State& state) {
+ const int n = state.range(0);
+ const int32_t kDeterministicSeed = 301;
+ const int kMaxOffset = 32;
+ char* src = new char[n + kMaxOffset];
+ memset(src, 'x', n + kMaxOffset);
+ size_t offsets[64];
+ RandomEngine rng(kDeterministicSeed);
+ std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset);
+ for (size_t& offset : offsets) {
+ offset = random_to_max_offset(rng);
+ }
+ int r = 0, i = 0;
+ for (auto _ : state) {
+ memmove(src + offsets[i], src + offsets[i + 1], n);
+ r += src[0];
+ i = (i + 2) % arraysize(offsets);
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] src;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+// memmove cache-friendly: aligned and overlapping with 4k
+// between the source and destination addresses.
+void BM_memmove_cache_friendly(benchmark::State& state) {
+ const int n = state.range(0);
+ char* src = new char[n + 4096];
+ memset(src, 'x', n);
+ int r = 0;
+ while (state.KeepRunningBatch(2)) { // count each memmove as an iteration
+ memmove(src + 4096, src, n);
+ memmove(src, src + 4096, n);
+ r += src[0];
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] src;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_memmove_cache_friendly)
+ ->Arg(5 * 1024)
+ ->Arg(10 * 1024)
+ ->Range(16 << 10, 8 << 20);
+
+// memmove best(?) case: aligned and non-overlapping.
+void BM_memmove_aligned_non_overlapping(benchmark::State& state) {
+ CopyLoop(state, state.range(0), memmove);
+}
+BENCHMARK(BM_memmove_aligned_non_overlapping)
+ ->DenseRange(1, 18)
+ ->Range(32, 8 << 20);
+
+// memset speed
+void BM_memset(benchmark::State& state) {
+ const int n = state.range(0);
+ char* dst = new char[n];
+ int r = 0;
+ for (auto _ : state) {
+ memset(dst, 'x', n);
+ r += dst[0];
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] dst;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_memset)->Range(8, 4096 << 10);
+
+// Bandwidth (vectorization?) test: the ideal generated code will be limited
+// by memory bandwidth. Even so-so generated code will max out memory bandwidth
+// on some machines.
+void BM_membandwidth(benchmark::State& state) {
+ const int n = state.range(0);
+ CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter.
+ char* dst = new char[n];
+ int r = 0;
+ for (auto _ : state) {
+ const uint32_t* p = reinterpret_cast<uint32_t*>(dst);
+ const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n);
+ uint32_t x = 0;
+ while (p < limit) {
+ x += p[0];
+ x += p[1];
+ x += p[2];
+ x += p[3];
+ x += p[4];
+ x += p[5];
+ x += p[6];
+ x += p[7];
+ p += 8;
+ }
+ r += x;
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] dst;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10);
+
+// Helper for benchmarks. Repeatedly compares two strings that are
+// either equal or different only in one character. If test_equal_strings
+// is false then position_to_modify determines where the difference will be.
+template <typename Function>
+ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop(
+ benchmark::State& state, bool test_equal_strings,
+ std::string::size_type position_to_modify, int size, Function func) {
+ const int kIterMult = 4; // Iteration multiplier for better timing resolution
+ CHECK_GT(size, 0);
+ const bool position_to_modify_is_valid =
+ position_to_modify != std::string::npos && position_to_modify < size;
+ CHECK_NE(position_to_modify_is_valid, test_equal_strings);
+ if (!position_to_modify_is_valid) {
+ position_to_modify = 0;
+ }
+ std::string sa(size, 'a');
+ std::string sb = sa;
+ char last = sa[size - 1];
+ int num = 0;
+ for (auto _ : state) {
+ for (int i = 0; i < kIterMult; ++i) {
+ sb[position_to_modify] = test_equal_strings ? last : last ^ 1;
+ num += func(sa, sb);
+ }
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
+ benchmark::DoNotOptimize(num);
+}
+
+// Helper for benchmarks. Repeatedly compares two memory regions that are
+// either equal or different only in their final character.
+template <typename Function>
+ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state,
+ bool test_equal_strings,
+ int size, Function func) {
+ const int kIterMult = 4; // Iteration multiplier for better timing resolution
+ CHECK_GT(size, 0);
+ char* data = static_cast<char*>(malloc(size * 2));
+ memset(data, 'a', size * 2);
+ char* a = data;
+ char* b = data + size;
+ char last = a[size - 1];
+ int num = 0;
+ for (auto _ : state) {
+ for (int i = 0; i < kIterMult; ++i) {
+ b[size - 1] = test_equal_strings ? last : last ^ 1;
+ num += func(a, b, size);
+ }
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
+ benchmark::DoNotOptimize(num);
+ free(data);
+}
+
+void BM_memcmp(benchmark::State& state) {
+ CompareLoop(state, false, state.range(0), memcmp);
+}
+BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+void BM_fastmemcmp_inlined(benchmark::State& state) {
+ CompareLoop(state, false, state.range(0),
+ absl::strings_internal::fastmemcmp_inlined);
+}
+BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+void BM_memeq(benchmark::State& state) {
+ CompareLoop(state, false, state.range(0), absl::strings_internal::memeq);
+}
+BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+void BM_memeq_equal(benchmark::State& state) {
+ CompareLoop(state, true, state.range(0), absl::strings_internal::memeq);
+}
+BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+bool StringLess(const std::string& x, const std::string& y) { return x < y; }
+bool StringEqual(const std::string& x, const std::string& y) { return x == y; }
+bool StdEqual(const std::string& x, const std::string& y) {
+ return x.size() == y.size() &&
+ std::equal(x.data(), x.data() + x.size(), y.data());
+}
+
+// Benchmark for x < y, where x and y are strings that differ in only their
+// final char. That should be more-or-less the worst case for <.
+void BM_string_less(benchmark::State& state) {
+ StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
+ StringLess);
+}
+BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+// Benchmark for x < y, where x and y are strings that differ in only their
+// first char. That should be more-or-less the best case for <.
+void BM_string_less_easy(benchmark::State& state) {
+ StringCompareLoop(state, false, 0, state.range(0), StringLess);
+}
+BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_string_equal(benchmark::State& state) {
+ StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
+ StringEqual);
+}
+BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_string_equal_equal(benchmark::State& state) {
+ StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual);
+}
+BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_std_equal(benchmark::State& state) {
+ StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual);
+}
+BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_std_equal_equal(benchmark::State& state) {
+ StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual);
+}
+BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_string_equal_unequal_lengths(benchmark::State& state) {
+ const int size = state.range(0);
+ std::string a(size, 'a');
+ std::string b(size + 1, 'a');
+ int count = 0;
+ for (auto _ : state) {
+ b[size - 1] = 'a';
+ count += (a == b);
+ }
+ benchmark::DoNotOptimize(count);
+}
+BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
+
+void BM_stdstring_equal_unequal_lengths(benchmark::State& state) {
+ const int size = state.range(0);
+ std::string a(size, 'a');
+ std::string b(size + 1, 'a');
+ int count = 0;
+ for (auto _ : state) {
+ b[size - 1] = 'a';
+ count += (a == b);
+ }
+ benchmark::DoNotOptimize(count);
+}
+BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
+
+} // namespace
diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc
new file mode 100644
index 00000000..a0de70df
--- /dev/null
+++ b/absl/strings/internal/memutil.cc
@@ -0,0 +1,110 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/memutil.h"
+
+#include <cstdlib>
+
+namespace absl {
+namespace strings_internal {
+
+int memcasecmp(const char* s1, const char* s2, size_t len) {
+ const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
+ const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
+
+ for (size_t i = 0; i < len; i++) {
+ const int diff =
+ int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
+ int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
+ if (diff != 0) return diff;
+ }
+ return 0;
+}
+
+char* memdup(const char* s, size_t slen) {
+ void* copy;
+ if ((copy = malloc(slen)) == nullptr) return nullptr;
+ memcpy(copy, s, slen);
+ return reinterpret_cast<char*>(copy);
+}
+
+char* memrchr(const char* s, int c, size_t slen) {
+ for (const char* e = s + slen - 1; e >= s; e--) {
+ if (*e == c) return const_cast<char*>(e);
+ }
+ return nullptr;
+}
+
+size_t memspn(const char* s, size_t slen, const char* accept) {
+ const char* p = s;
+ const char* spanp;
+ char c, sc;
+
+cont:
+ c = *p++;
+ if (slen-- == 0) return p - 1 - s;
+ for (spanp = accept; (sc = *spanp++) != '\0';)
+ if (sc == c) goto cont;
+ return p - 1 - s;
+}
+
+size_t memcspn(const char* s, size_t slen, const char* reject) {
+ const char* p = s;
+ const char* spanp;
+ char c, sc;
+
+ while (slen-- != 0) {
+ c = *p++;
+ for (spanp = reject; (sc = *spanp++) != '\0';)
+ if (sc == c) return p - 1 - s;
+ }
+ return p - s;
+}
+
+char* mempbrk(const char* s, size_t slen, const char* accept) {
+ const char* scanp;
+ int sc;
+
+ for (; slen; ++s, --slen) {
+ for (scanp = accept; (sc = *scanp++) != '\0';)
+ if (sc == *s) return const_cast<char*>(s);
+ }
+ return nullptr;
+}
+
+// This is significantly faster for case-sensitive matches with very
+// few possible matches. See unit test for benchmarks.
+const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
+ size_t neelen) {
+ if (0 == neelen) {
+ return phaystack; // even if haylen is 0
+ }
+ if (haylen < neelen) return nullptr;
+
+ const char* match;
+ const char* hayend = phaystack + haylen - neelen + 1;
+ // A static cast is used here to work around the fact that memchr returns
+ // a void* on Posix-compliant systems and const void* on Windows.
+ while ((match = static_cast<const char*>(
+ memchr(phaystack, pneedle[0], hayend - phaystack)))) {
+ if (memcmp(match, pneedle, neelen) == 0)
+ return match;
+ else
+ phaystack = match + 1;
+ }
+ return nullptr;
+}
+
+} // namespace strings_internal
+} // namespace absl
diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h
new file mode 100644
index 00000000..a6f1c691
--- /dev/null
+++ b/absl/strings/internal/memutil.h
@@ -0,0 +1,146 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// These routines provide mem versions of standard C std::string routines,
+// such as strpbrk. They function exactly the same as the str versions,
+// so if you wonder what they are, replace the word "mem" by
+// "str" and check out the man page. I could return void*, as the
+// strutil.h mem*() routines tend to do, but I return char* instead
+// since this is by far the most common way these functions are called.
+//
+// The difference between the mem and str versions is the mem version
+// takes a pointer and a length, rather than a '\0'-terminated std::string.
+// The memcase* routines defined here assume the locale is "C"
+// (they use absl::ascii_tolower instead of tolower).
+//
+// These routines are based on the BSD library.
+//
+// Here's a list of routines from std::string.h, and their mem analogues.
+// Functions in lowercase are defined in std::string.h; those in UPPERCASE
+// are defined here:
+//
+// strlen --
+// strcat strncat MEMCAT
+// strcpy strncpy memcpy
+// -- memccpy (very cool function, btw)
+// -- memmove
+// -- memset
+// strcmp strncmp memcmp
+// strcasecmp strncasecmp MEMCASECMP
+// strchr memchr
+// strcoll --
+// strxfrm --
+// strdup strndup MEMDUP
+// strrchr MEMRCHR
+// strspn MEMSPN
+// strcspn MEMCSPN
+// strpbrk MEMPBRK
+// strstr MEMSTR MEMMEM
+// (g)strcasestr MEMCASESTR MEMCASEMEM
+// strtok --
+// strprefix MEMPREFIX (strprefix is from strutil.h)
+// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
+// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
+// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
+// -- MEMIS
+// -- MEMCASEIS
+// strcount MEMCOUNT (strcount is from strutil.h)
+
+#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
+#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
+
+#include <cstddef>
+#include <cstring>
+
+#include "absl/base/port.h" // disable some warnings on Windows
+#include "absl/strings/ascii.h" // for absl::ascii_tolower
+
+namespace absl {
+namespace strings_internal {
+
+inline char* memcat(char* dest, size_t destlen, const char* src,
+ size_t srclen) {
+ return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
+}
+
+int memcasecmp(const char* s1, const char* s2, size_t len);
+char* memdup(const char* s, size_t slen);
+char* memrchr(const char* s, int c, size_t slen);
+size_t memspn(const char* s, size_t slen, const char* accept);
+size_t memcspn(const char* s, size_t slen, const char* reject);
+char* mempbrk(const char* s, size_t slen, const char* accept);
+
+// This is for internal use only. Don't call this directly
+template <bool case_sensitive>
+const char* int_memmatch(const char* haystack, size_t haylen,
+ const char* needle, size_t neelen) {
+ if (0 == neelen) {
+ return haystack; // even if haylen is 0
+ }
+ const char* hayend = haystack + haylen;
+ const char* needlestart = needle;
+ const char* needleend = needlestart + neelen;
+
+ for (; haystack < hayend; ++haystack) {
+ char hay = case_sensitive
+ ? *haystack
+ : absl::ascii_tolower(static_cast<unsigned char>(*haystack));
+ char nee = case_sensitive
+ ? *needle
+ : absl::ascii_tolower(static_cast<unsigned char>(*needle));
+ if (hay == nee) {
+ if (++needle == needleend) {
+ return haystack + 1 - neelen;
+ }
+ } else if (needle != needlestart) {
+ // must back up haystack in case a prefix matched (find "aab" in "aaab")
+ haystack -= needle - needlestart; // for loop will advance one more
+ needle = needlestart;
+ }
+ }
+ return nullptr;
+}
+
+// These are the guys you can call directly
+inline const char* memstr(const char* phaystack, size_t haylen,
+ const char* pneedle) {
+ return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
+}
+
+inline const char* memcasestr(const char* phaystack, size_t haylen,
+ const char* pneedle) {
+ return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
+}
+
+inline const char* memmem(const char* phaystack, size_t haylen,
+ const char* pneedle, size_t needlelen) {
+ return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
+}
+
+inline const char* memcasemem(const char* phaystack, size_t haylen,
+ const char* pneedle, size_t needlelen) {
+ return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
+}
+
+// This is significantly faster for case-sensitive matches with very
+// few possible matches. See unit test for benchmarks.
+const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
+ size_t neelen);
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_
diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc
new file mode 100644
index 00000000..1ff60f20
--- /dev/null
+++ b/absl/strings/internal/memutil_test.cc
@@ -0,0 +1,180 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Unit test for memutil.cc
+
+#include "absl/strings/internal/memutil.h"
+
+#include <algorithm>
+#include <cstdlib>
+
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+
+namespace {
+
+static char* memcasechr(const char* s, int c, size_t slen) {
+ c = absl::ascii_tolower(c);
+ for (; slen; ++s, --slen) {
+ if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
+ }
+ return nullptr;
+}
+
+static const char* memcasematch(const char* phaystack, size_t haylen,
+ const char* pneedle, size_t neelen) {
+ if (0 == neelen) {
+ return phaystack; // even if haylen is 0
+ }
+ if (haylen < neelen) return nullptr;
+
+ const char* match;
+ const char* hayend = phaystack + haylen - neelen + 1;
+ while ((match = static_cast<char*>(
+ memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
+ if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
+ return match;
+ else
+ phaystack = match + 1;
+ }
+ return nullptr;
+}
+
+TEST(MemUtilTest, AllTests) {
+ // check memutil functions
+ char a[1000];
+ absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
+ absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
+
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
+ sizeof("hello there") - 1),
+ 0);
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
+ sizeof("hello there") - 1),
+ -1);
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
+ sizeof("hello there") - 2),
+ 0);
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
+
+ char* p = absl::strings_internal::memdup("hello", 5);
+ free(p);
+
+ p = absl::strings_internal::memrchr("hello there", 'e',
+ sizeof("hello there") - 1);
+ EXPECT_TRUE(p && p[-1] == 'r');
+ p = absl::strings_internal::memrchr("hello there", 'e',
+ sizeof("hello there") - 2);
+ EXPECT_TRUE(p && p[-1] == 'h');
+ p = absl::strings_internal::memrchr("hello there", 'u',
+ sizeof("hello there") - 1);
+ EXPECT_TRUE(p == nullptr);
+
+ int len = absl::strings_internal::memspn("hello there",
+ sizeof("hello there") - 1, "hole");
+ EXPECT_EQ(len, sizeof("hello") - 1);
+ len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
+ "u");
+ EXPECT_EQ(len, 0);
+ len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
+ "");
+ EXPECT_EQ(len, 0);
+ len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
+ "trole h");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memspn("hello there!",
+ sizeof("hello there!") - 1, "trole h");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memspn("hello there!",
+ sizeof("hello there!") - 2, "trole h!");
+ EXPECT_EQ(len, sizeof("hello there!") - 2);
+
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, "leho");
+ EXPECT_EQ(len, 0);
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, "u");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, "");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, " ");
+ EXPECT_EQ(len, 5);
+
+ p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
+ "leho");
+ EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
+ p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
+ "nu");
+ EXPECT_TRUE(p == nullptr);
+ p = absl::strings_internal::mempbrk("hello there!",
+ sizeof("hello there!") - 2, "!");
+ EXPECT_TRUE(p == nullptr);
+ p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
+ " t ");
+ EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
+
+ {
+ const char kHaystack[] = "0123456789";
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
+ kHaystack + 7);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
+ kHaystack + 9);
+ EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
+ nullptr);
+ EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
+ nullptr);
+ }
+ {
+ const char kHaystack[] = "aBcDeFgHiJ";
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
+ kHaystack + 7);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
+ kHaystack + 9);
+ EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
+ nullptr);
+ EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
+ nullptr);
+ }
+ {
+ const char kHaystack[] = "0123456789";
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
+ kHaystack + 7);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
+ kHaystack + 9);
+ EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
+ nullptr);
+ EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
+ nullptr);
+ }
+}
+
+} // namespace
diff --git a/absl/strings/internal/numbers_test_common.inc b/absl/strings/internal/numbers_test_common.inc
new file mode 100644
index 00000000..e165b3be
--- /dev/null
+++ b/absl/strings/internal/numbers_test_common.inc
@@ -0,0 +1,166 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file contains common things needed by numbers_test.cc,
+// numbers_legacy_test.cc and numbers_benchmark.cc.
+
+namespace {
+
+// Previously documented minimum buffer sizes for Fast*ToBuffer functions.
+// NOTE(edk): These should be deleted and uses replaced with kFastToBufferSize
+// once existing code has been fixed to use kFastToBufferSize.
+enum {
+ kFastInt32ToBufferSize = 12,
+ kFastInt64ToBufferSize = 22,
+ kFastUInt32ToBufferSize = 12,
+ kFastUInt64ToBufferSize = 22
+};
+
+template <typename IntType>
+bool Itoa(IntType value, int base, std::string* destination) {
+ destination->clear();
+ if (base <= 1 || base > 36) {
+ return false;
+ }
+
+ if (value == 0) {
+ destination->push_back('0');
+ return true;
+ }
+
+ bool negative = value < 0;
+ while (value != 0) {
+ const IntType next_value = value / base;
+ // Can't use std::abs here because of problems when IntType is unsigned.
+ int remainder = value > next_value * base ? value - next_value * base
+ : next_value * base - value;
+ char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
+ destination->insert(0, 1, c);
+ value = next_value;
+ }
+
+ if (negative) {
+ destination->insert(0, 1, '-');
+ }
+ return true;
+}
+
+struct uint32_test_case {
+ const char* str;
+ bool expect_ok;
+ int base; // base to pass to the conversion function
+ uint32_t expected;
+} const strtouint32_test_cases[] = {
+ {"0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
+ {"0x34234324", true, 16, 0x34234324},
+ {"34234324", true, 16, 0x34234324},
+ {"0", true, 16, 0},
+ {" \t\n 0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
+ {" \f\v 46", true, 10, 46}, // must accept weird whitespace
+ {" \t\n 72717222", true, 8, 072717222},
+ {" \t\n 072717222", true, 8, 072717222},
+ {" \t\n 072717228", false, 8, 07271722},
+ {"0", true, 0, 0},
+
+ // Base-10 version.
+ {"34234324", true, 0, 34234324},
+ {"4294967295", true, 0, std::numeric_limits<uint32_t>::max()},
+ {"34234324 \n\t", true, 10, 34234324},
+
+ // Unusual base
+ {"0", true, 3, 0},
+ {"2", true, 3, 2},
+ {"11", true, 3, 4},
+
+ // Invalid uints.
+ {"", false, 0, 0},
+ {" ", false, 0, 0},
+ {"abc", false, 0, 0}, // would be valid hex, but prefix is missing
+ {"34234324a", false, 0, 34234324},
+ {"34234.3", false, 0, 34234},
+ {"-1", false, 0, 0},
+ {" -123", false, 0, 0},
+ {" \t\n -123", false, 0, 0},
+
+ // Out of bounds.
+ {"4294967296", false, 0, std::numeric_limits<uint32_t>::max()},
+ {"0x100000000", false, 0, std::numeric_limits<uint32_t>::max()},
+ {nullptr, false, 0, 0},
+};
+
+struct uint64_test_case {
+ const char* str;
+ bool expect_ok;
+ int base;
+ uint64_t expected;
+} const strtouint64_test_cases[] = {
+ {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
+ {"3423432448783446", true, 16, int64_t{0x3423432448783446}},
+
+ {"0", true, 16, 0},
+ {"000", true, 0, 0},
+ {"0", true, 0, 0},
+ {" \t\n 0xffffffffffffffff", true, 16,
+ std::numeric_limits<uint64_t>::max()},
+
+ {"012345670123456701234", true, 8, int64_t{012345670123456701234}},
+ {"12345670123456701234", true, 8, int64_t{012345670123456701234}},
+
+ {"12845670123456701234", false, 8, 0},
+
+ // Base-10 version.
+ {"34234324487834466", true, 0, int64_t{34234324487834466}},
+
+ {" \t\n 18446744073709551615", true, 0,
+ std::numeric_limits<uint64_t>::max()},
+
+ {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
+
+ {" \f\v 46", true, 10, 46}, // must accept weird whitespace
+
+ // Unusual base
+ {"0", true, 3, 0},
+ {"2", true, 3, 2},
+ {"11", true, 3, 4},
+
+ {"0", true, 0, 0},
+
+ // Invalid uints.
+ {"", false, 0, 0},
+ {" ", false, 0, 0},
+ {"abc", false, 0, 0},
+ {"34234324487834466a", false, 0, 0},
+ {"34234487834466.3", false, 0, 0},
+ {"-1", false, 0, 0},
+ {" -123", false, 0, 0},
+ {" \t\n -123", false, 0, 0},
+
+ // Out of bounds.
+ {"18446744073709551616", false, 10, 0},
+ {"18446744073709551616", false, 0, 0},
+ {"0x10000000000000000", false, 16, std::numeric_limits<uint64_t>::max()},
+ {"0X10000000000000000", false, 16,
+ std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
+ {"0x10000000000000000", false, 0, std::numeric_limits<uint64_t>::max()},
+ {"0X10000000000000000", false, 0,
+ std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
+
+ {"0x1234", true, 16, 0x1234},
+
+ // Base-10 std::string version.
+ {"1234", true, 0, 1234},
+ {nullptr, false, 0, 0},
+};
+
+} // namespace
diff --git a/absl/strings/internal/ostringstream.h b/absl/strings/internal/ostringstream.h
new file mode 100644
index 00000000..017632a9
--- /dev/null
+++ b/absl/strings/internal/ostringstream.h
@@ -0,0 +1,97 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
+#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
+
+#include <cassert>
+#include <ostream>
+#include <streambuf>
+#include <string>
+
+#include "absl/base/port.h"
+
+namespace absl {
+namespace strings_internal {
+
+// The same as std::ostringstream but appends to a user-specified std::string,
+// and is faster. It is ~70% faster to create, ~50% faster to write to, and
+// completely free to extract the result std::string.
+//
+// std::string s;
+// OStringStream strm(&s);
+// strm << 42 << ' ' << 3.14; // appends to `s`
+//
+// The stream object doesn't have to be named. Starting from C++11 operator<<
+// works with rvalues of std::ostream.
+//
+// std::string s;
+// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
+//
+// OStringStream is faster to create than std::ostringstream but it's still
+// relatively slow. Avoid creating multiple streams where a single stream will
+// do.
+//
+// Creates unnecessary instances of OStringStream: slow.
+//
+// std::string s;
+// OStringStream(&s) << 42;
+// OStringStream(&s) << ' ';
+// OStringStream(&s) << 3.14;
+//
+// Creates a single instance of OStringStream and reuses it: fast.
+//
+// std::string s;
+// OStringStream strm(&s);
+// strm << 42;
+// strm << ' ';
+// strm << 3.14;
+//
+// Note: flush() has no effect. No reason to call it.
+class OStringStream : private std::basic_streambuf<char>, public std::ostream {
+ public:
+ // The argument can be null, in which case you'll need to call str(p) with a
+ // non-null argument before you can write to the stream.
+ //
+ // The destructor of OStringStream doesn't use the std::string. It's OK to destroy
+ // the std::string before the stream.
+ explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
+
+ std::string* str() { return s_; }
+ const std::string* str() const { return s_; }
+ void str(std::string* s) { s_ = s; }
+
+ private:
+ using Buf = std::basic_streambuf<char>;
+
+ Buf::int_type overflow(int c = Buf::traits_type::eof()) override {
+ assert(s_);
+ if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
+ s_->push_back(static_cast<char>(c));
+ return 1;
+ }
+
+ std::streamsize xsputn(const char* s, std::streamsize n) override {
+ assert(s_);
+ s_->append(s, n);
+ return n;
+ }
+
+ std::string* s_;
+};
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
diff --git a/absl/strings/internal/ostringstream_test.cc b/absl/strings/internal/ostringstream_test.cc
new file mode 100644
index 00000000..0047ec82
--- /dev/null
+++ b/absl/strings/internal/ostringstream_test.cc
@@ -0,0 +1,103 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/ostringstream.h"
+
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <string>
+#include <type_traits>
+
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(OStringStream, IsOStream) {
+ static_assert(
+ std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
+ "");
+}
+
+TEST(OStringStream, ConstructDestroy) {
+ {
+ absl::strings_internal::OStringStream strm(nullptr);
+ EXPECT_EQ(nullptr, strm.str());
+ }
+ {
+ std::string s = "abc";
+ {
+ absl::strings_internal::OStringStream strm(&s);
+ EXPECT_EQ(&s, strm.str());
+ }
+ EXPECT_EQ("abc", s);
+ }
+ {
+ std::unique_ptr<std::string> s(new std::string);
+ absl::strings_internal::OStringStream strm(s.get());
+ s.reset();
+ }
+}
+
+TEST(OStringStream, Str) {
+ std::string s1;
+ absl::strings_internal::OStringStream strm(&s1);
+ const absl::strings_internal::OStringStream& c_strm(strm);
+
+ static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
+ static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
+
+ EXPECT_EQ(&s1, strm.str());
+ EXPECT_EQ(&s1, c_strm.str());
+
+ strm.str(&s1);
+ EXPECT_EQ(&s1, strm.str());
+ EXPECT_EQ(&s1, c_strm.str());
+
+ std::string s2;
+ strm.str(&s2);
+ EXPECT_EQ(&s2, strm.str());
+ EXPECT_EQ(&s2, c_strm.str());
+
+ strm.str(nullptr);
+ EXPECT_EQ(nullptr, strm.str());
+ EXPECT_EQ(nullptr, c_strm.str());
+}
+
+TEST(OStreamStream, WriteToLValue) {
+ std::string s = "abc";
+ {
+ absl::strings_internal::OStringStream strm(&s);
+ EXPECT_EQ("abc", s);
+ strm << "";
+ EXPECT_EQ("abc", s);
+ strm << 42;
+ EXPECT_EQ("abc42", s);
+ strm << 'x' << 'y';
+ EXPECT_EQ("abc42xy", s);
+ }
+ EXPECT_EQ("abc42xy", s);
+}
+
+TEST(OStreamStream, WriteToRValue) {
+ std::string s = "abc";
+ absl::strings_internal::OStringStream(&s) << "";
+ EXPECT_EQ("abc", s);
+ absl::strings_internal::OStringStream(&s) << 42;
+ EXPECT_EQ("abc42", s);
+ absl::strings_internal::OStringStream(&s) << 'x' << 'y';
+ EXPECT_EQ("abc42xy", s);
+}
+
+} // namespace
diff --git a/absl/strings/internal/resize_uninitialized.h b/absl/strings/internal/resize_uninitialized.h
new file mode 100644
index 00000000..0157ca02
--- /dev/null
+++ b/absl/strings/internal/resize_uninitialized.h
@@ -0,0 +1,69 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
+#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
+
+#include <string>
+#include <utility>
+
+#include "absl/base/port.h"
+#include "absl/meta/type_traits.h" // for void_t
+
+namespace absl {
+namespace strings_internal {
+
+// Is a subclass of true_type or false_type, depending on whether or not
+// T has a resize_uninitialized member.
+template <typename T, typename = void>
+struct HasResizeUninitialized : std::false_type {};
+template <typename T>
+struct HasResizeUninitialized<
+ T, absl::void_t<decltype(std::declval<T>().resize_uninitialized(237))>>
+ : std::true_type {};
+
+template <typename string_type>
+void ResizeUninit(string_type* s, size_t new_size, std::true_type) {
+ s->resize_uninitialized(new_size);
+}
+template <typename string_type>
+void ResizeUninit(string_type* s, size_t new_size, std::false_type) {
+ s->resize(new_size);
+}
+
+// Returns true if the std::string implementation supports a resize where
+// the new characters added to the std::string are left untouched.
+//
+// (A better name might be "STLStringSupportsUninitializedResize", alluding to
+// the previous function.)
+template <typename string_type>
+inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
+ return HasResizeUninitialized<string_type>();
+}
+
+// Like str->resize(new_size), except any new characters added to "*str" as a
+// result of resizing may be left uninitialized, rather than being filled with
+// '0' bytes. Typically used when code is then going to overwrite the backing
+// store of the std::string with known data. Uses a Google extension to std::string.
+template <typename string_type, typename = void>
+inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
+ ResizeUninit(s, new_size, HasResizeUninitialized<string_type>());
+}
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
diff --git a/absl/strings/internal/resize_uninitialized_test.cc b/absl/strings/internal/resize_uninitialized_test.cc
new file mode 100644
index 00000000..ad282efc
--- /dev/null
+++ b/absl/strings/internal/resize_uninitialized_test.cc
@@ -0,0 +1,68 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/resize_uninitialized.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+int resize_call_count = 0;
+
+struct resizable_string {
+ void resize(size_t) { resize_call_count += 1; }
+};
+
+int resize_uninitialized_call_count = 0;
+
+struct resize_uninitializable_string {
+ void resize(size_t) { resize_call_count += 1; }
+ void resize_uninitialized(size_t) { resize_uninitialized_call_count += 1; }
+};
+
+TEST(ResizeUninit, WithAndWithout) {
+ resize_call_count = 0;
+ resize_uninitialized_call_count = 0;
+ {
+ resizable_string rs;
+
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ EXPECT_FALSE(
+ absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
+ EXPECT_EQ(resize_call_count, 1);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ }
+
+ resize_call_count = 0;
+ resize_uninitialized_call_count = 0;
+ {
+ resize_uninitializable_string rus;
+
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ EXPECT_TRUE(
+ absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 1);
+ }
+}
+
+} // namespace
diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h
new file mode 100644
index 00000000..e73f1dde
--- /dev/null
+++ b/absl/strings/internal/str_join_internal.h
@@ -0,0 +1,314 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// This file declares INTERNAL parts of the Join API that are inlined/templated
+// or otherwise need to be available at compile time. The main abstractions
+// defined in this file are:
+//
+// - A handful of default Formatters
+// - JoinAlgorithm() overloads
+// - JoinRange() overloads
+// - JoinTuple()
+//
+// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
+// absl/strings/str_join.h
+//
+// IWYU pragma: private, include "absl/strings/str_join.h"
+
+#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
+#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
+
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "absl/strings/internal/ostringstream.h"
+#include "absl/strings/str_cat.h"
+
+namespace absl {
+namespace strings_internal {
+
+//
+// Formatter objects
+//
+// The following are implementation classes for standard Formatter objects. The
+// factory functions that users will call to create and use these formatters are
+// defined and documented in strings/join.h.
+//
+
+// The default formatter. Converts alpha-numeric types to strings.
+struct AlphaNumFormatterImpl {
+ // This template is needed in order to support passing in a dereferenced
+ // vector<bool>::iterator
+ template <typename T>
+ void operator()(std::string* out, const T& t) const {
+ StrAppend(out, AlphaNum(t));
+ }
+
+ void operator()(std::string* out, const AlphaNum& t) const {
+ StrAppend(out, t);
+ }
+};
+
+// A type that's used to overload the JoinAlgorithm() function (defined below)
+// for ranges that do not require additional formatting (e.g., a range of
+// strings).
+
+struct NoFormatter : public AlphaNumFormatterImpl {};
+
+// Formats types to strings using the << operator.
+class StreamFormatterImpl {
+ public:
+ // The method isn't const because it mutates state. Making it const will
+ // render StreamFormatterImpl thread-hostile.
+ template <typename T>
+ void operator()(std::string* out, const T& t) {
+ // The stream is created lazily to avoid paying the relatively high cost
+ // of its construction when joining an empty range.
+ if (strm_) {
+ strm_->clear(); // clear the bad, fail and eof bits in case they were set
+ strm_->str(out);
+ } else {
+ strm_.reset(new strings_internal::OStringStream(out));
+ }
+ *strm_ << t;
+ }
+
+ private:
+ std::unique_ptr<strings_internal::OStringStream> strm_;
+};
+
+// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
+// 'second' member is formatted using f2_. sep_ is the separator.
+template <typename F1, typename F2>
+class PairFormatterImpl {
+ public:
+ PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
+ : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
+
+ template <typename T>
+ void operator()(std::string* out, const T& p) {
+ f1_(out, p.first);
+ out->append(sep_);
+ f2_(out, p.second);
+ }
+
+ template <typename T>
+ void operator()(std::string* out, const T& p) const {
+ f1_(out, p.first);
+ out->append(sep_);
+ f2_(out, p.second);
+ }
+
+ private:
+ F1 f1_;
+ std::string sep_;
+ F2 f2_;
+};
+
+// Wraps another formatter and dereferences the argument to operator() then
+// passes the dereferenced argument to the wrapped formatter. This can be
+// useful, for example, to join a std::vector<int*>.
+template <typename Formatter>
+class DereferenceFormatterImpl {
+ public:
+ DereferenceFormatterImpl() : f_() {}
+ explicit DereferenceFormatterImpl(Formatter&& f)
+ : f_(std::forward<Formatter>(f)) {}
+
+ template <typename T>
+ void operator()(std::string* out, const T& t) {
+ f_(out, *t);
+ }
+
+ template <typename T>
+ void operator()(std::string* out, const T& t) const {
+ f_(out, *t);
+ }
+
+ private:
+ Formatter f_;
+};
+
+// DefaultFormatter<T> is a traits class that selects a default Formatter to use
+// for the given type T. The ::Type member names the Formatter to use. This is
+// used by the strings::Join() functions that do NOT take a Formatter argument,
+// in which case a default Formatter must be chosen.
+//
+// AlphaNumFormatterImpl is the default in the base template, followed by
+// specializations for other types.
+template <typename ValueType>
+struct DefaultFormatter {
+ typedef AlphaNumFormatterImpl Type;
+};
+template <>
+struct DefaultFormatter<const char*> {
+ typedef AlphaNumFormatterImpl Type;
+};
+template <>
+struct DefaultFormatter<char*> {
+ typedef AlphaNumFormatterImpl Type;
+};
+template <>
+struct DefaultFormatter<std::string> {
+ typedef NoFormatter Type;
+};
+template <>
+struct DefaultFormatter<absl::string_view> {
+ typedef NoFormatter Type;
+};
+template <typename ValueType>
+struct DefaultFormatter<ValueType*> {
+ typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
+ Type;
+};
+
+template <typename ValueType>
+struct DefaultFormatter<std::unique_ptr<ValueType>>
+ : public DefaultFormatter<ValueType*> {};
+
+//
+// JoinAlgorithm() functions
+//
+
+// The main joining algorithm. This simply joins the elements in the given
+// iterator range, each separated by the given separator, into an output std::string,
+// and formats each element using the provided Formatter object.
+template <typename Iterator, typename Formatter>
+std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
+ Formatter&& f) {
+ std::string result;
+ absl::string_view sep("");
+ for (Iterator it = start; it != end; ++it) {
+ result.append(sep.data(), sep.size());
+ f(&result, *it);
+ sep = s;
+ }
+ return result;
+}
+
+// No-op placeholder for input iterators which can not be iterated over.
+template <typename Iterator>
+size_t GetResultSize(Iterator, Iterator, size_t, std::input_iterator_tag) {
+ return 0;
+}
+
+// Calculates space to reserve, if the iterator supports multiple passes.
+template <typename Iterator>
+size_t GetResultSize(Iterator it, Iterator end, size_t separator_size,
+ std::forward_iterator_tag) {
+ assert(it != end);
+ size_t length = it->size();
+ while (++it != end) {
+ length += separator_size;
+ length += it->size();
+ }
+ return length;
+}
+
+// A joining algorithm that's optimized for an iterator range of std::string-like
+// objects that do not need any additional formatting. This is to optimize the
+// common case of joining, say, a std::vector<std::string> or a
+// std::vector<absl::string_view>.
+//
+// This is an overload of the previous JoinAlgorithm() function. Here the
+// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
+// type, this overload is only invoked when strings::Join() is called with a
+// range of std::string-like objects (e.g., std::string, absl::string_view), and an
+// explicit Formatter argument was NOT specified.
+//
+// The optimization is that the needed space will be reserved in the output
+// std::string to avoid the need to resize while appending. To do this, the iterator
+// range will be traversed twice: once to calculate the total needed size, and
+// then again to copy the elements and delimiters to the output std::string.
+template <typename Iterator>
+std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
+ NoFormatter) {
+ std::string result;
+ if (start != end) {
+ typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
+ result.reserve(GetResultSize(start, end, s.size(), iterator_tag));
+
+ // Joins strings
+ absl::string_view sep("", 0);
+ for (Iterator it = start; it != end; ++it) {
+ result.append(sep.data(), sep.size());
+ result.append(it->data(), it->size());
+ sep = s;
+ }
+ }
+
+ return result;
+}
+
+// JoinTupleLoop implements a loop over the elements of a std::tuple, which
+// are heterogeneous. The primary template matches the tuple interior case. It
+// continues the iteration after appending a separator (for nonzero indices)
+// and formatting an element of the tuple. The specialization for the I=N case
+// matches the end-of-tuple, and terminates the iteration.
+template <size_t I, size_t N>
+struct JoinTupleLoop {
+ template <typename Tup, typename Formatter>
+ void operator()(std::string* out, const Tup& tup, absl::string_view sep,
+ Formatter&& fmt) {
+ if (I > 0) out->append(sep.data(), sep.size());
+ fmt(out, std::get<I>(tup));
+ JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
+ }
+};
+template <size_t N>
+struct JoinTupleLoop<N, N> {
+ template <typename Tup, typename Formatter>
+ void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
+};
+
+template <typename... T, typename Formatter>
+std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
+ Formatter&& fmt) {
+ std::string result;
+ JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
+ return result;
+}
+
+template <typename Iterator>
+std::string JoinRange(Iterator first, Iterator last, absl::string_view separator) {
+ // No formatter was explicitly given, so a default must be chosen.
+ typedef typename std::iterator_traits<Iterator>::value_type ValueType;
+ typedef typename DefaultFormatter<ValueType>::Type Formatter;
+ return JoinAlgorithm(first, last, separator, Formatter());
+}
+
+template <typename Range, typename Formatter>
+std::string JoinRange(const Range& range, absl::string_view separator,
+ Formatter&& fmt) {
+ using std::begin;
+ using std::end;
+ return JoinAlgorithm(begin(range), end(range), separator, fmt);
+}
+
+template <typename Range>
+std::string JoinRange(const Range& range, absl::string_view separator) {
+ using std::begin;
+ using std::end;
+ return JoinRange(begin(range), end(range), separator);
+}
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h
new file mode 100644
index 00000000..dc31a8ef
--- /dev/null
+++ b/absl/strings/internal/str_split_internal.h
@@ -0,0 +1,439 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// This file declares INTERNAL parts of the Split API that are inline/templated
+// or otherwise need to be available at compile time. The main abstractions
+// defined in here are
+//
+// - ConvertibleToStringView
+// - SplitIterator<>
+// - Splitter<>
+//
+// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
+// absl/strings/str_split.h.
+//
+// IWYU pragma: private, include "absl/strings/str_split.h"
+
+#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
+#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
+
+#ifdef _GLIBCXX_DEBUG
+#include <glibcxx_debug_traits.h>
+#endif // _GLIBCXX_DEBUG
+
+#include <array>
+#include <initializer_list>
+#include <iterator>
+#include <map>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/meta/type_traits.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace strings_internal {
+
+#ifdef _GLIBCXX_DEBUG
+using ::glibcxx_debug_traits::IsStrictlyDebugWrapperBase;
+#else // _GLIBCXX_DEBUG
+template <typename T> struct IsStrictlyDebugWrapperBase : std::false_type {};
+#endif // _GLIBCXX_DEBUG
+
+// This class is implicitly constructible from everything that absl::string_view
+// is implicitly constructible from. If it's constructed from a temporary
+// std::string, the data is moved into a data member so its lifetime matches that of
+// the ConvertibleToStringView instance.
+class ConvertibleToStringView {
+ public:
+ ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
+ : value_(s) {}
+ ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
+ ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
+ : value_(s) {}
+ ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
+ : value_(s) {}
+
+ // Matches rvalue strings and moves their data to a member.
+ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
+ : copy_(std::move(s)), value_(copy_) {}
+
+ ConvertibleToStringView(const ConvertibleToStringView& other)
+ : copy_(other.copy_),
+ value_(other.IsSelfReferential() ? copy_ : other.value_) {}
+
+ ConvertibleToStringView(ConvertibleToStringView&& other) {
+ StealMembers(std::move(other));
+ }
+
+ ConvertibleToStringView& operator=(ConvertibleToStringView other) {
+ StealMembers(std::move(other));
+ return *this;
+ }
+
+ absl::string_view value() const { return value_; }
+
+ private:
+ // Returns true if ctsp's value refers to its internal copy_ member.
+ bool IsSelfReferential() const { return value_.data() == copy_.data(); }
+
+ void StealMembers(ConvertibleToStringView&& other) {
+ if (other.IsSelfReferential()) {
+ copy_ = std::move(other.copy_);
+ value_ = copy_;
+ other.value_ = other.copy_;
+ } else {
+ value_ = other.value_;
+ }
+ }
+
+ // Holds the data moved from temporary std::string arguments. Declared first so
+ // that 'value' can refer to 'copy_'.
+ std::string copy_;
+ absl::string_view value_;
+};
+
+// An iterator that enumerates the parts of a std::string from a Splitter. The text
+// to be split, the Delimiter, and the Predicate are all taken from the given
+// Splitter object. Iterators may only be compared if they refer to the same
+// Splitter instance.
+//
+// This class is NOT part of the public splitting API.
+template <typename Splitter>
+class SplitIterator {
+ public:
+ using iterator_category = std::input_iterator_tag;
+ using value_type = absl::string_view;
+ using difference_type = ptrdiff_t;
+ using pointer = const value_type*;
+ using reference = const value_type&;
+
+ enum State { kInitState, kLastState, kEndState };
+ SplitIterator(State state, const Splitter* splitter)
+ : pos_(0),
+ state_(state),
+ splitter_(splitter),
+ delimiter_(splitter->delimiter()),
+ predicate_(splitter->predicate()) {
+ // Hack to maintain backward compatibility. This one block makes it so an
+ // empty absl::string_view whose .data() happens to be nullptr behaves
+ // *differently* from an otherwise empty absl::string_view whose .data() is
+ // not nullptr. This is an undesirable difference in general, but this
+ // behavior is maintained to avoid breaking existing code that happens to
+ // depend on this old behavior/bug. Perhaps it will be fixed one day. The
+ // difference in behavior is as follows:
+ // Split(absl::string_view(""), '-'); // {""}
+ // Split(absl::string_view(), '-'); // {}
+ if (splitter_->text().data() == nullptr) {
+ state_ = kEndState;
+ pos_ = splitter_->text().size();
+ return;
+ }
+
+ if (state_ == kEndState) {
+ pos_ = splitter_->text().size();
+ } else {
+ ++(*this);
+ }
+ }
+
+ bool at_end() const { return state_ == kEndState; }
+
+ reference operator*() const { return curr_; }
+ pointer operator->() const { return &curr_; }
+
+ SplitIterator& operator++() {
+ do {
+ if (state_ == kLastState) {
+ state_ = kEndState;
+ return *this;
+ }
+ const absl::string_view text = splitter_->text();
+ const absl::string_view d = delimiter_.Find(text, pos_);
+ if (d.data() == text.end()) state_ = kLastState;
+ curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
+ pos_ += curr_.size() + d.size();
+ } while (!predicate_(curr_));
+ return *this;
+ }
+
+ SplitIterator operator++(int) {
+ SplitIterator old(*this);
+ ++(*this);
+ return old;
+ }
+
+ friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
+ return a.state_ == b.state_ && a.pos_ == b.pos_;
+ }
+
+ friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
+ return !(a == b);
+ }
+
+ private:
+ size_t pos_;
+ State state_;
+ absl::string_view curr_;
+ const Splitter* splitter_;
+ typename Splitter::DelimiterType delimiter_;
+ typename Splitter::PredicateType predicate_;
+};
+
+// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
+template <typename T, typename = void>
+struct HasMappedType : std::false_type {};
+template <typename T>
+struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
+ : std::true_type {};
+
+// HasValueType<T>::value is true iff there exists a type T::value_type.
+template <typename T, typename = void>
+struct HasValueType : std::false_type {};
+template <typename T>
+struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
+};
+
+// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
+template <typename T, typename = void>
+struct HasConstIterator : std::false_type {};
+template <typename T>
+struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
+ : std::true_type {};
+
+// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
+// details below in Splitter<> where this is used.
+std::false_type IsInitializerListDispatch(...); // default: No
+template <typename T>
+std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
+template <typename T>
+struct IsInitializerList
+ : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
+
+// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
+// is true for type 'C'.
+//
+// Restricts conversion to container-like types (by testing for the presence of
+// a const_iterator member type) and also to disable conversion to an
+// std::initializer_list (which also has a const_iterator). Otherwise, code
+// compiled in C++11 will get an error due to ambiguous conversion paths (in
+// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
+// or an std::initializer_list<T>).
+template <typename C>
+struct SplitterIsConvertibleTo
+ : std::enable_if<
+ !IsStrictlyDebugWrapperBase<C>::value &&
+ !IsInitializerList<C>::value &&
+ HasValueType<C>::value &&
+ HasConstIterator<C>::value> {};
+
+// This class implements the range that is returned by absl::StrSplit(). This
+// class has templated conversion operators that allow it to be implicitly
+// converted to a variety of types that the caller may have specified on the
+// left-hand side of an assignment.
+//
+// The main interface for interacting with this class is through its implicit
+// conversion operators. However, this class may also be used like a container
+// in that it has .begin() and .end() member functions. It may also be used
+// within a range-for loop.
+//
+// Output containers can be collections of any type that is constructible from
+// an absl::string_view.
+//
+// An Predicate functor may be supplied. This predicate will be used to filter
+// the split strings: only strings for which the predicate returns true will be
+// kept. A Predicate object is any unary functor that takes an absl::string_view
+// and returns bool.
+template <typename Delimiter, typename Predicate>
+class Splitter {
+ public:
+ using DelimiterType = Delimiter;
+ using PredicateType = Predicate;
+ using const_iterator = strings_internal::SplitIterator<Splitter>;
+ using value_type = typename std::iterator_traits<const_iterator>::value_type;
+
+ Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
+ : text_(std::move(input_text)),
+ delimiter_(std::move(d)),
+ predicate_(std::move(p)) {}
+
+ absl::string_view text() const { return text_.value(); }
+ const Delimiter& delimiter() const { return delimiter_; }
+ const Predicate& predicate() const { return predicate_; }
+
+ // Range functions that iterate the split substrings as absl::string_view
+ // objects. These methods enable a Splitter to be used in a range-based for
+ // loop.
+ const_iterator begin() const { return {const_iterator::kInitState, this}; }
+ const_iterator end() const { return {const_iterator::kEndState, this}; }
+
+ // An implicit conversion operator that is restricted to only those containers
+ // that the splitter is convertible to.
+ template <typename Container,
+ typename OnlyIf = typename SplitterIsConvertibleTo<Container>::type>
+ operator Container() const { // NOLINT(runtime/explicit)
+ return ConvertToContainer<Container, typename Container::value_type,
+ HasMappedType<Container>::value>()(*this);
+ }
+
+ // Returns a pair with its .first and .second members set to the first two
+ // strings returned by the begin() iterator. Either/both of .first and .second
+ // will be constructed with empty strings if the iterator doesn't have a
+ // corresponding value.
+ template <typename First, typename Second>
+ operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
+ absl::string_view first, second;
+ auto it = begin();
+ if (it != end()) {
+ first = *it;
+ if (++it != end()) {
+ second = *it;
+ }
+ }
+ return {First(first), Second(second)};
+ }
+
+ private:
+ // ConvertToContainer is a functor converting a Splitter to the requested
+ // Container of ValueType. It is specialized below to optimize splitting to
+ // certain combinations of Container and ValueType.
+ //
+ // This base template handles the generic case of storing the split results in
+ // the requested non-map-like container and converting the split substrings to
+ // the requested type.
+ template <typename Container, typename ValueType, bool is_map = false>
+ struct ConvertToContainer {
+ Container operator()(const Splitter& splitter) const {
+ Container c;
+ auto it = std::inserter(c, c.end());
+ for (const auto sp : splitter) {
+ *it++ = ValueType(sp);
+ }
+ return c;
+ }
+ };
+
+ // Partial specialization for a std::vector<absl::string_view>.
+ //
+ // Optimized for the common case of splitting to a
+ // std::vector<absl::string_view>. In this case we first split the results to
+ // a small array of absl::string_view on the stack, to reduce reallocations.
+ template <typename A>
+ struct ConvertToContainer<std::vector<absl::string_view, A>,
+ absl::string_view, false> {
+ std::vector<absl::string_view, A> operator()(
+ const Splitter& splitter) const {
+ struct raw_view {
+ const char* data;
+ size_t size;
+ operator absl::string_view() const { // NOLINT(runtime/explicit)
+ return {data, size};
+ }
+ };
+ std::vector<absl::string_view, A> v;
+ std::array<raw_view, 16> ar;
+ for (auto it = splitter.begin(); !it.at_end();) {
+ size_t index = 0;
+ do {
+ ar[index].data = it->data();
+ ar[index].size = it->size();
+ ++it;
+ } while (++index != ar.size() && !it.at_end());
+ v.insert(v.end(), ar.begin(), ar.begin() + index);
+ }
+ return v;
+ }
+ };
+
+ // Partial specialization for a std::vector<std::string>.
+ //
+ // Optimized for the common case of splitting to a std::vector<std::string>. In
+ // this case we first split the results to a std::vector<absl::string_view> so
+ // the returned std::vector<std::string> can have space reserved to avoid std::string
+ // moves.
+ template <typename A>
+ struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
+ std::vector<std::string, A> operator()(const Splitter& splitter) const {
+ const std::vector<absl::string_view> v = splitter;
+ return std::vector<std::string, A>(v.begin(), v.end());
+ }
+ };
+
+ // Partial specialization for containers of pairs (e.g., maps).
+ //
+ // The algorithm is to insert a new pair into the map for each even-numbered
+ // item, with the even-numbered item as the key with a default-constructed
+ // value. Each odd-numbered item will then be assigned to the last pair's
+ // value.
+ template <typename Container, typename First, typename Second>
+ struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
+ Container operator()(const Splitter& splitter) const {
+ Container m;
+ typename Container::iterator it;
+ bool insert = true;
+ for (const auto sp : splitter) {
+ if (insert) {
+ it = Inserter<Container>::Insert(&m, First(sp), Second());
+ } else {
+ it->second = Second(sp);
+ }
+ insert = !insert;
+ }
+ return m;
+ }
+
+ // Inserts the key and value into the given map, returning an iterator to
+ // the inserted item. Specialized for std::map and std::multimap to use
+ // emplace() and adapt emplace()'s return value.
+ template <typename Map>
+ struct Inserter {
+ using M = Map;
+ template <typename... Args>
+ static typename M::iterator Insert(M* m, Args&&... args) {
+ return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
+ }
+ };
+
+ template <typename... Ts>
+ struct Inserter<std::map<Ts...>> {
+ using M = std::map<Ts...>;
+ template <typename... Args>
+ static typename M::iterator Insert(M* m, Args&&... args) {
+ return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
+ }
+ };
+
+ template <typename... Ts>
+ struct Inserter<std::multimap<Ts...>> {
+ using M = std::multimap<Ts...>;
+ template <typename... Args>
+ static typename M::iterator Insert(M* m, Args&&... args) {
+ return m->emplace(std::make_pair(std::forward<Args>(args)...));
+ }
+ };
+ };
+
+ ConvertibleToStringView text_;
+ Delimiter delimiter_;
+ Predicate predicate_;
+};
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc
new file mode 100644
index 00000000..2415c2cc
--- /dev/null
+++ b/absl/strings/internal/utf8.cc
@@ -0,0 +1,51 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// UTF8 utilities, implemented to reduce dependencies.
+
+#include "absl/strings/internal/utf8.h"
+
+namespace absl {
+namespace strings_internal {
+
+size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
+ if (utf8_char <= 0x7F) {
+ *buffer = static_cast<char>(utf8_char);
+ return 1;
+ } else if (utf8_char <= 0x7FF) {
+ buffer[1] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[0] = 0xC0 | utf8_char;
+ return 2;
+ } else if (utf8_char <= 0xFFFF) {
+ buffer[2] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[1] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[0] = 0xE0 | utf8_char;
+ return 3;
+ } else {
+ buffer[3] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[2] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[1] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[0] = 0xF0 | utf8_char;
+ return 4;
+ }
+}
+
+} // namespace strings_internal
+} // namespace absl
diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h
new file mode 100644
index 00000000..705eea7f
--- /dev/null
+++ b/absl/strings/internal/utf8.h
@@ -0,0 +1,52 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// UTF8 utilities, implemented to reduce dependencies.
+//
+// If you need Unicode specific processing (for example being aware of
+// Unicode character boundaries, or knowledge of Unicode casing rules,
+// or various forms of equivalence and normalization), take a look at
+// files in i18n/utf8.
+
+#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
+#define ABSL_STRINGS_INTERNAL_UTF8_H_
+
+#include <cstddef>
+#include <cstdint>
+
+
+namespace absl {
+namespace strings_internal {
+
+// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
+// out the UTF-8 encoding into buffer, and returns the number of chars
+// it wrote.
+//
+// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
+// are:
+// 00 - 7F : 0xxxxxxx
+// 80 - 7FF : 110xxxxx 10xxxxxx
+// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
+// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+//
+// Values greater than 0x10FFFF are not supported and may or may not write
+// characters into buffer, however never will more than kMaxEncodedUTF8Size
+// bytes be written, regardless of the value of utf8_char.
+enum { kMaxEncodedUTF8Size = 4 };
+size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_UTF8_H_
diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc
new file mode 100644
index 00000000..4d437427
--- /dev/null
+++ b/absl/strings/internal/utf8_test.cc
@@ -0,0 +1,58 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/utf8.h"
+
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <cstdint>
+
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(EncodeUTF8Char, BasicFunction) {
+ std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
+ {0x00A3, u8"\u00A3"},
+ {0x00010000, u8"\U00010000"},
+ {0x0000FFFF, u8"\U0000FFFF"},
+ {0x0010FFFD, u8"\U0010FFFD"}};
+ for (auto &test : tests) {
+ char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
+ char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
+ char *buf0_written =
+ &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
+ char *buf1_written =
+ &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
+ int apparent_length = 7;
+ while (buf0[apparent_length - 1] == '\x00' &&
+ buf1[apparent_length - 1] == '\xFF') {
+ if (--apparent_length == 0) break;
+ }
+ EXPECT_EQ(apparent_length, buf0_written - buf0);
+ EXPECT_EQ(apparent_length, buf1_written - buf1);
+ EXPECT_EQ(apparent_length, test.second.length());
+ EXPECT_EQ(std::string(buf0, apparent_length), test.second);
+ EXPECT_EQ(std::string(buf1, apparent_length), test.second);
+ }
+ char buf[32] = "Don't Tread On Me";
+ EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
+ absl::strings_internal::kMaxEncodedUTF8Size);
+ char buf2[32] = "Negative is invalid but sane";
+ EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
+ absl::strings_internal::kMaxEncodedUTF8Size);
+}
+
+} // namespace