diff options
Diffstat (limited to 'absl/strings/str_split_test.cc')
-rw-r--r-- | absl/strings/str_split_test.cc | 896 |
1 files changed, 896 insertions, 0 deletions
diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc new file mode 100644 index 00000000..a95a0fbd --- /dev/null +++ b/absl/strings/str_split_test.cc @@ -0,0 +1,896 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/str_split.h" + +#include <climits> +#include <cstdlib> +#include <cstring> +#include <deque> +#include <limits> +#include <list> +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/strings/numbers.h" + +namespace { + +using ::testing::ElementsAre; +using ::testing::Pair; +using ::testing::UnorderedElementsAre; + +// This tests the overall split API, which is made up of the absl::StrSplit() +// function and the Delimiter objects in the absl:: namespace. +// This TEST macro is outside of any namespace to require full specification of +// namespaces just like callers will need to use. +TEST(Split, APIExamples) { + { + // Passes std::string delimiter. Assumes the default of Literal. + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByString; + v = absl::StrSplit("a,b,c", ByString(",")); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")), + ElementsAre("a", "b", "c")); + } + + { + // Same as above, but using a single character as the delimiter. + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByChar; + v = absl::StrSplit("a,b,c", ByChar(',')); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Same as above, but using std::string + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + + // Equivalent to... + using absl::ByChar; + v = absl::StrSplit("a,b,c", ByChar(',')); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses the Literal std::string "=>" as the delimiter. + const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // The substrings are returned as string_views, eliminating copying. + std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Leading and trailing empty substrings. + std::vector<std::string> v = absl::StrSplit(",a,b,c,", ','); + EXPECT_THAT(v, ElementsAre("", "a", "b", "c", "")); + } + + { + // Splits on a delimiter that is not found. + std::vector<std::string> v = absl::StrSplit("abc", ','); + EXPECT_THAT(v, ElementsAre("abc")); + } + + { + // Splits the input std::string into individual characters by using an empty + // std::string as the delimiter. + std::vector<std::string> v = absl::StrSplit("abc", ""); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Splits std::string data with embedded NUL characters, using NUL as the + // delimiter. A simple delimiter of "\0" doesn't work because strlen() will + // say that's the empty std::string when constructing the absl::string_view + // delimiter. Instead, a non-empty std::string containing NUL can be used as the + // delimiter. + std::string embedded_nulls("a\0b\0c", 5); + std::string null_delim("\0", 1); + std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Stores first two split strings as the members in a std::pair. + std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + // "c" is omitted because std::pair can hold only two elements. + } + + { + // Results stored in std::set<std::string> + std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses a non-const char* delimiter. + char a[] = ","; + char* d = a + 0; + std::vector<std::string> v = absl::StrSplit("a,b,c", d); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Results split using either of , or ; + using absl::ByAnyChar; + std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;")); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Uses the SkipWhitespace predicate. + using absl::SkipWhitespace; + std::vector<std::string> v = absl::StrSplit("a, ,,b,", ',', SkipWhitespace()); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + // Uses the ByLength delimiter. + using absl::ByLength; + std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3)); + EXPECT_THAT(v, ElementsAre("abc", "def", "g")); + } + + { + // Results stored in a std::map. + std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); + EXPECT_EQ(2, m.size()); + EXPECT_EQ("3", m["a"]); + EXPECT_EQ("2", m["b"]); + } + + { + // Results stored in a std::multimap. + std::multimap<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); + EXPECT_EQ(3, m.size()); + auto it = m.find("a"); + EXPECT_EQ("1", it->second); + ++it; + EXPECT_EQ("3", it->second); + it = m.find("b"); + EXPECT_EQ("2", it->second); + } + + { + // Demonstrates use in a range-based for loop in C++11. + std::string s = "x,x,x,x,x,x,x"; + for (absl::string_view sp : absl::StrSplit(s, ',')) { + EXPECT_EQ("x", sp); + } + } + + { + // Demonstrates use with a Predicate in a range-based for loop. + using absl::SkipWhitespace; + std::string s = " ,x,,x,,x,x,x,,"; + for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) { + EXPECT_EQ("x", sp); + } + } + + { + // Demonstrates a "smart" split to std::map using two separate calls to + // absl::StrSplit. One call to split the records, and another call to split + // the keys and values. This also uses the Limit delimiter so that the + // std::string "a=b=c" will split to "a" -> "b=c". + std::map<std::string, std::string> m; + for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { + m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); + } + EXPECT_EQ("b=c", m.find("a")->second); + EXPECT_EQ("e", m.find("d")->second); + EXPECT_EQ("", m.find("f")->second); + EXPECT_EQ("", m.find("g")->second); + } +} + +// +// Tests for SplitIterator +// + +TEST(SplitIterator, Basics) { + auto splitter = absl::StrSplit("a,b", ','); + auto it = splitter.begin(); + auto end = splitter.end(); + + EXPECT_NE(it, end); + EXPECT_EQ("a", *it); // tests dereference + ++it; // tests preincrement + EXPECT_NE(it, end); + EXPECT_EQ("b", std::string(it->data(), it->size())); // tests dereference as ptr + it++; // tests postincrement + EXPECT_EQ(it, end); +} + +// Simple Predicate to skip a particular std::string. +class Skip { + public: + explicit Skip(const std::string& s) : s_(s) {} + bool operator()(absl::string_view sp) { return sp != s_; } + + private: + std::string s_; +}; + +TEST(SplitIterator, Predicate) { + auto splitter = absl::StrSplit("a,b,c", ',', Skip("b")); + auto it = splitter.begin(); + auto end = splitter.end(); + + EXPECT_NE(it, end); + EXPECT_EQ("a", *it); // tests dereference + ++it; // tests preincrement -- "b" should be skipped here. + EXPECT_NE(it, end); + EXPECT_EQ("c", std::string(it->data(), it->size())); // tests dereference as ptr + it++; // tests postincrement + EXPECT_EQ(it, end); +} + +TEST(SplitIterator, EdgeCases) { + // Expected input and output, assuming a delimiter of ',' + struct { + std::string in; + std::vector<std::string> expect; + } specs[] = { + {"", {""}}, + {"foo", {"foo"}}, + {",", {"", ""}}, + {",foo", {"", "foo"}}, + {"foo,", {"foo", ""}}, + {",foo,", {"", "foo", ""}}, + {"foo,bar", {"foo", "bar"}}, + }; + + for (const auto& spec : specs) { + SCOPED_TRACE(spec.in); + auto splitter = absl::StrSplit(spec.in, ','); + auto it = splitter.begin(); + auto end = splitter.end(); + for (const auto& expected : spec.expect) { + EXPECT_NE(it, end); + EXPECT_EQ(expected, *it++); + } + EXPECT_EQ(it, end); + } +} + +TEST(Splitter, Const) { + const auto splitter = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(splitter, ElementsAre("a", "b", "c")); +} + +TEST(Split, EmptyAndNull) { + // Attention: Splitting a null absl::string_view is different than splitting + // an empty absl::string_view even though both string_views are considered + // equal. This behavior is likely surprising and undesirable. However, to + // maintain backward compatibility, there is a small "hack" in + // str_split_internal.h that preserves this behavior. If that behavior is ever + // changed/fixed, this test will need to be updated. + EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre("")); + EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre()); +} + +TEST(SplitIterator, EqualityAsEndCondition) { + auto splitter = absl::StrSplit("a,b,c", ','); + auto it = splitter.begin(); + auto it2 = it; + + // Increments it2 twice to point to "c" in the input text. + ++it2; + ++it2; + EXPECT_EQ("c", *it2); + + // This test uses a non-end SplitIterator as the terminating condition in a + // for loop. This relies on SplitIterator equality for non-end SplitIterators + // working correctly. At this point it2 points to "c", and we use that as the + // "end" condition in this test. + std::vector<absl::string_view> v; + for (; it != it2; ++it) { + v.push_back(*it); + } + EXPECT_THAT(v, ElementsAre("a", "b")); +} + +// +// Tests for Splitter +// + +TEST(Splitter, RangeIterators) { + auto splitter = absl::StrSplit("a,b,c", ','); + std::vector<absl::string_view> output; + for (const absl::string_view p : splitter) { + output.push_back(p); + } + EXPECT_THAT(output, ElementsAre("a", "b", "c")); +} + +// Some template functions for use in testing conversion operators +template <typename ContainerType, typename Splitter> +void TestConversionOperator(const Splitter& splitter) { + ContainerType output = splitter; + EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d")); +} + +template <typename MapType, typename Splitter> +void TestMapConversionOperator(const Splitter& splitter) { + MapType m = splitter; + EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d"))); +} + +template <typename FirstType, typename SecondType, typename Splitter> +void TestPairConversionOperator(const Splitter& splitter) { + std::pair<FirstType, SecondType> p = splitter; + EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b"))); +} + +TEST(Splitter, ConversionOperator) { + auto splitter = absl::StrSplit("a,b,c,d", ','); + + TestConversionOperator<std::vector<absl::string_view>>(splitter); + TestConversionOperator<std::vector<std::string>>(splitter); + TestConversionOperator<std::list<absl::string_view>>(splitter); + TestConversionOperator<std::list<std::string>>(splitter); + TestConversionOperator<std::deque<absl::string_view>>(splitter); + TestConversionOperator<std::deque<std::string>>(splitter); + TestConversionOperator<std::set<absl::string_view>>(splitter); + TestConversionOperator<std::set<std::string>>(splitter); + TestConversionOperator<std::multiset<absl::string_view>>(splitter); + TestConversionOperator<std::multiset<std::string>>(splitter); + TestConversionOperator<std::unordered_set<std::string>>(splitter); + + // Tests conversion to map-like objects. + + TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>( + splitter); + TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter); + TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<std::map<std::string, std::string>>(splitter); + TestMapConversionOperator< + std::multimap<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(splitter); + TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); + TestMapConversionOperator<std::unordered_map<std::string, std::string>>(splitter); + + // Tests conversion to std::pair + + TestPairConversionOperator<absl::string_view, absl::string_view>(splitter); + TestPairConversionOperator<absl::string_view, std::string>(splitter); + TestPairConversionOperator<std::string, absl::string_view>(splitter); + TestPairConversionOperator<std::string, std::string>(splitter); +} + +// A few additional tests for conversion to std::pair. This conversion is +// different from others because a std::pair always has exactly two elements: +// .first and .second. The split has to work even when the split has +// less-than, equal-to, and more-than 2 strings. +TEST(Splitter, ToPair) { + { + // Empty std::string + std::pair<std::string, std::string> p = absl::StrSplit("", ','); + EXPECT_EQ("", p.first); + EXPECT_EQ("", p.second); + } + + { + // Only first + std::pair<std::string, std::string> p = absl::StrSplit("a", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("", p.second); + } + + { + // Only second + std::pair<std::string, std::string> p = absl::StrSplit(",b", ','); + EXPECT_EQ("", p.first); + EXPECT_EQ("b", p.second); + } + + { + // First and second. + std::pair<std::string, std::string> p = absl::StrSplit("a,b", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + } + + { + // First and second and then more stuff that will be ignored. + std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); + EXPECT_EQ("a", p.first); + EXPECT_EQ("b", p.second); + // "c" is omitted. + } +} + +TEST(Splitter, Predicates) { + static const char kTestChars[] = ",a, ,b,"; + using absl::AllowEmpty; + using absl::SkipEmpty; + using absl::SkipWhitespace; + + { + // No predicate. Does not skip empties. + auto splitter = absl::StrSplit(kTestChars, ','); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("", "a", " ", "b", "")); + } + + { + // Allows empty strings. Same behavior as no predicate at all. + auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty()); + std::vector<std::string> v_allowempty = splitter; + EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", "")); + + // Ensures AllowEmpty equals the behavior with no predicate. + auto splitter_nopredicate = absl::StrSplit(kTestChars, ','); + std::vector<std::string> v_nopredicate = splitter_nopredicate; + EXPECT_EQ(v_allowempty, v_nopredicate); + } + + { + // Skips empty strings. + auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty()); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("a", " ", "b")); + } + + { + // Skips empty and all-whitespace strings. + auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace()); + std::vector<std::string> v = splitter; + EXPECT_THAT(v, ElementsAre("a", "b")); + } +} + +// +// Tests for StrSplit() +// + +TEST(Split, Basics) { + { + // Doesn't really do anything useful because the return value is ignored, + // but it should work. + absl::StrSplit("a,b,c", ','); + } + + { + std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + std::vector<std::string> v = absl::StrSplit("a,b,c", ','); + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + } + + { + // Ensures that assignment works. This requires a little extra work with + // C++11 because of overloads with initializer_list. + std::vector<std::string> v; + v = absl::StrSplit("a,b,c", ','); + + EXPECT_THAT(v, ElementsAre("a", "b", "c")); + std::map<std::string, std::string> m; + m = absl::StrSplit("a,b,c", ','); + EXPECT_EQ(2, m.size()); + std::unordered_map<std::string, std::string> hm; + hm = absl::StrSplit("a,b,c", ','); + EXPECT_EQ(2, hm.size()); + } +} + +absl::string_view ReturnStringView() { return "Hello World"; } +const char* ReturnConstCharP() { return "Hello World"; } +char* ReturnCharP() { return const_cast<char*>("Hello World"); } + +TEST(Split, AcceptsCertainTemporaries) { + std::vector<std::string> v; + v = absl::StrSplit(ReturnStringView(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); + v = absl::StrSplit(ReturnConstCharP(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); + v = absl::StrSplit(ReturnCharP(), ' '); + EXPECT_THAT(v, ElementsAre("Hello", "World")); +} + +TEST(Split, Temporary) { + // Use a std::string longer than the small-std::string-optimization length, so that when + // the temporary is destroyed, if the splitter keeps a reference to the + // std::string's contents, it'll reference freed memory instead of just dead + // on-stack memory. + const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; + EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) + << "Input should be larger than fits on the stack."; + + // This happens more often in C++11 as part of a range-based for loop. + auto splitter = absl::StrSplit(std::string(input), ','); + std::string expected = "a"; + for (absl::string_view letter : splitter) { + EXPECT_EQ(expected, letter); + ++expected[0]; + } + EXPECT_EQ("v", expected); + + // This happens more often in C++11 as part of a range-based for loop. + auto std_splitter = absl::StrSplit(std::string(input), ','); + expected = "a"; + for (absl::string_view letter : std_splitter) { + EXPECT_EQ(expected, letter); + ++expected[0]; + } + EXPECT_EQ("v", expected); +} + +template <typename T> +static std::unique_ptr<T> CopyToHeap(const T& value) { + return std::unique_ptr<T>(new T(value)); +} + +TEST(Split, LvalueCaptureIsCopyable) { + std::string input = "a,b"; + auto heap_splitter = CopyToHeap(absl::StrSplit(input, ',')); + auto stack_splitter = *heap_splitter; + heap_splitter.reset(); + std::vector<std::string> result = stack_splitter; + EXPECT_THAT(result, testing::ElementsAre("a", "b")); +} + +TEST(Split, TemporaryCaptureIsCopyable) { + auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ',')); + auto stack_splitter = *heap_splitter; + heap_splitter.reset(); + std::vector<std::string> result = stack_splitter; + EXPECT_THAT(result, testing::ElementsAre("a", "b")); +} + +TEST(Split, SplitterIsCopyableAndMoveable) { + auto a = absl::StrSplit("foo", '-'); + + // Ensures that the following expressions compile. + auto b = a; // Copy construct + auto c = std::move(a); // Move construct + b = c; // Copy assign + c = std::move(b); // Move assign + + EXPECT_THAT(c, ElementsAre("foo")); +} + +TEST(Split, StringDelimiter) { + { + std::vector<absl::string_view> v = absl::StrSplit("a,b", ','); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(",")); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<absl::string_view> v = + absl::StrSplit("a,b", absl::string_view(",")); + EXPECT_THAT(v, ElementsAre("a", "b")); + } +} + +TEST(Split, UTF8) { + // Tests splitting utf8 strings and utf8 delimiters. + { + // A utf8 input std::string with an ascii delimiter. + std::vector<absl::string_view> v = absl::StrSplit("a,κόσμε", ','); + EXPECT_THAT(v, ElementsAre("a", "κόσμε")); + } + + { + // A utf8 input std::string and a utf8 delimiter. + std::vector<absl::string_view> v = absl::StrSplit("a,κόσμε,b", ",κόσμε,"); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + // A utf8 input std::string and ByAnyChar with ascii chars. + std::vector<absl::string_view> v = + absl::StrSplit("Foo hällo th丞re", absl::ByAnyChar(" \t")); + EXPECT_THAT(v, ElementsAre("Foo", "hällo", "th丞re")); + } +} + +TEST(Split, EmptyStringDelimiter) { + { + std::vector<std::string> v = absl::StrSplit("", ""); + EXPECT_THAT(v, ElementsAre("")); + } + + { + std::vector<std::string> v = absl::StrSplit("a", ""); + EXPECT_THAT(v, ElementsAre("a")); + } + + { + std::vector<std::string> v = absl::StrSplit("ab", ""); + EXPECT_THAT(v, ElementsAre("a", "b")); + } + + { + std::vector<std::string> v = absl::StrSplit("a b", ""); + EXPECT_THAT(v, ElementsAre("a", " ", "b")); + } +} + +TEST(Split, SubstrDelimiter) { + std::vector<absl::string_view> results; + absl::string_view delim("//"); + + results = absl::StrSplit("", delim); + EXPECT_THAT(results, ElementsAre("")); + + results = absl::StrSplit("//", delim); + EXPECT_THAT(results, ElementsAre("", "")); + + results = absl::StrSplit("ab", delim); + EXPECT_THAT(results, ElementsAre("ab")); + + results = absl::StrSplit("ab//", delim); + EXPECT_THAT(results, ElementsAre("ab", "")); + + results = absl::StrSplit("ab/", delim); + EXPECT_THAT(results, ElementsAre("ab/")); + + results = absl::StrSplit("a/b", delim); + EXPECT_THAT(results, ElementsAre("a/b")); + + results = absl::StrSplit("a//b", delim); + EXPECT_THAT(results, ElementsAre("a", "b")); + + results = absl::StrSplit("a///b", delim); + EXPECT_THAT(results, ElementsAre("a", "/b")); + + results = absl::StrSplit("a////b", delim); + EXPECT_THAT(results, ElementsAre("a", "", "b")); +} + +TEST(Split, EmptyResults) { + std::vector<absl::string_view> results; + + results = absl::StrSplit("", '#'); + EXPECT_THAT(results, ElementsAre("")); + + results = absl::StrSplit("#", '#'); + EXPECT_THAT(results, ElementsAre("", "")); + + results = absl::StrSplit("#cd", '#'); + EXPECT_THAT(results, ElementsAre("", "cd")); + + results = absl::StrSplit("ab#cd#", '#'); + EXPECT_THAT(results, ElementsAre("ab", "cd", "")); + + results = absl::StrSplit("ab##cd", '#'); + EXPECT_THAT(results, ElementsAre("ab", "", "cd")); + + results = absl::StrSplit("ab##", '#'); + EXPECT_THAT(results, ElementsAre("ab", "", "")); + + results = absl::StrSplit("ab#ab#", '#'); + EXPECT_THAT(results, ElementsAre("ab", "ab", "")); + + results = absl::StrSplit("aaaa", 'a'); + EXPECT_THAT(results, ElementsAre("", "", "", "", "")); + + results = absl::StrSplit("", '#', absl::SkipEmpty()); + EXPECT_THAT(results, ElementsAre()); +} + +template <typename Delimiter> +static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d, + size_t starting_pos, int expected_pos) { + absl::string_view found = d.Find(text, starting_pos); + return found.data() != text.end() && + expected_pos == found.data() - text.data(); +} + +// Helper function for testing Delimiter objects. Returns true if the given +// Delimiter is found in the given std::string at the given position. This function +// tests two cases: +// 1. The actual text given, staring at position 0 +// 2. The text given with leading padding that should be ignored +template <typename Delimiter> +static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) { + const std::string leading_text = ",x,y,z,"; + return IsFoundAtStartingPos(text, d, 0, expected_pos) && + IsFoundAtStartingPos(leading_text + std::string(text), d, + leading_text.length(), + expected_pos + leading_text.length()); +} + +// +// Tests for Literal +// + +// Tests using any delimiter that represents a single comma. +template <typename Delimiter> +void TestComma(Delimiter d) { + EXPECT_TRUE(IsFoundAt(",", d, 0)); + EXPECT_TRUE(IsFoundAt("a,", d, 1)); + EXPECT_TRUE(IsFoundAt(",b", d, 0)); + EXPECT_TRUE(IsFoundAt("a,b", d, 1)); + EXPECT_TRUE(IsFoundAt("a,b,", d, 1)); + EXPECT_TRUE(IsFoundAt("a,b,c", d, 1)); + EXPECT_FALSE(IsFoundAt("", d, -1)); + EXPECT_FALSE(IsFoundAt(" ", d, -1)); + EXPECT_FALSE(IsFoundAt("a", d, -1)); + EXPECT_FALSE(IsFoundAt("a b c", d, -1)); + EXPECT_FALSE(IsFoundAt("a;b;c", d, -1)); + EXPECT_FALSE(IsFoundAt(";", d, -1)); +} + +TEST(Delimiter, Literal) { + using absl::ByString; + TestComma(ByString(",")); + + // Works as named variable. + ByString comma_string(","); + TestComma(comma_string); + + // The first occurrence of empty std::string ("") in a std::string is at position 0. + // There is a test below that demonstrates this for absl::string_view::find(). + // If the ByString delimiter returned position 0 for this, there would + // be an infinite loop in the SplitIterator code. To avoid this, empty std::string + // is a special case in that it always returns the item at position 1. + absl::string_view abc("abc"); + EXPECT_EQ(0, abc.find("")); // "" is found at position 0 + ByString empty(""); + EXPECT_FALSE(IsFoundAt("", empty, 0)); + EXPECT_FALSE(IsFoundAt("a", empty, 0)); + EXPECT_TRUE(IsFoundAt("ab", empty, 1)); + EXPECT_TRUE(IsFoundAt("abc", empty, 1)); +} + +TEST(Split, ByChar) { + using absl::ByChar; + TestComma(ByChar(',')); + + // Works as named variable. + ByChar comma_char(','); + TestComma(comma_char); +} + +// +// Tests for ByAnyChar +// + +TEST(Delimiter, ByAnyChar) { + using absl::ByAnyChar; + ByAnyChar one_delim(","); + // Found + EXPECT_TRUE(IsFoundAt(",", one_delim, 0)); + EXPECT_TRUE(IsFoundAt("a,", one_delim, 1)); + EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1)); + EXPECT_TRUE(IsFoundAt(",b", one_delim, 0)); + // Not found + EXPECT_FALSE(IsFoundAt("", one_delim, -1)); + EXPECT_FALSE(IsFoundAt(" ", one_delim, -1)); + EXPECT_FALSE(IsFoundAt("a", one_delim, -1)); + EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1)); + EXPECT_FALSE(IsFoundAt(";", one_delim, -1)); + + ByAnyChar two_delims(",;"); + // Found + EXPECT_TRUE(IsFoundAt(",", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(",;", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";,", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0)); + EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0)); + EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1)); + EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1)); + // Not found + EXPECT_FALSE(IsFoundAt("", two_delims, -1)); + EXPECT_FALSE(IsFoundAt(" ", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("a", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1)); + EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); + + // ByAnyChar behaves just like ByString when given a delimiter of empty + // std::string. That is, it always returns a zero-length absl::string_view + // referring to the item at position 1, not position 0. + ByAnyChar empty(""); + EXPECT_FALSE(IsFoundAt("", empty, 0)); + EXPECT_FALSE(IsFoundAt("a", empty, 0)); + EXPECT_TRUE(IsFoundAt("ab", empty, 1)); + EXPECT_TRUE(IsFoundAt("abc", empty, 1)); +} + +// +// Tests for ByLength +// + +TEST(Delimiter, ByLength) { + using absl::ByLength; + + ByLength four_char_delim(4); + + // Found + EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4)); + EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4)); + EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4)); + // Not found + EXPECT_FALSE(IsFoundAt("", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0)); + EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0)); +} + +// Allocates too much memory for TSan and MSan. +#if !defined(THREAD_SANITIZER) && !defined(MEMORY_SANITIZER) +TEST(Split, WorksWithLargeStrings) { + if (sizeof(size_t) > 4 && !RunningOnValgrind()) { + std::string s(1ULL << 31, 'x'); + s.push_back('-'); // 2G + 1 byte + std::vector<absl::string_view> v = absl::StrSplit(s, '-'); + EXPECT_EQ(2, v.size()); + // The first element will contain 2G of 'x's. + // testing::StartsWith is too slow with a 2G std::string. + EXPECT_EQ('x', v[0][0]); + EXPECT_EQ('x', v[0][1]); + EXPECT_EQ('x', v[0][3]); + EXPECT_EQ("", v[1]); + } +} +#endif // THREAD_SANITIZER + +TEST(SplitInternalTest, TypeTraits) { + EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasMappedType<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasValueType<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value); + EXPECT_TRUE( + (absl::strings_internal::HasConstIterator<std::map<int, int>>::value)); + EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value); + EXPECT_TRUE((absl::strings_internal::IsInitializerList< + std::initializer_list<int>>::value)); +} + +} // namespace |