summaryrefslogtreecommitdiff
path: root/absl/strings
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings')
-rw-r--r--absl/strings/BUILD.bazel293
-rw-r--r--absl/strings/README.md87
-rw-r--r--absl/strings/ascii.cc198
-rw-r--r--absl/strings/ascii.h239
-rw-r--r--absl/strings/ascii_ctype.h66
-rw-r--r--absl/strings/ascii_test.cc354
-rw-r--r--absl/strings/escaping.cc1093
-rw-r--r--absl/strings/escaping.h158
-rw-r--r--absl/strings/escaping_test.cc638
-rw-r--r--absl/strings/internal/char_map.h154
-rw-r--r--absl/strings/internal/char_map_test.cc172
-rw-r--r--absl/strings/internal/escaping_test_common.inc113
-rw-r--r--absl/strings/internal/fastmem.h215
-rw-r--r--absl/strings/internal/fastmem_test.cc453
-rw-r--r--absl/strings/internal/memutil.cc110
-rw-r--r--absl/strings/internal/memutil.h146
-rw-r--r--absl/strings/internal/memutil_test.cc180
-rw-r--r--absl/strings/internal/numbers_test_common.inc166
-rw-r--r--absl/strings/internal/ostringstream.h97
-rw-r--r--absl/strings/internal/ostringstream_test.cc103
-rw-r--r--absl/strings/internal/resize_uninitialized.h69
-rw-r--r--absl/strings/internal/resize_uninitialized_test.cc68
-rw-r--r--absl/strings/internal/str_join_internal.h314
-rw-r--r--absl/strings/internal/str_split_internal.h439
-rw-r--r--absl/strings/internal/utf8.cc51
-rw-r--r--absl/strings/internal/utf8.h52
-rw-r--r--absl/strings/internal/utf8_test.cc58
-rw-r--r--absl/strings/match.cc40
-rw-r--r--absl/strings/match.h81
-rw-r--r--absl/strings/match_test.cc99
-rw-r--r--absl/strings/numbers.cc1288
-rw-r--r--absl/strings/numbers.h173
-rw-r--r--absl/strings/numbers_test.cc1186
-rw-r--r--absl/strings/str_cat.cc208
-rw-r--r--absl/strings/str_cat.h348
-rw-r--r--absl/strings/str_cat_test.cc462
-rw-r--r--absl/strings/str_join.h288
-rw-r--r--absl/strings/str_join_test.cc474
-rw-r--r--absl/strings/str_replace.cc79
-rw-r--r--absl/strings/str_replace.h213
-rw-r--r--absl/strings/str_replace_test.cc340
-rw-r--r--absl/strings/str_split.cc133
-rw-r--r--absl/strings/str_split.h511
-rw-r--r--absl/strings/str_split_test.cc896
-rw-r--r--absl/strings/string_view.cc248
-rw-r--r--absl/strings/string_view.h572
-rw-r--r--absl/strings/string_view_test.cc1097
-rw-r--r--absl/strings/strip.cc269
-rw-r--r--absl/strings/strip.h89
-rw-r--r--absl/strings/strip_test.cc119
-rw-r--r--absl/strings/substitute.cc117
-rw-r--r--absl/strings/substitute.h674
-rw-r--r--absl/strings/substitute_test.cc168
-rw-r--r--absl/strings/testdata/getline-1.txt3
-rw-r--r--absl/strings/testdata/getline-2.txt1
55 files changed, 16262 insertions, 0 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel
new file mode 100644
index 00000000..070721cc
--- /dev/null
+++ b/absl/strings/BUILD.bazel
@@ -0,0 +1,293 @@
+#
+# Copyright 2017 The Abseil Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- mode: python; -*-
+# Libraries in this low-level package may not depend on libraries in packages
+# that are not low level. For more information, including how to submit
+# changes to this file, see http://www/eng/howto/build-monitors.html
+
+load(
+ "//absl:test_dependencies.bzl",
+ "GUNIT_MAIN_DEPS_SELECTOR",
+ "GUNIT_DEPS_SELECTOR",
+)
+load(
+ "//absl:copts.bzl",
+ "ABSL_DEFAULT_COPTS",
+ "ABSL_TEST_COPTS",
+ "ABSL_EXCEPTIONS_FLAG",
+)
+
+package(
+ default_visibility = ["//visibility:public"],
+ features = [
+ "parse_headers",
+ "header_modules",
+ ],
+)
+
+licenses(["notice"]) # Apache 2.0
+
+cc_library(
+ name = "strings",
+ srcs = [
+ "ascii.cc",
+ "escaping.cc",
+ "internal/memutil.cc",
+ "internal/memutil.h",
+ "internal/str_join_internal.h",
+ "internal/str_split_internal.h",
+ "match.cc",
+ "numbers.cc",
+ "str_cat.cc",
+ "str_replace.cc",
+ "str_split.cc",
+ "string_view.cc",
+ "substitute.cc",
+ ],
+ hdrs = [
+ "ascii.h",
+ "escaping.h",
+ "match.h",
+ "numbers.h",
+ "str_cat.h",
+ "str_join.h",
+ "str_replace.h",
+ "str_split.h",
+ "string_view.h",
+ "strip.h",
+ "substitute.h",
+ ],
+ copts = ABSL_DEFAULT_COPTS,
+ deps = [
+ ":internal",
+ "//absl/base",
+ "//absl/base:config",
+ "//absl/base:core_headers",
+ "//absl/base:endian",
+ "//absl/base:throw_delegate",
+ "//absl/memory",
+ "//absl/meta:type_traits",
+ "//absl/numeric:int128",
+ ],
+)
+
+cc_library(
+ name = "internal",
+ srcs = [
+ "internal/utf8.cc",
+ ],
+ hdrs = [
+ "internal/char_map.h",
+ "internal/fastmem.h",
+ "internal/ostringstream.h",
+ "internal/resize_uninitialized.h",
+ "internal/utf8.h",
+ ],
+ copts = ABSL_DEFAULT_COPTS,
+ deps = [
+ "//absl/base:core_headers",
+ "//absl/base:endian",
+ "//absl/meta:type_traits",
+ ],
+)
+
+cc_test(
+ name = "match_test",
+ size = "small",
+ srcs = ["match_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [":strings"] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "escaping_test",
+ size = "small",
+ srcs = [
+ "escaping_test.cc",
+ "internal/escaping_test_common.inc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ "//absl/container:fixed_array",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "ascii_test",
+ size = "small",
+ srcs = ["ascii_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "memutil_test",
+ size = "small",
+ srcs = [
+ "internal/memutil.h",
+ "internal/memutil_test.cc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "utf8_test",
+ size = "small",
+ srcs = [
+ "internal/utf8_test.cc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ ":internal",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "string_view_test",
+ size = "small",
+ srcs = ["string_view_test.cc"],
+ copts = ABSL_TEST_COPTS + ABSL_EXCEPTIONS_FLAG,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ "//absl/base:config",
+ "//absl/base:dynamic_annotations",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "substitute_test",
+ size = "small",
+ srcs = ["substitute_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "str_replace_test",
+ size = "small",
+ srcs = ["str_replace_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "str_split_test",
+ srcs = ["str_split_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ "//absl/base:dynamic_annotations",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "ostringstream_test",
+ size = "small",
+ srcs = ["internal/ostringstream_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":internal",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "resize_uninitialized_test",
+ size = "small",
+ srcs = [
+ "internal/resize_uninitialized.h",
+ "internal/resize_uninitialized_test.cc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ "//absl/base:core_headers",
+ "//absl/meta:type_traits",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "str_join_test",
+ size = "small",
+ srcs = ["str_join_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ "//absl/memory",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "str_cat_test",
+ size = "small",
+ srcs = ["str_cat_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":strings",
+ "//absl/base:core_headers",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "numbers_test",
+ size = "small",
+ srcs = [
+ "internal/numbers_test_common.inc",
+ "numbers_test.cc",
+ ],
+ copts = ABSL_TEST_COPTS,
+ tags = [
+ "no_test_loonix",
+ ],
+ deps = [
+ ":strings",
+ "//absl/base",
+ "//absl/base:core_headers",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "strip_test",
+ size = "small",
+ srcs = ["strip_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [":strings"] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
+
+cc_test(
+ name = "char_map_test",
+ srcs = ["internal/char_map_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ deps = [
+ ":internal",
+ ] + select(GUNIT_MAIN_DEPS_SELECTOR),
+)
diff --git a/absl/strings/README.md b/absl/strings/README.md
new file mode 100644
index 00000000..d5320eb0
--- /dev/null
+++ b/absl/strings/README.md
@@ -0,0 +1,87 @@
+# ABSL Strings
+
+This directory contains packages related to std::string operations and std::string
+alternatives (such as character-agnostic byte manipulation packages).
+
+## Library Listing
+
+Two library targets are available within this directory:
+
+* **strings** (`//absl/strings:strings`) provides classes and
+ utility functions for manipulating and comparing strings, converting other
+ types (such as integers) into strings, or evaluating strings for other usages
+ (such as tokenization).
+
+* **cord** (`//absl/strings:cord`) provides classes and utility
+ functions for manipulating `Cord` elements. A `Cord` is a sequence of
+ characters that internally uses a tree structure to store their data,
+ avoiding the need for long regions of contiguous memory, and allows memory
+ sharing, sub-std::string copy-on-write, and a host of other advanced std::string
+ features.
+
+## Strings Library File Listing
+
+The following header files are directly included within the
+`absl::strings` library.
+
+## Alternate std::string-like Classes
+
+* `bytestream.h`
+ <br/>Abstraction of std::string for I/O
+* `string_view.h`
+ <br/>Pointer to part or all of another std::string
+
+## Formatting and Parsing
+
+* `numbers.h`
+ <br/>Converter between strings and numbers. Prefer `str_cat.h` for numbers
+ to strings
+
+## Operations on Characters
+
+* `ascii_ctype.h`
+ <br/>Char classifiers like &lt;ctype.h&gt; but faster
+* `charset.h`
+ <br/>Bitmap from unsigned char -&gt; bool
+
+## Operations on Strings
+
+* `case.h`
+ <br/>Case-changers
+* `escaping.h`
+ <br/>Escapers and unescapers
+* `str_join.h`
+ <br/>Joiner functions using a delimiter
+* `str_split.h`
+ <br/>Split functions
+* `str_cat.h`
+ <br/>Concatenators and appenders
+* `string_view_utils.h`
+ <br>Utility functions for strings
+* `strip.h`
+ <br/>Character removal functions
+* `substitute.h`
+ <br/>Printf-like typesafe formatter
+
+## Miscellaneous
+
+* `util.h`
+ <br/>Grab bag of useful std::string functions
+
+
+## Cord Library File Listing
+
+The following header files are directly included within the
+`absl::strings::cord` library:
+
+## The `Cord` Class
+
+* `cord.h`
+ <br/>A std::string built from a tree of shareable nodes
+
+## Operations on Cords
+
+* `cord_cat.h`
+ <br/>Concatenator functions for cords
+* `cord_util.h`
+ <br/>Utility functions for cords
diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc
new file mode 100644
index 00000000..c9481e88
--- /dev/null
+++ b/absl/strings/ascii.cc
@@ -0,0 +1,198 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/ascii.h"
+
+namespace absl {
+namespace ascii_internal {
+
+// # Table generated by this Python code (bit 0x02 is currently unused):
+// TODO(mbar) Move Python code for generation of table to BUILD and link here.
+
+// NOTE: The kAsciiPropertyBits table used within this code was generated by
+// Python code of the following form. (Bit 0x02 is currently unused and
+// available.)
+//
+// def Hex2(n):
+// return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
+// def IsPunct(ch):
+// return (ord(ch) >= 32 and ord(ch) < 127 and
+// not ch.isspace() and not ch.isalnum())
+// def IsBlank(ch):
+// return ch in ' \t'
+// def IsCntrl(ch):
+// return ord(ch) < 32 or ord(ch) == 127
+// def IsXDigit(ch):
+// return ch.isdigit() or ch.lower() in 'abcdef'
+// for i in range(128):
+// ch = chr(i)
+// mask = ((ch.isalpha() and 0x01 or 0) |
+// (ch.isalnum() and 0x04 or 0) |
+// (ch.isspace() and 0x08 or 0) |
+// (IsPunct(ch) and 0x10 or 0) |
+// (IsBlank(ch) and 0x20 or 0) |
+// (IsCntrl(ch) and 0x40 or 0) |
+// (IsXDigit(ch) and 0x80 or 0))
+// print Hex2(mask) + ',',
+// if i % 16 == 7:
+// print ' //', Hex2(i & 0x78)
+// elif i % 16 == 15:
+// print
+
+// clang-format off
+// Array of bitfields holding character information. Each bit value corresponds
+// to a particular character feature. For readability, and because the value
+// of these bits is tightly coupled to this implementation, the individual bits
+// are not named. Note that bitfields for all characters above ASCII 127 are
+// zero-initialized.
+const unsigned char kPropertyBits[256] = {
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00
+ 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10
+ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
+ 0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, // 0x20
+ 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, // 0x30
+ 0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x40
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x50
+ 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
+ 0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05, // 0x60
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, // 0x70
+ 0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
+};
+
+// Array of characters for the ascii_tolower() function. For values 'A'
+// through 'Z', return the lower-case character; otherwise, return the
+// identity of the passed character.
+const char kToLower[256] = {
+ '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+ '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+ '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+ '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+ '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+ '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+ '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+ '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+ '\x40', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+ '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
+ '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
+ '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
+ '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
+ '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
+ '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
+ '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
+ '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
+ '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
+ '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
+ '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
+ '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
+ '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
+ '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
+ '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
+ '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
+ '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
+ '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
+ '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
+ '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
+};
+
+// Array of characters for the ascii_toupper() function. For values 'a'
+// through 'z', return the upper-case character; otherwise, return the
+// identity of the passed character.
+const char kToUpper[256] = {
+ '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+ '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+ '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+ '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+ '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+ '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+ '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+ '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+ '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
+ '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
+ '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
+ '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+ '\x60', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
+ '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
+ '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
+ '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
+ '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
+ '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
+ '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
+ '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
+ '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
+ '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
+ '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
+ '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
+ '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
+ '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
+ '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
+ '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
+ '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
+};
+// clang-format on
+
+} // namespace ascii_internal
+
+void AsciiStrToLower(std::string* s) {
+ for (auto& ch : *s) {
+ ch = absl::ascii_tolower(ch);
+ }
+}
+
+void AsciiStrToUpper(std::string* s) {
+ for (auto& ch : *s) {
+ ch = absl::ascii_toupper(ch);
+ }
+}
+
+void RemoveExtraAsciiWhitespace(std::string* str) {
+ auto stripped = StripAsciiWhitespace(*str);
+
+ if (stripped.empty()) {
+ str->clear();
+ return;
+ }
+
+ auto input_it = stripped.begin();
+ auto input_end = stripped.end();
+ auto output_it = &(*str)[0];
+ bool is_ws = false;
+
+ for (; input_it < input_end; ++input_it) {
+ if (is_ws) {
+ // Consecutive whitespace? Keep only the last.
+ is_ws = absl::ascii_isspace(*input_it);
+ if (is_ws) --output_it;
+ } else {
+ is_ws = absl::ascii_isspace(*input_it);
+ }
+
+ *output_it = *input_it;
+ ++output_it;
+ }
+
+ str->erase(output_it - &(*str)[0]);
+}
+
+} // namespace absl
diff --git a/absl/strings/ascii.h b/absl/strings/ascii.h
new file mode 100644
index 00000000..fc2bb33e
--- /dev/null
+++ b/absl/strings/ascii.h
@@ -0,0 +1,239 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: ascii.h
+// -----------------------------------------------------------------------------
+//
+// This package contains functions operating on characters and strings
+// restricted to standard ASCII. These include character classification
+// functions analogous to those found in the ANSI C Standard Library <ctype.h>
+// header file.
+//
+// C++ implementations provide <ctype.h> functionality based on their
+// C environment locale. In general, reliance on such a locale is not ideal, as
+// the locale standard is problematic (and may not return invariant information
+// for the same character set, for example). These `ascii_*()` functions are
+// hard-wired for standard ASCII, much faster, and guaranteed to behave
+// consistently. They will never be overloaded, nor will their function
+// signature change.
+//
+// `ascii_isalnum()`, `ascii_isalpha()`, `ascii_isascii()`, `ascii_isblank()`,
+// `ascii_iscntrl()`, `ascii_isdigit()`, `ascii_isgraph()`, `ascii_islower()`,
+// `ascii_isprint()`, `ascii_ispunct()`, `ascii_isspace()`, `ascii_isupper()`,
+// `ascii_isxdigit()`
+// Analagous to the <ctype.h> functions with similar names, these
+// functions take an unsigned char and return a bool, based on whether the
+// character matches the condition specified.
+//
+// If the input character has a numerical value greater than 127, these
+// functions return `false`.
+//
+// `ascii_tolower()`, `ascii_toupper()`
+// Analagous to the <ctype.h> functions with similar names, these functions
+// take an unsigned char and return a char.
+//
+// If the input character is not an ASCII {lower,upper}-case letter (including
+// numerical values greater than 127) then the functions return the same value
+// as the input character.
+
+#ifndef ABSL_STRINGS_ASCII_H_
+#define ABSL_STRINGS_ASCII_H_
+
+#include <algorithm>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace ascii_internal {
+
+// Declaration for an array of bitfields holding character information.
+extern const unsigned char kPropertyBits[256];
+
+// Declaration for the array of characters to upper-case characters.
+extern const char kToUpper[256];
+
+// Declaration for the array of characters to lower-case characters.
+extern const char kToLower[256];
+
+} // namespace ascii_internal
+
+// ascii_isalpha()
+//
+// Determines whether the given character is an alphabetic character.
+inline bool ascii_isalpha(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x01) != 0;
+}
+
+// ascii_isalnum()
+//
+// Determines whether the given character is an alphanumeric character.
+inline bool ascii_isalnum(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x04) != 0;
+}
+
+// ascii_isspace()
+//
+// Determines whether the given character is a whitespace character (space,
+// tab, vertical tab, formfeed, linefeed, or carriage return).
+inline bool ascii_isspace(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x08) != 0;
+}
+
+// ascii_ispunct()
+//
+// Determines whether the given character is a punctuation character.
+inline bool ascii_ispunct(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x10) != 0;
+}
+
+// ascii_isblank()
+//
+// Determines whether the given character is a blank character (tab or space).
+inline bool ascii_isblank(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x20) != 0;
+}
+
+// ascii_iscntrl()
+//
+// Determines whether the given character is a control character.
+inline bool ascii_iscntrl(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x40) != 0;
+}
+
+// ascii_isxdigit()
+//
+// Determines whether the given character can be represented as a hexadecimal
+// digit character (i.e. {0-9} or {A-F}).
+inline bool ascii_isxdigit(unsigned char c) {
+ return (ascii_internal::kPropertyBits[c] & 0x80) != 0;
+}
+
+// ascii_isdigit()
+//
+// Determines whether the given character can be represented as a decimal
+// digit character (i.e. {0-9}).
+inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; }
+
+// ascii_isprint()
+//
+// Determines whether the given character is printable, including whitespace.
+inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; }
+
+// ascii_isgraph()
+//
+// Determines whether the given character has a graphical representation.
+inline bool ascii_isgraph(unsigned char c) { return c > 32 && c < 127; }
+
+// ascii_isupper()
+//
+// Determines whether the given character is uppercase.
+inline bool ascii_isupper(unsigned char c) { return c >= 'A' && c <= 'Z'; }
+
+// ascii_islower()
+//
+// Determines whether the given character is lowercase.
+inline bool ascii_islower(unsigned char c) { return c >= 'a' && c <= 'z'; }
+
+// ascii_isascii()
+//
+// Determines whether the given character is ASCII.
+inline bool ascii_isascii(unsigned char c) { return c < 128; }
+
+// ascii_tolower()
+//
+// Returns an ASCII character, converting to lowercase if uppercase is
+// passed. Note that character values > 127 are simply returned.
+inline char ascii_tolower(unsigned char c) {
+ return ascii_internal::kToLower[c];
+}
+
+// Converts the characters in `s` to lowercase, changing the contents of `s`.
+void AsciiStrToLower(std::string* s);
+
+// Creates a lowercase std::string from a given absl::string_view.
+ABSL_MUST_USE_RESULT inline std::string AsciiStrToLower(absl::string_view s) {
+ std::string result(s);
+ absl::AsciiStrToLower(&result);
+ return result;
+}
+
+// ascii_toupper()
+//
+// Returns the ASCII character, converting to upper-case if lower-case is
+// passed. Note that characters values > 127 are simply returned.
+inline char ascii_toupper(unsigned char c) {
+ return ascii_internal::kToUpper[c];
+}
+
+// Converts the characters in `s` to uppercase, changing the contents of `s`.
+void AsciiStrToUpper(std::string* s);
+
+// Creates an uppercase std::string from a given absl::string_view.
+ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) {
+ std::string result(s);
+ absl::AsciiStrToUpper(&result);
+ return result;
+}
+
+// Returns absl::string_view with whitespace stripped from the beginning of the
+// given string_view.
+ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace(
+ absl::string_view str) {
+ auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace);
+ return absl::string_view(it, str.end() - it);
+}
+
+// Strips in place whitespace from the beginning of the given std::string.
+inline void StripLeadingAsciiWhitespace(std::string* str) {
+ auto it = std::find_if_not(str->begin(), str->end(), absl::ascii_isspace);
+ str->erase(str->begin(), it);
+}
+
+// Returns absl::string_view with whitespace stripped from the end of the given
+// string_view.
+ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace(
+ absl::string_view str) {
+ auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace);
+ return absl::string_view(str.begin(), str.rend() - it);
+}
+
+// Strips in place whitespace from the end of the given std::string
+inline void StripTrailingAsciiWhitespace(std::string* str) {
+ auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace);
+ str->erase(str->rend() - it);
+}
+
+// Returns absl::string_view with whitespace stripped from both ends of the
+// given string_view.
+ABSL_MUST_USE_RESULT inline absl::string_view StripAsciiWhitespace(
+ absl::string_view str) {
+ return StripTrailingAsciiWhitespace(StripLeadingAsciiWhitespace(str));
+}
+
+// Strips in place whitespace from both ends of the given std::string
+inline void StripAsciiWhitespace(std::string* str) {
+ StripTrailingAsciiWhitespace(str);
+ StripLeadingAsciiWhitespace(str);
+}
+
+// Removes leading, trailing, and consecutive internal whitespace.
+void RemoveExtraAsciiWhitespace(std::string*);
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_ASCII_H_
diff --git a/absl/strings/ascii_ctype.h b/absl/strings/ascii_ctype.h
new file mode 100644
index 00000000..e1ba9e24
--- /dev/null
+++ b/absl/strings/ascii_ctype.h
@@ -0,0 +1,66 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_ASCII_CTYPE_H_
+#define ABSL_STRINGS_ASCII_CTYPE_H_
+
+#include "absl/strings/ascii.h"
+
+inline bool ascii_isalpha(unsigned char c) {
+ return absl::ascii_isalpha(c);
+}
+inline bool ascii_isalnum(unsigned char c) {
+ return absl::ascii_isalnum(c);
+}
+inline bool ascii_isspace(unsigned char c) {
+ return absl::ascii_isspace(c);
+}
+inline bool ascii_ispunct(unsigned char c) {
+ return absl::ascii_ispunct(c);
+}
+inline bool ascii_isblank(unsigned char c) {
+ return absl::ascii_isblank(c);
+}
+inline bool ascii_iscntrl(unsigned char c) {
+ return absl::ascii_iscntrl(c);
+}
+inline bool ascii_isxdigit(unsigned char c) {
+ return absl::ascii_isxdigit(c);
+}
+inline bool ascii_isdigit(unsigned char c) {
+ return absl::ascii_isdigit(c);
+}
+inline bool ascii_isprint(unsigned char c) {
+ return absl::ascii_isprint(c);
+}
+inline bool ascii_isgraph(unsigned char c) {
+ return absl::ascii_isgraph(c);
+}
+inline bool ascii_isupper(unsigned char c) {
+ return absl::ascii_isupper(c);
+}
+inline bool ascii_islower(unsigned char c) {
+ return absl::ascii_islower(c);
+}
+inline bool ascii_isascii(unsigned char c) {
+ return absl::ascii_isascii(c);
+}
+inline char ascii_tolower(unsigned char c) {
+ return absl::ascii_tolower(c);
+}
+inline char ascii_toupper(unsigned char c) {
+ return absl::ascii_toupper(c);
+}
+
+#endif // ABSL_STRINGS_ASCII_CTYPE_H_
diff --git a/absl/strings/ascii_test.cc b/absl/strings/ascii_test.cc
new file mode 100644
index 00000000..97f36013
--- /dev/null
+++ b/absl/strings/ascii_test.cc
@@ -0,0 +1,354 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/ascii.h"
+
+#include <cctype>
+#include <clocale>
+#include <cstring>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+
+namespace {
+
+TEST(AsciiIsFoo, All) {
+ for (int i = 0; i < 256; i++) {
+ if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z'))
+ EXPECT_TRUE(absl::ascii_isalpha(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isalpha(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if ((i >= '0' && i <= '9'))
+ EXPECT_TRUE(absl::ascii_isdigit(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isdigit(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (absl::ascii_isalpha(i) || absl::ascii_isdigit(i))
+ EXPECT_TRUE(absl::ascii_isalnum(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isalnum(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i != '\0' && strchr(" \r\n\t\v\f", i))
+ EXPECT_TRUE(absl::ascii_isspace(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isspace(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i >= 32 && i < 127)
+ EXPECT_TRUE(absl::ascii_isprint(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isprint(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (absl::ascii_isprint(i) && !absl::ascii_isspace(i) &&
+ !absl::ascii_isalnum(i))
+ EXPECT_TRUE(absl::ascii_ispunct(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_ispunct(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i == ' ' || i == '\t')
+ EXPECT_TRUE(absl::ascii_isblank(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isblank(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i < 32 || i == 127)
+ EXPECT_TRUE(absl::ascii_iscntrl(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_iscntrl(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (absl::ascii_isdigit(i) || (i >= 'A' && i <= 'F') ||
+ (i >= 'a' && i <= 'f'))
+ EXPECT_TRUE(absl::ascii_isxdigit(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isxdigit(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i > 32 && i < 127)
+ EXPECT_TRUE(absl::ascii_isgraph(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isgraph(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i >= 'A' && i <= 'Z')
+ EXPECT_TRUE(absl::ascii_isupper(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_isupper(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 256; i++) {
+ if (i >= 'a' && i <= 'z')
+ EXPECT_TRUE(absl::ascii_islower(i)) << ": failed on " << i;
+ else
+ EXPECT_TRUE(!absl::ascii_islower(i)) << ": failed on " << i;
+ }
+ for (int i = 0; i < 128; i++) {
+ EXPECT_TRUE(absl::ascii_isascii(i)) << ": failed on " << i;
+ }
+ for (int i = 128; i < 256; i++) {
+ EXPECT_TRUE(!absl::ascii_isascii(i)) << ": failed on " << i;
+ }
+
+ // The official is* functions don't accept negative signed chars, but
+ // our absl::ascii_is* functions do.
+ for (int i = 0; i < 256; i++) {
+ signed char sc = static_cast<signed char>(static_cast<unsigned char>(i));
+ EXPECT_EQ(absl::ascii_isalpha(i), absl::ascii_isalpha(sc)) << i;
+ EXPECT_EQ(absl::ascii_isdigit(i), absl::ascii_isdigit(sc)) << i;
+ EXPECT_EQ(absl::ascii_isalnum(i), absl::ascii_isalnum(sc)) << i;
+ EXPECT_EQ(absl::ascii_isspace(i), absl::ascii_isspace(sc)) << i;
+ EXPECT_EQ(absl::ascii_ispunct(i), absl::ascii_ispunct(sc)) << i;
+ EXPECT_EQ(absl::ascii_isblank(i), absl::ascii_isblank(sc)) << i;
+ EXPECT_EQ(absl::ascii_iscntrl(i), absl::ascii_iscntrl(sc)) << i;
+ EXPECT_EQ(absl::ascii_isxdigit(i), absl::ascii_isxdigit(sc)) << i;
+ EXPECT_EQ(absl::ascii_isprint(i), absl::ascii_isprint(sc)) << i;
+ EXPECT_EQ(absl::ascii_isgraph(i), absl::ascii_isgraph(sc)) << i;
+ EXPECT_EQ(absl::ascii_isupper(i), absl::ascii_isupper(sc)) << i;
+ EXPECT_EQ(absl::ascii_islower(i), absl::ascii_islower(sc)) << i;
+ EXPECT_EQ(absl::ascii_isascii(i), absl::ascii_isascii(sc)) << i;
+ }
+}
+
+// Checks that absl::ascii_isfoo returns the same value as isfoo in the C
+// locale.
+TEST(AsciiIsFoo, SameAsIsFoo) {
+ // temporarily change locale to C. It should already be C, but just for safety
+ std::string old_locale = setlocale(LC_CTYPE, nullptr);
+ ASSERT_TRUE(setlocale(LC_CTYPE, "C"));
+
+ for (int i = 0; i < 256; i++) {
+ EXPECT_EQ(isalpha(i) != 0, absl::ascii_isalpha(i)) << i;
+ EXPECT_EQ(isdigit(i) != 0, absl::ascii_isdigit(i)) << i;
+ EXPECT_EQ(isalnum(i) != 0, absl::ascii_isalnum(i)) << i;
+ EXPECT_EQ(isspace(i) != 0, absl::ascii_isspace(i)) << i;
+ EXPECT_EQ(ispunct(i) != 0, absl::ascii_ispunct(i)) << i;
+ EXPECT_EQ(isblank(i) != 0, absl::ascii_isblank(i)) << i;
+ EXPECT_EQ(iscntrl(i) != 0, absl::ascii_iscntrl(i)) << i;
+ EXPECT_EQ(isxdigit(i) != 0, absl::ascii_isxdigit(i)) << i;
+ EXPECT_EQ(isprint(i) != 0, absl::ascii_isprint(i)) << i;
+ EXPECT_EQ(isgraph(i) != 0, absl::ascii_isgraph(i)) << i;
+ EXPECT_EQ(isupper(i) != 0, absl::ascii_isupper(i)) << i;
+ EXPECT_EQ(islower(i) != 0, absl::ascii_islower(i)) << i;
+ EXPECT_EQ(isascii(i) != 0, absl::ascii_isascii(i)) << i;
+ }
+
+ // restore the old locale.
+ ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str()));
+}
+
+TEST(AsciiToFoo, All) {
+ // temporarily change locale to C. It should already be C, but just for safety
+ std::string old_locale = setlocale(LC_CTYPE, nullptr);
+ ASSERT_TRUE(setlocale(LC_CTYPE, "C"));
+
+ for (int i = 0; i < 256; i++) {
+ if (absl::ascii_islower(i))
+ EXPECT_EQ(absl::ascii_toupper(i), 'A' + (i - 'a')) << i;
+ else
+ EXPECT_EQ(absl::ascii_toupper(i), static_cast<char>(i)) << i;
+
+ if (absl::ascii_isupper(i))
+ EXPECT_EQ(absl::ascii_tolower(i), 'a' + (i - 'A')) << i;
+ else
+ EXPECT_EQ(absl::ascii_tolower(i), static_cast<char>(i)) << i;
+
+ // These CHECKs only hold in a C locale.
+ EXPECT_EQ(static_cast<char>(tolower(i)), absl::ascii_tolower(i)) << i;
+ EXPECT_EQ(static_cast<char>(toupper(i)), absl::ascii_toupper(i)) << i;
+
+ // The official to* functions don't accept negative signed chars, but
+ // our absl::ascii_to* functions do.
+ signed char sc = static_cast<signed char>(static_cast<unsigned char>(i));
+ EXPECT_EQ(absl::ascii_tolower(i), absl::ascii_tolower(sc)) << i;
+ EXPECT_EQ(absl::ascii_toupper(i), absl::ascii_toupper(sc)) << i;
+ }
+
+ // restore the old locale.
+ ASSERT_TRUE(setlocale(LC_CTYPE, old_locale.c_str()));
+}
+
+TEST(AsciiStrTo, Lower) {
+ const char buf[] = "ABCDEF";
+ const std::string str("GHIJKL");
+ const std::string str2("MNOPQR");
+ const absl::string_view sp(str2);
+
+ EXPECT_EQ("abcdef", absl::AsciiStrToLower(buf));
+ EXPECT_EQ("ghijkl", absl::AsciiStrToLower(str));
+ EXPECT_EQ("mnopqr", absl::AsciiStrToLower(sp));
+
+ char mutable_buf[] = "Mutable";
+ std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
+ mutable_buf, absl::ascii_tolower);
+ EXPECT_STREQ("mutable", mutable_buf);
+}
+
+TEST(AsciiStrTo, Upper) {
+ const char buf[] = "abcdef";
+ const std::string str("ghijkl");
+ const std::string str2("mnopqr");
+ const absl::string_view sp(str2);
+
+ EXPECT_EQ("ABCDEF", absl::AsciiStrToUpper(buf));
+ EXPECT_EQ("GHIJKL", absl::AsciiStrToUpper(str));
+ EXPECT_EQ("MNOPQR", absl::AsciiStrToUpper(sp));
+
+ char mutable_buf[] = "Mutable";
+ std::transform(mutable_buf, mutable_buf + strlen(mutable_buf),
+ mutable_buf, absl::ascii_toupper);
+ EXPECT_STREQ("MUTABLE", mutable_buf);
+}
+
+TEST(StripLeadingAsciiWhitespace, FromStringView) {
+ EXPECT_EQ(absl::string_view{},
+ absl::StripLeadingAsciiWhitespace(absl::string_view{}));
+ EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"foo"}));
+ EXPECT_EQ("foo", absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo"}));
+ EXPECT_EQ("foo foo\n ",
+ absl::StripLeadingAsciiWhitespace({"\t \n\f\r\n\vfoo foo\n "}));
+ EXPECT_EQ(absl::string_view{}, absl::StripLeadingAsciiWhitespace(
+ {"\t \n\f\r\v\n\t \n\f\r\v\n"}));
+}
+
+TEST(StripLeadingAsciiWhitespace, InPlace) {
+ std::string str;
+
+ absl::StripLeadingAsciiWhitespace(&str);
+ EXPECT_EQ("", str);
+
+ str = "foo";
+ absl::StripLeadingAsciiWhitespace(&str);
+ EXPECT_EQ("foo", str);
+
+ str = "\t \n\f\r\n\vfoo";
+ absl::StripLeadingAsciiWhitespace(&str);
+ EXPECT_EQ("foo", str);
+
+ str = "\t \n\f\r\n\vfoo foo\n ";
+ absl::StripLeadingAsciiWhitespace(&str);
+ EXPECT_EQ("foo foo\n ", str);
+
+ str = "\t \n\f\r\v\n\t \n\f\r\v\n";
+ absl::StripLeadingAsciiWhitespace(&str);
+ EXPECT_EQ(absl::string_view{}, str);
+}
+
+TEST(StripTrailingAsciiWhitespace, FromStringView) {
+ EXPECT_EQ(absl::string_view{},
+ absl::StripTrailingAsciiWhitespace(absl::string_view{}));
+ EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo"}));
+ EXPECT_EQ("foo", absl::StripTrailingAsciiWhitespace({"foo\t \n\f\r\n\v"}));
+ EXPECT_EQ(" \nfoo foo",
+ absl::StripTrailingAsciiWhitespace({" \nfoo foo\t \n\f\r\n\v"}));
+ EXPECT_EQ(absl::string_view{}, absl::StripTrailingAsciiWhitespace(
+ {"\t \n\f\r\v\n\t \n\f\r\v\n"}));
+}
+
+TEST(StripTrailingAsciiWhitespace, InPlace) {
+ std::string str;
+
+ absl::StripTrailingAsciiWhitespace(&str);
+ EXPECT_EQ("", str);
+
+ str = "foo";
+ absl::StripTrailingAsciiWhitespace(&str);
+ EXPECT_EQ("foo", str);
+
+ str = "foo\t \n\f\r\n\v";
+ absl::StripTrailingAsciiWhitespace(&str);
+ EXPECT_EQ("foo", str);
+
+ str = " \nfoo foo\t \n\f\r\n\v";
+ absl::StripTrailingAsciiWhitespace(&str);
+ EXPECT_EQ(" \nfoo foo", str);
+
+ str = "\t \n\f\r\v\n\t \n\f\r\v\n";
+ absl::StripTrailingAsciiWhitespace(&str);
+ EXPECT_EQ(absl::string_view{}, str);
+}
+
+TEST(StripAsciiWhitespace, FromStringView) {
+ EXPECT_EQ(absl::string_view{},
+ absl::StripAsciiWhitespace(absl::string_view{}));
+ EXPECT_EQ("foo", absl::StripAsciiWhitespace({"foo"}));
+ EXPECT_EQ("foo",
+ absl::StripAsciiWhitespace({"\t \n\f\r\n\vfoo\t \n\f\r\n\v"}));
+ EXPECT_EQ("foo foo", absl::StripAsciiWhitespace(
+ {"\t \n\f\r\n\vfoo foo\t \n\f\r\n\v"}));
+ EXPECT_EQ(absl::string_view{},
+ absl::StripAsciiWhitespace({"\t \n\f\r\v\n\t \n\f\r\v\n"}));
+}
+
+TEST(StripAsciiWhitespace, InPlace) {
+ std::string str;
+
+ absl::StripAsciiWhitespace(&str);
+ EXPECT_EQ("", str);
+
+ str = "foo";
+ absl::StripAsciiWhitespace(&str);
+ EXPECT_EQ("foo", str);
+
+ str = "\t \n\f\r\n\vfoo\t \n\f\r\n\v";
+ absl::StripAsciiWhitespace(&str);
+ EXPECT_EQ("foo", str);
+
+ str = "\t \n\f\r\n\vfoo foo\t \n\f\r\n\v";
+ absl::StripAsciiWhitespace(&str);
+ EXPECT_EQ("foo foo", str);
+
+ str = "\t \n\f\r\v\n\t \n\f\r\v\n";
+ absl::StripAsciiWhitespace(&str);
+ EXPECT_EQ(absl::string_view{}, str);
+}
+
+TEST(RemoveExtraAsciiWhitespace, InPlace) {
+ const char* inputs[] = {"No extra space",
+ " Leading whitespace",
+ "Trailing whitespace ",
+ " Leading and trailing ",
+ " Whitespace \t in\v middle ",
+ "'Eeeeep! \n Newlines!\n",
+ "nospaces",
+ "",
+ "\n\t a\t\n\nb \t\n"};
+
+ const char* outputs[] = {
+ "No extra space",
+ "Leading whitespace",
+ "Trailing whitespace",
+ "Leading and trailing",
+ "Whitespace in middle",
+ "'Eeeeep! Newlines!",
+ "nospaces",
+ "",
+ "a\nb",
+ };
+ const int NUM_TESTS = ABSL_ARRAYSIZE(inputs);
+
+ for (int i = 0; i < NUM_TESTS; i++) {
+ std::string s(inputs[i]);
+ absl::RemoveExtraAsciiWhitespace(&s);
+ EXPECT_EQ(outputs[i], s);
+ }
+}
+
+} // namespace
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
new file mode 100644
index 00000000..f1576057
--- /dev/null
+++ b/absl/strings/escaping.cc
@@ -0,0 +1,1093 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/escaping.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "absl/base/internal/endian.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/base/internal/unaligned_access.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/strings/internal/char_map.h"
+#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/internal/utf8.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace {
+
+// Digit conversion.
+constexpr char kHexChar[] = "0123456789abcdef";
+
+constexpr char kHexTable[513] =
+ "000102030405060708090a0b0c0d0e0f"
+ "101112131415161718191a1b1c1d1e1f"
+ "202122232425262728292a2b2c2d2e2f"
+ "303132333435363738393a3b3c3d3e3f"
+ "404142434445464748494a4b4c4d4e4f"
+ "505152535455565758595a5b5c5d5e5f"
+ "606162636465666768696a6b6c6d6e6f"
+ "707172737475767778797a7b7c7d7e7f"
+ "808182838485868788898a8b8c8d8e8f"
+ "909192939495969798999a9b9c9d9e9f"
+ "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
+ "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
+ "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
+ "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
+ "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
+ "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
+
+// These are used for the leave_nulls_escaped argument to CUnescapeInternal().
+constexpr bool kUnescapeNulls = false;
+
+inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
+
+inline int hex_digit_to_int(char c) {
+ static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61,
+ "Character set must be ASCII.");
+ assert(absl::ascii_isxdigit(c));
+ int x = static_cast<unsigned char>(c);
+ if (x > '9') {
+ x += 9;
+ }
+ return x & 0xf;
+}
+
+// ----------------------------------------------------------------------
+// CUnescapeInternal()
+// Implements both CUnescape() and CUnescapeForNullTerminatedString().
+//
+// Unescapes C escape sequences and is the reverse of CEscape().
+//
+// If 'source' is valid, stores the unescaped std::string and its size in
+// 'dest' and 'dest_len' respectively, and returns true. Otherwise
+// returns false and optionally stores the error description in
+// 'error'. Set 'error' to nullptr to disable error reporting.
+//
+// 'dest' should point to a buffer that is at least as big as 'source'.
+// 'source' and 'dest' may be the same.
+//
+// NOTE: any changes to this function must also be reflected in the older
+// UnescapeCEscapeSequences().
+// ----------------------------------------------------------------------
+bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
+ char* dest, ptrdiff_t* dest_len, std::string* error) {
+ char* d = dest;
+ const char* p = source.data();
+ const char* end = source.end();
+ const char* last_byte = end - 1;
+
+ // Small optimization for case where source = dest and there's no escaping
+ while (p == d && p < end && *p != '\\') p++, d++;
+
+ while (p < end) {
+ if (*p != '\\') {
+ *d++ = *p++;
+ } else {
+ if (++p > last_byte) { // skip past the '\\'
+ if (error) *error = "String cannot end with \\";
+ return false;
+ }
+ switch (*p) {
+ case 'a': *d++ = '\a'; break;
+ case 'b': *d++ = '\b'; break;
+ case 'f': *d++ = '\f'; break;
+ case 'n': *d++ = '\n'; break;
+ case 'r': *d++ = '\r'; break;
+ case 't': *d++ = '\t'; break;
+ case 'v': *d++ = '\v'; break;
+ case '\\': *d++ = '\\'; break;
+ case '?': *d++ = '\?'; break; // \? Who knew?
+ case '\'': *d++ = '\''; break;
+ case '"': *d++ = '\"'; break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ // octal digit: 1 to 3 digits
+ const char* octal_start = p;
+ unsigned int ch = *p - '0';
+ if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
+ if (p < last_byte && is_octal_digit(p[1]))
+ ch = ch * 8 + *++p - '0'; // now points at last digit
+ if (ch > 0xff) {
+ if (error) {
+ *error = "Value of \\" +
+ std::string(octal_start, p + 1 - octal_start) +
+ " exceeds 0xff";
+ }
+ return false;
+ }
+ if ((ch == 0) && leave_nulls_escaped) {
+ // Copy the escape sequence for the null character
+ const ptrdiff_t octal_size = p + 1 - octal_start;
+ *d++ = '\\';
+ memcpy(d, octal_start, octal_size);
+ d += octal_size;
+ break;
+ }
+ *d++ = ch;
+ break;
+ }
+ case 'x':
+ case 'X': {
+ if (p >= last_byte) {
+ if (error) *error = "String cannot end with \\x";
+ return false;
+ } else if (!absl::ascii_isxdigit(p[1])) {
+ if (error) *error = "\\x cannot be followed by a non-hex digit";
+ return false;
+ }
+ unsigned int ch = 0;
+ const char* hex_start = p;
+ while (p < last_byte && absl::ascii_isxdigit(p[1]))
+ // Arbitrarily many hex digits
+ ch = (ch << 4) + hex_digit_to_int(*++p);
+ if (ch > 0xFF) {
+ if (error) {
+ *error = "Value of \\" + std::string(hex_start, p + 1 - hex_start) +
+ " exceeds 0xff";
+ }
+ return false;
+ }
+ if ((ch == 0) && leave_nulls_escaped) {
+ // Copy the escape sequence for the null character
+ const ptrdiff_t hex_size = p + 1 - hex_start;
+ *d++ = '\\';
+ memcpy(d, hex_start, hex_size);
+ d += hex_size;
+ break;
+ }
+ *d++ = ch;
+ break;
+ }
+ case 'u': {
+ // \uhhhh => convert 4 hex digits to UTF-8
+ char32_t rune = 0;
+ const char* hex_start = p;
+ if (p + 4 >= end) {
+ if (error) {
+ *error = "\\u must be followed by 4 hex digits: \\" +
+ std::string(hex_start, p + 1 - hex_start);
+ }
+ return false;
+ }
+ for (int i = 0; i < 4; ++i) {
+ // Look one char ahead.
+ if (absl::ascii_isxdigit(p[1])) {
+ rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
+ } else {
+ if (error) {
+ *error = "\\u must be followed by 4 hex digits: \\" +
+ std::string(hex_start, p + 1 - hex_start);
+ }
+ return false;
+ }
+ }
+ if ((rune == 0) && leave_nulls_escaped) {
+ // Copy the escape sequence for the null character
+ *d++ = '\\';
+ memcpy(d, hex_start, 5); // u0000
+ d += 5;
+ break;
+ }
+ d += strings_internal::EncodeUTF8Char(d, rune);
+ break;
+ }
+ case 'U': {
+ // \Uhhhhhhhh => convert 8 hex digits to UTF-8
+ char32_t rune = 0;
+ const char* hex_start = p;
+ if (p + 8 >= end) {
+ if (error) {
+ *error = "\\U must be followed by 8 hex digits: \\" +
+ std::string(hex_start, p + 1 - hex_start);
+ }
+ return false;
+ }
+ for (int i = 0; i < 8; ++i) {
+ // Look one char ahead.
+ if (absl::ascii_isxdigit(p[1])) {
+ // Don't change rune until we're sure this
+ // is within the Unicode limit, but do advance p.
+ uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
+ if (newrune > 0x10FFFF) {
+ if (error) {
+ *error = "Value of \\" +
+ std::string(hex_start, p + 1 - hex_start) +
+ " exceeds Unicode limit (0x10FFFF)";
+ }
+ return false;
+ } else {
+ rune = newrune;
+ }
+ } else {
+ if (error) {
+ *error = "\\U must be followed by 8 hex digits: \\" +
+ std::string(hex_start, p + 1 - hex_start);
+ }
+ return false;
+ }
+ }
+ if ((rune == 0) && leave_nulls_escaped) {
+ // Copy the escape sequence for the null character
+ *d++ = '\\';
+ memcpy(d, hex_start, 9); // U00000000
+ d += 9;
+ break;
+ }
+ d += strings_internal::EncodeUTF8Char(d, rune);
+ break;
+ }
+ default: {
+ if (error) *error = std::string("Unknown escape sequence: \\") + *p;
+ return false;
+ }
+ }
+ p++; // read past letter we escaped
+ }
+ }
+ *dest_len = d - dest;
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// CUnescapeInternal()
+//
+// Same as above but uses a C++ std::string for output. 'source' and 'dest'
+// may be the same.
+// ----------------------------------------------------------------------
+bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
+ std::string* dest, std::string* error) {
+ strings_internal::STLStringResizeUninitialized(dest, source.size());
+
+ ptrdiff_t dest_size;
+ if (!CUnescapeInternal(source,
+ leave_nulls_escaped,
+ const_cast<char*>(dest->data()),
+ &dest_size,
+ error)) {
+ return false;
+ }
+ dest->erase(dest_size);
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// CEscape()
+// CHexEscape()
+// Utf8SafeCEscape()
+// Utf8SafeCHexEscape()
+// Escapes 'src' using C-style escape sequences. This is useful for
+// preparing query flags. The 'Hex' version uses hexadecimal rather than
+// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes.
+//
+// Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint().
+// ----------------------------------------------------------------------
+std::string CEscapeInternal(absl::string_view src, bool use_hex, bool utf8_safe) {
+ std::string dest;
+ bool last_hex_escape = false; // true if last output char was \xNN.
+
+ for (unsigned char c : src) {
+ bool is_hex_escape = false;
+ switch (c) {
+ case '\n': dest.append("\\" "n"); break;
+ case '\r': dest.append("\\" "r"); break;
+ case '\t': dest.append("\\" "t"); break;
+ case '\"': dest.append("\\" "\""); break;
+ case '\'': dest.append("\\" "'"); break;
+ case '\\': dest.append("\\" "\\"); break;
+ default:
+ // Note that if we emit \xNN and the src character after that is a hex
+ // digit then that digit must be escaped too to prevent it being
+ // interpreted as part of the character code by C.
+ if ((!utf8_safe || c < 0x80) &&
+ (!absl::ascii_isprint(c) ||
+ (last_hex_escape && absl::ascii_isxdigit(c)))) {
+ if (use_hex) {
+ dest.append("\\" "x");
+ dest.push_back(kHexChar[c / 16]);
+ dest.push_back(kHexChar[c % 16]);
+ is_hex_escape = true;
+ } else {
+ dest.append("\\");
+ dest.push_back(kHexChar[c / 64]);
+ dest.push_back(kHexChar[(c % 64) / 8]);
+ dest.push_back(kHexChar[c % 8]);
+ }
+ } else {
+ dest.push_back(c);
+ break;
+ }
+ }
+ last_hex_escape = is_hex_escape;
+ }
+
+ return dest;
+}
+
+// Calculates the length of the C-style escaped version of 'src'.
+// Assumes that non-printable characters are escaped using octal sequences, and
+// that UTF-8 bytes are not handled specially.
+inline size_t CEscapedLength(absl::string_view src) {
+ /* clang-format off */
+ constexpr char c_escaped_len[256] = {
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", '
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9'
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ };
+ /* clang-format on */
+
+ size_t escaped_len = 0;
+ for (unsigned char c : src) escaped_len += c_escaped_len[c];
+ return escaped_len;
+}
+
+void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
+ size_t escaped_len = CEscapedLength(src);
+ if (escaped_len == src.size()) {
+ dest->append(src.data(), src.size());
+ return;
+ }
+
+ size_t cur_dest_len = dest->size();
+ strings_internal::STLStringResizeUninitialized(dest,
+ cur_dest_len + escaped_len);
+ char* append_ptr = &(*dest)[cur_dest_len];
+
+ for (unsigned char c : src) {
+ switch (c) {
+ case '\n':
+ *append_ptr++ = '\\';
+ *append_ptr++ = 'n';
+ break;
+ case '\r':
+ *append_ptr++ = '\\';
+ *append_ptr++ = 'r';
+ break;
+ case '\t':
+ *append_ptr++ = '\\';
+ *append_ptr++ = 't';
+ break;
+ case '\"':
+ *append_ptr++ = '\\';
+ *append_ptr++ = '\"';
+ break;
+ case '\'':
+ *append_ptr++ = '\\';
+ *append_ptr++ = '\'';
+ break;
+ case '\\':
+ *append_ptr++ = '\\';
+ *append_ptr++ = '\\';
+ break;
+ default:
+ if (!absl::ascii_isprint(c)) {
+ *append_ptr++ = '\\';
+ *append_ptr++ = '0' + c / 64;
+ *append_ptr++ = '0' + (c % 64) / 8;
+ *append_ptr++ = '0' + c % 8;
+ } else {
+ *append_ptr++ = c;
+ }
+ break;
+ }
+ }
+}
+
+bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
+ size_t szdest, const signed char* unbase64,
+ size_t* len) {
+ static const char kPad64Equals = '=';
+ static const char kPad64Dot = '.';
+
+ size_t destidx = 0;
+ int decode = 0;
+ int state = 0;
+ unsigned int ch = 0;
+ unsigned int temp = 0;
+
+ // If "char" is signed by default, using *src as an array index results in
+ // accessing negative array elements. Treat the input as a pointer to
+ // unsigned char to avoid this.
+ const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
+
+ // The GET_INPUT macro gets the next input character, skipping
+ // over any whitespace, and stopping when we reach the end of the
+ // std::string or when we read any non-data character. The arguments are
+ // an arbitrary identifier (used as a label for goto) and the number
+ // of data bytes that must remain in the input to avoid aborting the
+ // loop.
+#define GET_INPUT(label, remain) \
+ label: \
+ --szsrc; \
+ ch = *src++; \
+ decode = unbase64[ch]; \
+ if (decode < 0) { \
+ if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
+ state = 4 - remain; \
+ break; \
+ }
+
+ // if dest is null, we're just checking to see if it's legal input
+ // rather than producing output. (I suspect this could just be done
+ // with a regexp...). We duplicate the loop so this test can be
+ // outside it instead of in every iteration.
+
+ if (dest) {
+ // This loop consumes 4 input bytes and produces 3 output bytes
+ // per iteration. We can't know at the start that there is enough
+ // data left in the std::string for a full iteration, so the loop may
+ // break out in the middle; if so 'state' will be set to the
+ // number of input bytes read.
+
+ while (szsrc >= 4) {
+ // We'll start by optimistically assuming that the next four
+ // bytes of the std::string (src[0..3]) are four good data bytes
+ // (that is, no nulls, whitespace, padding chars, or illegal
+ // chars). We need to test src[0..2] for nulls individually
+ // before constructing temp to preserve the property that we
+ // never read past a null in the std::string (no matter how long
+ // szsrc claims the std::string is).
+
+ if (!src[0] || !src[1] || !src[2] ||
+ ((temp = ((unsigned(unbase64[src[0]]) << 18) |
+ (unsigned(unbase64[src[1]]) << 12) |
+ (unsigned(unbase64[src[2]]) << 6) |
+ (unsigned(unbase64[src[3]])))) &
+ 0x80000000)) {
+ // Iff any of those four characters was bad (null, illegal,
+ // whitespace, padding), then temp's high bit will be set
+ // (because unbase64[] is -1 for all bad characters).
+ //
+ // We'll back up and resort to the slower decoder, which knows
+ // how to handle those cases.
+
+ GET_INPUT(first, 4);
+ temp = decode;
+ GET_INPUT(second, 3);
+ temp = (temp << 6) | decode;
+ GET_INPUT(third, 2);
+ temp = (temp << 6) | decode;
+ GET_INPUT(fourth, 1);
+ temp = (temp << 6) | decode;
+ } else {
+ // We really did have four good data bytes, so advance four
+ // characters in the std::string.
+
+ szsrc -= 4;
+ src += 4;
+ }
+
+ // temp has 24 bits of input, so write that out as three bytes.
+
+ if (destidx + 3 > szdest) return false;
+ dest[destidx + 2] = temp;
+ temp >>= 8;
+ dest[destidx + 1] = temp;
+ temp >>= 8;
+ dest[destidx] = temp;
+ destidx += 3;
+ }
+ } else {
+ while (szsrc >= 4) {
+ if (!src[0] || !src[1] || !src[2] ||
+ ((temp = ((unsigned(unbase64[src[0]]) << 18) |
+ (unsigned(unbase64[src[1]]) << 12) |
+ (unsigned(unbase64[src[2]]) << 6) |
+ (unsigned(unbase64[src[3]])))) &
+ 0x80000000)) {
+ GET_INPUT(first_no_dest, 4);
+ GET_INPUT(second_no_dest, 3);
+ GET_INPUT(third_no_dest, 2);
+ GET_INPUT(fourth_no_dest, 1);
+ } else {
+ szsrc -= 4;
+ src += 4;
+ }
+ destidx += 3;
+ }
+ }
+
+#undef GET_INPUT
+
+ // if the loop terminated because we read a bad character, return
+ // now.
+ if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
+ !absl::ascii_isspace(ch))
+ return false;
+
+ if (ch == kPad64Equals || ch == kPad64Dot) {
+ // if we stopped by hitting an '=' or '.', un-read that character -- we'll
+ // look at it again when we count to check for the proper number of
+ // equals signs at the end.
+ ++szsrc;
+ --src;
+ } else {
+ // This loop consumes 1 input byte per iteration. It's used to
+ // clean up the 0-3 input bytes remaining when the first, faster
+ // loop finishes. 'temp' contains the data from 'state' input
+ // characters read by the first loop.
+ while (szsrc > 0) {
+ --szsrc;
+ ch = *src++;
+ decode = unbase64[ch];
+ if (decode < 0) {
+ if (absl::ascii_isspace(ch)) {
+ continue;
+ } else if (ch == kPad64Equals || ch == kPad64Dot) {
+ // back up one character; we'll read it again when we check
+ // for the correct number of pad characters at the end.
+ ++szsrc;
+ --src;
+ break;
+ } else {
+ return false;
+ }
+ }
+
+ // Each input character gives us six bits of output.
+ temp = (temp << 6) | decode;
+ ++state;
+ if (state == 4) {
+ // If we've accumulated 24 bits of output, write that out as
+ // three bytes.
+ if (dest) {
+ if (destidx + 3 > szdest) return false;
+ dest[destidx + 2] = temp;
+ temp >>= 8;
+ dest[destidx + 1] = temp;
+ temp >>= 8;
+ dest[destidx] = temp;
+ }
+ destidx += 3;
+ state = 0;
+ temp = 0;
+ }
+ }
+ }
+
+ // Process the leftover data contained in 'temp' at the end of the input.
+ int expected_equals = 0;
+ switch (state) {
+ case 0:
+ // Nothing left over; output is a multiple of 3 bytes.
+ break;
+
+ case 1:
+ // Bad input; we have 6 bits left over.
+ return false;
+
+ case 2:
+ // Produce one more output byte from the 12 input bits we have left.
+ if (dest) {
+ if (destidx + 1 > szdest) return false;
+ temp >>= 4;
+ dest[destidx] = temp;
+ }
+ ++destidx;
+ expected_equals = 2;
+ break;
+
+ case 3:
+ // Produce two more output bytes from the 18 input bits we have left.
+ if (dest) {
+ if (destidx + 2 > szdest) return false;
+ temp >>= 2;
+ dest[destidx + 1] = temp;
+ temp >>= 8;
+ dest[destidx] = temp;
+ }
+ destidx += 2;
+ expected_equals = 1;
+ break;
+
+ default:
+ // state should have no other values at this point.
+ ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
+ state);
+ }
+
+ // The remainder of the std::string should be all whitespace, mixed with
+ // exactly 0 equals signs, or exactly 'expected_equals' equals
+ // signs. (Always accepting 0 equals signs is an Abseil extension
+ // not covered in the RFC, as is accepting dot as the pad character.)
+
+ int equals = 0;
+ while (szsrc > 0) {
+ if (*src == kPad64Equals || *src == kPad64Dot)
+ ++equals;
+ else if (!absl::ascii_isspace(*src))
+ return false;
+ --szsrc;
+ ++src;
+ }
+
+ const bool ok = (equals == 0 || equals == expected_equals);
+ if (ok) *len = destidx;
+ return ok;
+}
+
+// The arrays below were generated by the following code
+// #include <sys/time.h>
+// #include <stdlib.h>
+// #include <std::string.h>
+// main()
+// {
+// static const char Base64[] =
+// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+// char* pos;
+// int idx, i, j;
+// printf(" ");
+// for (i = 0; i < 255; i += 8) {
+// for (j = i; j < i + 8; j++) {
+// pos = strchr(Base64, j);
+// if ((pos == nullptr) || (j == 0))
+// idx = -1;
+// else
+// idx = pos - Base64;
+// if (idx == -1)
+// printf(" %2d, ", idx);
+// else
+// printf(" %2d/*%c*/,", idx, j);
+// }
+// printf("\n ");
+// }
+// }
+//
+// where the value of "Base64[]" was replaced by one of the base-64 conversion
+// tables from the functions below.
+/* clang-format off */
+constexpr signed char kUnBase64[] = {
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */,
+ 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
+ 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
+ -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
+ 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
+ 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
+ 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1,
+ -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
+ 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
+ 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
+ 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1
+};
+
+constexpr signed char kUnWebSafeBase64[] = {
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 62/*-*/, -1, -1,
+ 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
+ 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1,
+ -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/,
+ 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
+ 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
+ 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/,
+ -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
+ 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
+ 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
+ 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1
+};
+/* clang-format on */
+
+size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
+ // Base64 encodes three bytes of input at a time. If the input is not
+ // divisible by three, we pad as appropriate.
+ //
+ // (from http://tools.ietf.org/html/rfc3548)
+ // Special processing is performed if fewer than 24 bits are available
+ // at the end of the data being encoded. A full encoding quantum is
+ // always completed at the end of a quantity. When fewer than 24 input
+ // bits are available in an input group, zero bits are added (on the
+ // right) to form an integral number of 6-bit groups. Padding at the
+ // end of the data is performed using the '=' character. Since all base
+ // 64 input is an integral number of octets, only the following cases
+ // can arise:
+
+ // Base64 encodes each three bytes of input into four bytes of output.
+ size_t len = (input_len / 3) * 4;
+
+ if (input_len % 3 == 0) {
+ // (from http://tools.ietf.org/html/rfc3548)
+ // (1) the final quantum of encoding input is an integral multiple of 24
+ // bits; here, the final unit of encoded output will be an integral
+ // multiple of 4 characters with no "=" padding,
+ } else if (input_len % 3 == 1) {
+ // (from http://tools.ietf.org/html/rfc3548)
+ // (2) the final quantum of encoding input is exactly 8 bits; here, the
+ // final unit of encoded output will be two characters followed by two
+ // "=" padding characters, or
+ len += 2;
+ if (do_padding) {
+ len += 2;
+ }
+ } else { // (input_len % 3 == 2)
+ // (from http://tools.ietf.org/html/rfc3548)
+ // (3) the final quantum of encoding input is exactly 16 bits; here, the
+ // final unit of encoded output will be three characters followed by one
+ // "=" padding character.
+ len += 3;
+ if (do_padding) {
+ len += 1;
+ }
+ }
+
+ assert(len >= input_len); // make sure we didn't overflow
+ return len;
+}
+
+size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
+ size_t szdest, const char* base64,
+ bool do_padding) {
+ static const char kPad64 = '=';
+
+ if (szsrc * 4 > szdest * 3) return 0;
+
+ char* cur_dest = dest;
+ const unsigned char* cur_src = src;
+
+ char* const limit_dest = dest + szdest;
+ const unsigned char* const limit_src = src + szsrc;
+
+ // Three bytes of data encodes to four characters of cyphertext.
+ // So we can pump through three-byte chunks atomically.
+ if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3
+ while (cur_src < limit_src - 3) { // as long as we have >= 32 bits
+ uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
+
+ cur_dest[0] = base64[in >> 18];
+ in &= 0x3FFFF;
+ cur_dest[1] = base64[in >> 12];
+ in &= 0xFFF;
+ cur_dest[2] = base64[in >> 6];
+ in &= 0x3F;
+ cur_dest[3] = base64[in];
+
+ cur_dest += 4;
+ cur_src += 3;
+ }
+ }
+ // To save time, we didn't update szdest or szsrc in the loop. So do it now.
+ szdest = limit_dest - cur_dest;
+ szsrc = limit_src - cur_src;
+
+ /* now deal with the tail (<=3 bytes) */
+ switch (szsrc) {
+ case 0:
+ // Nothing left; nothing more to do.
+ break;
+ case 1: {
+ // One byte left: this encodes to two characters, and (optionally)
+ // two pad characters to round out the four-character cypherblock.
+ if (szdest < 2) return 0;
+ uint32_t in = cur_src[0];
+ cur_dest[0] = base64[in >> 2];
+ in &= 0x3;
+ cur_dest[1] = base64[in << 4];
+ cur_dest += 2;
+ szdest -= 2;
+ if (do_padding) {
+ if (szdest < 2) return 0;
+ cur_dest[0] = kPad64;
+ cur_dest[1] = kPad64;
+ cur_dest += 2;
+ szdest -= 2;
+ }
+ break;
+ }
+ case 2: {
+ // Two bytes left: this encodes to three characters, and (optionally)
+ // one pad character to round out the four-character cypherblock.
+ if (szdest < 3) return 0;
+ uint32_t in = absl::big_endian::Load16(cur_src);
+ cur_dest[0] = base64[in >> 10];
+ in &= 0x3FF;
+ cur_dest[1] = base64[in >> 4];
+ in &= 0x00F;
+ cur_dest[2] = base64[in << 2];
+ cur_dest += 3;
+ szdest -= 3;
+ if (do_padding) {
+ if (szdest < 1) return 0;
+ cur_dest[0] = kPad64;
+ cur_dest += 1;
+ szdest -= 1;
+ }
+ break;
+ }
+ case 3: {
+ // Three bytes left: same as in the big loop above. We can't do this in
+ // the loop because the loop above always reads 4 bytes, and the fourth
+ // byte is past the end of the input.
+ if (szdest < 4) return 0;
+ uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
+ cur_dest[0] = base64[in >> 18];
+ in &= 0x3FFFF;
+ cur_dest[1] = base64[in >> 12];
+ in &= 0xFFF;
+ cur_dest[2] = base64[in >> 6];
+ in &= 0x3F;
+ cur_dest[3] = base64[in];
+ cur_dest += 4;
+ szdest -= 4;
+ break;
+ }
+ default:
+ // Should not be reached: blocks of 4 bytes are handled
+ // in the while loop before this switch statement.
+ ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
+ break;
+ }
+ return (cur_dest - dest);
+}
+
+constexpr char kBase64Chars[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+constexpr char kWebSafeBase64Chars[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+void Base64EscapeInternal(const unsigned char* src, size_t szsrc, std::string* dest,
+ bool do_padding, const char* base64_chars) {
+ const size_t calc_escaped_size =
+ CalculateBase64EscapedLenInternal(szsrc, do_padding);
+ strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size);
+
+ const size_t escaped_len = Base64EscapeInternal(
+ src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
+ assert(calc_escaped_size == escaped_len);
+ dest->erase(escaped_len);
+}
+
+bool Base64UnescapeInternal(const char* src, size_t slen, std::string* dest,
+ const signed char* unbase64) {
+ // Determine the size of the output std::string. Base64 encodes every 3 bytes into
+ // 4 characters. any leftover chars are added directly for good measure.
+ // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
+ const size_t dest_len = 3 * (slen / 4) + (slen % 4);
+
+ strings_internal::STLStringResizeUninitialized(dest, dest_len);
+
+ // We are getting the destination buffer by getting the beginning of the
+ // std::string and converting it into a char *.
+ size_t len;
+ const bool ok =
+ Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
+ if (!ok) {
+ dest->clear();
+ return false;
+ }
+
+ // could be shorter if there was padding
+ assert(len <= dest_len);
+ dest->erase(len);
+
+ return true;
+}
+
+/* clang-format off */
+constexpr char kHexValue[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9'
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f'
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* clang-format on */
+
+// This is a templated function so that T can be either a char*
+// or a std::string. This works because we use the [] operator to access
+// individual characters at a time.
+template <typename T>
+void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) {
+ for (int i = 0; i < num; i++) {
+ to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) +
+ (kHexValue[from[i * 2 + 1] & 0xFF]);
+ }
+}
+
+// This is a templated function so that T can be either a char* or a std::string.
+template <typename T>
+void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
+ auto dest_ptr = &dest[0];
+ for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
+ const char* hex_p = &kHexTable[*src_ptr * 2];
+ std::copy(hex_p, hex_p + 2, dest_ptr);
+ }
+}
+
+} // namespace
+
+// ----------------------------------------------------------------------
+// CUnescape()
+//
+// See CUnescapeInternal() for implementation details.
+// ----------------------------------------------------------------------
+bool CUnescape(absl::string_view source, std::string* dest, std::string* error) {
+ return CUnescapeInternal(source, kUnescapeNulls, dest, error);
+}
+
+std::string CEscape(absl::string_view src) {
+ std::string dest;
+ CEscapeAndAppendInternal(src, &dest);
+ return dest;
+}
+
+std::string CHexEscape(absl::string_view src) {
+ return CEscapeInternal(src, true, false);
+}
+
+std::string Utf8SafeCEscape(absl::string_view src) {
+ return CEscapeInternal(src, false, true);
+}
+
+std::string Utf8SafeCHexEscape(absl::string_view src) {
+ return CEscapeInternal(src, true, true);
+}
+
+// ----------------------------------------------------------------------
+// ptrdiff_t Base64Unescape() - base64 decoder
+// ptrdiff_t Base64Escape() - base64 encoder
+// ptrdiff_t WebSafeBase64Unescape() - Google's variation of base64 decoder
+// ptrdiff_t WebSafeBase64Escape() - Google's variation of base64 encoder
+//
+// Check out
+// http://tools.ietf.org/html/rfc2045 for formal description, but what we
+// care about is that...
+// Take the encoded stuff in groups of 4 characters and turn each
+// character into a code 0 to 63 thus:
+// A-Z map to 0 to 25
+// a-z map to 26 to 51
+// 0-9 map to 52 to 61
+// +(- for WebSafe) maps to 62
+// /(_ for WebSafe) maps to 63
+// There will be four numbers, all less than 64 which can be represented
+// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
+// Arrange the 6 digit binary numbers into three bytes as such:
+// aaaaaabb bbbbcccc ccdddddd
+// Equals signs (one or two) are used at the end of the encoded block to
+// indicate that the text was not an integer multiple of three bytes long.
+// ----------------------------------------------------------------------
+
+bool Base64Unescape(absl::string_view src, std::string* dest) {
+ return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
+}
+
+bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) {
+ return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
+}
+
+void Base64Escape(absl::string_view src, std::string* dest) {
+ Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
+ src.size(), dest, true, kBase64Chars);
+}
+
+void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
+ Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
+ src.size(), dest, false, kWebSafeBase64Chars);
+}
+
+std::string HexStringToBytes(absl::string_view from) {
+ std::string result;
+ const auto num = from.size() / 2;
+ strings_internal::STLStringResizeUninitialized(&result, num);
+ absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
+ return result;
+}
+
+std::string BytesToHexString(absl::string_view from) {
+ std::string result;
+ strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
+ absl::BytesToHexStringInternal<std::string&>(
+ reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
+ return result;
+}
+
+} // namespace absl
diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h
new file mode 100644
index 00000000..05327e7c
--- /dev/null
+++ b/absl/strings/escaping.h
@@ -0,0 +1,158 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: escaping.h
+// -----------------------------------------------------------------------------
+//
+// This header file contains std::string utilities involved in escaping and
+// unescaping strings in various ways.
+//
+
+#ifndef ABSL_STRINGS_ESCAPING_H_
+#define ABSL_STRINGS_ESCAPING_H_
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "absl/base/macros.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+// CUnescape()
+//
+// Unescapes a `source` std::string and copies it into `dest`, rewriting C-style
+// escape sequences (http://en.cppreference.com/w/cpp/language/escape) into
+// their proper code point equivalents, returning `true` if successful.
+//
+// The following unescape sequences can be handled:
+//
+// * ASCII escape sequences ('\n','\r','\\', etc.) to their ASCII equivalents
+// * Octal escape sequences ('\nnn') to byte nnn. The unescaped value must
+// resolve to a single byte or an error will occur. E.g. values greater than
+// 0xff will produce an error.
+// * Hexadecimal escape sequences ('\xnn') to byte nn. While an arbitrary
+// number of following digits are allowed, the unescaped value must resolve
+// to a single byte or an error will occur. E.g. '\x0045' is equivalent to
+// '\x45', but '\x1234' will produce an error.
+// * Unicode escape sequences ('\unnnn' for exactly four hex digits or
+// '\Unnnnnnnn' for exactly eight hex digits, which will be encoded in
+// UTF-8. (E.g., `\u2019` unescapes to the three bytes 0xE2, 0x80, and
+// 0x99).
+//
+//
+// If any errors are encountered, this function returns `false` and stores the
+// first encountered error in `error`. To disable error reporting, set `error`
+// to `nullptr` or use the overload with no error reporting below.
+//
+// Example:
+//
+// std::string s = "foo\\rbar\\nbaz\\t";
+// std::string unescaped_s = absl::CUnescape(s);
+// EXPECT_EQ(unescaped_s, "foo\rbar\nbaz\t");
+bool CUnescape(absl::string_view source, std::string* dest, std::string* error);
+
+// Overload of `CUnescape()` with no error reporting.
+inline bool CUnescape(absl::string_view source, std::string* dest) {
+ return CUnescape(source, dest, nullptr);
+}
+
+// CEscape()
+//
+// Escapes a 'src' std::string using C-style escapes sequences
+// (http://en.cppreference.com/w/cpp/language/escape), escaping other
+// non-printable/non-whitespace bytes as octal sequences (e.g. "\377").
+//
+// Example:
+//
+// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
+// std::string escaped_s = absl::CEscape(s);
+// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\010\\t\\n\\013\\014\\r\\n");
+std::string CEscape(absl::string_view src);
+
+// CHexEscape()
+//
+// Escapes a 'src' std::string using C-style escape sequences, escaping
+// other non-printable/non-whitespace bytes as hexadecimal sequences (e.g.
+// "\xFF").
+//
+// Example:
+//
+// std::string s = "foo\rbar\tbaz\010\011\012\013\014\x0d\n";
+// std::string escaped_s = absl::CHexEscape(s);
+// EXPECT_EQ(escaped_s, "foo\\rbar\\tbaz\\x08\\t\\n\\x0b\\x0c\\r\\n");
+std::string CHexEscape(absl::string_view src);
+
+// Utf8SafeCEscape()
+//
+// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as
+// octal sequences, and passing through UTF-8 characters without conversion.
+// I.e., when encountering any bytes with their high bit set, this function
+// will not escape those values, whether or not they are valid UTF-8.
+std::string Utf8SafeCEscape(absl::string_view src);
+
+// Utf8SafeCHexEscape()
+//
+// Escapes a 'src' std::string using C-style escape sequences, escaping bytes as
+// hexidecimal sequences, and passing through UTF-8 characters without
+// conversion.
+std::string Utf8SafeCHexEscape(absl::string_view src);
+
+// Base64Unescape()
+//
+// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing
+// it to a `dest` buffer, returning `true` on success. If `src` contains invalid
+// characters, `dest` is cleared and returns `false`.
+bool Base64Unescape(absl::string_view src, std::string* dest);
+
+// WebSafeBase64Unescape(absl::string_view, std::string*)
+//
+// Converts a `src` std::string encoded in Base64 to its binary equivalent, writing
+// it to a `dest` buffer, but using '-' instead of '+', and '_' instead of '/'.
+// If `src` contains invalid characters, `dest` is cleared and returns `false`.
+bool WebSafeBase64Unescape(absl::string_view src, std::string* dest);
+
+// Base64Escape()
+//
+// Encodes a `src` std::string into a `dest` buffer using base64 encoding, with
+// padding characters. This function conforms with RFC 4648 section 4 (base64).
+void Base64Escape(absl::string_view src, std::string* dest);
+
+// WebSafeBase64Escape()
+//
+// Encodes a `src` std::string into a `dest` buffer using uses '-' instead of '+' and
+// '_' instead of '/', and without padding. This function conforms with RFC 4648
+// section 5 (base64url).
+void WebSafeBase64Escape(absl::string_view src, std::string* dest);
+
+// HexStringToBytes()
+//
+// Converts an ASCII hex std::string into bytes, returning binary data of length
+// `from.size()/2`.
+std::string HexStringToBytes(absl::string_view from);
+
+// BytesToHexString()
+//
+// Converts binary data into an ASCII text std::string, returing a std::string of size
+// `2*from.size()`.
+std::string BytesToHexString(absl::string_view from);
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_ESCAPING_H_
diff --git a/absl/strings/escaping_test.cc b/absl/strings/escaping_test.cc
new file mode 100644
index 00000000..d464051d
--- /dev/null
+++ b/absl/strings/escaping_test.cc
@@ -0,0 +1,638 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/escaping.h"
+
+#include <array>
+#include <cstdio>
+#include <cstring>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/container/fixed_array.h"
+#include "absl/strings/str_cat.h"
+
+#include "absl/strings/internal/escaping_test_common.inc"
+
+namespace {
+
+struct epair {
+ std::string escaped;
+ std::string unescaped;
+};
+
+TEST(CEscape, EscapeAndUnescape) {
+ const std::string inputs[] = {
+ std::string("foo\nxx\r\b\0023"),
+ std::string(""),
+ std::string("abc"),
+ std::string("\1chad_rules"),
+ std::string("\1arnar_drools"),
+ std::string("xxxx\r\t'\"\\"),
+ std::string("\0xx\0", 4),
+ std::string("\x01\x31"),
+ std::string("abc\xb\x42\141bc"),
+ std::string("123\1\x31\x32\x33"),
+ std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
+ std::string("\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
+ };
+ // Do this twice, once for octal escapes and once for hex escapes.
+ for (int kind = 0; kind < 4; kind++) {
+ for (const std::string& original : inputs) {
+ std::string escaped;
+ switch (kind) {
+ case 0:
+ escaped = absl::CEscape(original);
+ break;
+ case 1:
+ escaped = absl::CHexEscape(original);
+ break;
+ case 2:
+ escaped = absl::Utf8SafeCEscape(original);
+ break;
+ case 3:
+ escaped = absl::Utf8SafeCHexEscape(original);
+ break;
+ }
+ std::string unescaped_str;
+ EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
+ EXPECT_EQ(unescaped_str, original);
+
+ // Check in-place unescaping
+ std::string s = escaped;
+ EXPECT_TRUE(absl::CUnescape(s, &s));
+ ASSERT_EQ(s, original);
+ }
+ }
+ // Check that all possible two character strings can be escaped then
+ // unescaped successfully.
+ for (int char0 = 0; char0 < 256; char0++) {
+ for (int char1 = 0; char1 < 256; char1++) {
+ char chars[2];
+ chars[0] = char0;
+ chars[1] = char1;
+ std::string s(chars, 2);
+ std::string escaped = absl::CHexEscape(s);
+ std::string unescaped;
+ EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
+ EXPECT_EQ(s, unescaped);
+ }
+ }
+}
+
+TEST(CEscape, BasicEscaping) {
+ epair oct_values[] = {
+ {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
+ {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
+ "'full of \"sound\" and \"fury\"'"},
+ {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
+ {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
+ };
+ epair hex_values[] = {
+ {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
+ {"I\\\'ve just seen a \\\"face\\\"",
+ "I've just seen a \"face\""},
+ {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
+ {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
+ };
+ epair utf8_oct_values[] = {
+ {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
+ "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
+ {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
+ "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
+ {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
+ "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
+ {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
+ "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
+ };
+ epair utf8_hex_values[] = {
+ {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
+ "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
+ {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
+ "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
+ {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
+ "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
+ {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
+ "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
+ };
+
+ for (const epair& val : oct_values) {
+ std::string escaped = absl::CEscape(val.unescaped);
+ EXPECT_EQ(escaped, val.escaped);
+ }
+ for (const epair& val : hex_values) {
+ std::string escaped = absl::CHexEscape(val.unescaped);
+ EXPECT_EQ(escaped, val.escaped);
+ }
+ for (const epair& val : utf8_oct_values) {
+ std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
+ EXPECT_EQ(escaped, val.escaped);
+ }
+ for (const epair& val : utf8_hex_values) {
+ std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
+ EXPECT_EQ(escaped, val.escaped);
+ }
+}
+
+TEST(Unescape, BasicFunction) {
+ epair tests[] =
+ {{"\\u0030", "0"},
+ {"\\u00A3", "\xC2\xA3"},
+ {"\\u22FD", "\xE2\x8B\xBD"},
+ {"\\U00010000", "\xF0\x90\x80\x80"},
+ {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
+ for (const epair& val : tests) {
+ std::string out;
+ EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
+ EXPECT_EQ(out, val.unescaped);
+ }
+ std::string bad[] =
+ {"\\u1", // too short
+ "\\U1", // too short
+ "\\Uffffff",
+ "\\777", // exceeds 0xff
+ "\\xABCD"}; // exceeds 0xff
+ for (const std::string& e : bad) {
+ std::string error;
+ std::string out;
+ EXPECT_FALSE(absl::CUnescape(e, &out, &error));
+ EXPECT_FALSE(error.empty());
+ }
+}
+
+class CUnescapeTest : public testing::Test {
+ protected:
+ static const char kStringWithMultipleOctalNulls[];
+ static const char kStringWithMultipleHexNulls[];
+ static const char kStringWithMultipleUnicodeNulls[];
+
+ std::string result_string_;
+};
+
+const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
+ "\\0\\n" // null escape \0 plus newline
+ "0\\n" // just a number 0 (not a null escape) plus newline
+ "\\00\\12" // null escape \00 plus octal newline code
+ "\\000"; // null escape \000
+
+// This has the same ingredients as kStringWithMultipleOctalNulls
+// but with \x hex escapes instead of octal escapes.
+const char CUnescapeTest::kStringWithMultipleHexNulls[] =
+ "\\x0\\n"
+ "0\\n"
+ "\\x00\\xa"
+ "\\x000";
+
+const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
+ "\\u0000\\n" // short-form (4-digit) null escape plus newline
+ "0\\n" // just a number 0 (not a null escape) plus newline
+ "\\U00000000"; // long-form (8-digit) null escape
+
+TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
+ std::string original_string = "\\0";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
+ std::string original_string = "\\00";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
+ std::string original_string = "\\000";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
+ std::string original_string = "\\x0";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
+ std::string original_string = "\\x00";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
+ std::string original_string = "\\x000";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
+ std::string original_string = "\\u0000";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
+ std::string original_string = "\\U00000000";
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0", 1), result_string_);
+}
+
+TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
+ std::string original_string(kStringWithMultipleOctalNulls);
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ // All escapes, including newlines and null escapes, should have been
+ // converted to the equivalent characters.
+ EXPECT_EQ(std::string("\0\n"
+ "0\n"
+ "\0\n"
+ "\0", 7), result_string_);
+}
+
+
+TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
+ std::string original_string(kStringWithMultipleHexNulls);
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0\n"
+ "0\n"
+ "\0\n"
+ "\0", 7), result_string_);
+}
+
+TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
+ std::string original_string(kStringWithMultipleUnicodeNulls);
+ EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
+ EXPECT_EQ(std::string("\0\n"
+ "0\n"
+ "\0", 5), result_string_);
+}
+
+static struct {
+ absl::string_view plaintext;
+ absl::string_view cyphertext;
+} const base64_tests[] = {
+ // Empty std::string.
+ {{"", 0}, {"", 0}},
+ {{nullptr, 0},
+ {"", 0}}, // if length is zero, plaintext ptr must be ignored!
+
+ // Basic bit patterns;
+ // values obtained with "echo -n '...' | uuencode -m test"
+
+ {{"\000", 1}, "AA=="},
+ {{"\001", 1}, "AQ=="},
+ {{"\002", 1}, "Ag=="},
+ {{"\004", 1}, "BA=="},
+ {{"\010", 1}, "CA=="},
+ {{"\020", 1}, "EA=="},
+ {{"\040", 1}, "IA=="},
+ {{"\100", 1}, "QA=="},
+ {{"\200", 1}, "gA=="},
+
+ {{"\377", 1}, "/w=="},
+ {{"\376", 1}, "/g=="},
+ {{"\375", 1}, "/Q=="},
+ {{"\373", 1}, "+w=="},
+ {{"\367", 1}, "9w=="},
+ {{"\357", 1}, "7w=="},
+ {{"\337", 1}, "3w=="},
+ {{"\277", 1}, "vw=="},
+ {{"\177", 1}, "fw=="},
+ {{"\000\000", 2}, "AAA="},
+ {{"\000\001", 2}, "AAE="},
+ {{"\000\002", 2}, "AAI="},
+ {{"\000\004", 2}, "AAQ="},
+ {{"\000\010", 2}, "AAg="},
+ {{"\000\020", 2}, "ABA="},
+ {{"\000\040", 2}, "ACA="},
+ {{"\000\100", 2}, "AEA="},
+ {{"\000\200", 2}, "AIA="},
+ {{"\001\000", 2}, "AQA="},
+ {{"\002\000", 2}, "AgA="},
+ {{"\004\000", 2}, "BAA="},
+ {{"\010\000", 2}, "CAA="},
+ {{"\020\000", 2}, "EAA="},
+ {{"\040\000", 2}, "IAA="},
+ {{"\100\000", 2}, "QAA="},
+ {{"\200\000", 2}, "gAA="},
+
+ {{"\377\377", 2}, "//8="},
+ {{"\377\376", 2}, "//4="},
+ {{"\377\375", 2}, "//0="},
+ {{"\377\373", 2}, "//s="},
+ {{"\377\367", 2}, "//c="},
+ {{"\377\357", 2}, "/+8="},
+ {{"\377\337", 2}, "/98="},
+ {{"\377\277", 2}, "/78="},
+ {{"\377\177", 2}, "/38="},
+ {{"\376\377", 2}, "/v8="},
+ {{"\375\377", 2}, "/f8="},
+ {{"\373\377", 2}, "+/8="},
+ {{"\367\377", 2}, "9/8="},
+ {{"\357\377", 2}, "7/8="},
+ {{"\337\377", 2}, "3/8="},
+ {{"\277\377", 2}, "v/8="},
+ {{"\177\377", 2}, "f/8="},
+
+ {{"\000\000\000", 3}, "AAAA"},
+ {{"\000\000\001", 3}, "AAAB"},
+ {{"\000\000\002", 3}, "AAAC"},
+ {{"\000\000\004", 3}, "AAAE"},
+ {{"\000\000\010", 3}, "AAAI"},
+ {{"\000\000\020", 3}, "AAAQ"},
+ {{"\000\000\040", 3}, "AAAg"},
+ {{"\000\000\100", 3}, "AABA"},
+ {{"\000\000\200", 3}, "AACA"},
+ {{"\000\001\000", 3}, "AAEA"},
+ {{"\000\002\000", 3}, "AAIA"},
+ {{"\000\004\000", 3}, "AAQA"},
+ {{"\000\010\000", 3}, "AAgA"},
+ {{"\000\020\000", 3}, "ABAA"},
+ {{"\000\040\000", 3}, "ACAA"},
+ {{"\000\100\000", 3}, "AEAA"},
+ {{"\000\200\000", 3}, "AIAA"},
+ {{"\001\000\000", 3}, "AQAA"},
+ {{"\002\000\000", 3}, "AgAA"},
+ {{"\004\000\000", 3}, "BAAA"},
+ {{"\010\000\000", 3}, "CAAA"},
+ {{"\020\000\000", 3}, "EAAA"},
+ {{"\040\000\000", 3}, "IAAA"},
+ {{"\100\000\000", 3}, "QAAA"},
+ {{"\200\000\000", 3}, "gAAA"},
+
+ {{"\377\377\377", 3}, "////"},
+ {{"\377\377\376", 3}, "///+"},
+ {{"\377\377\375", 3}, "///9"},
+ {{"\377\377\373", 3}, "///7"},
+ {{"\377\377\367", 3}, "///3"},
+ {{"\377\377\357", 3}, "///v"},
+ {{"\377\377\337", 3}, "///f"},
+ {{"\377\377\277", 3}, "//+/"},
+ {{"\377\377\177", 3}, "//9/"},
+ {{"\377\376\377", 3}, "//7/"},
+ {{"\377\375\377", 3}, "//3/"},
+ {{"\377\373\377", 3}, "//v/"},
+ {{"\377\367\377", 3}, "//f/"},
+ {{"\377\357\377", 3}, "/+//"},
+ {{"\377\337\377", 3}, "/9//"},
+ {{"\377\277\377", 3}, "/7//"},
+ {{"\377\177\377", 3}, "/3//"},
+ {{"\376\377\377", 3}, "/v//"},
+ {{"\375\377\377", 3}, "/f//"},
+ {{"\373\377\377", 3}, "+///"},
+ {{"\367\377\377", 3}, "9///"},
+ {{"\357\377\377", 3}, "7///"},
+ {{"\337\377\377", 3}, "3///"},
+ {{"\277\377\377", 3}, "v///"},
+ {{"\177\377\377", 3}, "f///"},
+
+ // Random numbers: values obtained with
+ //
+ // #! /bin/bash
+ // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
+ // od -N $1 -t o1 /tmp/bar.random
+ // uuencode -m test < /tmp/bar.random
+ //
+ // where $1 is the number of bytes (2, 3)
+
+ {{"\243\361", 2}, "o/E="},
+ {{"\024\167", 2}, "FHc="},
+ {{"\313\252", 2}, "y6o="},
+ {{"\046\041", 2}, "JiE="},
+ {{"\145\236", 2}, "ZZ4="},
+ {{"\254\325", 2}, "rNU="},
+ {{"\061\330", 2}, "Mdg="},
+ {{"\245\032", 2}, "pRo="},
+ {{"\006\000", 2}, "BgA="},
+ {{"\375\131", 2}, "/Vk="},
+ {{"\303\210", 2}, "w4g="},
+ {{"\040\037", 2}, "IB8="},
+ {{"\261\372", 2}, "sfo="},
+ {{"\335\014", 2}, "3Qw="},
+ {{"\233\217", 2}, "m48="},
+ {{"\373\056", 2}, "+y4="},
+ {{"\247\232", 2}, "p5o="},
+ {{"\107\053", 2}, "Rys="},
+ {{"\204\077", 2}, "hD8="},
+ {{"\276\211", 2}, "vok="},
+ {{"\313\110", 2}, "y0g="},
+ {{"\363\376", 2}, "8/4="},
+ {{"\251\234", 2}, "qZw="},
+ {{"\103\262", 2}, "Q7I="},
+ {{"\142\312", 2}, "Yso="},
+ {{"\067\211", 2}, "N4k="},
+ {{"\220\001", 2}, "kAE="},
+ {{"\152\240", 2}, "aqA="},
+ {{"\367\061", 2}, "9zE="},
+ {{"\133\255", 2}, "W60="},
+ {{"\176\035", 2}, "fh0="},
+ {{"\032\231", 2}, "Gpk="},
+
+ {{"\013\007\144", 3}, "Cwdk"},
+ {{"\030\112\106", 3}, "GEpG"},
+ {{"\047\325\046", 3}, "J9Um"},
+ {{"\310\160\022", 3}, "yHAS"},
+ {{"\131\100\237", 3}, "WUCf"},
+ {{"\064\342\134", 3}, "NOJc"},
+ {{"\010\177\004", 3}, "CH8E"},
+ {{"\345\147\205", 3}, "5WeF"},
+ {{"\300\343\360", 3}, "wOPw"},
+ {{"\061\240\201", 3}, "MaCB"},
+ {{"\225\333\044", 3}, "ldsk"},
+ {{"\215\137\352", 3}, "jV/q"},
+ {{"\371\147\160", 3}, "+Wdw"},
+ {{"\030\320\051", 3}, "GNAp"},
+ {{"\044\174\241", 3}, "JHyh"},
+ {{"\260\127\037", 3}, "sFcf"},
+ {{"\111\045\033", 3}, "SSUb"},
+ {{"\202\114\107", 3}, "gkxH"},
+ {{"\057\371\042", 3}, "L/ki"},
+ {{"\223\247\244", 3}, "k6ek"},
+ {{"\047\216\144", 3}, "J45k"},
+ {{"\203\070\327", 3}, "gzjX"},
+ {{"\247\140\072", 3}, "p2A6"},
+ {{"\124\115\116", 3}, "VE1O"},
+ {{"\157\162\050", 3}, "b3Io"},
+ {{"\357\223\004", 3}, "75ME"},
+ {{"\052\117\156", 3}, "Kk9u"},
+ {{"\347\154\000", 3}, "52wA"},
+ {{"\303\012\142", 3}, "wwpi"},
+ {{"\060\035\362", 3}, "MB3y"},
+ {{"\130\226\361", 3}, "WJbx"},
+ {{"\173\013\071", 3}, "ews5"},
+ {{"\336\004\027", 3}, "3gQX"},
+ {{"\357\366\234", 3}, "7/ac"},
+ {{"\353\304\111", 3}, "68RJ"},
+ {{"\024\264\131", 3}, "FLRZ"},
+ {{"\075\114\251", 3}, "PUyp"},
+ {{"\315\031\225", 3}, "zRmV"},
+ {{"\154\201\276", 3}, "bIG+"},
+ {{"\200\066\072", 3}, "gDY6"},
+ {{"\142\350\267", 3}, "Yui3"},
+ {{"\033\000\166", 3}, "GwB2"},
+ {{"\210\055\077", 3}, "iC0/"},
+ {{"\341\037\124", 3}, "4R9U"},
+ {{"\161\103\152", 3}, "cUNq"},
+ {{"\270\142\131", 3}, "uGJZ"},
+ {{"\337\076\074", 3}, "3z48"},
+ {{"\375\106\362", 3}, "/Uby"},
+ {{"\227\301\127", 3}, "l8FX"},
+ {{"\340\002\234", 3}, "4AKc"},
+ {{"\121\064\033", 3}, "UTQb"},
+ {{"\157\134\143", 3}, "b1xj"},
+ {{"\247\055\327", 3}, "py3X"},
+ {{"\340\142\005", 3}, "4GIF"},
+ {{"\060\260\143", 3}, "MLBj"},
+ {{"\075\203\170", 3}, "PYN4"},
+ {{"\143\160\016", 3}, "Y3AO"},
+ {{"\313\013\063", 3}, "ywsz"},
+ {{"\174\236\135", 3}, "fJ5d"},
+ {{"\103\047\026", 3}, "QycW"},
+ {{"\365\005\343", 3}, "9QXj"},
+ {{"\271\160\223", 3}, "uXCT"},
+ {{"\362\255\172", 3}, "8q16"},
+ {{"\113\012\015", 3}, "SwoN"},
+
+ // various lengths, generated by this python script:
+ //
+ // from std::string import lowercase as lc
+ // for i in range(27):
+ // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
+ // lc[:i].encode('base64').strip())
+
+ {{"", 0}, {"", 0}},
+ {"a", "YQ=="},
+ {"ab", "YWI="},
+ {"abc", "YWJj"},
+ {"abcd", "YWJjZA=="},
+ {"abcde", "YWJjZGU="},
+ {"abcdef", "YWJjZGVm"},
+ {"abcdefg", "YWJjZGVmZw=="},
+ {"abcdefgh", "YWJjZGVmZ2g="},
+ {"abcdefghi", "YWJjZGVmZ2hp"},
+ {"abcdefghij", "YWJjZGVmZ2hpag=="},
+ {"abcdefghijk", "YWJjZGVmZ2hpams="},
+ {"abcdefghijkl", "YWJjZGVmZ2hpamts"},
+ {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
+ {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
+ {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
+ {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
+ {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
+ {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
+ {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
+ {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
+ {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
+ {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
+ {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
+ {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
+ {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
+ {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
+};
+
+TEST(Base64, EscapeAndUnescape) {
+ // Check the short strings; this tests the math (and boundaries)
+ for (const auto& tc : base64_tests) {
+ std::string encoded("this junk should be ignored");
+ absl::Base64Escape(tc.plaintext, &encoded);
+ EXPECT_EQ(encoded, tc.cyphertext);
+
+ std::string decoded("this junk should be ignored");
+ EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
+ EXPECT_EQ(decoded, tc.plaintext);
+
+ std::string websafe(tc.cyphertext);
+ for (int c = 0; c < websafe.size(); ++c) {
+ if ('+' == websafe[c]) websafe[c] = '-';
+ if ('/' == websafe[c]) websafe[c] = '_';
+ if ('=' == websafe[c]) {
+ websafe.resize(c);
+ break;
+ }
+ }
+
+ encoded = "this junk should be ignored";
+ absl::WebSafeBase64Escape(tc.plaintext, &encoded);
+ EXPECT_EQ(encoded, websafe);
+
+ // Let's try the std::string version of the decoder
+ decoded = "this junk should be ignored";
+ EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
+ EXPECT_EQ(decoded, tc.plaintext);
+ }
+
+ // Now try the long strings, this tests the streaming
+ for (const auto& tc : base64_strings) {
+ std::string buffer;
+ absl::WebSafeBase64Escape(tc.plaintext, &buffer);
+ EXPECT_EQ(tc.cyphertext, buffer);
+ }
+
+ // Verify the behavior when decoding bad data
+ {
+ absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
+ absl::string_view("abc.\0", 5)};
+ for (absl::string_view bad_data : data_set) {
+ std::string buf;
+ EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
+ EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
+ EXPECT_TRUE(buf.empty());
+ }
+ }
+}
+
+TEST(Base64, DISABLED_HugeData) {
+ const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
+ static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
+ const std::string huge(kSize, 'x');
+
+ std::string escaped;
+ absl::Base64Escape(huge, &escaped);
+
+ // Generates the std::string that should match a base64 encoded "xxx..." std::string.
+ // "xxx" in base64 is "eHh4".
+ std::string expected_encoding;
+ expected_encoding.reserve(kSize / 3 * 4);
+ for (size_t i = 0; i < kSize / 3; ++i) {
+ expected_encoding.append("eHh4");
+ }
+ EXPECT_EQ(expected_encoding, escaped);
+
+ std::string unescaped;
+ EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
+ EXPECT_EQ(huge, unescaped);
+}
+
+TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
+ std::string hex_mixed = "0123456789abcdefABCDEF";
+ std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
+ std::string hex_only_lower = "0123456789abcdefabcdef";
+
+ std::string bytes_result = absl::HexStringToBytes(hex_mixed);
+ EXPECT_EQ(bytes_expected, bytes_result);
+
+ std::string prefix_valid = hex_mixed + "?";
+ std::string prefix_valid_result = absl::HexStringToBytes(
+ absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
+ EXPECT_EQ(bytes_expected, prefix_valid_result);
+
+ std::string infix_valid = "?" + hex_mixed + "???";
+ std::string infix_valid_result = absl::HexStringToBytes(
+ absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
+ EXPECT_EQ(bytes_expected, infix_valid_result);
+
+ std::string hex_result = absl::BytesToHexString(bytes_expected);
+ EXPECT_EQ(hex_only_lower, hex_result);
+}
+
+} // namespace
diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h
new file mode 100644
index 00000000..8d92963a
--- /dev/null
+++ b/absl/strings/internal/char_map.h
@@ -0,0 +1,154 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Character Map Class
+//
+// A fast, bit-vector map for 8-bit unsigned characters.
+// This class is useful for non-character purposes as well.
+
+#ifndef ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
+#define ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+
+namespace absl {
+namespace strings_internal {
+
+class Charmap {
+ public:
+ constexpr Charmap() : m_() {}
+
+ // Initializes with a given char*. Note that NUL is not treated as
+ // a terminator, but rather a char to be flicked.
+ Charmap(const char* str, int len) : m_() {
+ while (len--) SetChar(*str++);
+ }
+
+ // Initializes with a given char*. NUL is treated as a terminator
+ // and will not be in the charmap.
+ explicit Charmap(const char* str) : m_() {
+ while (*str) SetChar(*str++);
+ }
+
+ constexpr bool contains(unsigned char c) const {
+ return (m_[c / 64] >> (c % 64)) & 0x1;
+ }
+
+ // Returns true if and only if a character exists in both maps.
+ bool IntersectsWith(const Charmap& c) const {
+ for (size_t i = 0; i < ABSL_ARRAYSIZE(m_); ++i) {
+ if ((m_[i] & c.m_[i]) != 0) return true;
+ }
+ return false;
+ }
+
+ bool IsZero() const {
+ for (uint64_t c : m_) {
+ if (c != 0) return false;
+ }
+ return true;
+ }
+
+ // Containing only a single specified char.
+ static constexpr Charmap Char(char x) {
+ return Charmap(CharMaskForWord(x, 0), CharMaskForWord(x, 1),
+ CharMaskForWord(x, 2), CharMaskForWord(x, 3));
+ }
+
+ // Containing all the chars in the C-std::string 's'.
+ // Note that this is expensively recursive because of the C++11 constexpr
+ // formulation. Use only in constexpr initializers.
+ static constexpr Charmap FromString(const char* s) {
+ return *s == 0 ? Charmap() : (Char(*s) | FromString(s + 1));
+ }
+
+ // Containing all the chars in the closed interval [lo,hi].
+ static constexpr Charmap Range(char lo, char hi) {
+ return Charmap(RangeForWord(lo, hi, 0), RangeForWord(lo, hi, 1),
+ RangeForWord(lo, hi, 2), RangeForWord(lo, hi, 3));
+ }
+
+ friend constexpr Charmap operator&(const Charmap& a, const Charmap& b) {
+ return Charmap(a.m_[0] & b.m_[0], a.m_[1] & b.m_[1], a.m_[2] & b.m_[2],
+ a.m_[3] & b.m_[3]);
+ }
+
+ friend constexpr Charmap operator|(const Charmap& a, const Charmap& b) {
+ return Charmap(a.m_[0] | b.m_[0], a.m_[1] | b.m_[1], a.m_[2] | b.m_[2],
+ a.m_[3] | b.m_[3]);
+ }
+
+ friend constexpr Charmap operator~(const Charmap& a) {
+ return Charmap(~a.m_[0], ~a.m_[1], ~a.m_[2], ~a.m_[3]);
+ }
+
+ private:
+ constexpr Charmap(uint64_t b0, uint64_t b1, uint64_t b2, uint64_t b3)
+ : m_{b0, b1, b2, b3} {}
+
+ static constexpr uint64_t RangeForWord(unsigned char lo, unsigned char hi,
+ uint64_t word) {
+ return OpenRangeFromZeroForWord(hi + 1, word) &
+ ~OpenRangeFromZeroForWord(lo, word);
+ }
+
+ // All the chars in the specified word of the range [0, upper).
+ static constexpr uint64_t OpenRangeFromZeroForWord(uint64_t upper,
+ uint64_t word) {
+ return (upper <= 64 * word)
+ ? 0
+ : (upper >= 64 * (word + 1))
+ ? ~static_cast<uint64_t>(0)
+ : (~static_cast<uint64_t>(0) >> (64 - upper % 64));
+ }
+
+ static constexpr uint64_t CharMaskForWord(unsigned char x, uint64_t word) {
+ return (x / 64 == word) ? (static_cast<uint64_t>(1) << (x % 64)) : 0;
+ }
+
+ private:
+ void SetChar(unsigned char c) {
+ m_[c / 64] |= static_cast<uint64_t>(1) << (c % 64);
+ }
+
+ uint64_t m_[4];
+};
+
+// Mirror the char-classifying predicates in <cctype>
+constexpr Charmap UpperCharmap() { return Charmap::Range('A', 'Z'); }
+constexpr Charmap LowerCharmap() { return Charmap::Range('a', 'z'); }
+constexpr Charmap DigitCharmap() { return Charmap::Range('0', '9'); }
+constexpr Charmap AlphaCharmap() { return LowerCharmap() | UpperCharmap(); }
+constexpr Charmap AlnumCharmap() { return DigitCharmap() | AlphaCharmap(); }
+constexpr Charmap XDigitCharmap() {
+ return DigitCharmap() | Charmap::Range('A', 'F') | Charmap::Range('a', 'f');
+}
+constexpr Charmap PrintCharmap() { return Charmap::Range(0x20, 0x7e); }
+constexpr Charmap SpaceCharmap() { return Charmap::FromString("\t\n\v\f\r "); }
+constexpr Charmap CntrlCharmap() {
+ return Charmap::Range(0, 0x7f) & ~PrintCharmap();
+}
+constexpr Charmap BlankCharmap() { return Charmap::FromString("\t "); }
+constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); }
+constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); }
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_
diff --git a/absl/strings/internal/char_map_test.cc b/absl/strings/internal/char_map_test.cc
new file mode 100644
index 00000000..2167be97
--- /dev/null
+++ b/absl/strings/internal/char_map_test.cc
@@ -0,0 +1,172 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/char_map.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cctype>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+constexpr absl::strings_internal::Charmap everything_map =
+ ~absl::strings_internal::Charmap();
+constexpr absl::strings_internal::Charmap nothing_map{};
+
+TEST(Charmap, AllTests) {
+ const absl::strings_internal::Charmap also_nothing_map("", 0);
+ ASSERT_TRUE(everything_map.contains('\0'));
+ ASSERT_TRUE(!nothing_map.contains('\0'));
+ ASSERT_TRUE(!also_nothing_map.contains('\0'));
+ for (unsigned char ch = 1; ch != 0; ++ch) {
+ ASSERT_TRUE(everything_map.contains(ch));
+ ASSERT_TRUE(!nothing_map.contains(ch));
+ ASSERT_TRUE(!also_nothing_map.contains(ch));
+ }
+
+ const absl::strings_internal::Charmap symbols("&@#@^!@?", 5);
+ ASSERT_TRUE(symbols.contains('&'));
+ ASSERT_TRUE(symbols.contains('@'));
+ ASSERT_TRUE(symbols.contains('#'));
+ ASSERT_TRUE(symbols.contains('^'));
+ ASSERT_TRUE(!symbols.contains('!'));
+ ASSERT_TRUE(!symbols.contains('?'));
+ int cnt = 0;
+ for (unsigned char ch = 1; ch != 0; ++ch)
+ cnt += symbols.contains(ch);
+ ASSERT_EQ(cnt, 4);
+
+ const absl::strings_internal::Charmap lets("^abcde", 3);
+ const absl::strings_internal::Charmap lets2("fghij\0klmnop", 10);
+ const absl::strings_internal::Charmap lets3("fghij\0klmnop");
+ ASSERT_TRUE(lets2.contains('k'));
+ ASSERT_TRUE(!lets3.contains('k'));
+
+ ASSERT_TRUE(symbols.IntersectsWith(lets));
+ ASSERT_TRUE(!lets2.IntersectsWith(lets));
+ ASSERT_TRUE(lets.IntersectsWith(symbols));
+ ASSERT_TRUE(!lets.IntersectsWith(lets2));
+
+ ASSERT_TRUE(nothing_map.IsZero());
+ ASSERT_TRUE(!lets.IsZero());
+}
+
+namespace {
+std::string Members(const absl::strings_internal::Charmap& m) {
+ std::string r;
+ for (size_t i = 0; i < 256; ++i)
+ if (m.contains(i)) r.push_back(i);
+ return r;
+}
+
+std::string ClosedRangeString(unsigned char lo, unsigned char hi) {
+ // Don't depend on lo<hi. Just increment until lo==hi.
+ std::string s;
+ while (true) {
+ s.push_back(lo);
+ if (lo == hi) break;
+ ++lo;
+ }
+ return s;
+}
+
+} // namespace
+
+TEST(Charmap, Constexpr) {
+ constexpr absl::strings_internal::Charmap kEmpty = nothing_map;
+ EXPECT_THAT(Members(kEmpty), "");
+ constexpr absl::strings_internal::Charmap kA =
+ absl::strings_internal::Charmap::Char('A');
+ EXPECT_THAT(Members(kA), "A");
+ constexpr absl::strings_internal::Charmap kAZ =
+ absl::strings_internal::Charmap::Range('A', 'Z');
+ EXPECT_THAT(Members(kAZ), "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+ constexpr absl::strings_internal::Charmap kIdentifier =
+ absl::strings_internal::Charmap::Range('0', '9') |
+ absl::strings_internal::Charmap::Range('A', 'Z') |
+ absl::strings_internal::Charmap::Range('a', 'z') |
+ absl::strings_internal::Charmap::Char('_');
+ EXPECT_THAT(Members(kIdentifier),
+ "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "_"
+ "abcdefghijklmnopqrstuvwxyz");
+ constexpr absl::strings_internal::Charmap kAll = everything_map;
+ for (size_t i = 0; i < 256; ++i) {
+ EXPECT_TRUE(kAll.contains(i)) << i;
+ }
+ constexpr absl::strings_internal::Charmap kHello =
+ absl::strings_internal::Charmap::FromString("Hello, world!");
+ EXPECT_THAT(Members(kHello), " !,Hdelorw");
+
+ // test negation and intersection
+ constexpr absl::strings_internal::Charmap kABC =
+ absl::strings_internal::Charmap::Range('A', 'Z') &
+ ~absl::strings_internal::Charmap::Range('D', 'Z');
+ EXPECT_THAT(Members(kABC), "ABC");
+}
+
+TEST(Charmap, Range) {
+ // Exhaustive testing takes too long, so test some of the boundaries that
+ // are perhaps going to cause trouble.
+ std::vector<size_t> poi = {0, 1, 2, 3, 4, 7, 8, 9, 15,
+ 16, 17, 30, 31, 32, 33, 63, 64, 65,
+ 127, 128, 129, 223, 224, 225, 254, 255};
+ for (auto lo = poi.begin(); lo != poi.end(); ++lo) {
+ SCOPED_TRACE(*lo);
+ for (auto hi = lo; hi != poi.end(); ++hi) {
+ SCOPED_TRACE(*hi);
+ EXPECT_THAT(Members(absl::strings_internal::Charmap::Range(*lo, *hi)),
+ ClosedRangeString(*lo, *hi));
+ }
+ }
+}
+
+bool AsBool(int x) { return static_cast<bool>(x); }
+
+TEST(CharmapCtype, Match) {
+ for (int c = 0; c < 256; ++c) {
+ SCOPED_TRACE(c);
+ SCOPED_TRACE(static_cast<char>(c));
+ EXPECT_EQ(AsBool(std::isupper(c)),
+ absl::strings_internal::UpperCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::islower(c)),
+ absl::strings_internal::LowerCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isdigit(c)),
+ absl::strings_internal::DigitCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isalpha(c)),
+ absl::strings_internal::AlphaCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isalnum(c)),
+ absl::strings_internal::AlnumCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isxdigit(c)),
+ absl::strings_internal::XDigitCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isprint(c)),
+ absl::strings_internal::PrintCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isspace(c)),
+ absl::strings_internal::SpaceCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::iscntrl(c)),
+ absl::strings_internal::CntrlCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isblank(c)),
+ absl::strings_internal::BlankCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::isgraph(c)),
+ absl::strings_internal::GraphCharmap().contains(c));
+ EXPECT_EQ(AsBool(std::ispunct(c)),
+ absl::strings_internal::PunctCharmap().contains(c));
+ }
+}
+
+} // namespace
diff --git a/absl/strings/internal/escaping_test_common.inc b/absl/strings/internal/escaping_test_common.inc
new file mode 100644
index 00000000..6f29140e
--- /dev/null
+++ b/absl/strings/internal/escaping_test_common.inc
@@ -0,0 +1,113 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This test contains common things needed by both escaping_test.cc and
+// escaping_benchmark.cc.
+
+namespace {
+
+struct {
+ absl::string_view plaintext;
+ absl::string_view cyphertext;
+} const base64_strings[] = {
+ // Some google quotes
+ // Cyphertext created with "uuencode (GNU sharutils) 4.6.3"
+ // (Note that we're testing the websafe encoding, though, so if
+ // you add messages, be sure to run "tr -- '+/' '-_'" on the output)
+ { "I was always good at math and science, and I never realized "
+ "that was unusual or somehow undesirable. So one of the things "
+ "I care a lot about is helping to remove that stigma, "
+ "to show girls that you can be feminine, you can like the things "
+ "that girls like, but you can also be really good at technology. "
+ "You can be really good at building things."
+ " - Marissa Meyer, Newsweek, 2010-12-22" "\n",
+
+ "SSB3YXMgYWx3YXlzIGdvb2QgYXQgbWF0aCBhbmQgc2NpZW5jZSwgYW5kIEkg"
+ "bmV2ZXIgcmVhbGl6ZWQgdGhhdCB3YXMgdW51c3VhbCBvciBzb21laG93IHVu"
+ "ZGVzaXJhYmxlLiBTbyBvbmUgb2YgdGhlIHRoaW5ncyBJIGNhcmUgYSBsb3Qg"
+ "YWJvdXQgaXMgaGVscGluZyB0byByZW1vdmUgdGhhdCBzdGlnbWEsIHRvIHNo"
+ "b3cgZ2lybHMgdGhhdCB5b3UgY2FuIGJlIGZlbWluaW5lLCB5b3UgY2FuIGxp"
+ "a2UgdGhlIHRoaW5ncyB0aGF0IGdpcmxzIGxpa2UsIGJ1dCB5b3UgY2FuIGFs"
+ "c28gYmUgcmVhbGx5IGdvb2QgYXQgdGVjaG5vbG9neS4gWW91IGNhbiBiZSBy"
+ "ZWFsbHkgZ29vZCBhdCBidWlsZGluZyB0aGluZ3MuIC0gTWFyaXNzYSBNZXll"
+ "ciwgTmV3c3dlZWssIDIwMTAtMTItMjIK" },
+
+ { "Typical first year for a new cluster: "
+ "~0.5 overheating "
+ "~1 PDU failure "
+ "~1 rack-move "
+ "~1 network rewiring "
+ "~20 rack failures "
+ "~5 racks go wonky "
+ "~8 network maintenances "
+ "~12 router reloads "
+ "~3 router failures "
+ "~dozens of minor 30-second blips for dns "
+ "~1000 individual machine failures "
+ "~thousands of hard drive failures "
+ "slow disks, bad memory, misconfigured machines, flaky machines, etc."
+ " - Jeff Dean, The Joys of Real Hardware" "\n",
+
+ "VHlwaWNhbCBmaXJzdCB5ZWFyIGZvciBhIG5ldyBjbHVzdGVyOiB-MC41IG92"
+ "ZXJoZWF0aW5nIH4xIFBEVSBmYWlsdXJlIH4xIHJhY2stbW92ZSB-MSBuZXR3"
+ "b3JrIHJld2lyaW5nIH4yMCByYWNrIGZhaWx1cmVzIH41IHJhY2tzIGdvIHdv"
+ "bmt5IH44IG5ldHdvcmsgbWFpbnRlbmFuY2VzIH4xMiByb3V0ZXIgcmVsb2Fk"
+ "cyB-MyByb3V0ZXIgZmFpbHVyZXMgfmRvemVucyBvZiBtaW5vciAzMC1zZWNv"
+ "bmQgYmxpcHMgZm9yIGRucyB-MTAwMCBpbmRpdmlkdWFsIG1hY2hpbmUgZmFp"
+ "bHVyZXMgfnRob3VzYW5kcyBvZiBoYXJkIGRyaXZlIGZhaWx1cmVzIHNsb3cg"
+ "ZGlza3MsIGJhZCBtZW1vcnksIG1pc2NvbmZpZ3VyZWQgbWFjaGluZXMsIGZs"
+ "YWt5IG1hY2hpbmVzLCBldGMuIC0gSmVmZiBEZWFuLCBUaGUgSm95cyBvZiBS"
+ "ZWFsIEhhcmR3YXJlCg" },
+
+ { "I'm the head of the webspam team at Google. "
+ "That means that if you type your name into Google and get porn back, "
+ "it's my fault. Unless you're a porn star, in which case porn is a "
+ "completely reasonable response."
+ " - Matt Cutts, Google Plus" "\n",
+
+ "SSdtIHRoZSBoZWFkIG9mIHRoZSB3ZWJzcGFtIHRlYW0gYXQgR29vZ2xlLiAg"
+ "VGhhdCBtZWFucyB0aGF0IGlmIHlvdSB0eXBlIHlvdXIgbmFtZSBpbnRvIEdv"
+ "b2dsZSBhbmQgZ2V0IHBvcm4gYmFjaywgaXQncyBteSBmYXVsdC4gVW5sZXNz"
+ "IHlvdSdyZSBhIHBvcm4gc3RhciwgaW4gd2hpY2ggY2FzZSBwb3JuIGlzIGEg"
+ "Y29tcGxldGVseSByZWFzb25hYmxlIHJlc3BvbnNlLiAtIE1hdHQgQ3V0dHMs"
+ "IEdvb2dsZSBQbHVzCg" },
+
+ { "It will still be a long time before machines approach human intelligence. "
+ "But luckily, machines don't actually have to be intelligent; "
+ "they just have to fake it. Access to a wealth of information, "
+ "combined with a rudimentary decision-making capacity, "
+ "can often be almost as useful. Of course, the results are better yet "
+ "when coupled with intelligence. A reference librarian with access to "
+ "a good search engine is a formidable tool."
+ " - Craig Silverstein, Siemens Pictures of the Future, Spring 2004" "\n",
+
+ "SXQgd2lsbCBzdGlsbCBiZSBhIGxvbmcgdGltZSBiZWZvcmUgbWFjaGluZXMg"
+ "YXBwcm9hY2ggaHVtYW4gaW50ZWxsaWdlbmNlLiBCdXQgbHVja2lseSwgbWFj"
+ "aGluZXMgZG9uJ3QgYWN0dWFsbHkgaGF2ZSB0byBiZSBpbnRlbGxpZ2VudDsg"
+ "dGhleSBqdXN0IGhhdmUgdG8gZmFrZSBpdC4gQWNjZXNzIHRvIGEgd2VhbHRo"
+ "IG9mIGluZm9ybWF0aW9uLCBjb21iaW5lZCB3aXRoIGEgcnVkaW1lbnRhcnkg"
+ "ZGVjaXNpb24tbWFraW5nIGNhcGFjaXR5LCBjYW4gb2Z0ZW4gYmUgYWxtb3N0"
+ "IGFzIHVzZWZ1bC4gT2YgY291cnNlLCB0aGUgcmVzdWx0cyBhcmUgYmV0dGVy"
+ "IHlldCB3aGVuIGNvdXBsZWQgd2l0aCBpbnRlbGxpZ2VuY2UuIEEgcmVmZXJl"
+ "bmNlIGxpYnJhcmlhbiB3aXRoIGFjY2VzcyB0byBhIGdvb2Qgc2VhcmNoIGVu"
+ "Z2luZSBpcyBhIGZvcm1pZGFibGUgdG9vbC4gLSBDcmFpZyBTaWx2ZXJzdGVp"
+ "biwgU2llbWVucyBQaWN0dXJlcyBvZiB0aGUgRnV0dXJlLCBTcHJpbmcgMjAw"
+ "NAo" },
+
+ // Degenerate edge case
+ { "",
+ "" },
+};
+
+} // namespace
diff --git a/absl/strings/internal/fastmem.h b/absl/strings/internal/fastmem.h
new file mode 100644
index 00000000..9989b12e
--- /dev/null
+++ b/absl/strings/internal/fastmem.h
@@ -0,0 +1,215 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Fast memory copying and comparison routines.
+// strings::fastmemcmp_inlined() replaces memcmp()
+// strings::memcpy_inlined() replaces memcpy()
+// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0
+//
+// strings::*_inlined() routines are inline versions of the
+// routines exported by this module. Sometimes using the inlined
+// versions is faster. Measure before using the inlined versions.
+//
+
+#ifndef ABSL_STRINGS_INTERNAL_FASTMEM_H_
+#define ABSL_STRINGS_INTERNAL_FASTMEM_H_
+
+#ifdef __SSE4_1__
+#include <immintrin.h>
+#endif
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+#include "absl/base/internal/unaligned_access.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+
+namespace absl {
+namespace strings_internal {
+
+// Return true if the n bytes at a equal the n bytes at b.
+// The regions are allowed to overlap.
+//
+// The performance is similar to the performance of memcmp(), but faster for
+// moderately-sized inputs, or inputs that share a common prefix and differ
+// somewhere in their last 8 bytes. Further optimizations can be added later
+// if it makes sense to do so. Alternatively, if the compiler & runtime improve
+// to eliminate the need for this, we can remove it.
+inline bool memeq(const char* a, const char* b, size_t n) {
+ size_t n_rounded_down = n & ~static_cast<size_t>(7);
+ if (ABSL_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7
+ return memcmp(a, b, n) == 0;
+ }
+ // n >= 8
+ {
+ uint64_t u =
+ ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
+ uint64_t v = ABSL_INTERNAL_UNALIGNED_LOAD64(a + n - 8) ^
+ ABSL_INTERNAL_UNALIGNED_LOAD64(b + n - 8);
+ if ((u | v) != 0) { // The first or last 8 bytes differ.
+ return false;
+ }
+ }
+ // The next line forces n to be a multiple of 8.
+ n = n_rounded_down;
+ if (n >= 80) {
+ // In 2013 or later, this should be fast on long strings.
+ return memcmp(a, b, n) == 0;
+ }
+ // Now force n to be a multiple of 16. Arguably, a "switch" would be smart
+ // here, but there's a difficult-to-evaluate code size vs. speed issue. The
+ // current approach often re-compares some bytes (worst case is if n initially
+ // was 16, 32, 48, or 64), but is fairly short.
+ size_t e = n & 8;
+ a += e;
+ b += e;
+ n -= e;
+ // n is now in {0, 16, 32, ...}. Process 0 or more 16-byte chunks.
+ while (n > 0) {
+#ifdef __SSE4_1__
+ __m128i u =
+ _mm_xor_si128(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
+ _mm_loadu_si128(reinterpret_cast<const __m128i*>(b)));
+ if (!_mm_test_all_zeros(u, u)) {
+ return false;
+ }
+#else
+ uint64_t x =
+ ABSL_INTERNAL_UNALIGNED_LOAD64(a) ^ ABSL_INTERNAL_UNALIGNED_LOAD64(b);
+ uint64_t y = ABSL_INTERNAL_UNALIGNED_LOAD64(a + 8) ^
+ ABSL_INTERNAL_UNALIGNED_LOAD64(b + 8);
+ if ((x | y) != 0) {
+ return false;
+ }
+#endif
+ a += 16;
+ b += 16;
+ n -= 16;
+ }
+ return true;
+}
+
+inline int fastmemcmp_inlined(const void* va, const void* vb, size_t n) {
+ const unsigned char* pa = static_cast<const unsigned char*>(va);
+ const unsigned char* pb = static_cast<const unsigned char*>(vb);
+ switch (n) {
+ default:
+ return memcmp(va, vb, n);
+ case 7:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 6:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 5:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 4:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 3:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 2:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ++pa;
+ ++pb;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 1:
+ if (*pa != *pb) return *pa < *pb ? -1 : +1;
+ ABSL_FALLTHROUGH_INTENDED;
+ case 0:
+ break;
+ }
+ return 0;
+}
+
+// The standard memcpy operation is slow for variable small sizes.
+// This implementation inlines the optimal realization for sizes 1 to 16.
+// To avoid code bloat don't use it in case of not performance-critical spots,
+// nor when you don't expect very frequent values of size <= 16.
+inline void memcpy_inlined(char* dst, const char* src, size_t size) {
+ // Compiler inlines code with minimal amount of data movement when third
+ // parameter of memcpy is a constant.
+ switch (size) {
+ case 1:
+ memcpy(dst, src, 1);
+ break;
+ case 2:
+ memcpy(dst, src, 2);
+ break;
+ case 3:
+ memcpy(dst, src, 3);
+ break;
+ case 4:
+ memcpy(dst, src, 4);
+ break;
+ case 5:
+ memcpy(dst, src, 5);
+ break;
+ case 6:
+ memcpy(dst, src, 6);
+ break;
+ case 7:
+ memcpy(dst, src, 7);
+ break;
+ case 8:
+ memcpy(dst, src, 8);
+ break;
+ case 9:
+ memcpy(dst, src, 9);
+ break;
+ case 10:
+ memcpy(dst, src, 10);
+ break;
+ case 11:
+ memcpy(dst, src, 11);
+ break;
+ case 12:
+ memcpy(dst, src, 12);
+ break;
+ case 13:
+ memcpy(dst, src, 13);
+ break;
+ case 14:
+ memcpy(dst, src, 14);
+ break;
+ case 15:
+ memcpy(dst, src, 15);
+ break;
+ case 16:
+ memcpy(dst, src, 16);
+ break;
+ default:
+ memcpy(dst, src, size);
+ break;
+ }
+}
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_FASTMEM_H_
diff --git a/absl/strings/internal/fastmem_test.cc b/absl/strings/internal/fastmem_test.cc
new file mode 100644
index 00000000..7c670f96
--- /dev/null
+++ b/absl/strings/internal/fastmem_test.cc
@@ -0,0 +1,453 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/fastmem.h"
+
+#include <memory>
+#include <random>
+#include <string>
+
+#include "base/init_google.h"
+#include "base/logging.h"
+#include "testing/base/public/benchmark.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+using RandomEngine = std::minstd_rand0;
+
+void VerifyResults(const int r1, const int r2, const std::string& a,
+ const std::string& b) {
+ CHECK_EQ(a.size(), b.size());
+ if (r1 == 0) {
+ EXPECT_EQ(r2, 0) << a << " " << b;
+ } else if (r1 > 0) {
+ EXPECT_GT(r2, 0) << a << " " << b;
+ } else {
+ EXPECT_LT(r2, 0) << a << " " << b;
+ }
+ if ((r1 == 0) == (r2 == 0)) {
+ EXPECT_EQ(r1 == 0,
+ absl::strings_internal::memeq(a.data(), b.data(), a.size()))
+ << r1 << " " << a << " " << b;
+ }
+}
+
+// Check correctness against glibc's memcmp implementation
+void CheckSingle(const std::string& a, const std::string& b) {
+ CHECK_EQ(a.size(), b.size());
+ const int r1 = memcmp(a.data(), b.data(), a.size());
+ const int r2 =
+ absl::strings_internal::fastmemcmp_inlined(a.data(), b.data(), a.size());
+ VerifyResults(r1, r2, a, b);
+}
+
+void GenerateString(size_t len, std::string* s) {
+ s->clear();
+ for (int i = 0; i < len; i++) {
+ *s += ('a' + (i % 26));
+ }
+}
+
+void CheckCompare(const std::string& a, const std::string& b) {
+ CheckSingle(a, b);
+ for (int common = 0; common <= 32; common++) {
+ std::string extra;
+ GenerateString(common, &extra);
+ CheckSingle(extra + a, extra + b);
+ CheckSingle(a + extra, b + extra);
+ for (char c1 = 'a'; c1 <= 'c'; c1++) {
+ for (char c2 = 'a'; c2 <= 'c'; c2++) {
+ CheckSingle(extra + c1 + a, extra + c2 + b);
+ }
+ }
+ }
+}
+
+TEST(FastCompare, Misc) {
+ CheckCompare("", "");
+
+ CheckCompare("a", "a");
+ CheckCompare("ab", "ab");
+ CheckCompare("abc", "abc");
+ CheckCompare("abcd", "abcd");
+ CheckCompare("abcde", "abcde");
+
+ CheckCompare("a", "x");
+ CheckCompare("ab", "xb");
+ CheckCompare("abc", "xbc");
+ CheckCompare("abcd", "xbcd");
+ CheckCompare("abcde", "xbcde");
+
+ CheckCompare("x", "a");
+ CheckCompare("xb", "ab");
+ CheckCompare("xbc", "abc");
+ CheckCompare("xbcd", "abcd");
+ CheckCompare("xbcde", "abcde");
+
+ CheckCompare("a", "x");
+ CheckCompare("ab", "ax");
+ CheckCompare("abc", "abx");
+ CheckCompare("abcd", "abcx");
+ CheckCompare("abcde", "abcdx");
+
+ CheckCompare("x", "a");
+ CheckCompare("ax", "ab");
+ CheckCompare("abx", "abc");
+ CheckCompare("abcx", "abcd");
+ CheckCompare("abcdx", "abcde");
+
+ for (int len = 0; len < 1000; len++) {
+ std::string p(len, 'z');
+ CheckCompare(p + "x", p + "a");
+ CheckCompare(p + "ax", p + "ab");
+ CheckCompare(p + "abx", p + "abc");
+ CheckCompare(p + "abcx", p + "abcd");
+ CheckCompare(p + "abcdx", p + "abcde");
+ }
+}
+
+TEST(FastCompare, TrailingByte) {
+ for (int i = 0; i < 256; i++) {
+ for (int j = 0; j < 256; j++) {
+ std::string a(1, i);
+ std::string b(1, j);
+ CheckSingle(a, b);
+ }
+ }
+}
+
+// Check correctness of memcpy_inlined.
+void CheckSingleMemcpyInlined(const std::string& a) {
+ std::unique_ptr<char[]> destination(new char[a.size() + 2]);
+ destination[0] = 'x';
+ destination[a.size() + 1] = 'x';
+ absl::strings_internal::memcpy_inlined(destination.get() + 1, a.data(),
+ a.size());
+ CHECK_EQ('x', destination[0]);
+ CHECK_EQ('x', destination[a.size() + 1]);
+ CHECK_EQ(0, memcmp(a.data(), destination.get() + 1, a.size()));
+}
+
+TEST(MemCpyInlined, Misc) {
+ CheckSingleMemcpyInlined("");
+ CheckSingleMemcpyInlined("0");
+ CheckSingleMemcpyInlined("012");
+ CheckSingleMemcpyInlined("0123");
+ CheckSingleMemcpyInlined("01234");
+ CheckSingleMemcpyInlined("012345");
+ CheckSingleMemcpyInlined("0123456");
+ CheckSingleMemcpyInlined("01234567");
+ CheckSingleMemcpyInlined("012345678");
+ CheckSingleMemcpyInlined("0123456789");
+ CheckSingleMemcpyInlined("0123456789a");
+ CheckSingleMemcpyInlined("0123456789ab");
+ CheckSingleMemcpyInlined("0123456789abc");
+ CheckSingleMemcpyInlined("0123456789abcd");
+ CheckSingleMemcpyInlined("0123456789abcde");
+ CheckSingleMemcpyInlined("0123456789abcdef");
+ CheckSingleMemcpyInlined("0123456789abcdefg");
+}
+
+template <typename Function>
+inline void CopyLoop(benchmark::State& state, int size, Function func) {
+ char* src = new char[size];
+ char* dst = new char[size];
+ memset(src, 'x', size);
+ memset(dst, 'y', size);
+ for (auto _ : state) {
+ func(dst, src, size);
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
+ CHECK_EQ(dst[0], 'x');
+ delete[] src;
+ delete[] dst;
+}
+
+void BM_memcpy(benchmark::State& state) {
+ CopyLoop(state, state.range(0), memcpy);
+}
+BENCHMARK(BM_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+void BM_memcpy_inlined(benchmark::State& state) {
+ CopyLoop(state, state.range(0), absl::strings_internal::memcpy_inlined);
+}
+BENCHMARK(BM_memcpy_inlined)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+// unaligned memcpy
+void BM_unaligned_memcpy(benchmark::State& state) {
+ const int n = state.range(0);
+ const int kMaxOffset = 32;
+ char* src = new char[n + kMaxOffset];
+ char* dst = new char[n + kMaxOffset];
+ memset(src, 'x', n + kMaxOffset);
+ int r = 0, i = 0;
+ for (auto _ : state) {
+ memcpy(dst + (i % kMaxOffset), src + ((i + 5) % kMaxOffset), n);
+ r += dst[0];
+ ++i;
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] src;
+ delete[] dst;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_unaligned_memcpy)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+// memmove worst case: heavy overlap, but not always by the same amount.
+// Also, the source and destination will often be unaligned.
+void BM_memmove_worst_case(benchmark::State& state) {
+ const int n = state.range(0);
+ const int32_t kDeterministicSeed = 301;
+ const int kMaxOffset = 32;
+ char* src = new char[n + kMaxOffset];
+ memset(src, 'x', n + kMaxOffset);
+ size_t offsets[64];
+ RandomEngine rng(kDeterministicSeed);
+ std::uniform_int_distribution<size_t> random_to_max_offset(0, kMaxOffset);
+ for (size_t& offset : offsets) {
+ offset = random_to_max_offset(rng);
+ }
+ int r = 0, i = 0;
+ for (auto _ : state) {
+ memmove(src + offsets[i], src + offsets[i + 1], n);
+ r += src[0];
+ i = (i + 2) % arraysize(offsets);
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] src;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_memmove_worst_case)->DenseRange(1, 18)->Range(32, 8 << 20);
+
+// memmove cache-friendly: aligned and overlapping with 4k
+// between the source and destination addresses.
+void BM_memmove_cache_friendly(benchmark::State& state) {
+ const int n = state.range(0);
+ char* src = new char[n + 4096];
+ memset(src, 'x', n);
+ int r = 0;
+ while (state.KeepRunningBatch(2)) { // count each memmove as an iteration
+ memmove(src + 4096, src, n);
+ memmove(src, src + 4096, n);
+ r += src[0];
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] src;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_memmove_cache_friendly)
+ ->Arg(5 * 1024)
+ ->Arg(10 * 1024)
+ ->Range(16 << 10, 8 << 20);
+
+// memmove best(?) case: aligned and non-overlapping.
+void BM_memmove_aligned_non_overlapping(benchmark::State& state) {
+ CopyLoop(state, state.range(0), memmove);
+}
+BENCHMARK(BM_memmove_aligned_non_overlapping)
+ ->DenseRange(1, 18)
+ ->Range(32, 8 << 20);
+
+// memset speed
+void BM_memset(benchmark::State& state) {
+ const int n = state.range(0);
+ char* dst = new char[n];
+ int r = 0;
+ for (auto _ : state) {
+ memset(dst, 'x', n);
+ r += dst[0];
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] dst;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_memset)->Range(8, 4096 << 10);
+
+// Bandwidth (vectorization?) test: the ideal generated code will be limited
+// by memory bandwidth. Even so-so generated code will max out memory bandwidth
+// on some machines.
+void BM_membandwidth(benchmark::State& state) {
+ const int n = state.range(0);
+ CHECK_EQ(n % 32, 0); // We will read 32 bytes per iter.
+ char* dst = new char[n];
+ int r = 0;
+ for (auto _ : state) {
+ const uint32_t* p = reinterpret_cast<uint32_t*>(dst);
+ const uint32_t* limit = reinterpret_cast<uint32_t*>(dst + n);
+ uint32_t x = 0;
+ while (p < limit) {
+ x += p[0];
+ x += p[1];
+ x += p[2];
+ x += p[3];
+ x += p[4];
+ x += p[5];
+ x += p[6];
+ x += p[7];
+ p += 8;
+ }
+ r += x;
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * n);
+ delete[] dst;
+ benchmark::DoNotOptimize(r);
+}
+BENCHMARK(BM_membandwidth)->Range(32, 16384 << 10);
+
+// Helper for benchmarks. Repeatedly compares two strings that are
+// either equal or different only in one character. If test_equal_strings
+// is false then position_to_modify determines where the difference will be.
+template <typename Function>
+ABSL_ATTRIBUTE_ALWAYS_INLINE inline void StringCompareLoop(
+ benchmark::State& state, bool test_equal_strings,
+ std::string::size_type position_to_modify, int size, Function func) {
+ const int kIterMult = 4; // Iteration multiplier for better timing resolution
+ CHECK_GT(size, 0);
+ const bool position_to_modify_is_valid =
+ position_to_modify != std::string::npos && position_to_modify < size;
+ CHECK_NE(position_to_modify_is_valid, test_equal_strings);
+ if (!position_to_modify_is_valid) {
+ position_to_modify = 0;
+ }
+ std::string sa(size, 'a');
+ std::string sb = sa;
+ char last = sa[size - 1];
+ int num = 0;
+ for (auto _ : state) {
+ for (int i = 0; i < kIterMult; ++i) {
+ sb[position_to_modify] = test_equal_strings ? last : last ^ 1;
+ num += func(sa, sb);
+ }
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
+ benchmark::DoNotOptimize(num);
+}
+
+// Helper for benchmarks. Repeatedly compares two memory regions that are
+// either equal or different only in their final character.
+template <typename Function>
+ABSL_ATTRIBUTE_ALWAYS_INLINE inline void CompareLoop(benchmark::State& state,
+ bool test_equal_strings,
+ int size, Function func) {
+ const int kIterMult = 4; // Iteration multiplier for better timing resolution
+ CHECK_GT(size, 0);
+ char* data = static_cast<char*>(malloc(size * 2));
+ memset(data, 'a', size * 2);
+ char* a = data;
+ char* b = data + size;
+ char last = a[size - 1];
+ int num = 0;
+ for (auto _ : state) {
+ for (int i = 0; i < kIterMult; ++i) {
+ b[size - 1] = test_equal_strings ? last : last ^ 1;
+ num += func(a, b, size);
+ }
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * size);
+ benchmark::DoNotOptimize(num);
+ free(data);
+}
+
+void BM_memcmp(benchmark::State& state) {
+ CompareLoop(state, false, state.range(0), memcmp);
+}
+BENCHMARK(BM_memcmp)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+void BM_fastmemcmp_inlined(benchmark::State& state) {
+ CompareLoop(state, false, state.range(0),
+ absl::strings_internal::fastmemcmp_inlined);
+}
+BENCHMARK(BM_fastmemcmp_inlined)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+void BM_memeq(benchmark::State& state) {
+ CompareLoop(state, false, state.range(0), absl::strings_internal::memeq);
+}
+BENCHMARK(BM_memeq)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+void BM_memeq_equal(benchmark::State& state) {
+ CompareLoop(state, true, state.range(0), absl::strings_internal::memeq);
+}
+BENCHMARK(BM_memeq_equal)->DenseRange(1, 9)->Range(32, 8 << 20);
+
+bool StringLess(const std::string& x, const std::string& y) { return x < y; }
+bool StringEqual(const std::string& x, const std::string& y) { return x == y; }
+bool StdEqual(const std::string& x, const std::string& y) {
+ return x.size() == y.size() &&
+ std::equal(x.data(), x.data() + x.size(), y.data());
+}
+
+// Benchmark for x < y, where x and y are strings that differ in only their
+// final char. That should be more-or-less the worst case for <.
+void BM_string_less(benchmark::State& state) {
+ StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
+ StringLess);
+}
+BENCHMARK(BM_string_less)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+// Benchmark for x < y, where x and y are strings that differ in only their
+// first char. That should be more-or-less the best case for <.
+void BM_string_less_easy(benchmark::State& state) {
+ StringCompareLoop(state, false, 0, state.range(0), StringLess);
+}
+BENCHMARK(BM_string_less_easy)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_string_equal(benchmark::State& state) {
+ StringCompareLoop(state, false, state.range(0) - 1, state.range(0),
+ StringEqual);
+}
+BENCHMARK(BM_string_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_string_equal_equal(benchmark::State& state) {
+ StringCompareLoop(state, true, std::string::npos, state.range(0), StringEqual);
+}
+BENCHMARK(BM_string_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_std_equal(benchmark::State& state) {
+ StringCompareLoop(state, false, state.range(0) - 1, state.range(0), StdEqual);
+}
+BENCHMARK(BM_std_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_std_equal_equal(benchmark::State& state) {
+ StringCompareLoop(state, true, std::string::npos, state.range(0), StdEqual);
+}
+BENCHMARK(BM_std_equal_equal)->DenseRange(1, 9)->Range(32, 1 << 20);
+
+void BM_string_equal_unequal_lengths(benchmark::State& state) {
+ const int size = state.range(0);
+ std::string a(size, 'a');
+ std::string b(size + 1, 'a');
+ int count = 0;
+ for (auto _ : state) {
+ b[size - 1] = 'a';
+ count += (a == b);
+ }
+ benchmark::DoNotOptimize(count);
+}
+BENCHMARK(BM_string_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
+
+void BM_stdstring_equal_unequal_lengths(benchmark::State& state) {
+ const int size = state.range(0);
+ std::string a(size, 'a');
+ std::string b(size + 1, 'a');
+ int count = 0;
+ for (auto _ : state) {
+ b[size - 1] = 'a';
+ count += (a == b);
+ }
+ benchmark::DoNotOptimize(count);
+}
+BENCHMARK(BM_stdstring_equal_unequal_lengths)->Arg(1)->Arg(1 << 20);
+
+} // namespace
diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc
new file mode 100644
index 00000000..a0de70df
--- /dev/null
+++ b/absl/strings/internal/memutil.cc
@@ -0,0 +1,110 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/memutil.h"
+
+#include <cstdlib>
+
+namespace absl {
+namespace strings_internal {
+
+int memcasecmp(const char* s1, const char* s2, size_t len) {
+ const unsigned char* us1 = reinterpret_cast<const unsigned char*>(s1);
+ const unsigned char* us2 = reinterpret_cast<const unsigned char*>(s2);
+
+ for (size_t i = 0; i < len; i++) {
+ const int diff =
+ int{static_cast<unsigned char>(absl::ascii_tolower(us1[i]))} -
+ int{static_cast<unsigned char>(absl::ascii_tolower(us2[i]))};
+ if (diff != 0) return diff;
+ }
+ return 0;
+}
+
+char* memdup(const char* s, size_t slen) {
+ void* copy;
+ if ((copy = malloc(slen)) == nullptr) return nullptr;
+ memcpy(copy, s, slen);
+ return reinterpret_cast<char*>(copy);
+}
+
+char* memrchr(const char* s, int c, size_t slen) {
+ for (const char* e = s + slen - 1; e >= s; e--) {
+ if (*e == c) return const_cast<char*>(e);
+ }
+ return nullptr;
+}
+
+size_t memspn(const char* s, size_t slen, const char* accept) {
+ const char* p = s;
+ const char* spanp;
+ char c, sc;
+
+cont:
+ c = *p++;
+ if (slen-- == 0) return p - 1 - s;
+ for (spanp = accept; (sc = *spanp++) != '\0';)
+ if (sc == c) goto cont;
+ return p - 1 - s;
+}
+
+size_t memcspn(const char* s, size_t slen, const char* reject) {
+ const char* p = s;
+ const char* spanp;
+ char c, sc;
+
+ while (slen-- != 0) {
+ c = *p++;
+ for (spanp = reject; (sc = *spanp++) != '\0';)
+ if (sc == c) return p - 1 - s;
+ }
+ return p - s;
+}
+
+char* mempbrk(const char* s, size_t slen, const char* accept) {
+ const char* scanp;
+ int sc;
+
+ for (; slen; ++s, --slen) {
+ for (scanp = accept; (sc = *scanp++) != '\0';)
+ if (sc == *s) return const_cast<char*>(s);
+ }
+ return nullptr;
+}
+
+// This is significantly faster for case-sensitive matches with very
+// few possible matches. See unit test for benchmarks.
+const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
+ size_t neelen) {
+ if (0 == neelen) {
+ return phaystack; // even if haylen is 0
+ }
+ if (haylen < neelen) return nullptr;
+
+ const char* match;
+ const char* hayend = phaystack + haylen - neelen + 1;
+ // A static cast is used here to work around the fact that memchr returns
+ // a void* on Posix-compliant systems and const void* on Windows.
+ while ((match = static_cast<const char*>(
+ memchr(phaystack, pneedle[0], hayend - phaystack)))) {
+ if (memcmp(match, pneedle, neelen) == 0)
+ return match;
+ else
+ phaystack = match + 1;
+ }
+ return nullptr;
+}
+
+} // namespace strings_internal
+} // namespace absl
diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h
new file mode 100644
index 00000000..a6f1c691
--- /dev/null
+++ b/absl/strings/internal/memutil.h
@@ -0,0 +1,146 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// These routines provide mem versions of standard C std::string routines,
+// such as strpbrk. They function exactly the same as the str versions,
+// so if you wonder what they are, replace the word "mem" by
+// "str" and check out the man page. I could return void*, as the
+// strutil.h mem*() routines tend to do, but I return char* instead
+// since this is by far the most common way these functions are called.
+//
+// The difference between the mem and str versions is the mem version
+// takes a pointer and a length, rather than a '\0'-terminated std::string.
+// The memcase* routines defined here assume the locale is "C"
+// (they use absl::ascii_tolower instead of tolower).
+//
+// These routines are based on the BSD library.
+//
+// Here's a list of routines from std::string.h, and their mem analogues.
+// Functions in lowercase are defined in std::string.h; those in UPPERCASE
+// are defined here:
+//
+// strlen --
+// strcat strncat MEMCAT
+// strcpy strncpy memcpy
+// -- memccpy (very cool function, btw)
+// -- memmove
+// -- memset
+// strcmp strncmp memcmp
+// strcasecmp strncasecmp MEMCASECMP
+// strchr memchr
+// strcoll --
+// strxfrm --
+// strdup strndup MEMDUP
+// strrchr MEMRCHR
+// strspn MEMSPN
+// strcspn MEMCSPN
+// strpbrk MEMPBRK
+// strstr MEMSTR MEMMEM
+// (g)strcasestr MEMCASESTR MEMCASEMEM
+// strtok --
+// strprefix MEMPREFIX (strprefix is from strutil.h)
+// strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h)
+// strsuffix MEMSUFFIX (strsuffix is from strutil.h)
+// strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h)
+// -- MEMIS
+// -- MEMCASEIS
+// strcount MEMCOUNT (strcount is from strutil.h)
+
+#ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_
+#define ABSL_STRINGS_INTERNAL_MEMUTIL_H_
+
+#include <cstddef>
+#include <cstring>
+
+#include "absl/base/port.h" // disable some warnings on Windows
+#include "absl/strings/ascii.h" // for absl::ascii_tolower
+
+namespace absl {
+namespace strings_internal {
+
+inline char* memcat(char* dest, size_t destlen, const char* src,
+ size_t srclen) {
+ return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen));
+}
+
+int memcasecmp(const char* s1, const char* s2, size_t len);
+char* memdup(const char* s, size_t slen);
+char* memrchr(const char* s, int c, size_t slen);
+size_t memspn(const char* s, size_t slen, const char* accept);
+size_t memcspn(const char* s, size_t slen, const char* reject);
+char* mempbrk(const char* s, size_t slen, const char* accept);
+
+// This is for internal use only. Don't call this directly
+template <bool case_sensitive>
+const char* int_memmatch(const char* haystack, size_t haylen,
+ const char* needle, size_t neelen) {
+ if (0 == neelen) {
+ return haystack; // even if haylen is 0
+ }
+ const char* hayend = haystack + haylen;
+ const char* needlestart = needle;
+ const char* needleend = needlestart + neelen;
+
+ for (; haystack < hayend; ++haystack) {
+ char hay = case_sensitive
+ ? *haystack
+ : absl::ascii_tolower(static_cast<unsigned char>(*haystack));
+ char nee = case_sensitive
+ ? *needle
+ : absl::ascii_tolower(static_cast<unsigned char>(*needle));
+ if (hay == nee) {
+ if (++needle == needleend) {
+ return haystack + 1 - neelen;
+ }
+ } else if (needle != needlestart) {
+ // must back up haystack in case a prefix matched (find "aab" in "aaab")
+ haystack -= needle - needlestart; // for loop will advance one more
+ needle = needlestart;
+ }
+ }
+ return nullptr;
+}
+
+// These are the guys you can call directly
+inline const char* memstr(const char* phaystack, size_t haylen,
+ const char* pneedle) {
+ return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle));
+}
+
+inline const char* memcasestr(const char* phaystack, size_t haylen,
+ const char* pneedle) {
+ return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle));
+}
+
+inline const char* memmem(const char* phaystack, size_t haylen,
+ const char* pneedle, size_t needlelen) {
+ return int_memmatch<true>(phaystack, haylen, pneedle, needlelen);
+}
+
+inline const char* memcasemem(const char* phaystack, size_t haylen,
+ const char* pneedle, size_t needlelen) {
+ return int_memmatch<false>(phaystack, haylen, pneedle, needlelen);
+}
+
+// This is significantly faster for case-sensitive matches with very
+// few possible matches. See unit test for benchmarks.
+const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle,
+ size_t neelen);
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_
diff --git a/absl/strings/internal/memutil_test.cc b/absl/strings/internal/memutil_test.cc
new file mode 100644
index 00000000..1ff60f20
--- /dev/null
+++ b/absl/strings/internal/memutil_test.cc
@@ -0,0 +1,180 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Unit test for memutil.cc
+
+#include "absl/strings/internal/memutil.h"
+
+#include <algorithm>
+#include <cstdlib>
+
+#include "gtest/gtest.h"
+#include "absl/strings/ascii.h"
+
+namespace {
+
+static char* memcasechr(const char* s, int c, size_t slen) {
+ c = absl::ascii_tolower(c);
+ for (; slen; ++s, --slen) {
+ if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
+ }
+ return nullptr;
+}
+
+static const char* memcasematch(const char* phaystack, size_t haylen,
+ const char* pneedle, size_t neelen) {
+ if (0 == neelen) {
+ return phaystack; // even if haylen is 0
+ }
+ if (haylen < neelen) return nullptr;
+
+ const char* match;
+ const char* hayend = phaystack + haylen - neelen + 1;
+ while ((match = static_cast<char*>(
+ memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
+ if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
+ return match;
+ else
+ phaystack = match + 1;
+ }
+ return nullptr;
+}
+
+TEST(MemUtilTest, AllTests) {
+ // check memutil functions
+ char a[1000];
+ absl::strings_internal::memcat(a, 0, "hello", sizeof("hello") - 1);
+ absl::strings_internal::memcat(a, 5, " there", sizeof(" there") - 1);
+
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO there",
+ sizeof("hello there") - 1),
+ 0);
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
+ sizeof("hello there") - 1),
+ -1);
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "heLLO therf",
+ sizeof("hello there") - 2),
+ 0);
+ EXPECT_EQ(absl::strings_internal::memcasecmp(a, "whatever", 0), 0);
+
+ char* p = absl::strings_internal::memdup("hello", 5);
+ free(p);
+
+ p = absl::strings_internal::memrchr("hello there", 'e',
+ sizeof("hello there") - 1);
+ EXPECT_TRUE(p && p[-1] == 'r');
+ p = absl::strings_internal::memrchr("hello there", 'e',
+ sizeof("hello there") - 2);
+ EXPECT_TRUE(p && p[-1] == 'h');
+ p = absl::strings_internal::memrchr("hello there", 'u',
+ sizeof("hello there") - 1);
+ EXPECT_TRUE(p == nullptr);
+
+ int len = absl::strings_internal::memspn("hello there",
+ sizeof("hello there") - 1, "hole");
+ EXPECT_EQ(len, sizeof("hello") - 1);
+ len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
+ "u");
+ EXPECT_EQ(len, 0);
+ len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
+ "");
+ EXPECT_EQ(len, 0);
+ len = absl::strings_internal::memspn("hello there", sizeof("hello there") - 1,
+ "trole h");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memspn("hello there!",
+ sizeof("hello there!") - 1, "trole h");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memspn("hello there!",
+ sizeof("hello there!") - 2, "trole h!");
+ EXPECT_EQ(len, sizeof("hello there!") - 2);
+
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, "leho");
+ EXPECT_EQ(len, 0);
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, "u");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, "");
+ EXPECT_EQ(len, sizeof("hello there") - 1);
+ len = absl::strings_internal::memcspn("hello there",
+ sizeof("hello there") - 1, " ");
+ EXPECT_EQ(len, 5);
+
+ p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
+ "leho");
+ EXPECT_TRUE(p && p[1] == 'e' && p[2] == 'l');
+ p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
+ "nu");
+ EXPECT_TRUE(p == nullptr);
+ p = absl::strings_internal::mempbrk("hello there!",
+ sizeof("hello there!") - 2, "!");
+ EXPECT_TRUE(p == nullptr);
+ p = absl::strings_internal::mempbrk("hello there", sizeof("hello there") - 1,
+ " t ");
+ EXPECT_TRUE(p && p[-1] == 'o' && p[1] == 't');
+
+ {
+ const char kHaystack[] = "0123456789";
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 0, "", 0), kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "012", 3),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "0xx", 1),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "789", 3),
+ kHaystack + 7);
+ EXPECT_EQ(absl::strings_internal::memmem(kHaystack, 10, "9xx", 1),
+ kHaystack + 9);
+ EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "9xx", 3) ==
+ nullptr);
+ EXPECT_TRUE(absl::strings_internal::memmem(kHaystack, 10, "xxx", 1) ==
+ nullptr);
+ }
+ {
+ const char kHaystack[] = "aBcDeFgHiJ";
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 0, "", 0),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Abc", 3),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "Axx", 1),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "hIj", 3),
+ kHaystack + 7);
+ EXPECT_EQ(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 1),
+ kHaystack + 9);
+ EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "jxx", 3) ==
+ nullptr);
+ EXPECT_TRUE(absl::strings_internal::memcasemem(kHaystack, 10, "xxx", 1) ==
+ nullptr);
+ }
+ {
+ const char kHaystack[] = "0123456789";
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 0, "", 0), kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "012", 3),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "0xx", 1),
+ kHaystack);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "789", 3),
+ kHaystack + 7);
+ EXPECT_EQ(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 1),
+ kHaystack + 9);
+ EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "9xx", 3) ==
+ nullptr);
+ EXPECT_TRUE(absl::strings_internal::memmatch(kHaystack, 10, "xxx", 1) ==
+ nullptr);
+ }
+}
+
+} // namespace
diff --git a/absl/strings/internal/numbers_test_common.inc b/absl/strings/internal/numbers_test_common.inc
new file mode 100644
index 00000000..e165b3be
--- /dev/null
+++ b/absl/strings/internal/numbers_test_common.inc
@@ -0,0 +1,166 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file contains common things needed by numbers_test.cc,
+// numbers_legacy_test.cc and numbers_benchmark.cc.
+
+namespace {
+
+// Previously documented minimum buffer sizes for Fast*ToBuffer functions.
+// NOTE(edk): These should be deleted and uses replaced with kFastToBufferSize
+// once existing code has been fixed to use kFastToBufferSize.
+enum {
+ kFastInt32ToBufferSize = 12,
+ kFastInt64ToBufferSize = 22,
+ kFastUInt32ToBufferSize = 12,
+ kFastUInt64ToBufferSize = 22
+};
+
+template <typename IntType>
+bool Itoa(IntType value, int base, std::string* destination) {
+ destination->clear();
+ if (base <= 1 || base > 36) {
+ return false;
+ }
+
+ if (value == 0) {
+ destination->push_back('0');
+ return true;
+ }
+
+ bool negative = value < 0;
+ while (value != 0) {
+ const IntType next_value = value / base;
+ // Can't use std::abs here because of problems when IntType is unsigned.
+ int remainder = value > next_value * base ? value - next_value * base
+ : next_value * base - value;
+ char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10;
+ destination->insert(0, 1, c);
+ value = next_value;
+ }
+
+ if (negative) {
+ destination->insert(0, 1, '-');
+ }
+ return true;
+}
+
+struct uint32_test_case {
+ const char* str;
+ bool expect_ok;
+ int base; // base to pass to the conversion function
+ uint32_t expected;
+} const strtouint32_test_cases[] = {
+ {"0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
+ {"0x34234324", true, 16, 0x34234324},
+ {"34234324", true, 16, 0x34234324},
+ {"0", true, 16, 0},
+ {" \t\n 0xffffffff", true, 16, std::numeric_limits<uint32_t>::max()},
+ {" \f\v 46", true, 10, 46}, // must accept weird whitespace
+ {" \t\n 72717222", true, 8, 072717222},
+ {" \t\n 072717222", true, 8, 072717222},
+ {" \t\n 072717228", false, 8, 07271722},
+ {"0", true, 0, 0},
+
+ // Base-10 version.
+ {"34234324", true, 0, 34234324},
+ {"4294967295", true, 0, std::numeric_limits<uint32_t>::max()},
+ {"34234324 \n\t", true, 10, 34234324},
+
+ // Unusual base
+ {"0", true, 3, 0},
+ {"2", true, 3, 2},
+ {"11", true, 3, 4},
+
+ // Invalid uints.
+ {"", false, 0, 0},
+ {" ", false, 0, 0},
+ {"abc", false, 0, 0}, // would be valid hex, but prefix is missing
+ {"34234324a", false, 0, 34234324},
+ {"34234.3", false, 0, 34234},
+ {"-1", false, 0, 0},
+ {" -123", false, 0, 0},
+ {" \t\n -123", false, 0, 0},
+
+ // Out of bounds.
+ {"4294967296", false, 0, std::numeric_limits<uint32_t>::max()},
+ {"0x100000000", false, 0, std::numeric_limits<uint32_t>::max()},
+ {nullptr, false, 0, 0},
+};
+
+struct uint64_test_case {
+ const char* str;
+ bool expect_ok;
+ int base;
+ uint64_t expected;
+} const strtouint64_test_cases[] = {
+ {"0x3423432448783446", true, 16, int64_t{0x3423432448783446}},
+ {"3423432448783446", true, 16, int64_t{0x3423432448783446}},
+
+ {"0", true, 16, 0},
+ {"000", true, 0, 0},
+ {"0", true, 0, 0},
+ {" \t\n 0xffffffffffffffff", true, 16,
+ std::numeric_limits<uint64_t>::max()},
+
+ {"012345670123456701234", true, 8, int64_t{012345670123456701234}},
+ {"12345670123456701234", true, 8, int64_t{012345670123456701234}},
+
+ {"12845670123456701234", false, 8, 0},
+
+ // Base-10 version.
+ {"34234324487834466", true, 0, int64_t{34234324487834466}},
+
+ {" \t\n 18446744073709551615", true, 0,
+ std::numeric_limits<uint64_t>::max()},
+
+ {"34234324487834466 \n\t ", true, 0, int64_t{34234324487834466}},
+
+ {" \f\v 46", true, 10, 46}, // must accept weird whitespace
+
+ // Unusual base
+ {"0", true, 3, 0},
+ {"2", true, 3, 2},
+ {"11", true, 3, 4},
+
+ {"0", true, 0, 0},
+
+ // Invalid uints.
+ {"", false, 0, 0},
+ {" ", false, 0, 0},
+ {"abc", false, 0, 0},
+ {"34234324487834466a", false, 0, 0},
+ {"34234487834466.3", false, 0, 0},
+ {"-1", false, 0, 0},
+ {" -123", false, 0, 0},
+ {" \t\n -123", false, 0, 0},
+
+ // Out of bounds.
+ {"18446744073709551616", false, 10, 0},
+ {"18446744073709551616", false, 0, 0},
+ {"0x10000000000000000", false, 16, std::numeric_limits<uint64_t>::max()},
+ {"0X10000000000000000", false, 16,
+ std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
+ {"0x10000000000000000", false, 0, std::numeric_limits<uint64_t>::max()},
+ {"0X10000000000000000", false, 0,
+ std::numeric_limits<uint64_t>::max()}, // 0X versus 0x.
+
+ {"0x1234", true, 16, 0x1234},
+
+ // Base-10 std::string version.
+ {"1234", true, 0, 1234},
+ {nullptr, false, 0, 0},
+};
+
+} // namespace
diff --git a/absl/strings/internal/ostringstream.h b/absl/strings/internal/ostringstream.h
new file mode 100644
index 00000000..017632a9
--- /dev/null
+++ b/absl/strings/internal/ostringstream.h
@@ -0,0 +1,97 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
+#define ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
+
+#include <cassert>
+#include <ostream>
+#include <streambuf>
+#include <string>
+
+#include "absl/base/port.h"
+
+namespace absl {
+namespace strings_internal {
+
+// The same as std::ostringstream but appends to a user-specified std::string,
+// and is faster. It is ~70% faster to create, ~50% faster to write to, and
+// completely free to extract the result std::string.
+//
+// std::string s;
+// OStringStream strm(&s);
+// strm << 42 << ' ' << 3.14; // appends to `s`
+//
+// The stream object doesn't have to be named. Starting from C++11 operator<<
+// works with rvalues of std::ostream.
+//
+// std::string s;
+// OStringStream(&s) << 42 << ' ' << 3.14; // appends to `s`
+//
+// OStringStream is faster to create than std::ostringstream but it's still
+// relatively slow. Avoid creating multiple streams where a single stream will
+// do.
+//
+// Creates unnecessary instances of OStringStream: slow.
+//
+// std::string s;
+// OStringStream(&s) << 42;
+// OStringStream(&s) << ' ';
+// OStringStream(&s) << 3.14;
+//
+// Creates a single instance of OStringStream and reuses it: fast.
+//
+// std::string s;
+// OStringStream strm(&s);
+// strm << 42;
+// strm << ' ';
+// strm << 3.14;
+//
+// Note: flush() has no effect. No reason to call it.
+class OStringStream : private std::basic_streambuf<char>, public std::ostream {
+ public:
+ // The argument can be null, in which case you'll need to call str(p) with a
+ // non-null argument before you can write to the stream.
+ //
+ // The destructor of OStringStream doesn't use the std::string. It's OK to destroy
+ // the std::string before the stream.
+ explicit OStringStream(std::string* s) : std::ostream(this), s_(s) {}
+
+ std::string* str() { return s_; }
+ const std::string* str() const { return s_; }
+ void str(std::string* s) { s_ = s; }
+
+ private:
+ using Buf = std::basic_streambuf<char>;
+
+ Buf::int_type overflow(int c = Buf::traits_type::eof()) override {
+ assert(s_);
+ if (!Buf::traits_type::eq_int_type(c, Buf::traits_type::eof()))
+ s_->push_back(static_cast<char>(c));
+ return 1;
+ }
+
+ std::streamsize xsputn(const char* s, std::streamsize n) override {
+ assert(s_);
+ s_->append(s, n);
+ return n;
+ }
+
+ std::string* s_;
+};
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_
diff --git a/absl/strings/internal/ostringstream_test.cc b/absl/strings/internal/ostringstream_test.cc
new file mode 100644
index 00000000..0047ec82
--- /dev/null
+++ b/absl/strings/internal/ostringstream_test.cc
@@ -0,0 +1,103 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/ostringstream.h"
+
+#include <memory>
+#include <ostream>
+#include <sstream>
+#include <string>
+#include <type_traits>
+
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(OStringStream, IsOStream) {
+ static_assert(
+ std::is_base_of<std::ostream, absl::strings_internal::OStringStream>(),
+ "");
+}
+
+TEST(OStringStream, ConstructDestroy) {
+ {
+ absl::strings_internal::OStringStream strm(nullptr);
+ EXPECT_EQ(nullptr, strm.str());
+ }
+ {
+ std::string s = "abc";
+ {
+ absl::strings_internal::OStringStream strm(&s);
+ EXPECT_EQ(&s, strm.str());
+ }
+ EXPECT_EQ("abc", s);
+ }
+ {
+ std::unique_ptr<std::string> s(new std::string);
+ absl::strings_internal::OStringStream strm(s.get());
+ s.reset();
+ }
+}
+
+TEST(OStringStream, Str) {
+ std::string s1;
+ absl::strings_internal::OStringStream strm(&s1);
+ const absl::strings_internal::OStringStream& c_strm(strm);
+
+ static_assert(std::is_same<decltype(strm.str()), std::string*>(), "");
+ static_assert(std::is_same<decltype(c_strm.str()), const std::string*>(), "");
+
+ EXPECT_EQ(&s1, strm.str());
+ EXPECT_EQ(&s1, c_strm.str());
+
+ strm.str(&s1);
+ EXPECT_EQ(&s1, strm.str());
+ EXPECT_EQ(&s1, c_strm.str());
+
+ std::string s2;
+ strm.str(&s2);
+ EXPECT_EQ(&s2, strm.str());
+ EXPECT_EQ(&s2, c_strm.str());
+
+ strm.str(nullptr);
+ EXPECT_EQ(nullptr, strm.str());
+ EXPECT_EQ(nullptr, c_strm.str());
+}
+
+TEST(OStreamStream, WriteToLValue) {
+ std::string s = "abc";
+ {
+ absl::strings_internal::OStringStream strm(&s);
+ EXPECT_EQ("abc", s);
+ strm << "";
+ EXPECT_EQ("abc", s);
+ strm << 42;
+ EXPECT_EQ("abc42", s);
+ strm << 'x' << 'y';
+ EXPECT_EQ("abc42xy", s);
+ }
+ EXPECT_EQ("abc42xy", s);
+}
+
+TEST(OStreamStream, WriteToRValue) {
+ std::string s = "abc";
+ absl::strings_internal::OStringStream(&s) << "";
+ EXPECT_EQ("abc", s);
+ absl::strings_internal::OStringStream(&s) << 42;
+ EXPECT_EQ("abc42", s);
+ absl::strings_internal::OStringStream(&s) << 'x' << 'y';
+ EXPECT_EQ("abc42xy", s);
+}
+
+} // namespace
diff --git a/absl/strings/internal/resize_uninitialized.h b/absl/strings/internal/resize_uninitialized.h
new file mode 100644
index 00000000..0157ca02
--- /dev/null
+++ b/absl/strings/internal/resize_uninitialized.h
@@ -0,0 +1,69 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
+#define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
+
+#include <string>
+#include <utility>
+
+#include "absl/base/port.h"
+#include "absl/meta/type_traits.h" // for void_t
+
+namespace absl {
+namespace strings_internal {
+
+// Is a subclass of true_type or false_type, depending on whether or not
+// T has a resize_uninitialized member.
+template <typename T, typename = void>
+struct HasResizeUninitialized : std::false_type {};
+template <typename T>
+struct HasResizeUninitialized<
+ T, absl::void_t<decltype(std::declval<T>().resize_uninitialized(237))>>
+ : std::true_type {};
+
+template <typename string_type>
+void ResizeUninit(string_type* s, size_t new_size, std::true_type) {
+ s->resize_uninitialized(new_size);
+}
+template <typename string_type>
+void ResizeUninit(string_type* s, size_t new_size, std::false_type) {
+ s->resize(new_size);
+}
+
+// Returns true if the std::string implementation supports a resize where
+// the new characters added to the std::string are left untouched.
+//
+// (A better name might be "STLStringSupportsUninitializedResize", alluding to
+// the previous function.)
+template <typename string_type>
+inline constexpr bool STLStringSupportsNontrashingResize(string_type*) {
+ return HasResizeUninitialized<string_type>();
+}
+
+// Like str->resize(new_size), except any new characters added to "*str" as a
+// result of resizing may be left uninitialized, rather than being filled with
+// '0' bytes. Typically used when code is then going to overwrite the backing
+// store of the std::string with known data. Uses a Google extension to std::string.
+template <typename string_type, typename = void>
+inline void STLStringResizeUninitialized(string_type* s, size_t new_size) {
+ ResizeUninit(s, new_size, HasResizeUninitialized<string_type>());
+}
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_
diff --git a/absl/strings/internal/resize_uninitialized_test.cc b/absl/strings/internal/resize_uninitialized_test.cc
new file mode 100644
index 00000000..ad282efc
--- /dev/null
+++ b/absl/strings/internal/resize_uninitialized_test.cc
@@ -0,0 +1,68 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/resize_uninitialized.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+int resize_call_count = 0;
+
+struct resizable_string {
+ void resize(size_t) { resize_call_count += 1; }
+};
+
+int resize_uninitialized_call_count = 0;
+
+struct resize_uninitializable_string {
+ void resize(size_t) { resize_call_count += 1; }
+ void resize_uninitialized(size_t) { resize_uninitialized_call_count += 1; }
+};
+
+TEST(ResizeUninit, WithAndWithout) {
+ resize_call_count = 0;
+ resize_uninitialized_call_count = 0;
+ {
+ resizable_string rs;
+
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ EXPECT_FALSE(
+ absl::strings_internal::STLStringSupportsNontrashingResize(&rs));
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ absl::strings_internal::STLStringResizeUninitialized(&rs, 237);
+ EXPECT_EQ(resize_call_count, 1);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ }
+
+ resize_call_count = 0;
+ resize_uninitialized_call_count = 0;
+ {
+ resize_uninitializable_string rus;
+
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ EXPECT_TRUE(
+ absl::strings_internal::STLStringSupportsNontrashingResize(&rus));
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 0);
+ absl::strings_internal::STLStringResizeUninitialized(&rus, 237);
+ EXPECT_EQ(resize_call_count, 0);
+ EXPECT_EQ(resize_uninitialized_call_count, 1);
+ }
+}
+
+} // namespace
diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h
new file mode 100644
index 00000000..e73f1dde
--- /dev/null
+++ b/absl/strings/internal/str_join_internal.h
@@ -0,0 +1,314 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// This file declares INTERNAL parts of the Join API that are inlined/templated
+// or otherwise need to be available at compile time. The main abstractions
+// defined in this file are:
+//
+// - A handful of default Formatters
+// - JoinAlgorithm() overloads
+// - JoinRange() overloads
+// - JoinTuple()
+//
+// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
+// absl/strings/str_join.h
+//
+// IWYU pragma: private, include "absl/strings/str_join.h"
+
+#ifndef ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
+#define ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
+
+#include <cassert>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "absl/strings/internal/ostringstream.h"
+#include "absl/strings/str_cat.h"
+
+namespace absl {
+namespace strings_internal {
+
+//
+// Formatter objects
+//
+// The following are implementation classes for standard Formatter objects. The
+// factory functions that users will call to create and use these formatters are
+// defined and documented in strings/join.h.
+//
+
+// The default formatter. Converts alpha-numeric types to strings.
+struct AlphaNumFormatterImpl {
+ // This template is needed in order to support passing in a dereferenced
+ // vector<bool>::iterator
+ template <typename T>
+ void operator()(std::string* out, const T& t) const {
+ StrAppend(out, AlphaNum(t));
+ }
+
+ void operator()(std::string* out, const AlphaNum& t) const {
+ StrAppend(out, t);
+ }
+};
+
+// A type that's used to overload the JoinAlgorithm() function (defined below)
+// for ranges that do not require additional formatting (e.g., a range of
+// strings).
+
+struct NoFormatter : public AlphaNumFormatterImpl {};
+
+// Formats types to strings using the << operator.
+class StreamFormatterImpl {
+ public:
+ // The method isn't const because it mutates state. Making it const will
+ // render StreamFormatterImpl thread-hostile.
+ template <typename T>
+ void operator()(std::string* out, const T& t) {
+ // The stream is created lazily to avoid paying the relatively high cost
+ // of its construction when joining an empty range.
+ if (strm_) {
+ strm_->clear(); // clear the bad, fail and eof bits in case they were set
+ strm_->str(out);
+ } else {
+ strm_.reset(new strings_internal::OStringStream(out));
+ }
+ *strm_ << t;
+ }
+
+ private:
+ std::unique_ptr<strings_internal::OStringStream> strm_;
+};
+
+// Formats a std::pair<>. The 'first' member is formatted using f1_ and the
+// 'second' member is formatted using f2_. sep_ is the separator.
+template <typename F1, typename F2>
+class PairFormatterImpl {
+ public:
+ PairFormatterImpl(F1 f1, absl::string_view sep, F2 f2)
+ : f1_(std::move(f1)), sep_(sep), f2_(std::move(f2)) {}
+
+ template <typename T>
+ void operator()(std::string* out, const T& p) {
+ f1_(out, p.first);
+ out->append(sep_);
+ f2_(out, p.second);
+ }
+
+ template <typename T>
+ void operator()(std::string* out, const T& p) const {
+ f1_(out, p.first);
+ out->append(sep_);
+ f2_(out, p.second);
+ }
+
+ private:
+ F1 f1_;
+ std::string sep_;
+ F2 f2_;
+};
+
+// Wraps another formatter and dereferences the argument to operator() then
+// passes the dereferenced argument to the wrapped formatter. This can be
+// useful, for example, to join a std::vector<int*>.
+template <typename Formatter>
+class DereferenceFormatterImpl {
+ public:
+ DereferenceFormatterImpl() : f_() {}
+ explicit DereferenceFormatterImpl(Formatter&& f)
+ : f_(std::forward<Formatter>(f)) {}
+
+ template <typename T>
+ void operator()(std::string* out, const T& t) {
+ f_(out, *t);
+ }
+
+ template <typename T>
+ void operator()(std::string* out, const T& t) const {
+ f_(out, *t);
+ }
+
+ private:
+ Formatter f_;
+};
+
+// DefaultFormatter<T> is a traits class that selects a default Formatter to use
+// for the given type T. The ::Type member names the Formatter to use. This is
+// used by the strings::Join() functions that do NOT take a Formatter argument,
+// in which case a default Formatter must be chosen.
+//
+// AlphaNumFormatterImpl is the default in the base template, followed by
+// specializations for other types.
+template <typename ValueType>
+struct DefaultFormatter {
+ typedef AlphaNumFormatterImpl Type;
+};
+template <>
+struct DefaultFormatter<const char*> {
+ typedef AlphaNumFormatterImpl Type;
+};
+template <>
+struct DefaultFormatter<char*> {
+ typedef AlphaNumFormatterImpl Type;
+};
+template <>
+struct DefaultFormatter<std::string> {
+ typedef NoFormatter Type;
+};
+template <>
+struct DefaultFormatter<absl::string_view> {
+ typedef NoFormatter Type;
+};
+template <typename ValueType>
+struct DefaultFormatter<ValueType*> {
+ typedef DereferenceFormatterImpl<typename DefaultFormatter<ValueType>::Type>
+ Type;
+};
+
+template <typename ValueType>
+struct DefaultFormatter<std::unique_ptr<ValueType>>
+ : public DefaultFormatter<ValueType*> {};
+
+//
+// JoinAlgorithm() functions
+//
+
+// The main joining algorithm. This simply joins the elements in the given
+// iterator range, each separated by the given separator, into an output std::string,
+// and formats each element using the provided Formatter object.
+template <typename Iterator, typename Formatter>
+std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
+ Formatter&& f) {
+ std::string result;
+ absl::string_view sep("");
+ for (Iterator it = start; it != end; ++it) {
+ result.append(sep.data(), sep.size());
+ f(&result, *it);
+ sep = s;
+ }
+ return result;
+}
+
+// No-op placeholder for input iterators which can not be iterated over.
+template <typename Iterator>
+size_t GetResultSize(Iterator, Iterator, size_t, std::input_iterator_tag) {
+ return 0;
+}
+
+// Calculates space to reserve, if the iterator supports multiple passes.
+template <typename Iterator>
+size_t GetResultSize(Iterator it, Iterator end, size_t separator_size,
+ std::forward_iterator_tag) {
+ assert(it != end);
+ size_t length = it->size();
+ while (++it != end) {
+ length += separator_size;
+ length += it->size();
+ }
+ return length;
+}
+
+// A joining algorithm that's optimized for an iterator range of std::string-like
+// objects that do not need any additional formatting. This is to optimize the
+// common case of joining, say, a std::vector<std::string> or a
+// std::vector<absl::string_view>.
+//
+// This is an overload of the previous JoinAlgorithm() function. Here the
+// Formatter argument is of type NoFormatter. Since NoFormatter is an internal
+// type, this overload is only invoked when strings::Join() is called with a
+// range of std::string-like objects (e.g., std::string, absl::string_view), and an
+// explicit Formatter argument was NOT specified.
+//
+// The optimization is that the needed space will be reserved in the output
+// std::string to avoid the need to resize while appending. To do this, the iterator
+// range will be traversed twice: once to calculate the total needed size, and
+// then again to copy the elements and delimiters to the output std::string.
+template <typename Iterator>
+std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s,
+ NoFormatter) {
+ std::string result;
+ if (start != end) {
+ typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
+ result.reserve(GetResultSize(start, end, s.size(), iterator_tag));
+
+ // Joins strings
+ absl::string_view sep("", 0);
+ for (Iterator it = start; it != end; ++it) {
+ result.append(sep.data(), sep.size());
+ result.append(it->data(), it->size());
+ sep = s;
+ }
+ }
+
+ return result;
+}
+
+// JoinTupleLoop implements a loop over the elements of a std::tuple, which
+// are heterogeneous. The primary template matches the tuple interior case. It
+// continues the iteration after appending a separator (for nonzero indices)
+// and formatting an element of the tuple. The specialization for the I=N case
+// matches the end-of-tuple, and terminates the iteration.
+template <size_t I, size_t N>
+struct JoinTupleLoop {
+ template <typename Tup, typename Formatter>
+ void operator()(std::string* out, const Tup& tup, absl::string_view sep,
+ Formatter&& fmt) {
+ if (I > 0) out->append(sep.data(), sep.size());
+ fmt(out, std::get<I>(tup));
+ JoinTupleLoop<I + 1, N>()(out, tup, sep, fmt);
+ }
+};
+template <size_t N>
+struct JoinTupleLoop<N, N> {
+ template <typename Tup, typename Formatter>
+ void operator()(std::string*, const Tup&, absl::string_view, Formatter&&) {}
+};
+
+template <typename... T, typename Formatter>
+std::string JoinAlgorithm(const std::tuple<T...>& tup, absl::string_view sep,
+ Formatter&& fmt) {
+ std::string result;
+ JoinTupleLoop<0, sizeof...(T)>()(&result, tup, sep, fmt);
+ return result;
+}
+
+template <typename Iterator>
+std::string JoinRange(Iterator first, Iterator last, absl::string_view separator) {
+ // No formatter was explicitly given, so a default must be chosen.
+ typedef typename std::iterator_traits<Iterator>::value_type ValueType;
+ typedef typename DefaultFormatter<ValueType>::Type Formatter;
+ return JoinAlgorithm(first, last, separator, Formatter());
+}
+
+template <typename Range, typename Formatter>
+std::string JoinRange(const Range& range, absl::string_view separator,
+ Formatter&& fmt) {
+ using std::begin;
+ using std::end;
+ return JoinAlgorithm(begin(range), end(range), separator, fmt);
+}
+
+template <typename Range>
+std::string JoinRange(const Range& range, absl::string_view separator) {
+ using std::begin;
+ using std::end;
+ return JoinRange(begin(range), end(range), separator);
+}
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_
diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h
new file mode 100644
index 00000000..dc31a8ef
--- /dev/null
+++ b/absl/strings/internal/str_split_internal.h
@@ -0,0 +1,439 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// This file declares INTERNAL parts of the Split API that are inline/templated
+// or otherwise need to be available at compile time. The main abstractions
+// defined in here are
+//
+// - ConvertibleToStringView
+// - SplitIterator<>
+// - Splitter<>
+//
+// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
+// absl/strings/str_split.h.
+//
+// IWYU pragma: private, include "absl/strings/str_split.h"
+
+#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
+#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
+
+#ifdef _GLIBCXX_DEBUG
+#include <glibcxx_debug_traits.h>
+#endif // _GLIBCXX_DEBUG
+
+#include <array>
+#include <initializer_list>
+#include <iterator>
+#include <map>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/meta/type_traits.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace strings_internal {
+
+#ifdef _GLIBCXX_DEBUG
+using ::glibcxx_debug_traits::IsStrictlyDebugWrapperBase;
+#else // _GLIBCXX_DEBUG
+template <typename T> struct IsStrictlyDebugWrapperBase : std::false_type {};
+#endif // _GLIBCXX_DEBUG
+
+// This class is implicitly constructible from everything that absl::string_view
+// is implicitly constructible from. If it's constructed from a temporary
+// std::string, the data is moved into a data member so its lifetime matches that of
+// the ConvertibleToStringView instance.
+class ConvertibleToStringView {
+ public:
+ ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
+ : value_(s) {}
+ ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
+ ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
+ : value_(s) {}
+ ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
+ : value_(s) {}
+
+ // Matches rvalue strings and moves their data to a member.
+ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit)
+ : copy_(std::move(s)), value_(copy_) {}
+
+ ConvertibleToStringView(const ConvertibleToStringView& other)
+ : copy_(other.copy_),
+ value_(other.IsSelfReferential() ? copy_ : other.value_) {}
+
+ ConvertibleToStringView(ConvertibleToStringView&& other) {
+ StealMembers(std::move(other));
+ }
+
+ ConvertibleToStringView& operator=(ConvertibleToStringView other) {
+ StealMembers(std::move(other));
+ return *this;
+ }
+
+ absl::string_view value() const { return value_; }
+
+ private:
+ // Returns true if ctsp's value refers to its internal copy_ member.
+ bool IsSelfReferential() const { return value_.data() == copy_.data(); }
+
+ void StealMembers(ConvertibleToStringView&& other) {
+ if (other.IsSelfReferential()) {
+ copy_ = std::move(other.copy_);
+ value_ = copy_;
+ other.value_ = other.copy_;
+ } else {
+ value_ = other.value_;
+ }
+ }
+
+ // Holds the data moved from temporary std::string arguments. Declared first so
+ // that 'value' can refer to 'copy_'.
+ std::string copy_;
+ absl::string_view value_;
+};
+
+// An iterator that enumerates the parts of a std::string from a Splitter. The text
+// to be split, the Delimiter, and the Predicate are all taken from the given
+// Splitter object. Iterators may only be compared if they refer to the same
+// Splitter instance.
+//
+// This class is NOT part of the public splitting API.
+template <typename Splitter>
+class SplitIterator {
+ public:
+ using iterator_category = std::input_iterator_tag;
+ using value_type = absl::string_view;
+ using difference_type = ptrdiff_t;
+ using pointer = const value_type*;
+ using reference = const value_type&;
+
+ enum State { kInitState, kLastState, kEndState };
+ SplitIterator(State state, const Splitter* splitter)
+ : pos_(0),
+ state_(state),
+ splitter_(splitter),
+ delimiter_(splitter->delimiter()),
+ predicate_(splitter->predicate()) {
+ // Hack to maintain backward compatibility. This one block makes it so an
+ // empty absl::string_view whose .data() happens to be nullptr behaves
+ // *differently* from an otherwise empty absl::string_view whose .data() is
+ // not nullptr. This is an undesirable difference in general, but this
+ // behavior is maintained to avoid breaking existing code that happens to
+ // depend on this old behavior/bug. Perhaps it will be fixed one day. The
+ // difference in behavior is as follows:
+ // Split(absl::string_view(""), '-'); // {""}
+ // Split(absl::string_view(), '-'); // {}
+ if (splitter_->text().data() == nullptr) {
+ state_ = kEndState;
+ pos_ = splitter_->text().size();
+ return;
+ }
+
+ if (state_ == kEndState) {
+ pos_ = splitter_->text().size();
+ } else {
+ ++(*this);
+ }
+ }
+
+ bool at_end() const { return state_ == kEndState; }
+
+ reference operator*() const { return curr_; }
+ pointer operator->() const { return &curr_; }
+
+ SplitIterator& operator++() {
+ do {
+ if (state_ == kLastState) {
+ state_ = kEndState;
+ return *this;
+ }
+ const absl::string_view text = splitter_->text();
+ const absl::string_view d = delimiter_.Find(text, pos_);
+ if (d.data() == text.end()) state_ = kLastState;
+ curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
+ pos_ += curr_.size() + d.size();
+ } while (!predicate_(curr_));
+ return *this;
+ }
+
+ SplitIterator operator++(int) {
+ SplitIterator old(*this);
+ ++(*this);
+ return old;
+ }
+
+ friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
+ return a.state_ == b.state_ && a.pos_ == b.pos_;
+ }
+
+ friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
+ return !(a == b);
+ }
+
+ private:
+ size_t pos_;
+ State state_;
+ absl::string_view curr_;
+ const Splitter* splitter_;
+ typename Splitter::DelimiterType delimiter_;
+ typename Splitter::PredicateType predicate_;
+};
+
+// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
+template <typename T, typename = void>
+struct HasMappedType : std::false_type {};
+template <typename T>
+struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
+ : std::true_type {};
+
+// HasValueType<T>::value is true iff there exists a type T::value_type.
+template <typename T, typename = void>
+struct HasValueType : std::false_type {};
+template <typename T>
+struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
+};
+
+// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
+template <typename T, typename = void>
+struct HasConstIterator : std::false_type {};
+template <typename T>
+struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
+ : std::true_type {};
+
+// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
+// details below in Splitter<> where this is used.
+std::false_type IsInitializerListDispatch(...); // default: No
+template <typename T>
+std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
+template <typename T>
+struct IsInitializerList
+ : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
+
+// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
+// is true for type 'C'.
+//
+// Restricts conversion to container-like types (by testing for the presence of
+// a const_iterator member type) and also to disable conversion to an
+// std::initializer_list (which also has a const_iterator). Otherwise, code
+// compiled in C++11 will get an error due to ambiguous conversion paths (in
+// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
+// or an std::initializer_list<T>).
+template <typename C>
+struct SplitterIsConvertibleTo
+ : std::enable_if<
+ !IsStrictlyDebugWrapperBase<C>::value &&
+ !IsInitializerList<C>::value &&
+ HasValueType<C>::value &&
+ HasConstIterator<C>::value> {};
+
+// This class implements the range that is returned by absl::StrSplit(). This
+// class has templated conversion operators that allow it to be implicitly
+// converted to a variety of types that the caller may have specified on the
+// left-hand side of an assignment.
+//
+// The main interface for interacting with this class is through its implicit
+// conversion operators. However, this class may also be used like a container
+// in that it has .begin() and .end() member functions. It may also be used
+// within a range-for loop.
+//
+// Output containers can be collections of any type that is constructible from
+// an absl::string_view.
+//
+// An Predicate functor may be supplied. This predicate will be used to filter
+// the split strings: only strings for which the predicate returns true will be
+// kept. A Predicate object is any unary functor that takes an absl::string_view
+// and returns bool.
+template <typename Delimiter, typename Predicate>
+class Splitter {
+ public:
+ using DelimiterType = Delimiter;
+ using PredicateType = Predicate;
+ using const_iterator = strings_internal::SplitIterator<Splitter>;
+ using value_type = typename std::iterator_traits<const_iterator>::value_type;
+
+ Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p)
+ : text_(std::move(input_text)),
+ delimiter_(std::move(d)),
+ predicate_(std::move(p)) {}
+
+ absl::string_view text() const { return text_.value(); }
+ const Delimiter& delimiter() const { return delimiter_; }
+ const Predicate& predicate() const { return predicate_; }
+
+ // Range functions that iterate the split substrings as absl::string_view
+ // objects. These methods enable a Splitter to be used in a range-based for
+ // loop.
+ const_iterator begin() const { return {const_iterator::kInitState, this}; }
+ const_iterator end() const { return {const_iterator::kEndState, this}; }
+
+ // An implicit conversion operator that is restricted to only those containers
+ // that the splitter is convertible to.
+ template <typename Container,
+ typename OnlyIf = typename SplitterIsConvertibleTo<Container>::type>
+ operator Container() const { // NOLINT(runtime/explicit)
+ return ConvertToContainer<Container, typename Container::value_type,
+ HasMappedType<Container>::value>()(*this);
+ }
+
+ // Returns a pair with its .first and .second members set to the first two
+ // strings returned by the begin() iterator. Either/both of .first and .second
+ // will be constructed with empty strings if the iterator doesn't have a
+ // corresponding value.
+ template <typename First, typename Second>
+ operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
+ absl::string_view first, second;
+ auto it = begin();
+ if (it != end()) {
+ first = *it;
+ if (++it != end()) {
+ second = *it;
+ }
+ }
+ return {First(first), Second(second)};
+ }
+
+ private:
+ // ConvertToContainer is a functor converting a Splitter to the requested
+ // Container of ValueType. It is specialized below to optimize splitting to
+ // certain combinations of Container and ValueType.
+ //
+ // This base template handles the generic case of storing the split results in
+ // the requested non-map-like container and converting the split substrings to
+ // the requested type.
+ template <typename Container, typename ValueType, bool is_map = false>
+ struct ConvertToContainer {
+ Container operator()(const Splitter& splitter) const {
+ Container c;
+ auto it = std::inserter(c, c.end());
+ for (const auto sp : splitter) {
+ *it++ = ValueType(sp);
+ }
+ return c;
+ }
+ };
+
+ // Partial specialization for a std::vector<absl::string_view>.
+ //
+ // Optimized for the common case of splitting to a
+ // std::vector<absl::string_view>. In this case we first split the results to
+ // a small array of absl::string_view on the stack, to reduce reallocations.
+ template <typename A>
+ struct ConvertToContainer<std::vector<absl::string_view, A>,
+ absl::string_view, false> {
+ std::vector<absl::string_view, A> operator()(
+ const Splitter& splitter) const {
+ struct raw_view {
+ const char* data;
+ size_t size;
+ operator absl::string_view() const { // NOLINT(runtime/explicit)
+ return {data, size};
+ }
+ };
+ std::vector<absl::string_view, A> v;
+ std::array<raw_view, 16> ar;
+ for (auto it = splitter.begin(); !it.at_end();) {
+ size_t index = 0;
+ do {
+ ar[index].data = it->data();
+ ar[index].size = it->size();
+ ++it;
+ } while (++index != ar.size() && !it.at_end());
+ v.insert(v.end(), ar.begin(), ar.begin() + index);
+ }
+ return v;
+ }
+ };
+
+ // Partial specialization for a std::vector<std::string>.
+ //
+ // Optimized for the common case of splitting to a std::vector<std::string>. In
+ // this case we first split the results to a std::vector<absl::string_view> so
+ // the returned std::vector<std::string> can have space reserved to avoid std::string
+ // moves.
+ template <typename A>
+ struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
+ std::vector<std::string, A> operator()(const Splitter& splitter) const {
+ const std::vector<absl::string_view> v = splitter;
+ return std::vector<std::string, A>(v.begin(), v.end());
+ }
+ };
+
+ // Partial specialization for containers of pairs (e.g., maps).
+ //
+ // The algorithm is to insert a new pair into the map for each even-numbered
+ // item, with the even-numbered item as the key with a default-constructed
+ // value. Each odd-numbered item will then be assigned to the last pair's
+ // value.
+ template <typename Container, typename First, typename Second>
+ struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
+ Container operator()(const Splitter& splitter) const {
+ Container m;
+ typename Container::iterator it;
+ bool insert = true;
+ for (const auto sp : splitter) {
+ if (insert) {
+ it = Inserter<Container>::Insert(&m, First(sp), Second());
+ } else {
+ it->second = Second(sp);
+ }
+ insert = !insert;
+ }
+ return m;
+ }
+
+ // Inserts the key and value into the given map, returning an iterator to
+ // the inserted item. Specialized for std::map and std::multimap to use
+ // emplace() and adapt emplace()'s return value.
+ template <typename Map>
+ struct Inserter {
+ using M = Map;
+ template <typename... Args>
+ static typename M::iterator Insert(M* m, Args&&... args) {
+ return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
+ }
+ };
+
+ template <typename... Ts>
+ struct Inserter<std::map<Ts...>> {
+ using M = std::map<Ts...>;
+ template <typename... Args>
+ static typename M::iterator Insert(M* m, Args&&... args) {
+ return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
+ }
+ };
+
+ template <typename... Ts>
+ struct Inserter<std::multimap<Ts...>> {
+ using M = std::multimap<Ts...>;
+ template <typename... Args>
+ static typename M::iterator Insert(M* m, Args&&... args) {
+ return m->emplace(std::make_pair(std::forward<Args>(args)...));
+ }
+ };
+ };
+
+ ConvertibleToStringView text_;
+ Delimiter delimiter_;
+ Predicate predicate_;
+};
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc
new file mode 100644
index 00000000..2415c2cc
--- /dev/null
+++ b/absl/strings/internal/utf8.cc
@@ -0,0 +1,51 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// UTF8 utilities, implemented to reduce dependencies.
+
+#include "absl/strings/internal/utf8.h"
+
+namespace absl {
+namespace strings_internal {
+
+size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) {
+ if (utf8_char <= 0x7F) {
+ *buffer = static_cast<char>(utf8_char);
+ return 1;
+ } else if (utf8_char <= 0x7FF) {
+ buffer[1] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[0] = 0xC0 | utf8_char;
+ return 2;
+ } else if (utf8_char <= 0xFFFF) {
+ buffer[2] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[1] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[0] = 0xE0 | utf8_char;
+ return 3;
+ } else {
+ buffer[3] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[2] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[1] = 0x80 | (utf8_char & 0x3F);
+ utf8_char >>= 6;
+ buffer[0] = 0xF0 | utf8_char;
+ return 4;
+ }
+}
+
+} // namespace strings_internal
+} // namespace absl
diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h
new file mode 100644
index 00000000..705eea7f
--- /dev/null
+++ b/absl/strings/internal/utf8.h
@@ -0,0 +1,52 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// UTF8 utilities, implemented to reduce dependencies.
+//
+// If you need Unicode specific processing (for example being aware of
+// Unicode character boundaries, or knowledge of Unicode casing rules,
+// or various forms of equivalence and normalization), take a look at
+// files in i18n/utf8.
+
+#ifndef ABSL_STRINGS_INTERNAL_UTF8_H_
+#define ABSL_STRINGS_INTERNAL_UTF8_H_
+
+#include <cstddef>
+#include <cstdint>
+
+
+namespace absl {
+namespace strings_internal {
+
+// For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes
+// out the UTF-8 encoding into buffer, and returns the number of chars
+// it wrote.
+//
+// As described in https://tools.ietf.org/html/rfc3629#section-3 , the encodings
+// are:
+// 00 - 7F : 0xxxxxxx
+// 80 - 7FF : 110xxxxx 10xxxxxx
+// 800 - FFFF : 1110xxxx 10xxxxxx 10xxxxxx
+// 10000 - 10FFFF : 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+//
+// Values greater than 0x10FFFF are not supported and may or may not write
+// characters into buffer, however never will more than kMaxEncodedUTF8Size
+// bytes be written, regardless of the value of utf8_char.
+enum { kMaxEncodedUTF8Size = 4 };
+size_t EncodeUTF8Char(char *buffer, char32_t utf8_char);
+
+} // namespace strings_internal
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_UTF8_H_
diff --git a/absl/strings/internal/utf8_test.cc b/absl/strings/internal/utf8_test.cc
new file mode 100644
index 00000000..4d437427
--- /dev/null
+++ b/absl/strings/internal/utf8_test.cc
@@ -0,0 +1,58 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/utf8.h"
+
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <cstdint>
+
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(EncodeUTF8Char, BasicFunction) {
+ std::pair<char32_t, std::string> tests[] = {{0x0030, u8"\u0030"},
+ {0x00A3, u8"\u00A3"},
+ {0x00010000, u8"\U00010000"},
+ {0x0000FFFF, u8"\U0000FFFF"},
+ {0x0010FFFD, u8"\U0010FFFD"}};
+ for (auto &test : tests) {
+ char buf0[7] = {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'};
+ char buf1[7] = {'\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF', '\xFF'};
+ char *buf0_written =
+ &buf0[absl::strings_internal::EncodeUTF8Char(buf0, test.first)];
+ char *buf1_written =
+ &buf1[absl::strings_internal::EncodeUTF8Char(buf1, test.first)];
+ int apparent_length = 7;
+ while (buf0[apparent_length - 1] == '\x00' &&
+ buf1[apparent_length - 1] == '\xFF') {
+ if (--apparent_length == 0) break;
+ }
+ EXPECT_EQ(apparent_length, buf0_written - buf0);
+ EXPECT_EQ(apparent_length, buf1_written - buf1);
+ EXPECT_EQ(apparent_length, test.second.length());
+ EXPECT_EQ(std::string(buf0, apparent_length), test.second);
+ EXPECT_EQ(std::string(buf1, apparent_length), test.second);
+ }
+ char buf[32] = "Don't Tread On Me";
+ EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf, 0x00110000),
+ absl::strings_internal::kMaxEncodedUTF8Size);
+ char buf2[32] = "Negative is invalid but sane";
+ EXPECT_LE(absl::strings_internal::EncodeUTF8Char(buf2, -1),
+ absl::strings_internal::kMaxEncodedUTF8Size);
+}
+
+} // namespace
diff --git a/absl/strings/match.cc b/absl/strings/match.cc
new file mode 100644
index 00000000..53881bdd
--- /dev/null
+++ b/absl/strings/match.cc
@@ -0,0 +1,40 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/match.h"
+
+#include "absl/strings/internal/memutil.h"
+
+namespace absl {
+
+namespace {
+bool CaseEqual(absl::string_view piece1, absl::string_view piece2) {
+ return (piece1.size() == piece2.size() &&
+ 0 == strings_internal::memcasecmp(piece1.data(), piece2.data(),
+ piece1.size()));
+ // memcasecmp uses ascii_tolower().
+}
+} // namespace
+
+bool StartsWithIgnoreCase(absl::string_view text, absl::string_view preffix) {
+ return (text.size() >= preffix.size()) &&
+ CaseEqual(text.substr(0, preffix.size()), preffix);
+}
+
+bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) {
+ return (text.size() >= suffix.size()) &&
+ CaseEqual(text.substr(text.size() - suffix.size()), suffix);
+}
+
+} // namespace absl
diff --git a/absl/strings/match.h b/absl/strings/match.h
new file mode 100644
index 00000000..4a5d1c03
--- /dev/null
+++ b/absl/strings/match.h
@@ -0,0 +1,81 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: match.h
+// -----------------------------------------------------------------------------
+//
+// This file contains simple utilities for performing std::string matching checks.
+// All of these function parameters are specified as `absl::string_view`,
+// meaning that these functions can accept `std::string`, `absl::string_view` or
+// nul-terminated C-style strings.
+//
+// Examples:
+// std::string s = "foo";
+// absl::string_view sv = "f";
+// EXPECT_TRUE(absl::StrContains(s, sv));
+//
+// Note: The order of parameters in these functions is designed to mimic the
+// order an equivalent member function would exhibit;
+// e.g. `s.Contains(x)` ==> `absl::StrContains(s, x).
+#ifndef ABSL_STRINGS_MATCH_H_
+#define ABSL_STRINGS_MATCH_H_
+
+#include <cstring>
+
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+// StrContains()
+//
+// Returns whether a given std::string `s` contains the substring `x`.
+inline bool StrContains(absl::string_view s, absl::string_view x) {
+ return static_cast<absl::string_view::size_type>(s.find(x, 0)) != s.npos;
+}
+
+// StartsWith()
+//
+// Returns whether a given std::string `s` begins with `x`.
+inline bool StartsWith(absl::string_view s, absl::string_view x) {
+ return x.empty() ||
+ (s.size() >= x.size() && memcmp(s.data(), x.data(), x.size()) == 0);
+}
+
+// EndsWith()
+//
+// Returns whether a given std::string `s` ends `x`.
+inline bool EndsWith(absl::string_view s, absl::string_view x) {
+ return x.empty() ||
+ (s.size() >= x.size() &&
+ memcmp(s.data() + (s.size() - x.size()), x.data(), x.size()) == 0);
+}
+
+// StartsWithIgnoreCase()
+//
+// Returns whether a given std::string `text` starts with `starts_with`, ignoring
+// case in the comparison.
+bool StartsWithIgnoreCase(absl::string_view text,
+ absl::string_view starts_with);
+
+// EndsWithIgnoreCase()
+//
+// Returns whether a given std::string `text` ends with `ends_with`, ignoring case
+// in the comparison.
+bool EndsWithIgnoreCase(absl::string_view text, absl::string_view ends_with);
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_MATCH_H_
diff --git a/absl/strings/match_test.cc b/absl/strings/match_test.cc
new file mode 100644
index 00000000..d194f0e6
--- /dev/null
+++ b/absl/strings/match_test.cc
@@ -0,0 +1,99 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/match.h"
+
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(MatchTest, StartsWith) {
+ const std::string s1("123" "\0" "456", 7);
+ const absl::string_view a("foobar");
+ const absl::string_view b(s1);
+ const absl::string_view e;
+ EXPECT_TRUE(absl::StartsWith(a, a));
+ EXPECT_TRUE(absl::StartsWith(a, "foo"));
+ EXPECT_TRUE(absl::StartsWith(a, e));
+ EXPECT_TRUE(absl::StartsWith(b, s1));
+ EXPECT_TRUE(absl::StartsWith(b, b));
+ EXPECT_TRUE(absl::StartsWith(b, e));
+ EXPECT_TRUE(absl::StartsWith(e, ""));
+ EXPECT_FALSE(absl::StartsWith(a, b));
+ EXPECT_FALSE(absl::StartsWith(b, a));
+ EXPECT_FALSE(absl::StartsWith(e, a));
+}
+
+TEST(MatchTest, EndsWith) {
+ const std::string s1("123" "\0" "456", 7);
+ const absl::string_view a("foobar");
+ const absl::string_view b(s1);
+ const absl::string_view e;
+ EXPECT_TRUE(absl::EndsWith(a, a));
+ EXPECT_TRUE(absl::EndsWith(a, "bar"));
+ EXPECT_TRUE(absl::EndsWith(a, e));
+ EXPECT_TRUE(absl::EndsWith(b, s1));
+ EXPECT_TRUE(absl::EndsWith(b, b));
+ EXPECT_TRUE(absl::EndsWith(b, e));
+ EXPECT_TRUE(absl::EndsWith(e, ""));
+ EXPECT_FALSE(absl::EndsWith(a, b));
+ EXPECT_FALSE(absl::EndsWith(b, a));
+ EXPECT_FALSE(absl::EndsWith(e, a));
+}
+
+TEST(MatchTest, Contains) {
+ absl::string_view a("abcdefg");
+ absl::string_view b("abcd");
+ absl::string_view c("efg");
+ absl::string_view d("gh");
+ EXPECT_TRUE(absl::StrContains(a, a));
+ EXPECT_TRUE(absl::StrContains(a, b));
+ EXPECT_TRUE(absl::StrContains(a, c));
+ EXPECT_FALSE(absl::StrContains(a, d));
+ EXPECT_TRUE(absl::StrContains("", ""));
+ EXPECT_TRUE(absl::StrContains("abc", ""));
+ EXPECT_FALSE(absl::StrContains("", "a"));
+}
+
+TEST(MatchTest, ContainsNull) {
+ const std::string s = "foo";
+ const char* cs = "foo";
+ const absl::string_view sv("foo");
+ const absl::string_view sv2("foo\0bar", 4);
+ EXPECT_EQ(s, "foo");
+ EXPECT_EQ(sv, "foo");
+ EXPECT_NE(sv2, "foo");
+ EXPECT_TRUE(absl::EndsWith(s, sv));
+ EXPECT_TRUE(absl::StartsWith(cs, sv));
+ EXPECT_TRUE(absl::StrContains(cs, sv));
+ EXPECT_FALSE(absl::StrContains(cs, sv2));
+}
+
+TEST(MatchTest, StartsWithIgnoreCase) {
+ EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "foo"));
+ EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", "Fo"));
+ EXPECT_TRUE(absl::StartsWithIgnoreCase("foo", ""));
+ EXPECT_FALSE(absl::StartsWithIgnoreCase("foo", "fooo"));
+ EXPECT_FALSE(absl::StartsWithIgnoreCase("", "fo"));
+}
+
+TEST(MatchTest, EndsWithIgnoreCase) {
+ EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "foo"));
+ EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", "Oo"));
+ EXPECT_TRUE(absl::EndsWithIgnoreCase("foo", ""));
+ EXPECT_FALSE(absl::EndsWithIgnoreCase("foo", "fooo"));
+ EXPECT_FALSE(absl::EndsWithIgnoreCase("", "fo"));
+}
+
+} // namespace
diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc
new file mode 100644
index 00000000..3b093b98
--- /dev/null
+++ b/absl/strings/numbers.cc
@@ -0,0 +1,1288 @@
+// This file contains std::string processing functions related to
+// numeric values.
+
+#include "absl/strings/numbers.h"
+
+#include <cassert>
+#include <cctype>
+#include <cfloat> // for DBL_DIG and FLT_DIG
+#include <cmath> // for HUGE_VAL
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "absl/base/internal/raw_logging.h"
+#include "absl/numeric/int128.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/internal/memutil.h"
+#include "absl/strings/str_cat.h"
+
+namespace absl {
+
+bool SimpleAtof(absl::string_view str, float* value) {
+ *value = 0.0;
+ if (str.empty()) return false;
+ char buf[32];
+ std::unique_ptr<char[]> bigbuf;
+ char* ptr = buf;
+ if (str.size() > sizeof(buf) - 1) {
+ bigbuf.reset(new char[str.size() + 1]);
+ ptr = bigbuf.get();
+ }
+ memcpy(ptr, str.data(), str.size());
+ ptr[str.size()] = '\0';
+
+ char* endptr;
+ *value = strtof(ptr, &endptr);
+ if (endptr != ptr) {
+ while (absl::ascii_isspace(*endptr)) ++endptr;
+ }
+ // Ignore range errors from strtod/strtof.
+ // The values it returns on underflow and
+ // overflow are the right fallback in a
+ // robust setting.
+ return *ptr != '\0' && *endptr == '\0';
+}
+
+bool SimpleAtod(absl::string_view str, double* value) {
+ *value = 0.0;
+ if (str.empty()) return false;
+ char buf[32];
+ std::unique_ptr<char[]> bigbuf;
+ char* ptr = buf;
+ if (str.size() > sizeof(buf) - 1) {
+ bigbuf.reset(new char[str.size() + 1]);
+ ptr = bigbuf.get();
+ }
+ memcpy(ptr, str.data(), str.size());
+ ptr[str.size()] = '\0';
+
+ char* endptr;
+ *value = strtod(ptr, &endptr);
+ if (endptr != ptr) {
+ while (absl::ascii_isspace(*endptr)) ++endptr;
+ }
+ // Ignore range errors from strtod. The values it
+ // returns on underflow and overflow are the right
+ // fallback in a robust setting.
+ return *ptr != '\0' && *endptr == '\0';
+}
+
+namespace {
+
+// TODO(rogeeff): replace with the real released thing once we figure out what
+// it is.
+inline bool CaseEqual(absl::string_view piece1, absl::string_view piece2) {
+ return (piece1.size() == piece2.size() &&
+ 0 == strings_internal::memcasecmp(piece1.data(), piece2.data(),
+ piece1.size()));
+}
+
+// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
+// range 0 <= i < 100, and buf must have space for two characters. Example:
+// char buf[2];
+// PutTwoDigits(42, buf);
+// // buf[0] == '4'
+// // buf[1] == '2'
+inline void PutTwoDigits(size_t i, char* buf) {
+ static const char two_ASCII_digits[100][2] = {
+ {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'},
+ {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'},
+ {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'},
+ {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'},
+ {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'},
+ {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'},
+ {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'},
+ {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'},
+ {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'},
+ {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'},
+ {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'},
+ {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'},
+ {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'},
+ {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'},
+ {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'},
+ {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'},
+ {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'},
+ {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'},
+ {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'},
+ {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}
+ };
+ assert(i < 100);
+ memcpy(buf, two_ASCII_digits[i], 2);
+}
+
+} // namespace
+
+bool SimpleAtob(absl::string_view str, bool* value) {
+ ABSL_RAW_CHECK(value != nullptr, "Output pointer must not be nullptr.");
+ if (CaseEqual(str, "true") || CaseEqual(str, "t") ||
+ CaseEqual(str, "yes") || CaseEqual(str, "y") ||
+ CaseEqual(str, "1")) {
+ *value = true;
+ return true;
+ }
+ if (CaseEqual(str, "false") || CaseEqual(str, "f") ||
+ CaseEqual(str, "no") || CaseEqual(str, "n") ||
+ CaseEqual(str, "0")) {
+ *value = false;
+ return true;
+ }
+ return false;
+}
+
+// ----------------------------------------------------------------------
+// FastInt32ToBuffer()
+// FastUInt32ToBuffer()
+// FastInt64ToBuffer()
+// FastUInt64ToBuffer()
+//
+// Like the Fast*ToBuffer() functions above, these are intended for speed.
+// Unlike the Fast*ToBuffer() functions, however, these functions write
+// their output to the beginning of the buffer (hence the name, as the
+// output is left-aligned). The caller is responsible for ensuring that
+// the buffer has enough space to hold the output.
+//
+// Returns a pointer to the end of the std::string (i.e. the null character
+// terminating the std::string).
+// ----------------------------------------------------------------------
+
+namespace {
+
+// Used to optimize printing a decimal number's final digit.
+const char one_ASCII_final_digits[10][2] {
+ {'0', 0}, {'1', 0}, {'2', 0}, {'3', 0}, {'4', 0},
+ {'5', 0}, {'6', 0}, {'7', 0}, {'8', 0}, {'9', 0},
+};
+
+} // namespace
+
+char* numbers_internal::FastUInt32ToBuffer(uint32_t i, char* buffer) {
+ uint32_t digits;
+ // The idea of this implementation is to trim the number of divides to as few
+ // as possible, and also reducing memory stores and branches, by going in
+ // steps of two digits at a time rather than one whenever possible.
+ // The huge-number case is first, in the hopes that the compiler will output
+ // that case in one branch-free block of code, and only output conditional
+ // branches into it from below.
+ if (i >= 1000000000) { // >= 1,000,000,000
+ digits = i / 100000000; // 100,000,000
+ i -= digits * 100000000;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ lt100_000_000:
+ digits = i / 1000000; // 1,000,000
+ i -= digits * 1000000;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ lt1_000_000:
+ digits = i / 10000; // 10,000
+ i -= digits * 10000;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ lt10_000:
+ digits = i / 100;
+ i -= digits * 100;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ lt100:
+ digits = i;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ *buffer = 0;
+ return buffer;
+ }
+
+ if (i < 100) {
+ digits = i;
+ if (i >= 10) goto lt100;
+ memcpy(buffer, one_ASCII_final_digits[i], 2);
+ return buffer + 1;
+ }
+ if (i < 10000) { // 10,000
+ if (i >= 1000) goto lt10_000;
+ digits = i / 100;
+ i -= digits * 100;
+ *buffer++ = '0' + digits;
+ goto lt100;
+ }
+ if (i < 1000000) { // 1,000,000
+ if (i >= 100000) goto lt1_000_000;
+ digits = i / 10000; // 10,000
+ i -= digits * 10000;
+ *buffer++ = '0' + digits;
+ goto lt10_000;
+ }
+ if (i < 100000000) { // 100,000,000
+ if (i >= 10000000) goto lt100_000_000;
+ digits = i / 1000000; // 1,000,000
+ i -= digits * 1000000;
+ *buffer++ = '0' + digits;
+ goto lt1_000_000;
+ }
+ // we already know that i < 1,000,000,000
+ digits = i / 100000000; // 100,000,000
+ i -= digits * 100000000;
+ *buffer++ = '0' + digits;
+ goto lt100_000_000;
+}
+
+char* numbers_internal::FastInt32ToBuffer(int32_t i, char* buffer) {
+ uint32_t u = i;
+ if (i < 0) {
+ *buffer++ = '-';
+ // We need to do the negation in modular (i.e., "unsigned")
+ // arithmetic; MSVC++ apprently warns for plain "-u", so
+ // we write the equivalent expression "0 - u" instead.
+ u = 0 - u;
+ }
+ return numbers_internal::FastUInt32ToBuffer(u, buffer);
+}
+
+char* numbers_internal::FastUInt64ToBuffer(uint64_t i, char* buffer) {
+ uint32_t u32 = static_cast<uint32_t>(i);
+ if (u32 == i) return numbers_internal::FastUInt32ToBuffer(u32, buffer);
+
+ // Here we know i has at least 10 decimal digits.
+ uint64_t top_1to11 = i / 1000000000;
+ u32 = static_cast<uint32_t>(i - top_1to11 * 1000000000);
+ uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
+
+ if (top_1to11_32 == top_1to11) {
+ buffer = numbers_internal::FastUInt32ToBuffer(top_1to11_32, buffer);
+ } else {
+ // top_1to11 has more than 32 bits too; print it in two steps.
+ uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / 100);
+ uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * 100);
+ buffer = numbers_internal::FastUInt32ToBuffer(top_8to9, buffer);
+ PutTwoDigits(mid_2, buffer);
+ buffer += 2;
+ }
+
+ // We have only 9 digits now, again the maximum uint32_t can handle fully.
+ uint32_t digits = u32 / 10000000; // 10,000,000
+ u32 -= digits * 10000000;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ digits = u32 / 100000; // 100,000
+ u32 -= digits * 100000;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ digits = u32 / 1000; // 1,000
+ u32 -= digits * 1000;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ digits = u32 / 10;
+ u32 -= digits * 10;
+ PutTwoDigits(digits, buffer);
+ buffer += 2;
+ memcpy(buffer, one_ASCII_final_digits[u32], 2);
+ return buffer + 1;
+}
+
+char* numbers_internal::FastInt64ToBuffer(int64_t i, char* buffer) {
+ uint64_t u = i;
+ if (i < 0) {
+ *buffer++ = '-';
+ u = 0 - u;
+ }
+ return numbers_internal::FastUInt64ToBuffer(u, buffer);
+}
+
+// Although DBL_DIG is typically 15, DBL_MAX is normally represented with 17
+// digits of precision. When converted to a std::string value with fewer digits
+// of precision using strtod(), the result can be bigger than DBL_MAX due to
+// a rounding error. Converting this value back to a double will produce an
+// Inf which will trigger a SIGFPE if FP exceptions are enabled. We skip
+// the precision check for sufficiently large values to avoid the SIGFPE.
+static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001;
+
+char* numbers_internal::RoundTripDoubleToBuffer(double d, char* buffer) {
+ // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
+ // platforms these days. Just in case some system exists where DBL_DIG
+ // is significantly larger -- and risks overflowing our buffer -- we have
+ // this assert.
+ static_assert(DBL_DIG < 20, "DBL_DIG is too big");
+
+ bool full_precision_needed = true;
+ if (std::abs(d) <= kDoublePrecisionCheckMax) {
+ int snprintf_result = snprintf(buffer, numbers_internal::kFastToBufferSize,
+ "%.*g", DBL_DIG, d);
+
+ // The snprintf should never overflow because the buffer is significantly
+ // larger than the precision we asked for.
+ assert(snprintf_result > 0 &&
+ snprintf_result < numbers_internal::kFastToBufferSize);
+ (void)snprintf_result;
+
+ full_precision_needed = strtod(buffer, nullptr) != d;
+ }
+
+ if (full_precision_needed) {
+ int snprintf_result = snprintf(buffer, numbers_internal::kFastToBufferSize,
+ "%.*g", DBL_DIG + 2, d);
+
+ // Should never overflow; see above.
+ assert(snprintf_result > 0 &&
+ snprintf_result < numbers_internal::kFastToBufferSize);
+ (void)snprintf_result;
+ }
+ return buffer;
+}
+// This table is used to quickly calculate the base-ten exponent of a given
+// float, and then to provide a multiplier to bring that number into the
+// range 1-999,999,999, that is, into uint32_t range. Finally, the exp
+// std::string is made available so there is one less int-to-std::string conversion
+// to be done.
+
+struct Spec {
+ double min_range;
+ double multiplier;
+ const char expstr[5];
+};
+const Spec neg_exp_table[] = {
+ {1.4e-45f, 1e+55, "e-45"}, //
+ {1e-44f, 1e+54, "e-44"}, //
+ {1e-43f, 1e+53, "e-43"}, //
+ {1e-42f, 1e+52, "e-42"}, //
+ {1e-41f, 1e+51, "e-41"}, //
+ {1e-40f, 1e+50, "e-40"}, //
+ {1e-39f, 1e+49, "e-39"}, //
+ {1e-38f, 1e+48, "e-38"}, //
+ {1e-37f, 1e+47, "e-37"}, //
+ {1e-36f, 1e+46, "e-36"}, //
+ {1e-35f, 1e+45, "e-35"}, //
+ {1e-34f, 1e+44, "e-34"}, //
+ {1e-33f, 1e+43, "e-33"}, //
+ {1e-32f, 1e+42, "e-32"}, //
+ {1e-31f, 1e+41, "e-31"}, //
+ {1e-30f, 1e+40, "e-30"}, //
+ {1e-29f, 1e+39, "e-29"}, //
+ {1e-28f, 1e+38, "e-28"}, //
+ {1e-27f, 1e+37, "e-27"}, //
+ {1e-26f, 1e+36, "e-26"}, //
+ {1e-25f, 1e+35, "e-25"}, //
+ {1e-24f, 1e+34, "e-24"}, //
+ {1e-23f, 1e+33, "e-23"}, //
+ {1e-22f, 1e+32, "e-22"}, //
+ {1e-21f, 1e+31, "e-21"}, //
+ {1e-20f, 1e+30, "e-20"}, //
+ {1e-19f, 1e+29, "e-19"}, //
+ {1e-18f, 1e+28, "e-18"}, //
+ {1e-17f, 1e+27, "e-17"}, //
+ {1e-16f, 1e+26, "e-16"}, //
+ {1e-15f, 1e+25, "e-15"}, //
+ {1e-14f, 1e+24, "e-14"}, //
+ {1e-13f, 1e+23, "e-13"}, //
+ {1e-12f, 1e+22, "e-12"}, //
+ {1e-11f, 1e+21, "e-11"}, //
+ {1e-10f, 1e+20, "e-10"}, //
+ {1e-09f, 1e+19, "e-09"}, //
+ {1e-08f, 1e+18, "e-08"}, //
+ {1e-07f, 1e+17, "e-07"}, //
+ {1e-06f, 1e+16, "e-06"}, //
+ {1e-05f, 1e+15, "e-05"}, //
+ {1e-04f, 1e+14, "e-04"}, //
+};
+
+const Spec pos_exp_table[] = {
+ {1e+08f, 1e+02, "e+08"}, //
+ {1e+09f, 1e+01, "e+09"}, //
+ {1e+10f, 1e+00, "e+10"}, //
+ {1e+11f, 1e-01, "e+11"}, //
+ {1e+12f, 1e-02, "e+12"}, //
+ {1e+13f, 1e-03, "e+13"}, //
+ {1e+14f, 1e-04, "e+14"}, //
+ {1e+15f, 1e-05, "e+15"}, //
+ {1e+16f, 1e-06, "e+16"}, //
+ {1e+17f, 1e-07, "e+17"}, //
+ {1e+18f, 1e-08, "e+18"}, //
+ {1e+19f, 1e-09, "e+19"}, //
+ {1e+20f, 1e-10, "e+20"}, //
+ {1e+21f, 1e-11, "e+21"}, //
+ {1e+22f, 1e-12, "e+22"}, //
+ {1e+23f, 1e-13, "e+23"}, //
+ {1e+24f, 1e-14, "e+24"}, //
+ {1e+25f, 1e-15, "e+25"}, //
+ {1e+26f, 1e-16, "e+26"}, //
+ {1e+27f, 1e-17, "e+27"}, //
+ {1e+28f, 1e-18, "e+28"}, //
+ {1e+29f, 1e-19, "e+29"}, //
+ {1e+30f, 1e-20, "e+30"}, //
+ {1e+31f, 1e-21, "e+31"}, //
+ {1e+32f, 1e-22, "e+32"}, //
+ {1e+33f, 1e-23, "e+33"}, //
+ {1e+34f, 1e-24, "e+34"}, //
+ {1e+35f, 1e-25, "e+35"}, //
+ {1e+36f, 1e-26, "e+36"}, //
+ {1e+37f, 1e-27, "e+37"}, //
+ {1e+38f, 1e-28, "e+38"}, //
+ {1e+39, 1e-29, "e+39"}, //
+};
+
+struct ExpCompare {
+ bool operator()(const Spec& spec, double d) const {
+ return spec.min_range < d;
+ }
+};
+
+// Utility routine(s) for RoundTripFloatToBuffer:
+// OutputNecessaryDigits takes two 11-digit numbers, whose integer portion
+// represents the fractional part of a floating-point number, and outputs a
+// number that is in-between them, with the fewest digits possible. For
+// instance, given 12345678900 and 12345876900, it would output "0123457".
+// When there are multiple final digits that would satisfy this requirement,
+// this routine attempts to use a digit that would represent the average of
+// lower_double and upper_double.
+//
+// Although the routine works using integers, all callers use doubles, so
+// for their convenience this routine accepts doubles.
+static char* OutputNecessaryDigits(double lower_double, double upper_double,
+ char* out) {
+ assert(lower_double > 0);
+ assert(lower_double < upper_double - 10);
+ assert(upper_double < 100000000000.0);
+
+ // Narrow the range a bit; without this bias, an input of lower=87654320010.0
+ // and upper=87654320100.0 would produce an output of 876543201
+ //
+ // We do this in three steps: first, we lower the upper bound and truncate it
+ // to an integer. Then, we increase the lower bound by exactly the amount we
+ // just decreased the upper bound by - at that point, the midpoint is exactly
+ // where it used to be. Then we truncate the lower bound.
+
+ uint64_t upper64 = upper_double - (1.0 / 1024);
+ double shrink = upper_double - upper64;
+ uint64_t lower64 = lower_double + shrink;
+
+ // Theory of operation: we convert the lower number to ascii representation,
+ // two digits at a time. As we go, we remove the same digits from the upper
+ // number. When we see the upper number does not share those same digits, we
+ // know we can stop converting. When we stop, the last digit we output is
+ // taken from the average of upper and lower values, rounded up.
+ char buf[2];
+ uint32_t lodigits =
+ static_cast<uint32_t>(lower64 / 1000000000); // 1,000,000,000
+ uint64_t mul64 = lodigits * uint64_t{1000000000};
+
+ PutTwoDigits(lodigits, out);
+ out += 2;
+ if (upper64 - mul64 >= 1000000000) { // digit mismatch!
+ PutTwoDigits(upper64 / 1000000000, buf);
+ if (out[-2] != buf[0]) {
+ out[-2] = '0' + (upper64 + lower64 + 10000000000) / 20000000000;
+ --out;
+ } else {
+ PutTwoDigits((upper64 + lower64 + 1000000000) / 2000000000, out - 2);
+ }
+ *out = '\0';
+ return out;
+ }
+ uint32_t lower = static_cast<uint32_t>(lower64 - mul64);
+ uint32_t upper = static_cast<uint32_t>(upper64 - mul64);
+
+ lodigits = lower / 10000000; // 10,000,000
+ uint32_t mul = lodigits * 10000000;
+ PutTwoDigits(lodigits, out);
+ out += 2;
+ if (upper - mul >= 10000000) { // digit mismatch!
+ PutTwoDigits(upper / 10000000, buf);
+ if (out[-2] != buf[0]) {
+ out[-2] = '0' + (upper + lower + 100000000) / 200000000;
+ --out;
+ } else {
+ PutTwoDigits((upper + lower + 10000000) / 20000000, out - 2);
+ }
+ *out = '\0';
+ return out;
+ }
+ lower -= mul;
+ upper -= mul;
+
+ lodigits = lower / 100000; // 100,000
+ mul = lodigits * 100000;
+ PutTwoDigits(lodigits, out);
+ out += 2;
+ if (upper - mul >= 100000) { // digit mismatch!
+ PutTwoDigits(upper / 100000, buf);
+ if (out[-2] != buf[0]) {
+ out[-2] = '0' + (upper + lower + 1000000) / 2000000;
+ --out;
+ } else {
+ PutTwoDigits((upper + lower + 100000) / 200000, out - 2);
+ }
+ *out = '\0';
+ return out;
+ }
+ lower -= mul;
+ upper -= mul;
+
+ lodigits = lower / 1000;
+ mul = lodigits * 1000;
+ PutTwoDigits(lodigits, out);
+ out += 2;
+ if (upper - mul >= 1000) { // digit mismatch!
+ PutTwoDigits(upper / 1000, buf);
+ if (out[-2] != buf[0]) {
+ out[-2] = '0' + (upper + lower + 10000) / 20000;
+ --out;
+ } else {
+ PutTwoDigits((upper + lower + 1000) / 2000, out - 2);
+ }
+ *out = '\0';
+ return out;
+ }
+ lower -= mul;
+ upper -= mul;
+
+ PutTwoDigits(lower / 10, out);
+ out += 2;
+ PutTwoDigits(upper / 10, buf);
+ if (out[-2] != buf[0]) {
+ out[-2] = '0' + (upper + lower + 100) / 200;
+ --out;
+ } else {
+ PutTwoDigits((upper + lower + 10) / 20, out - 2);
+ }
+ *out = '\0';
+ return out;
+}
+
+// RoundTripFloatToBuffer converts the given float into a std::string which, if
+// passed to strtof, will produce the exact same original float. It does this
+// by computing the range of possible doubles which map to the given float, and
+// then examining the digits of the doubles in that range. If all the doubles
+// in the range start with "2.37", then clearly our float does, too. As soon as
+// they diverge, only one more digit is needed.
+char* numbers_internal::RoundTripFloatToBuffer(float f, char* buffer) {
+ static_assert(std::numeric_limits<float>::is_iec559,
+ "IEEE-754/IEC-559 support only");
+
+ char* out = buffer; // we write data to out, incrementing as we go, but
+ // FloatToBuffer always returns the address of the buffer
+ // passed in.
+
+ if (std::isnan(f)) {
+ strcpy(out, "nan"); // NOLINT(runtime/printf)
+ return buffer;
+ }
+ if (f == 0) { // +0 and -0 are handled here
+ if (std::signbit(f)) {
+ strcpy(out, "-0"); // NOLINT(runtime/printf)
+ } else {
+ strcpy(out, "0"); // NOLINT(runtime/printf)
+ }
+ return buffer;
+ }
+ if (f < 0) {
+ *out++ = '-';
+ f = -f;
+ }
+ if (std::isinf(f)) {
+ strcpy(out, "inf"); // NOLINT(runtime/printf)
+ return buffer;
+ }
+
+ double next_lower = nextafterf(f, 0.0f);
+ // For all doubles in the range lower_bound < f < upper_bound, the
+ // nearest float is f.
+ double lower_bound = (f + next_lower) * 0.5;
+ double upper_bound = f + (f - lower_bound);
+ // Note: because std::nextafter is slow, we calculate upper_bound
+ // assuming that it is the same distance from f as lower_bound is.
+ // For exact powers of two, upper_bound is actually twice as far
+ // from f as lower_bound is, but this turns out not to matter.
+
+ // Most callers pass floats that are either 0 or within the
+ // range 0.0001 through 100,000,000, so handle those first,
+ // since they don't need exponential notation.
+ const Spec* spec = nullptr;
+ if (f < 1.0) {
+ if (f >= 0.0001f) {
+ // for fractional values, we set up the multiplier at the same
+ // time as we output the leading "0." / "0.0" / "0.00" / "0.000"
+ double multiplier = 1e+11;
+ *out++ = '0';
+ *out++ = '.';
+ if (f < 0.1f) {
+ multiplier = 1e+12;
+ *out++ = '0';
+ if (f < 0.01f) {
+ multiplier = 1e+13;
+ *out++ = '0';
+ if (f < 0.001f) {
+ multiplier = 1e+14;
+ *out++ = '0';
+ }
+ }
+ }
+ OutputNecessaryDigits(lower_bound * multiplier, upper_bound * multiplier,
+ out);
+ return buffer;
+ }
+ spec = std::lower_bound(std::begin(neg_exp_table), std::end(neg_exp_table),
+ double{f}, ExpCompare());
+ if (spec == std::end(neg_exp_table)) --spec;
+ } else if (f < 1e8) {
+ // Handling non-exponential format greater than 1.0 is similar to the above,
+ // but instead of 0.0 / 0.00 / 0.000, the prefix is simply the truncated
+ // integer part of f.
+ int32_t as_int = f;
+ out = numbers_internal::FastUInt32ToBuffer(as_int, out);
+ // Easy: if the integer part is within (lower_bound, upper_bound), then we
+ // are already done.
+ if (as_int > lower_bound && as_int < upper_bound) {
+ return buffer;
+ }
+ *out++ = '.';
+ OutputNecessaryDigits((lower_bound - as_int) * 1e11,
+ (upper_bound - as_int) * 1e11, out);
+ return buffer;
+ } else {
+ spec = std::lower_bound(std::begin(pos_exp_table),
+ std::end(pos_exp_table),
+ double{f}, ExpCompare());
+ if (spec == std::end(pos_exp_table)) --spec;
+ }
+ // Exponential notation from here on. "spec" was computed using lower_bound,
+ // which means it's the first spec from the table where min_range is greater
+ // or equal to f.
+ // Unfortunately that's not quite what we want; we want a min_range that is
+ // less or equal. So first thing, if it was greater, back up one entry.
+ if (spec->min_range > f) --spec;
+
+ // The digits might be "237000123", but we want "2.37000123",
+ // so we output the digits one character later, and then move the first
+ // digit back so we can stick the "." in.
+ char* start = out;
+ out = OutputNecessaryDigits(lower_bound * spec->multiplier,
+ upper_bound * spec->multiplier, start + 1);
+ start[0] = start[1];
+ start[1] = '.';
+
+ // If it turns out there was only one digit output, then back up over the '.'
+ if (out == &start[2]) --out;
+
+ // Now add the "e+NN" part.
+ memcpy(out, spec->expstr, 4);
+ out[4] = '\0';
+ return buffer;
+}
+
+// Returns the number of leading 0 bits in a 64-bit value.
+// TODO(jorg): Replace with builtin_clzll if available.
+// Are we shipping util/bits in absl?
+static inline int CountLeadingZeros64(uint64_t n) {
+ int zeroes = 60;
+ if (n >> 32) zeroes -= 32, n >>= 32;
+ if (n >> 16) zeroes -= 16, n >>= 16;
+ if (n >> 8) zeroes -= 8, n >>= 8;
+ if (n >> 4) zeroes -= 4, n >>= 4;
+ return "\4\3\2\2\1\1\1\1\0\0\0\0\0\0\0\0"[n] + zeroes;
+}
+
+// Given a 128-bit number expressed as a pair of uint64_t, high half first,
+// return that number multiplied by the given 32-bit value. If the result is
+// too large to fit in a 128-bit number, divide it by 2 until it fits.
+static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
+ uint32_t mul) {
+ uint64_t bits0_31 = num.second & 0xFFFFFFFF;
+ uint64_t bits32_63 = num.second >> 32;
+ uint64_t bits64_95 = num.first & 0xFFFFFFFF;
+ uint64_t bits96_127 = num.first >> 32;
+
+ // The picture so far: each of these 64-bit values has only the lower 32 bits
+ // filled in.
+ // bits96_127: [ 00000000 xxxxxxxx ]
+ // bits64_95: [ 00000000 xxxxxxxx ]
+ // bits32_63: [ 00000000 xxxxxxxx ]
+ // bits0_31: [ 00000000 xxxxxxxx ]
+
+ bits0_31 *= mul;
+ bits32_63 *= mul;
+ bits64_95 *= mul;
+ bits96_127 *= mul;
+
+ // Now the top halves may also have value, though all 64 of their bits will
+ // never be set at the same time, since they are a result of a 32x32 bit
+ // multiply. This makes the carry calculation slightly easier.
+ // bits96_127: [ mmmmmmmm | mmmmmmmm ]
+ // bits64_95: [ | mmmmmmmm mmmmmmmm | ]
+ // bits32_63: | [ mmmmmmmm | mmmmmmmm ]
+ // bits0_31: | [ | mmmmmmmm mmmmmmmm ]
+ // eventually: [ bits128_up | ...bits64_127.... | ..bits0_63... ]
+
+ uint64_t bits0_63 = bits0_31 + (bits32_63 << 32);
+ uint64_t bits64_127 = bits64_95 + (bits96_127 << 32) + (bits32_63 >> 32) +
+ (bits0_63 < bits0_31);
+ uint64_t bits128_up = (bits96_127 >> 32) + (bits64_127 < bits64_95);
+ if (bits128_up == 0) return {bits64_127, bits0_63};
+
+ int shift = 64 - CountLeadingZeros64(bits128_up);
+ uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (64 - shift));
+ uint64_t hi = (bits64_127 >> shift) + (bits128_up << (64 - shift));
+ return {hi, lo};
+}
+
+// Compute num * 5 ^ expfive, and return the first 128 bits of the result,
+// where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
+// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
+static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
+ std::pair<uint64_t, uint64_t> result = {num, 0};
+ while (expfive >= 13) {
+ // 5^13 is the highest power of five that will fit in a 32-bit integer.
+ result = Mul32(result, 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5);
+ expfive -= 13;
+ }
+ constexpr int powers_of_five[13] = {
+ 1,
+ 5,
+ 5 * 5,
+ 5 * 5 * 5,
+ 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5,
+ 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5 * 5};
+ result = Mul32(result, powers_of_five[expfive & 15]);
+ int shift = CountLeadingZeros64(result.first);
+ if (shift != 0) {
+ result.first = (result.first << shift) + (result.second >> (64 - shift));
+ result.second = (result.second << shift);
+ }
+ return result;
+}
+
+struct ExpDigits {
+ int32_t exponent;
+ char digits[6];
+};
+
+// SplitToSix converts value, a positive double-precision floating-point number,
+// into a base-10 exponent and 6 ASCII digits, where the first digit is never
+// zero. For example, SplitToSix(1) returns an exponent of zero and a digits
+// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
+// two possible representations, e.g. value = 100000.5, then "round to even" is
+// performed.
+static ExpDigits SplitToSix(const double value) {
+ ExpDigits exp_dig;
+ int exp = 5;
+ double d = value;
+ // First step: calculate a close approximation of the output, where the
+ // value d will be between 100,000 and 999,999, representing the digits
+ // in the output ASCII array, and exp is the base-10 exponent. It would be
+ // faster to use a table here, and to look up the base-2 exponent of value,
+ // however value is an IEEE-754 64-bit number, so the table would have 2,000
+ // entries, which is not cache-friendly.
+ if (d >= 999999.5) {
+ if (d >= 1e+261) exp += 256, d *= 1e-256;
+ if (d >= 1e+133) exp += 128, d *= 1e-128;
+ if (d >= 1e+69) exp += 64, d *= 1e-64;
+ if (d >= 1e+37) exp += 32, d *= 1e-32;
+ if (d >= 1e+21) exp += 16, d *= 1e-16;
+ if (d >= 1e+13) exp += 8, d *= 1e-8;
+ if (d >= 1e+9) exp += 4, d *= 1e-4;
+ if (d >= 1e+7) exp += 2, d *= 1e-2;
+ if (d >= 1e+6) exp += 1, d *= 1e-1;
+ } else {
+ if (d < 1e-250) exp -= 256, d *= 1e256;
+ if (d < 1e-122) exp -= 128, d *= 1e128;
+ if (d < 1e-58) exp -= 64, d *= 1e64;
+ if (d < 1e-26) exp -= 32, d *= 1e32;
+ if (d < 1e-10) exp -= 16, d *= 1e16;
+ if (d < 1e-2) exp -= 8, d *= 1e8;
+ if (d < 1e+2) exp -= 4, d *= 1e4;
+ if (d < 1e+4) exp -= 2, d *= 1e2;
+ if (d < 1e+5) exp -= 1, d *= 1e1;
+ }
+ // At this point, d is in the range [99999.5..999999.5) and exp is in the
+ // range [-324..308]. Since we need to round d up, we want to add a half
+ // and truncate.
+ // However, the technique above may have lost some precision, due to its
+ // repeated multiplication by constants that each may be off by half a bit
+ // of precision. This only matters if we're close to the edge though.
+ // Since we'd like to know if the fractional part of d is close to a half,
+ // we multiply it by 65536 and see if the fractional part is close to 32768.
+ // (The number doesn't have to be a power of two,but powers of two are faster)
+ uint64_t d64k = d * 65536;
+ int dddddd; // A 6-digit decimal integer.
+ if ((d64k % 65536) == 32767 || (d64k % 65536) == 32768) {
+ // OK, it's fairly likely that precision was lost above, which is
+ // not a surprise given only 52 mantissa bits are available. Therefore
+ // redo the calculation using 128-bit numbers. (64 bits are not enough).
+
+ // Start out with digits rounded down; maybe add one below.
+ dddddd = static_cast<int>(d64k / 65536);
+
+ // mantissa is a 64-bit integer representing M.mmm... * 2^63. The actual
+ // value we're representing, of course, is M.mmm... * 2^exp2.
+ int exp2;
+ double m = std::frexp(value, &exp2);
+ uint64_t mantissa = m * (32768.0 * 65536.0 * 65536.0 * 65536.0);
+ // std::frexp returns an m value in the range [0.5, 1.0), however we
+ // can't multiply it by 2^64 and convert to an integer because some FPUs
+ // throw an exception when converting an number higher than 2^63 into an
+ // integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
+ // since m only has 52 significant bits anyway.
+ mantissa <<= 1;
+ exp2 -= 64; // not needed, but nice for debugging
+
+ // OK, we are here to compare:
+ // (dddddd + 0.5) * 10^(exp-5) vs. mantissa * 2^exp2
+ // so we can round up dddddd if appropriate. Those values span the full
+ // range of 600 orders of magnitude of IEE 64-bit floating-point.
+ // Fortunately, we already know they are very close, so we don't need to
+ // track the base-2 exponent of both sides. This greatly simplifies the
+ // the math since the 2^exp2 calculation is unnecessary and the power-of-10
+ // calculation can become a power-of-5 instead.
+
+ std::pair<uint64_t, uint64_t> edge, val;
+ if (exp >= 6) {
+ // Compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa
+ // Since we're tossing powers of two, 2 * dddddd + 1 is the
+ // same as dddddd + 0.5
+ edge = PowFive(2 * dddddd + 1, exp - 5);
+
+ val.first = mantissa;
+ val.second = 0;
+ } else {
+ // We can't compare (dddddd + 0.5) * 5 ^ (exp - 5) to mantissa as we did
+ // above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
+ // mantissa * 5 ^ (5 - exp)
+ edge = PowFive(2 * dddddd + 1, 0);
+
+ val = PowFive(mantissa, 5 - exp);
+ }
+ // printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
+ // val.second, edge.first, edge.second);
+ if (val > edge) {
+ dddddd++;
+ } else if (val == edge) {
+ dddddd += (dddddd & 1);
+ }
+ } else {
+ // Here, we are not close to the edge.
+ dddddd = static_cast<int>((d64k + 32768) / 65536);
+ }
+ if (dddddd == 1000000) {
+ dddddd = 100000;
+ exp += 1;
+ }
+ exp_dig.exponent = exp;
+
+ int two_digits = dddddd / 10000;
+ dddddd -= two_digits * 10000;
+ PutTwoDigits(two_digits, &exp_dig.digits[0]);
+
+ two_digits = dddddd / 100;
+ dddddd -= two_digits * 100;
+ PutTwoDigits(two_digits, &exp_dig.digits[2]);
+
+ PutTwoDigits(dddddd, &exp_dig.digits[4]);
+ return exp_dig;
+}
+
+// Helper function for fast formatting of floating-point.
+// The result is the same as "%g", a.k.a. "%.6g".
+size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
+ static_assert(std::numeric_limits<float>::is_iec559,
+ "IEEE-754/IEC-559 support only");
+
+ char* out = buffer; // we write data to out, incrementing as we go, but
+ // FloatToBuffer always returns the address of the buffer
+ // passed in.
+
+ if (std::isnan(d)) {
+ strcpy(out, "nan"); // NOLINT(runtime/printf)
+ return 3;
+ }
+ if (d == 0) { // +0 and -0 are handled here
+ if (std::signbit(d)) *out++ = '-';
+ *out++ = '0';
+ *out = 0;
+ return out - buffer;
+ }
+ if (d < 0) {
+ *out++ = '-';
+ d = -d;
+ }
+ if (std::isinf(d)) {
+ strcpy(out, "inf"); // NOLINT(runtime/printf)
+ return out + 3 - buffer;
+ }
+
+ auto exp_dig = SplitToSix(d);
+ int exp = exp_dig.exponent;
+ const char* digits = exp_dig.digits;
+ out[0] = '0';
+ out[1] = '.';
+ switch (exp) {
+ case 5:
+ memcpy(out, &digits[0], 6), out += 6;
+ *out = 0;
+ return out - buffer;
+ case 4:
+ memcpy(out, &digits[0], 5), out += 5;
+ if (digits[5] != '0') {
+ *out++ = '.';
+ *out++ = digits[5];
+ }
+ *out = 0;
+ return out - buffer;
+ case 3:
+ memcpy(out, &digits[0], 4), out += 4;
+ if ((digits[5] | digits[4]) != '0') {
+ *out++ = '.';
+ *out++ = digits[4];
+ if (digits[5] != '0') *out++ = digits[5];
+ }
+ *out = 0;
+ return out - buffer;
+ case 2:
+ memcpy(out, &digits[0], 3), out += 3;
+ *out++ = '.';
+ memcpy(out, &digits[3], 3);
+ out += 3;
+ while (out[-1] == '0') --out;
+ if (out[-1] == '.') --out;
+ *out = 0;
+ return out - buffer;
+ case 1:
+ memcpy(out, &digits[0], 2), out += 2;
+ *out++ = '.';
+ memcpy(out, &digits[2], 4);
+ out += 4;
+ while (out[-1] == '0') --out;
+ if (out[-1] == '.') --out;
+ *out = 0;
+ return out - buffer;
+ case 0:
+ memcpy(out, &digits[0], 1), out += 1;
+ *out++ = '.';
+ memcpy(out, &digits[1], 5);
+ out += 5;
+ while (out[-1] == '0') --out;
+ if (out[-1] == '.') --out;
+ *out = 0;
+ return out - buffer;
+ case -4:
+ out[2] = '0';
+ ++out;
+ ABSL_FALLTHROUGH_INTENDED;
+ case -3:
+ out[2] = '0';
+ ++out;
+ ABSL_FALLTHROUGH_INTENDED;
+ case -2:
+ out[2] = '0';
+ ++out;
+ ABSL_FALLTHROUGH_INTENDED;
+ case -1:
+ out += 2;
+ memcpy(out, &digits[0], 6);
+ out += 6;
+ while (out[-1] == '0') --out;
+ *out = 0;
+ return out - buffer;
+ }
+ assert(exp < -4 || exp >= 6);
+ out[0] = digits[0];
+ assert(out[1] == '.');
+ out += 2;
+ memcpy(out, &digits[1], 5), out += 5;
+ while (out[-1] == '0') --out;
+ if (out[-1] == '.') --out;
+ *out++ = 'e';
+ if (exp > 0) {
+ *out++ = '+';
+ } else {
+ *out++ = '-';
+ exp = -exp;
+ }
+ if (exp > 99) {
+ int dig1 = exp / 100;
+ exp -= dig1 * 100;
+ *out++ = '0' + dig1;
+ }
+ PutTwoDigits(exp, out);
+ out += 2;
+ *out = 0;
+ return out - buffer;
+}
+
+namespace {
+// Represents integer values of digits.
+// Uses 36 to indicate an invalid character since we support
+// bases up to 36.
+static const int8_t kAsciiToInt[256] = {
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, // 16 36s.
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 36, 36, 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17,
+ 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+ 36, 36, 36, 36, 36, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36,
+ 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36};
+
+// Parse the sign and optional hex or oct prefix in text.
+inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/,
+ int* base_ptr /*inout*/,
+ bool* negative_ptr /*output*/) {
+ if (text->data() == nullptr) {
+ return false;
+ }
+
+ const char* start = text->data();
+ const char* end = start + text->size();
+ int base = *base_ptr;
+
+ // Consume whitespace.
+ while (start < end && absl::ascii_isspace(start[0])) {
+ ++start;
+ }
+ while (start < end && absl::ascii_isspace(end[-1])) {
+ --end;
+ }
+ if (start >= end) {
+ return false;
+ }
+
+ // Consume sign.
+ *negative_ptr = (start[0] == '-');
+ if (*negative_ptr || start[0] == '+') {
+ ++start;
+ if (start >= end) {
+ return false;
+ }
+ }
+
+ // Consume base-dependent prefix.
+ // base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
+ // base 16: "0x" -> base 16
+ // Also validate the base.
+ if (base == 0) {
+ if (end - start >= 2 && start[0] == '0' &&
+ (start[1] == 'x' || start[1] == 'X')) {
+ base = 16;
+ start += 2;
+ if (start >= end) {
+ // "0x" with no digits after is invalid.
+ return false;
+ }
+ } else if (end - start >= 1 && start[0] == '0') {
+ base = 8;
+ start += 1;
+ } else {
+ base = 10;
+ }
+ } else if (base == 16) {
+ if (end - start >= 2 && start[0] == '0' &&
+ (start[1] == 'x' || start[1] == 'X')) {
+ start += 2;
+ if (start >= end) {
+ // "0x" with no digits after is invalid.
+ return false;
+ }
+ }
+ } else if (base >= 2 && base <= 36) {
+ // okay
+ } else {
+ return false;
+ }
+ *text = absl::string_view(start, end - start);
+ *base_ptr = base;
+ return true;
+}
+
+// Consume digits.
+//
+// The classic loop:
+//
+// for each digit
+// value = value * base + digit
+// value *= sign
+//
+// The classic loop needs overflow checking. It also fails on the most
+// negative integer, -2147483648 in 32-bit two's complement representation.
+//
+// My improved loop:
+//
+// if (!negative)
+// for each digit
+// value = value * base
+// value = value + digit
+// else
+// for each digit
+// value = value * base
+// value = value - digit
+//
+// Overflow checking becomes simple.
+
+// Lookup tables per IntType:
+// vmax/base and vmin/base are precomputed because division costs at least 8ns.
+// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
+// struct of arrays) would probably be better in terms of d-cache for the most
+// commonly used bases.
+template <typename IntType>
+struct LookupTables {
+ static const IntType kVmaxOverBase[];
+ static const IntType kVminOverBase[];
+};
+
+// An array initializer macro for X/base where base in [0, 36].
+// However, note that lookups for base in [0, 1] should never happen because
+// base has been validated to be in [2, 36] by safe_parse_sign_and_base().
+#define X_OVER_BASE_INITIALIZER(X) \
+ { \
+ 0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
+ X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
+ X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
+ X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
+ X / 35, X / 36, \
+ }
+
+template <typename IntType>
+const IntType LookupTables<IntType>::kVmaxOverBase[] =
+ X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
+
+template <typename IntType>
+const IntType LookupTables<IntType>::kVminOverBase[] =
+ X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
+
+#undef X_OVER_BASE_INITIALIZER
+
+template <typename IntType>
+inline bool safe_parse_positive_int(absl::string_view text, int base,
+ IntType* value_p) {
+ IntType value = 0;
+ const IntType vmax = std::numeric_limits<IntType>::max();
+ assert(vmax > 0);
+ assert(base >= 0);
+ assert(vmax >= static_cast<IntType>(base));
+ const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
+ const char* start = text.data();
+ const char* end = start + text.size();
+ // loop over digits
+ for (; start < end; ++start) {
+ unsigned char c = static_cast<unsigned char>(start[0]);
+ int digit = kAsciiToInt[c];
+ if (digit >= base) {
+ *value_p = value;
+ return false;
+ }
+ if (value > vmax_over_base) {
+ *value_p = vmax;
+ return false;
+ }
+ value *= base;
+ if (value > vmax - digit) {
+ *value_p = vmax;
+ return false;
+ }
+ value += digit;
+ }
+ *value_p = value;
+ return true;
+}
+
+template <typename IntType>
+inline bool safe_parse_negative_int(absl::string_view text, int base,
+ IntType* value_p) {
+ IntType value = 0;
+ const IntType vmin = std::numeric_limits<IntType>::min();
+ assert(vmin < 0);
+ assert(vmin <= 0 - base);
+ IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
+ // 2003 c++ standard [expr.mul]
+ // "... the sign of the remainder is implementation-defined."
+ // Although (vmin/base)*base + vmin%base is always vmin.
+ // 2011 c++ standard tightens the spec but we cannot rely on it.
+ // TODO(junyer): Handle this in the lookup table generation.
+ if (vmin % base > 0) {
+ vmin_over_base += 1;
+ }
+ const char* start = text.data();
+ const char* end = start + text.size();
+ // loop over digits
+ for (; start < end; ++start) {
+ unsigned char c = static_cast<unsigned char>(start[0]);
+ int digit = kAsciiToInt[c];
+ if (digit >= base) {
+ *value_p = value;
+ return false;
+ }
+ if (value < vmin_over_base) {
+ *value_p = vmin;
+ return false;
+ }
+ value *= base;
+ if (value < vmin + digit) {
+ *value_p = vmin;
+ return false;
+ }
+ value -= digit;
+ }
+ *value_p = value;
+ return true;
+}
+
+// Input format based on POSIX.1-2008 strtol
+// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
+template <typename IntType>
+inline bool safe_int_internal(absl::string_view text, IntType* value_p,
+ int base) {
+ *value_p = 0;
+ bool negative;
+ if (!safe_parse_sign_and_base(&text, &base, &negative)) {
+ return false;
+ }
+ if (!negative) {
+ return safe_parse_positive_int(text, base, value_p);
+ } else {
+ return safe_parse_negative_int(text, base, value_p);
+ }
+}
+
+template <typename IntType>
+inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
+ int base) {
+ *value_p = 0;
+ bool negative;
+ if (!safe_parse_sign_and_base(&text, &base, &negative) || negative) {
+ return false;
+ }
+ return safe_parse_positive_int(text, base, value_p);
+}
+} // anonymous namespace
+
+namespace numbers_internal {
+bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
+ return safe_int_internal<int32_t>(text, value, base);
+}
+
+bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
+ return safe_int_internal<int64_t>(text, value, base);
+}
+
+bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
+ return safe_uint_internal<uint32_t>(text, value, base);
+}
+
+bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
+ return safe_uint_internal<uint64_t>(text, value, base);
+}
+} // namespace numbers_internal
+
+} // namespace absl
diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h
new file mode 100644
index 00000000..f17dc97b
--- /dev/null
+++ b/absl/strings/numbers.h
@@ -0,0 +1,173 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: numbers.h
+// -----------------------------------------------------------------------------
+//
+// This package contains functions for converting strings to numbers. For
+// converting numbers to strings, use `StrCat()` or `StrAppend()` in str_cat.h,
+// which automatically detect and convert most number values appropriately.
+
+#ifndef ABSL_STRINGS_NUMBERS_H_
+#define ABSL_STRINGS_NUMBERS_H_
+
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <limits>
+#include <string>
+#include <type_traits>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/numeric/int128.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+// SimpleAtoi()
+//
+// Converts the given std::string into an integer value, returning `true` if
+// successful. The std::string must reflect a base-10 integer (optionally followed or
+// preceded by ASCII whitespace) whose value falls within the range of the
+// integer type,
+template <typename int_type>
+ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view s, int_type* out);
+
+// SimpleAtof()
+//
+// Converts the given std::string (optionally followed or preceded by ASCII
+// whitespace) into a float, which may be rounded on overflow or underflow.
+ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* value);
+
+// SimpleAtod()
+//
+// Converts the given std::string (optionally followed or preceded by ASCII
+// whitespace) into a double, which may be rounded on overflow or underflow.
+ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* value);
+
+// SimpleAtob()
+//
+// Converts the given std::string into into a boolean, returning `true` if
+// successful. The following case-insensitive strings are interpreted as boolean
+// `true`: "true", "t", "yes", "y", "1". The following case-insensitive strings
+// are interpreted as boolean `false`: "false", "f", "no", "n", "0".
+ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* value);
+
+} // namespace absl
+
+// End of public API. Implementation details follow.
+
+namespace absl {
+namespace numbers_internal {
+
+// safe_strto?() functions for implementing SimpleAtoi()
+bool safe_strto32_base(absl::string_view text, int32_t* value, int base);
+bool safe_strto64_base(absl::string_view text, int64_t* value, int base);
+bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base);
+bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base);
+
+// These functions are intended for speed. All functions take an output buffer
+// as an argument and return a pointer to the last byte they wrote, which is the
+// terminating '\0'. At most `kFastToBufferSize` bytes are written.
+char* FastInt32ToBuffer(int32_t i, char* buffer);
+char* FastUInt32ToBuffer(uint32_t i, char* buffer);
+char* FastInt64ToBuffer(int64_t i, char* buffer);
+char* FastUInt64ToBuffer(uint64_t i, char* buffer);
+
+static const int kFastToBufferSize = 32;
+static const int kSixDigitsToBufferSize = 16;
+
+char* RoundTripDoubleToBuffer(double d, char* buffer);
+char* RoundTripFloatToBuffer(float f, char* buffer);
+
+// Helper function for fast formatting of floating-point values.
+// The result is the same as printf's "%g", a.k.a. "%.6g"; that is, six
+// significant digits are returned, trailing zeros are removed, and numbers
+// outside the range 0.0001-999999 are output using scientific notation
+// (1.23456e+06). This routine is heavily optimized.
+// Required buffer size is `kSixDigitsToBufferSize`.
+size_t SixDigitsToBuffer(double d, char* buffer);
+
+template <typename int_type>
+char* FastIntToBuffer(int_type i, char* buffer) {
+ static_assert(sizeof(i) <= 64 / 8,
+ "FastIntToBuffer works only with 64-bit-or-less integers.");
+ // TODO(jorg): This signed-ness check is used because it works correctly
+ // with enums, and it also serves to check that int_type is not a pointer.
+ // If one day something like std::is_signed<enum E> works, switch to it.
+ if (static_cast<int_type>(1) - 2 < 0) { // Signed
+ if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
+ return numbers_internal::FastInt64ToBuffer(i, buffer);
+ } else { // 32-bit or less
+ return numbers_internal::FastInt32ToBuffer(i, buffer);
+ }
+ } else { // Unsigned
+ if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit
+ return numbers_internal::FastUInt64ToBuffer(i, buffer);
+ } else { // 32-bit or less
+ return numbers_internal::FastUInt32ToBuffer(i, buffer);
+ }
+ }
+}
+
+} // namespace numbers_internal
+
+// SimpleAtoi()
+//
+// Converts a std::string to an integer, using `safe_strto?()` functions for actual
+// parsing, returning `true` if successful. The `safe_strto?()` functions apply
+// strict checking; the std::string must be a base-10 integer, optionally followed or
+// preceded by ASCII whitespace, with a value in the range of the corresponding
+// integer type.
+template <typename int_type>
+ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view s, int_type* out) {
+ static_assert(sizeof(*out) == 4 || sizeof(*out) == 8,
+ "SimpleAtoi works only with 32-bit or 64-bit integers.");
+ static_assert(!std::is_floating_point<int_type>::value,
+ "Use SimpleAtof or SimpleAtod instead.");
+ bool parsed;
+ // TODO(jorg): This signed-ness check is used because it works correctly
+ // with enums, and it also serves to check that int_type is not a pointer.
+ // If one day something like std::is_signed<enum E> works, switch to it.
+ if (static_cast<int_type>(1) - 2 < 0) { // Signed
+ if (sizeof(*out) == 64 / 8) { // 64-bit
+ int64_t val;
+ parsed = numbers_internal::safe_strto64_base(s, &val, 10);
+ *out = static_cast<int_type>(val);
+ } else { // 32-bit
+ int32_t val;
+ parsed = numbers_internal::safe_strto32_base(s, &val, 10);
+ *out = static_cast<int_type>(val);
+ }
+ } else { // Unsigned
+ if (sizeof(*out) == 64 / 8) { // 64-bit
+ uint64_t val;
+ parsed = numbers_internal::safe_strtou64_base(s, &val, 10);
+ *out = static_cast<int_type>(val);
+ } else { // 32-bit
+ uint32_t val;
+ parsed = numbers_internal::safe_strtou32_base(s, &val, 10);
+ *out = static_cast<int_type>(val);
+ }
+ }
+ return parsed;
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_NUMBERS_H_
diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc
new file mode 100644
index 00000000..9b74d67b
--- /dev/null
+++ b/absl/strings/numbers_test.cc
@@ -0,0 +1,1186 @@
+// This file tests std::string processing functions related to numeric values.
+
+#include "absl/strings/numbers.h"
+
+#include <sys/types.h>
+#include <algorithm>
+#include <cctype>
+#include <cfenv> // NOLINT(build/c++11)
+#include <cfloat>
+#include <cinttypes>
+#include <climits>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <limits>
+#include <numeric>
+#include <random>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/raw_logging.h"
+#include "absl/base/port.h"
+#include "absl/strings/str_cat.h"
+
+#include "absl/strings/internal/numbers_test_common.inc"
+
+namespace {
+
+using absl::numbers_internal::FastInt32ToBuffer;
+using absl::numbers_internal::FastInt64ToBuffer;
+using absl::numbers_internal::FastUInt32ToBuffer;
+using absl::numbers_internal::FastUInt64ToBuffer;
+using absl::numbers_internal::kFastToBufferSize;
+using absl::numbers_internal::kSixDigitsToBufferSize;
+using absl::numbers_internal::safe_strto32_base;
+using absl::numbers_internal::safe_strto64_base;
+using absl::numbers_internal::safe_strtou32_base;
+using absl::numbers_internal::safe_strtou64_base;
+using absl::numbers_internal::RoundTripFloatToBuffer;
+using absl::numbers_internal::SixDigitsToBuffer;
+using absl::SimpleAtoi;
+using testing::Eq;
+using testing::MatchesRegex;
+
+// Number of floats to test with.
+// 10,000,000 is a reasonable default for a test that only takes a few seconds.
+// 1,000,000,000+ triggers checking for all possible mantissa values for
+// double-precision tests. 2,000,000,000+ triggers checking for every possible
+// single-precision float.
+#ifdef _MSC_VER
+// Use a smaller number on MSVC to avoid test time out (1 min)
+const int kFloatNumCases = 5000000;
+#else
+const int kFloatNumCases = 10000000;
+#endif
+
+// This is a slow, brute-force routine to compute the exact base-10
+// representation of a double-precision floating-point number. It
+// is useful for debugging only.
+std::string PerfectDtoa(double d) {
+ if (d == 0) return "0";
+ if (d < 0) return "-" + PerfectDtoa(-d);
+
+ // Basic theory: decompose d into mantissa and exp, where
+ // d = mantissa * 2^exp, and exp is as close to zero as possible.
+ int64_t mantissa, exp = 0;
+ while (d >= 1ULL << 63) ++exp, d *= 0.5;
+ while ((mantissa = d) != d) --exp, d *= 2.0;
+
+ // Then convert mantissa to ASCII, and either double it (if
+ // exp > 0) or halve it (if exp < 0) repeatedly. "halve it"
+ // in this case means multiplying it by five and dividing by 10.
+ constexpr int maxlen = 1100; // worst case is actually 1030 or so.
+ char buf[maxlen + 5];
+ for (int64_t num = mantissa, pos = maxlen; --pos >= 0;) {
+ buf[pos] = '0' + (num % 10);
+ num /= 10;
+ }
+ char* begin = &buf[0];
+ char* end = buf + maxlen;
+ for (int i = 0; i != exp; i += (exp > 0) ? 1 : -1) {
+ int carry = 0;
+ for (char* p = end; --p != begin;) {
+ int dig = *p - '0';
+ dig = dig * (exp > 0 ? 2 : 5) + carry;
+ carry = dig / 10;
+ dig %= 10;
+ *p = '0' + dig;
+ }
+ }
+ if (exp < 0) {
+ // "dividing by 10" above means we have to add the decimal point.
+ memmove(end + 1 + exp, end + exp, 1 - exp);
+ end[exp] = '.';
+ ++end;
+ }
+ while (*begin == '0' && begin[1] != '.') ++begin;
+ return {begin, end};
+}
+
+TEST(ToString, PerfectDtoa) {
+ EXPECT_THAT(PerfectDtoa(1), Eq("1"));
+ EXPECT_THAT(PerfectDtoa(0.1),
+ Eq("0.1000000000000000055511151231257827021181583404541015625"));
+ EXPECT_THAT(PerfectDtoa(1e24), Eq("999999999999999983222784"));
+ EXPECT_THAT(PerfectDtoa(5e-324), MatchesRegex("0.0000.*625"));
+ for (int i = 0; i < 100; ++i) {
+ for (double multiplier :
+ {1e-300, 1e-200, 1e-100, 0.1, 1.0, 10.0, 1e100, 1e300}) {
+ double d = multiplier * i;
+ std::string s = PerfectDtoa(d);
+ EXPECT_EQ(d, strtod(s.c_str(), nullptr));
+ }
+ }
+}
+
+void CheckInt32(int32_t x) {
+ char buffer[kFastInt32ToBufferSize];
+ char* actual = FastInt32ToBuffer(x, buffer);
+ std::string expected = std::to_string(x);
+ ASSERT_TRUE(expected == actual)
+ << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input "
+ << x;
+}
+
+void CheckInt64(int64_t x) {
+ char buffer[kFastInt64ToBufferSize + 3];
+ buffer[0] = '*';
+ buffer[23] = '*';
+ buffer[24] = '*';
+ char* actual = FastInt64ToBuffer(x, &buffer[1]);
+ std::string expected = std::to_string(x);
+ ASSERT_TRUE(expected == actual)
+ << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input "
+ << x;
+ ASSERT_EQ(buffer[0], '*');
+ ASSERT_EQ(buffer[23], '*');
+ ASSERT_EQ(buffer[24], '*');
+}
+
+void CheckUInt32(uint32_t x) {
+ char buffer[kFastUInt64ToBufferSize];
+ char* actual = FastUInt32ToBuffer(x, buffer);
+ std::string expected = std::to_string(x);
+ ASSERT_TRUE(expected == actual)
+ << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input "
+ << x;
+}
+
+void CheckUInt64(uint64_t x) {
+ char buffer[kFastUInt64ToBufferSize + 1];
+ char* actual = FastUInt64ToBuffer(x, &buffer[1]);
+ std::string expected = std::to_string(x);
+ ASSERT_TRUE(expected == actual)
+ << "Expected \"" << expected << "\", Actual \"" << actual << "\", Input "
+ << x;
+}
+
+void CheckHex64(uint64_t v) {
+ char expected[kFastUInt64ToBufferSize];
+ std::string actual = absl::StrCat(absl::Hex(v, absl::kZeroPad16));
+ snprintf(expected, sizeof(expected), "%016" PRIx64, static_cast<uint64_t>(v));
+ ASSERT_TRUE(expected == actual)
+ << "Expected \"" << expected << "\", Actual \"" << actual << "\"";
+}
+
+void TestFastPrints() {
+ for (int i = -100; i <= 100; i++) {
+ CheckInt32(i);
+ CheckInt64(i);
+ }
+ for (int i = 0; i <= 100; i++) {
+ CheckUInt32(i);
+ CheckUInt64(i);
+ }
+ // Test min int to make sure that works
+ CheckInt32(INT_MIN);
+ CheckInt32(INT_MAX);
+ CheckInt64(LONG_MIN);
+ CheckInt64(uint64_t{1000000000});
+ CheckInt64(uint64_t{9999999999});
+ CheckInt64(uint64_t{100000000000000});
+ CheckInt64(uint64_t{999999999999999});
+ CheckInt64(uint64_t{1000000000000000000});
+ CheckInt64(uint64_t{1199999999999999999});
+ CheckInt64(int64_t{-700000000000000000});
+ CheckInt64(LONG_MAX);
+ CheckUInt32(std::numeric_limits<uint32_t>::max());
+ CheckUInt64(uint64_t{1000000000});
+ CheckUInt64(uint64_t{9999999999});
+ CheckUInt64(uint64_t{100000000000000});
+ CheckUInt64(uint64_t{999999999999999});
+ CheckUInt64(uint64_t{1000000000000000000});
+ CheckUInt64(uint64_t{1199999999999999999});
+ CheckUInt64(std::numeric_limits<uint64_t>::max());
+
+ for (int i = 0; i < 10000; i++) {
+ CheckHex64(i);
+ }
+ CheckHex64(uint64_t{0x123456789abcdef0});
+}
+
+template <typename int_type, typename in_val_type>
+void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) {
+ std::string s = absl::StrCat(in_value);
+ int_type x = static_cast<int_type>(~exp_value);
+ EXPECT_TRUE(SimpleAtoi(s, &x))
+ << "in_value=" << in_value << " s=" << s << " x=" << x;
+ EXPECT_EQ(exp_value, x);
+ x = static_cast<int_type>(~exp_value);
+ EXPECT_TRUE(SimpleAtoi(s.c_str(), &x));
+ EXPECT_EQ(exp_value, x);
+}
+
+template <typename int_type, typename in_val_type>
+void VerifySimpleAtoiBad(in_val_type in_value) {
+ std::string s = absl::StrCat(in_value);
+ int_type x;
+ EXPECT_FALSE(SimpleAtoi(s, &x));
+ EXPECT_FALSE(SimpleAtoi(s.c_str(), &x));
+}
+
+TEST(NumbersTest, Atoi) {
+ // SimpleAtoi(absl::string_view, int32_t)
+ VerifySimpleAtoiGood<int32_t>(0, 0);
+ VerifySimpleAtoiGood<int32_t>(42, 42);
+ VerifySimpleAtoiGood<int32_t>(-42, -42);
+
+ VerifySimpleAtoiGood<int32_t>(std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<int32_t>::min());
+ VerifySimpleAtoiGood<int32_t>(std::numeric_limits<int32_t>::max(),
+ std::numeric_limits<int32_t>::max());
+
+ // SimpleAtoi(absl::string_view, uint32_t)
+ VerifySimpleAtoiGood<uint32_t>(0, 0);
+ VerifySimpleAtoiGood<uint32_t>(42, 42);
+ VerifySimpleAtoiBad<uint32_t>(-42);
+
+ VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int32_t>::min());
+ VerifySimpleAtoiGood<uint32_t>(std::numeric_limits<int32_t>::max(),
+ std::numeric_limits<int32_t>::max());
+ VerifySimpleAtoiGood<uint32_t>(std::numeric_limits<uint32_t>::max(),
+ std::numeric_limits<uint32_t>::max());
+ VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int64_t>::min());
+ VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<int64_t>::max());
+ VerifySimpleAtoiBad<uint32_t>(std::numeric_limits<uint64_t>::max());
+
+ // SimpleAtoi(absl::string_view, int64_t)
+ VerifySimpleAtoiGood<int64_t>(0, 0);
+ VerifySimpleAtoiGood<int64_t>(42, 42);
+ VerifySimpleAtoiGood<int64_t>(-42, -42);
+
+ VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<int32_t>::min());
+ VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int32_t>::max(),
+ std::numeric_limits<int32_t>::max());
+ VerifySimpleAtoiGood<int64_t>(std::numeric_limits<uint32_t>::max(),
+ std::numeric_limits<uint32_t>::max());
+ VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int64_t>::min(),
+ std::numeric_limits<int64_t>::min());
+ VerifySimpleAtoiGood<int64_t>(std::numeric_limits<int64_t>::max(),
+ std::numeric_limits<int64_t>::max());
+ VerifySimpleAtoiBad<int64_t>(std::numeric_limits<uint64_t>::max());
+
+ // SimpleAtoi(absl::string_view, uint64_t)
+ VerifySimpleAtoiGood<uint64_t>(0, 0);
+ VerifySimpleAtoiGood<uint64_t>(42, 42);
+ VerifySimpleAtoiBad<uint64_t>(-42);
+
+ VerifySimpleAtoiBad<uint64_t>(std::numeric_limits<int32_t>::min());
+ VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<int32_t>::max(),
+ std::numeric_limits<int32_t>::max());
+ VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint32_t>::max(),
+ std::numeric_limits<uint32_t>::max());
+ VerifySimpleAtoiBad<uint64_t>(std::numeric_limits<int64_t>::min());
+ VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<int64_t>::max(),
+ std::numeric_limits<int64_t>::max());
+ VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(),
+ std::numeric_limits<uint64_t>::max());
+
+ // Some other types
+ VerifySimpleAtoiGood<int>(-42, -42);
+ VerifySimpleAtoiGood<int32_t>(-42, -42);
+ VerifySimpleAtoiGood<uint32_t>(42, 42);
+ VerifySimpleAtoiGood<unsigned int>(42, 42);
+ VerifySimpleAtoiGood<int64_t>(-42, -42);
+ VerifySimpleAtoiGood<long>(-42, -42); // NOLINT(runtime/int)
+ VerifySimpleAtoiGood<uint64_t>(42, 42);
+ VerifySimpleAtoiGood<size_t>(42, 42);
+ VerifySimpleAtoiGood<std::string::size_type>(42, 42);
+}
+
+TEST(NumbersTest, Atoenum) {
+ enum E01 {
+ E01_zero = 0,
+ E01_one = 1,
+ };
+
+ VerifySimpleAtoiGood<E01>(E01_zero, E01_zero);
+ VerifySimpleAtoiGood<E01>(E01_one, E01_one);
+
+ enum E_101 {
+ E_101_minusone = -1,
+ E_101_zero = 0,
+ E_101_one = 1,
+ };
+
+ VerifySimpleAtoiGood<E_101>(E_101_minusone, E_101_minusone);
+ VerifySimpleAtoiGood<E_101>(E_101_zero, E_101_zero);
+ VerifySimpleAtoiGood<E_101>(E_101_one, E_101_one);
+
+ enum E_bigint {
+ E_bigint_zero = 0,
+ E_bigint_one = 1,
+ E_bigint_max31 = static_cast<int32_t>(0x7FFFFFFF),
+ };
+
+ VerifySimpleAtoiGood<E_bigint>(E_bigint_zero, E_bigint_zero);
+ VerifySimpleAtoiGood<E_bigint>(E_bigint_one, E_bigint_one);
+ VerifySimpleAtoiGood<E_bigint>(E_bigint_max31, E_bigint_max31);
+
+ enum E_fullint {
+ E_fullint_zero = 0,
+ E_fullint_one = 1,
+ E_fullint_max31 = static_cast<int32_t>(0x7FFFFFFF),
+ E_fullint_min32 = INT32_MIN,
+ };
+
+ VerifySimpleAtoiGood<E_fullint>(E_fullint_zero, E_fullint_zero);
+ VerifySimpleAtoiGood<E_fullint>(E_fullint_one, E_fullint_one);
+ VerifySimpleAtoiGood<E_fullint>(E_fullint_max31, E_fullint_max31);
+ VerifySimpleAtoiGood<E_fullint>(E_fullint_min32, E_fullint_min32);
+
+ enum E_biguint {
+ E_biguint_zero = 0,
+ E_biguint_one = 1,
+ E_biguint_max31 = static_cast<uint32_t>(0x7FFFFFFF),
+ E_biguint_max32 = static_cast<uint32_t>(0xFFFFFFFF),
+ };
+
+ VerifySimpleAtoiGood<E_biguint>(E_biguint_zero, E_biguint_zero);
+ VerifySimpleAtoiGood<E_biguint>(E_biguint_one, E_biguint_one);
+ VerifySimpleAtoiGood<E_biguint>(E_biguint_max31, E_biguint_max31);
+ VerifySimpleAtoiGood<E_biguint>(E_biguint_max32, E_biguint_max32);
+}
+
+TEST(stringtest, safe_strto32_base) {
+ int32_t value;
+ EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16));
+ EXPECT_EQ(0x34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16));
+ EXPECT_EQ(0x34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base("34234324", &value, 16));
+ EXPECT_EQ(0x34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base("0", &value, 16));
+ EXPECT_EQ(0, value);
+
+ EXPECT_TRUE(safe_strto32_base(" \t\n -0x34234324", &value, 16));
+ EXPECT_EQ(-0x34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base(" \t\n -34234324", &value, 16));
+ EXPECT_EQ(-0x34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base("7654321", &value, 8));
+ EXPECT_EQ(07654321, value);
+
+ EXPECT_TRUE(safe_strto32_base("-01234", &value, 8));
+ EXPECT_EQ(-01234, value);
+
+ EXPECT_FALSE(safe_strto32_base("1834", &value, 8));
+
+ // Autodetect base.
+ EXPECT_TRUE(safe_strto32_base("0", &value, 0));
+ EXPECT_EQ(0, value);
+
+ EXPECT_TRUE(safe_strto32_base("077", &value, 0));
+ EXPECT_EQ(077, value); // Octal interpretation
+
+ // Leading zero indicates octal, but then followed by invalid digit.
+ EXPECT_FALSE(safe_strto32_base("088", &value, 0));
+
+ // Leading 0x indicated hex, but then followed by invalid digit.
+ EXPECT_FALSE(safe_strto32_base("0xG", &value, 0));
+
+ // Base-10 version.
+ EXPECT_TRUE(safe_strto32_base("34234324", &value, 10));
+ EXPECT_EQ(34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base("0", &value, 10));
+ EXPECT_EQ(0, value);
+
+ EXPECT_TRUE(safe_strto32_base(" \t\n -34234324", &value, 10));
+ EXPECT_EQ(-34234324, value);
+
+ EXPECT_TRUE(safe_strto32_base("34234324 \n\t ", &value, 10));
+ EXPECT_EQ(34234324, value);
+
+ // Invalid ints.
+ EXPECT_FALSE(safe_strto32_base("", &value, 10));
+ EXPECT_FALSE(safe_strto32_base(" ", &value, 10));
+ EXPECT_FALSE(safe_strto32_base("abc", &value, 10));
+ EXPECT_FALSE(safe_strto32_base("34234324a", &value, 10));
+ EXPECT_FALSE(safe_strto32_base("34234.3", &value, 10));
+
+ // Out of bounds.
+ EXPECT_FALSE(safe_strto32_base("2147483648", &value, 10));
+ EXPECT_FALSE(safe_strto32_base("-2147483649", &value, 10));
+
+ // String version.
+ EXPECT_TRUE(safe_strto32_base(std::string("0x1234"), &value, 16));
+ EXPECT_EQ(0x1234, value);
+
+ // Base-10 std::string version.
+ EXPECT_TRUE(safe_strto32_base("1234", &value, 10));
+ EXPECT_EQ(1234, value);
+}
+
+TEST(stringtest, safe_strto32_range) {
+ // These tests verify underflow/overflow behaviour.
+ int32_t value;
+ EXPECT_FALSE(safe_strto32_base("2147483648", &value, 10));
+ EXPECT_EQ(std::numeric_limits<int32_t>::max(), value);
+
+ EXPECT_TRUE(safe_strto32_base("-2147483648", &value, 10));
+ EXPECT_EQ(std::numeric_limits<int32_t>::min(), value);
+
+ EXPECT_FALSE(safe_strto32_base("-2147483649", &value, 10));
+ EXPECT_EQ(std::numeric_limits<int32_t>::min(), value);
+}
+
+TEST(stringtest, safe_strto64_range) {
+ // These tests verify underflow/overflow behaviour.
+ int64_t value;
+ EXPECT_FALSE(safe_strto64_base("9223372036854775808", &value, 10));
+ EXPECT_EQ(std::numeric_limits<int64_t>::max(), value);
+
+ EXPECT_TRUE(safe_strto64_base("-9223372036854775808", &value, 10));
+ EXPECT_EQ(std::numeric_limits<int64_t>::min(), value);
+
+ EXPECT_FALSE(safe_strto64_base("-9223372036854775809", &value, 10));
+ EXPECT_EQ(std::numeric_limits<int64_t>::min(), value);
+}
+
+TEST(stringtest, safe_strto32_leading_substring) {
+ // These tests verify this comment in numbers.h:
+ // On error, returns false, and sets *value to: [...]
+ // conversion of leading substring if available ("123@@@" -> 123)
+ // 0 if no leading substring available
+ int32_t value;
+ EXPECT_FALSE(safe_strto32_base("04069@@@", &value, 10));
+ EXPECT_EQ(4069, value);
+
+ EXPECT_FALSE(safe_strto32_base("04069@@@", &value, 8));
+ EXPECT_EQ(0406, value);
+
+ EXPECT_FALSE(safe_strto32_base("04069balloons", &value, 10));
+ EXPECT_EQ(4069, value);
+
+ EXPECT_FALSE(safe_strto32_base("04069balloons", &value, 16));
+ EXPECT_EQ(0x4069ba, value);
+
+ EXPECT_FALSE(safe_strto32_base("@@@", &value, 10));
+ EXPECT_EQ(0, value); // there was no leading substring
+}
+
+TEST(stringtest, safe_strto64_leading_substring) {
+ // These tests verify this comment in numbers.h:
+ // On error, returns false, and sets *value to: [...]
+ // conversion of leading substring if available ("123@@@" -> 123)
+ // 0 if no leading substring available
+ int64_t value;
+ EXPECT_FALSE(safe_strto64_base("04069@@@", &value, 10));
+ EXPECT_EQ(4069, value);
+
+ EXPECT_FALSE(safe_strto64_base("04069@@@", &value, 8));
+ EXPECT_EQ(0406, value);
+
+ EXPECT_FALSE(safe_strto64_base("04069balloons", &value, 10));
+ EXPECT_EQ(4069, value);
+
+ EXPECT_FALSE(safe_strto64_base("04069balloons", &value, 16));
+ EXPECT_EQ(0x4069ba, value);
+
+ EXPECT_FALSE(safe_strto64_base("@@@", &value, 10));
+ EXPECT_EQ(0, value); // there was no leading substring
+}
+
+TEST(stringtest, safe_strto64_base) {
+ int64_t value;
+ EXPECT_TRUE(safe_strto64_base("0x3423432448783446", &value, 16));
+ EXPECT_EQ(int64_t{0x3423432448783446}, value);
+
+ EXPECT_TRUE(safe_strto64_base("3423432448783446", &value, 16));
+ EXPECT_EQ(int64_t{0x3423432448783446}, value);
+
+ EXPECT_TRUE(safe_strto64_base("0", &value, 16));
+ EXPECT_EQ(0, value);
+
+ EXPECT_TRUE(safe_strto64_base(" \t\n -0x3423432448783446", &value, 16));
+ EXPECT_EQ(int64_t{-0x3423432448783446}, value);
+
+ EXPECT_TRUE(safe_strto64_base(" \t\n -3423432448783446", &value, 16));
+ EXPECT_EQ(int64_t{-0x3423432448783446}, value);
+
+ EXPECT_TRUE(safe_strto64_base("123456701234567012", &value, 8));
+ EXPECT_EQ(int64_t{0123456701234567012}, value);
+
+ EXPECT_TRUE(safe_strto64_base("-017777777777777", &value, 8));
+ EXPECT_EQ(int64_t{-017777777777777}, value);
+
+ EXPECT_FALSE(safe_strto64_base("19777777777777", &value, 8));
+
+ // Autodetect base.
+ EXPECT_TRUE(safe_strto64_base("0", &value, 0));
+ EXPECT_EQ(0, value);
+
+ EXPECT_TRUE(safe_strto64_base("077", &value, 0));
+ EXPECT_EQ(077, value); // Octal interpretation
+
+ // Leading zero indicates octal, but then followed by invalid digit.
+ EXPECT_FALSE(safe_strto64_base("088", &value, 0));
+
+ // Leading 0x indicated hex, but then followed by invalid digit.
+ EXPECT_FALSE(safe_strto64_base("0xG", &value, 0));
+
+ // Base-10 version.
+ EXPECT_TRUE(safe_strto64_base("34234324487834466", &value, 10));
+ EXPECT_EQ(int64_t{34234324487834466}, value);
+
+ EXPECT_TRUE(safe_strto64_base("0", &value, 10));
+ EXPECT_EQ(0, value);
+
+ EXPECT_TRUE(safe_strto64_base(" \t\n -34234324487834466", &value, 10));
+ EXPECT_EQ(int64_t{-34234324487834466}, value);
+
+ EXPECT_TRUE(safe_strto64_base("34234324487834466 \n\t ", &value, 10));
+ EXPECT_EQ(int64_t{34234324487834466}, value);
+
+ // Invalid ints.
+ EXPECT_FALSE(safe_strto64_base("", &value, 10));
+ EXPECT_FALSE(safe_strto64_base(" ", &value, 10));
+ EXPECT_FALSE(safe_strto64_base("abc", &value, 10));
+ EXPECT_FALSE(safe_strto64_base("34234324487834466a", &value, 10));
+ EXPECT_FALSE(safe_strto64_base("34234487834466.3", &value, 10));
+
+ // Out of bounds.
+ EXPECT_FALSE(safe_strto64_base("9223372036854775808", &value, 10));
+ EXPECT_FALSE(safe_strto64_base("-9223372036854775809", &value, 10));
+
+ // String version.
+ EXPECT_TRUE(safe_strto64_base(std::string("0x1234"), &value, 16));
+ EXPECT_EQ(0x1234, value);
+
+ // Base-10 std::string version.
+ EXPECT_TRUE(safe_strto64_base("1234", &value, 10));
+ EXPECT_EQ(1234, value);
+}
+
+const size_t kNumRandomTests = 10000;
+
+template <typename IntType>
+void test_random_integer_parse_base(bool (*parse_func)(absl::string_view,
+ IntType* value,
+ int base)) {
+ using RandomEngine = std::minstd_rand0;
+ std::random_device rd;
+ RandomEngine rng(rd());
+ std::uniform_int_distribution<IntType> random_int(
+ std::numeric_limits<IntType>::min());
+ std::uniform_int_distribution<int> random_base(2, 35);
+ for (size_t i = 0; i < kNumRandomTests; i++) {
+ IntType value = random_int(rng);
+ int base = random_base(rng);
+ std::string str_value;
+ EXPECT_TRUE(Itoa<IntType>(value, base, &str_value));
+ IntType parsed_value;
+
+ // Test successful parse
+ EXPECT_TRUE(parse_func(str_value, &parsed_value, base));
+ EXPECT_EQ(parsed_value, value);
+
+ // Test overflow
+ EXPECT_FALSE(
+ parse_func(absl::StrCat(std::numeric_limits<IntType>::max(), value),
+ &parsed_value, base));
+
+ // Test underflow
+ if (std::numeric_limits<IntType>::min() < 0) {
+ EXPECT_FALSE(
+ parse_func(absl::StrCat(std::numeric_limits<IntType>::min(), value),
+ &parsed_value, base));
+ } else {
+ EXPECT_FALSE(parse_func(absl::StrCat("-", value), &parsed_value, base));
+ }
+ }
+}
+
+TEST(stringtest, safe_strto32_random) {
+ test_random_integer_parse_base<int32_t>(&safe_strto32_base);
+}
+TEST(stringtest, safe_strto64_random) {
+ test_random_integer_parse_base<int64_t>(&safe_strto64_base);
+}
+TEST(stringtest, safe_strtou32_random) {
+ test_random_integer_parse_base<uint32_t>(&safe_strtou32_base);
+}
+TEST(stringtest, safe_strtou64_random) {
+ test_random_integer_parse_base<uint64_t>(&safe_strtou64_base);
+}
+
+TEST(stringtest, safe_strtou32_base) {
+ for (int i = 0; strtouint32_test_cases[i].str != nullptr; ++i) {
+ const auto& e = strtouint32_test_cases[i];
+ uint32_t value;
+ EXPECT_EQ(e.expect_ok, safe_strtou32_base(e.str, &value, e.base))
+ << "str=\"" << e.str << "\" base=" << e.base;
+ if (e.expect_ok) {
+ EXPECT_EQ(e.expected, value) << "i=" << i << " str=\"" << e.str
+ << "\" base=" << e.base;
+ }
+ }
+}
+
+TEST(stringtest, safe_strtou32_base_length_delimited) {
+ for (int i = 0; strtouint32_test_cases[i].str != nullptr; ++i) {
+ const auto& e = strtouint32_test_cases[i];
+ std::string tmp(e.str);
+ tmp.append("12"); // Adds garbage at the end.
+
+ uint32_t value;
+ EXPECT_EQ(e.expect_ok,
+ safe_strtou32_base(absl::string_view(tmp.data(), strlen(e.str)),
+ &value, e.base))
+ << "str=\"" << e.str << "\" base=" << e.base;
+ if (e.expect_ok) {
+ EXPECT_EQ(e.expected, value) << "i=" << i << " str=" << e.str
+ << " base=" << e.base;
+ }
+ }
+}
+
+TEST(stringtest, safe_strtou64_base) {
+ for (int i = 0; strtouint64_test_cases[i].str != nullptr; ++i) {
+ const auto& e = strtouint64_test_cases[i];
+ uint64_t value;
+ EXPECT_EQ(e.expect_ok, safe_strtou64_base(e.str, &value, e.base))
+ << "str=\"" << e.str << "\" base=" << e.base;
+ if (e.expect_ok) {
+ EXPECT_EQ(e.expected, value) << "str=" << e.str << " base=" << e.base;
+ }
+ }
+}
+
+TEST(stringtest, safe_strtou64_base_length_delimited) {
+ for (int i = 0; strtouint64_test_cases[i].str != nullptr; ++i) {
+ const auto& e = strtouint64_test_cases[i];
+ std::string tmp(e.str);
+ tmp.append("12"); // Adds garbage at the end.
+
+ uint64_t value;
+ EXPECT_EQ(e.expect_ok,
+ safe_strtou64_base(absl::string_view(tmp.data(), strlen(e.str)),
+ &value, e.base))
+ << "str=\"" << e.str << "\" base=" << e.base;
+ if (e.expect_ok) {
+ EXPECT_EQ(e.expected, value) << "str=\"" << e.str << "\" base=" << e.base;
+ }
+ }
+}
+
+// feenableexcept() and fedisableexcept() are missing on Mac OS X, MSVC.
+#if defined(_MSC_VER) || defined(__APPLE__)
+#define ABSL_MISSING_FEENABLEEXCEPT 1
+#define ABSL_MISSING_FEDISABLEEXCEPT 1
+#endif
+
+class SimpleDtoaTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ // Store the current floating point env & clear away any pending exceptions.
+ feholdexcept(&fp_env_);
+#ifndef ABSL_MISSING_FEENABLEEXCEPT
+ // Turn on floating point exceptions.
+ feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+ }
+
+ void TearDown() override {
+ // Restore the floating point environment to the original state.
+ // In theory fedisableexcept is unnecessary; fesetenv will also do it.
+ // In practice, our toolchains have subtle bugs.
+#ifndef ABSL_MISSING_FEDISABLEEXCEPT
+ fedisableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
+#endif
+ fesetenv(&fp_env_);
+ }
+
+ std::string ToNineDigits(double value) {
+ char buffer[kFastToBufferSize]; // more than enough for %.9g
+ snprintf(buffer, sizeof(buffer), "%.9g", value);
+ return buffer;
+ }
+
+ fenv_t fp_env_;
+};
+
+// Run the given runnable functor for "cases" test cases, chosen over the
+// available range of float. pi and e and 1/e are seeded, and then all
+// available integer powers of 2 and 10 are multiplied against them. In
+// addition to trying all those values, we try the next higher and next lower
+// float, and then we add additional test cases evenly distributed between them.
+// Each test case is passed to runnable as both a positive and negative value.
+template <typename R>
+void ExhaustiveFloat(uint32_t cases, R&& runnable) {
+ runnable(0.0f);
+ runnable(-0.0f);
+ if (cases >= 2e9) { // more than 2 billion? Might as well run them all.
+ for (float f = 0; f < std::numeric_limits<float>::max(); ) {
+ f = nextafterf(f, std::numeric_limits<float>::max());
+ runnable(-f);
+ runnable(f);
+ }
+ return;
+ }
+ std::set<float> floats = {3.4028234e38f};
+ for (float f : {1.0, 3.14159265, 2.718281828, 1 / 2.718281828}) {
+ for (float testf = f; testf != 0; testf *= 0.1f) floats.insert(testf);
+ for (float testf = f; testf != 0; testf *= 0.5f) floats.insert(testf);
+ for (float testf = f; testf < 3e38f / 2; testf *= 2.0f)
+ floats.insert(testf);
+ for (float testf = f; testf < 3e38f / 10; testf *= 10) floats.insert(testf);
+ }
+
+ float last = *floats.begin();
+
+ runnable(last);
+ runnable(-last);
+ int iters_per_float = cases / floats.size();
+ if (iters_per_float == 0) iters_per_float = 1;
+ for (float f : floats) {
+ if (f == last) continue;
+ float testf = nextafter(last, std::numeric_limits<float>::max());
+ runnable(testf);
+ runnable(-testf);
+ last = testf;
+ if (f == last) continue;
+ double step = (double{f} - last) / iters_per_float;
+ for (double d = last + step; d < f; d += step) {
+ testf = d;
+ if (testf != last) {
+ runnable(testf);
+ runnable(-testf);
+ last = testf;
+ }
+ }
+ testf = nextafter(f, 0.0f);
+ if (testf > last) {
+ runnable(testf);
+ runnable(-testf);
+ last = testf;
+ }
+ if (f != last) {
+ runnable(f);
+ runnable(-f);
+ last = f;
+ }
+ }
+}
+
+TEST_F(SimpleDtoaTest, ExhaustiveFloatToBuffer) {
+ uint64_t test_count = 0;
+ std::vector<float> mismatches;
+ ExhaustiveFloat(kFloatNumCases, [&](float f) {
+ if (f != f) return; // rule out NaNs
+ ++test_count;
+ char fastbuf[kFastToBufferSize];
+ RoundTripFloatToBuffer(f, fastbuf);
+ float round_trip = strtof(fastbuf, nullptr);
+ if (f != round_trip) {
+ mismatches.push_back(f);
+ if (mismatches.size() < 10) {
+ ABSL_RAW_LOG(ERROR, "%s",
+ absl::StrCat("Round-trip failure with float. ", "f=", f,
+ "=", ToNineDigits(f), " fast=", fastbuf,
+ " strtof=", ToNineDigits(round_trip))
+ .c_str());
+ }
+ }
+ });
+ if (!mismatches.empty()) {
+ EXPECT_EQ(mismatches.size(), 0);
+ for (size_t i = 0; i < mismatches.size(); ++i) {
+ if (i > 100) i = mismatches.size() - 1;
+ float f = mismatches[i];
+ std::string msg =
+ absl::StrCat("Mismatch #", i, " f=", f, " (", ToNineDigits(f), ")");
+ char buf[kFastToBufferSize];
+ absl::StrAppend(&msg, " fast='", RoundTripFloatToBuffer(f, buf), "'");
+ float rt = strtof(buf, nullptr);
+ absl::StrAppend(&msg, " rt=", ToNineDigits(rt));
+ ABSL_RAW_LOG(ERROR, "%s", msg.c_str());
+ }
+ }
+}
+
+TEST_F(SimpleDtoaTest, ExhaustiveDoubleToSixDigits) {
+ uint64_t test_count = 0;
+ std::vector<double> mismatches;
+ auto checker = [&](double d) {
+ if (d != d) return; // rule out NaNs
+ ++test_count;
+ char sixdigitsbuf[kSixDigitsToBufferSize] = {0};
+ SixDigitsToBuffer(d, sixdigitsbuf);
+ char snprintfbuf[kSixDigitsToBufferSize] = {0};
+ snprintf(snprintfbuf, kSixDigitsToBufferSize, "%g", d);
+ if (strcmp(sixdigitsbuf, snprintfbuf) != 0) {
+ mismatches.push_back(d);
+ if (mismatches.size() < 10) {
+ ABSL_RAW_LOG(ERROR, "%s",
+ absl::StrCat("Six-digit failure with double. ", "d=", d,
+ "=", d, " sixdigits=", sixdigitsbuf,
+ " printf(%g)=", snprintfbuf)
+ .c_str());
+ }
+ }
+ };
+ // Some quick sanity checks...
+ checker(5e-324);
+ checker(1e-308);
+ checker(1.0);
+ checker(1.000005);
+ checker(1.7976931348623157e308);
+ checker(0.00390625);
+#ifndef _MSC_VER
+ // on MSVC, snprintf() rounds it to 0.00195313. SixDigitsToBuffer() rounds it
+ // to 0.00195312 (round half to even).
+ checker(0.001953125);
+#endif
+ checker(0.005859375);
+ // Some cases where the rounding is very very close
+ checker(1.089095e-15);
+ checker(3.274195e-55);
+ checker(6.534355e-146);
+ checker(2.920845e+234);
+
+ if (mismatches.empty()) {
+ test_count = 0;
+ ExhaustiveFloat(kFloatNumCases, checker);
+
+ test_count = 0;
+ std::vector<int> digit_testcases{
+ 100000, 100001, 100002, 100005, 100010, 100020, 100050, 100100, // misc
+ 195312, 195313, // 1.953125 is a case where we round down, just barely.
+ 200000, 500000, 800000, // misc mid-range cases
+ 585937, 585938, // 5.859375 is a case where we round up, just barely.
+ 900000, 990000, 999000, 999900, 999990, 999996, 999997, 999998, 999999};
+ if (kFloatNumCases >= 1e9) {
+ // If at least 1 billion test cases were requested, user wants an
+ // exhaustive test. So let's test all mantissas, too.
+ constexpr int min_mantissa = 100000, max_mantissa = 999999;
+ digit_testcases.resize(max_mantissa - min_mantissa + 1);
+ std::iota(digit_testcases.begin(), digit_testcases.end(), min_mantissa);
+ }
+
+ for (int exponent = -324; exponent <= 308; ++exponent) {
+ double powten = pow(10.0, exponent);
+ if (powten == 0) powten = 5e-324;
+ if (kFloatNumCases >= 1e9) {
+ // The exhaustive test takes a very long time, so log progress.
+ char buf[kSixDigitsToBufferSize];
+ ABSL_RAW_LOG(
+ INFO, "%s",
+ absl::StrCat("Exp ", exponent, " powten=", powten, "(",
+ powten, ") (",
+ std::string(buf, SixDigitsToBuffer(powten, buf)), ")")
+ .c_str());
+ }
+ for (int digits : digit_testcases) {
+ if (exponent == 308 && digits >= 179769) break; // don't overflow!
+ double digiform = (digits + 0.5) * 0.00001;
+ double testval = digiform * powten;
+ double pretestval = nextafter(testval, 0);
+ double posttestval = nextafter(testval, 1.7976931348623157e308);
+ checker(testval);
+ checker(pretestval);
+ checker(posttestval);
+ }
+ }
+ } else {
+ EXPECT_EQ(mismatches.size(), 0);
+ for (size_t i = 0; i < mismatches.size(); ++i) {
+ if (i > 100) i = mismatches.size() - 1;
+ double d = mismatches[i];
+ char sixdigitsbuf[kSixDigitsToBufferSize] = {0};
+ SixDigitsToBuffer(d, sixdigitsbuf);
+ char snprintfbuf[kSixDigitsToBufferSize] = {0};
+ snprintf(snprintfbuf, kSixDigitsToBufferSize, "%g", d);
+ double before = nextafter(d, 0.0);
+ double after = nextafter(d, 1.7976931348623157e308);
+ char b1[32], b2[kSixDigitsToBufferSize];
+ ABSL_RAW_LOG(
+ ERROR, "%s",
+ absl::StrCat(
+ "Mismatch #", i, " d=", d, " (", ToNineDigits(d), ")",
+ " sixdigits='", sixdigitsbuf, "'", " snprintf='", snprintfbuf,
+ "'", " Before.=", PerfectDtoa(before), " ",
+ (SixDigitsToBuffer(before, b2), b2),
+ " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", before), b1),
+ " Perfect=", PerfectDtoa(d), " ", (SixDigitsToBuffer(d, b2), b2),
+ " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", d), b1),
+ " After.=.", PerfectDtoa(after), " ",
+ (SixDigitsToBuffer(after, b2), b2),
+ " vs snprintf=", (snprintf(b1, sizeof(b1), "%g", after), b1))
+ .c_str());
+ }
+ }
+}
+
+TEST(StrToInt32, Partial) {
+ struct Int32TestLine {
+ std::string input;
+ bool status;
+ int32_t value;
+ };
+ const int32_t int32_min = std::numeric_limits<int32_t>::min();
+ const int32_t int32_max = std::numeric_limits<int32_t>::max();
+ Int32TestLine int32_test_line[] = {
+ {"", false, 0},
+ {" ", false, 0},
+ {"-", false, 0},
+ {"123@@@", false, 123},
+ {absl::StrCat(int32_min, int32_max), false, int32_min},
+ {absl::StrCat(int32_max, int32_max), false, int32_max},
+ };
+
+ for (const Int32TestLine& test_line : int32_test_line) {
+ int32_t value = -2;
+ bool status = safe_strto32_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = -2;
+ status = safe_strto32_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = -2;
+ status = safe_strto32_base(absl::string_view(test_line.input), &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ }
+}
+
+TEST(StrToUint32, Partial) {
+ struct Uint32TestLine {
+ std::string input;
+ bool status;
+ uint32_t value;
+ };
+ const uint32_t uint32_max = std::numeric_limits<uint32_t>::max();
+ Uint32TestLine uint32_test_line[] = {
+ {"", false, 0},
+ {" ", false, 0},
+ {"-", false, 0},
+ {"123@@@", false, 123},
+ {absl::StrCat(uint32_max, uint32_max), false, uint32_max},
+ };
+
+ for (const Uint32TestLine& test_line : uint32_test_line) {
+ uint32_t value = 2;
+ bool status = safe_strtou32_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = 2;
+ status = safe_strtou32_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = 2;
+ status = safe_strtou32_base(absl::string_view(test_line.input), &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ }
+}
+
+TEST(StrToInt64, Partial) {
+ struct Int64TestLine {
+ std::string input;
+ bool status;
+ int64_t value;
+ };
+ const int64_t int64_min = std::numeric_limits<int64_t>::min();
+ const int64_t int64_max = std::numeric_limits<int64_t>::max();
+ Int64TestLine int64_test_line[] = {
+ {"", false, 0},
+ {" ", false, 0},
+ {"-", false, 0},
+ {"123@@@", false, 123},
+ {absl::StrCat(int64_min, int64_max), false, int64_min},
+ {absl::StrCat(int64_max, int64_max), false, int64_max},
+ };
+
+ for (const Int64TestLine& test_line : int64_test_line) {
+ int64_t value = -2;
+ bool status = safe_strto64_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = -2;
+ status = safe_strto64_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = -2;
+ status = safe_strto64_base(absl::string_view(test_line.input), &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ }
+}
+
+TEST(StrToUint64, Partial) {
+ struct Uint64TestLine {
+ std::string input;
+ bool status;
+ uint64_t value;
+ };
+ const uint64_t uint64_max = std::numeric_limits<uint64_t>::max();
+ Uint64TestLine uint64_test_line[] = {
+ {"", false, 0},
+ {" ", false, 0},
+ {"-", false, 0},
+ {"123@@@", false, 123},
+ {absl::StrCat(uint64_max, uint64_max), false, uint64_max},
+ };
+
+ for (const Uint64TestLine& test_line : uint64_test_line) {
+ uint64_t value = 2;
+ bool status = safe_strtou64_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = 2;
+ status = safe_strtou64_base(test_line.input, &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ value = 2;
+ status = safe_strtou64_base(absl::string_view(test_line.input), &value, 10);
+ EXPECT_EQ(test_line.status, status) << test_line.input;
+ EXPECT_EQ(test_line.value, value) << test_line.input;
+ }
+}
+
+TEST(StrToInt32Base, PrefixOnly) {
+ struct Int32TestLine {
+ std::string input;
+ bool status;
+ int32_t value;
+ };
+ Int32TestLine int32_test_line[] = {
+ { "", false, 0 },
+ { "-", false, 0 },
+ { "-0", true, 0 },
+ { "0", true, 0 },
+ { "0x", false, 0 },
+ { "-0x", false, 0 },
+ };
+ const int base_array[] = { 0, 2, 8, 10, 16 };
+
+ for (const Int32TestLine& line : int32_test_line) {
+ for (const int base : base_array) {
+ int32_t value = 2;
+ bool status = safe_strto32_base(line.input.c_str(), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strto32_base(line.input, &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strto32_base(absl::string_view(line.input), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ }
+ }
+}
+
+TEST(StrToUint32Base, PrefixOnly) {
+ struct Uint32TestLine {
+ std::string input;
+ bool status;
+ uint32_t value;
+ };
+ Uint32TestLine uint32_test_line[] = {
+ { "", false, 0 },
+ { "0", true, 0 },
+ { "0x", false, 0 },
+ };
+ const int base_array[] = { 0, 2, 8, 10, 16 };
+
+ for (const Uint32TestLine& line : uint32_test_line) {
+ for (const int base : base_array) {
+ uint32_t value = 2;
+ bool status = safe_strtou32_base(line.input.c_str(), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strtou32_base(line.input, &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strtou32_base(absl::string_view(line.input), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ }
+ }
+}
+
+TEST(StrToInt64Base, PrefixOnly) {
+ struct Int64TestLine {
+ std::string input;
+ bool status;
+ int64_t value;
+ };
+ Int64TestLine int64_test_line[] = {
+ { "", false, 0 },
+ { "-", false, 0 },
+ { "-0", true, 0 },
+ { "0", true, 0 },
+ { "0x", false, 0 },
+ { "-0x", false, 0 },
+ };
+ const int base_array[] = { 0, 2, 8, 10, 16 };
+
+ for (const Int64TestLine& line : int64_test_line) {
+ for (const int base : base_array) {
+ int64_t value = 2;
+ bool status = safe_strto64_base(line.input.c_str(), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strto64_base(line.input, &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strto64_base(absl::string_view(line.input), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ }
+ }
+}
+
+TEST(StrToUint64Base, PrefixOnly) {
+ struct Uint64TestLine {
+ std::string input;
+ bool status;
+ uint64_t value;
+ };
+ Uint64TestLine uint64_test_line[] = {
+ { "", false, 0 },
+ { "0", true, 0 },
+ { "0x", false, 0 },
+ };
+ const int base_array[] = { 0, 2, 8, 10, 16 };
+
+ for (const Uint64TestLine& line : uint64_test_line) {
+ for (const int base : base_array) {
+ uint64_t value = 2;
+ bool status = safe_strtou64_base(line.input.c_str(), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strtou64_base(line.input, &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ value = 2;
+ status = safe_strtou64_base(absl::string_view(line.input), &value, base);
+ EXPECT_EQ(line.status, status) << line.input << " " << base;
+ EXPECT_EQ(line.value, value) << line.input << " " << base;
+ }
+ }
+}
+
+} // namespace
diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc
new file mode 100644
index 00000000..0c75655c
--- /dev/null
+++ b/absl/strings/str_cat.cc
@@ -0,0 +1,208 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/str_cat.h"
+
+#include <cstdarg>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+#include "absl/strings/ascii.h"
+#include "absl/strings/internal/resize_uninitialized.h"
+
+namespace absl {
+
+AlphaNum::AlphaNum(Hex hex) {
+ char* const end = &digits_[numbers_internal::kFastToBufferSize];
+ char* writer = end;
+ uint64_t value = hex.value;
+ static const char hexdigits[] = "0123456789abcdef";
+ do {
+ *--writer = hexdigits[value & 0xF];
+ value >>= 4;
+ } while (value != 0);
+
+ char* beg;
+ if (end - writer < hex.width) {
+ beg = end - hex.width;
+ std::fill_n(beg, writer - beg, hex.fill);
+ } else {
+ beg = writer;
+ }
+
+ piece_ = absl::string_view(beg, end - beg);
+}
+
+// ----------------------------------------------------------------------
+// StrCat()
+// This merges the given strings or integers, with no delimiter. This
+// is designed to be the fastest possible way to construct a std::string out
+// of a mix of raw C strings, StringPieces, strings, and integer values.
+// ----------------------------------------------------------------------
+
+// Append is merely a version of memcpy that returns the address of the byte
+// after the area just overwritten.
+static char* Append(char* out, const AlphaNum& x) {
+ // memcpy is allowed to overwrite arbitrary memory, so doing this after the
+ // call would force an extra fetch of x.size().
+ char* after = out + x.size();
+ memcpy(out, x.data(), x.size());
+ return after;
+}
+
+std::string StrCat(const AlphaNum& a, const AlphaNum& b) {
+ std::string result;
+ absl::strings_internal::STLStringResizeUninitialized(&result,
+ a.size() + b.size());
+ char* const begin = &*result.begin();
+ char* out = begin;
+ out = Append(out, a);
+ out = Append(out, b);
+ assert(out == begin + result.size());
+ return result;
+}
+
+std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) {
+ std::string result;
+ strings_internal::STLStringResizeUninitialized(
+ &result, a.size() + b.size() + c.size());
+ char* const begin = &*result.begin();
+ char* out = begin;
+ out = Append(out, a);
+ out = Append(out, b);
+ out = Append(out, c);
+ assert(out == begin + result.size());
+ return result;
+}
+
+std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c,
+ const AlphaNum& d) {
+ std::string result;
+ strings_internal::STLStringResizeUninitialized(
+ &result, a.size() + b.size() + c.size() + d.size());
+ char* const begin = &*result.begin();
+ char* out = begin;
+ out = Append(out, a);
+ out = Append(out, b);
+ out = Append(out, c);
+ out = Append(out, d);
+ assert(out == begin + result.size());
+ return result;
+}
+
+namespace strings_internal {
+
+// Do not call directly - these are not part of the public API.
+std::string CatPieces(std::initializer_list<absl::string_view> pieces) {
+ std::string result;
+ size_t total_size = 0;
+ for (const absl::string_view piece : pieces) total_size += piece.size();
+ strings_internal::STLStringResizeUninitialized(&result, total_size);
+
+ char* const begin = &*result.begin();
+ char* out = begin;
+ for (const absl::string_view piece : pieces) {
+ const size_t this_size = piece.size();
+ memcpy(out, piece.data(), this_size);
+ out += this_size;
+ }
+ assert(out == begin + result.size());
+ return result;
+}
+
+// It's possible to call StrAppend with an absl::string_view that is itself a
+// fragment of the std::string we're appending to. However the results of this are
+// random. Therefore, check for this in debug mode. Use unsigned math so we
+// only have to do one comparison. Note, there's an exception case: appending an
+// empty std::string is always allowed.
+#define ASSERT_NO_OVERLAP(dest, src) \
+ assert(((src).size() == 0) || \
+ (uintptr_t((src).data() - (dest).data()) > uintptr_t((dest).size())))
+
+void AppendPieces(std::string* dest,
+ std::initializer_list<absl::string_view> pieces) {
+ size_t old_size = dest->size();
+ size_t total_size = old_size;
+ for (const absl::string_view piece : pieces) {
+ ASSERT_NO_OVERLAP(*dest, piece);
+ total_size += piece.size();
+ }
+ strings_internal::STLStringResizeUninitialized(dest, total_size);
+
+ char* const begin = &*dest->begin();
+ char* out = begin + old_size;
+ for (const absl::string_view piece : pieces) {
+ const size_t this_size = piece.size();
+ memcpy(out, piece.data(), this_size);
+ out += this_size;
+ }
+ assert(out == begin + dest->size());
+}
+
+} // namespace strings_internal
+
+void StrAppend(std::string* dest, const AlphaNum& a) {
+ ASSERT_NO_OVERLAP(*dest, a);
+ dest->append(a.data(), a.size());
+}
+
+void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) {
+ ASSERT_NO_OVERLAP(*dest, a);
+ ASSERT_NO_OVERLAP(*dest, b);
+ std::string::size_type old_size = dest->size();
+ strings_internal::STLStringResizeUninitialized(
+ dest, old_size + a.size() + b.size());
+ char* const begin = &*dest->begin();
+ char* out = begin + old_size;
+ out = Append(out, a);
+ out = Append(out, b);
+ assert(out == begin + dest->size());
+}
+
+void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c) {
+ ASSERT_NO_OVERLAP(*dest, a);
+ ASSERT_NO_OVERLAP(*dest, b);
+ ASSERT_NO_OVERLAP(*dest, c);
+ std::string::size_type old_size = dest->size();
+ strings_internal::STLStringResizeUninitialized(
+ dest, old_size + a.size() + b.size() + c.size());
+ char* const begin = &*dest->begin();
+ char* out = begin + old_size;
+ out = Append(out, a);
+ out = Append(out, b);
+ out = Append(out, c);
+ assert(out == begin + dest->size());
+}
+
+void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c, const AlphaNum& d) {
+ ASSERT_NO_OVERLAP(*dest, a);
+ ASSERT_NO_OVERLAP(*dest, b);
+ ASSERT_NO_OVERLAP(*dest, c);
+ ASSERT_NO_OVERLAP(*dest, d);
+ std::string::size_type old_size = dest->size();
+ strings_internal::STLStringResizeUninitialized(
+ dest, old_size + a.size() + b.size() + c.size() + d.size());
+ char* const begin = &*dest->begin();
+ char* out = begin + old_size;
+ out = Append(out, a);
+ out = Append(out, b);
+ out = Append(out, c);
+ out = Append(out, d);
+ assert(out == begin + dest->size());
+}
+
+} // namespace absl
diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h
new file mode 100644
index 00000000..5b4c9baa
--- /dev/null
+++ b/absl/strings/str_cat.h
@@ -0,0 +1,348 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: str_cat.h
+// -----------------------------------------------------------------------------
+//
+// This package contains functions for efficiently concatenating and appending
+// strings: `StrCat()` and `StrAppend()`. Most of the work within these routines
+// is actually handled through use of a special AlphaNum type, which was
+// designed to be used as a parameter type that efficiently manages conversion
+// to strings and avoids copies in the above operations.
+//
+// Any routine accepting either a std::string or a number may accept `AlphaNum`.
+// The basic idea is that by accepting a `const AlphaNum &` as an argument
+// to your function, your callers will automagically convert bools, integers,
+// and floating point values to strings for you.
+//
+// NOTE: Use of `AlphaNum` outside of the //absl/strings package is unsupported
+// except for the specific case of function parameters of type `AlphaNum` or
+// `const AlphaNum &`. In particular, instantiating `AlphaNum` directly as a
+// stack variable is not supported.
+//
+// Conversion from 8-bit values is not accepted because, if it were, then an
+// attempt to pass ':' instead of ":" might result in a 58 ending up in your
+// result.
+//
+// Bools convert to "0" or "1".
+//
+// Floating point numbers are formatted with six-digit precision, which is
+// the default for "std::cout <<" or printf "%g" (the same as "%.6g").
+//
+//
+// You can convert to hexadecimal output rather than decimal output using the
+// `Hex` type contained here. To do so, pass `Hex(my_int)` as a parameter to
+// `StrCat()` or `StrAppend()`. You may specify a minimum hex field width using
+// a `PadSpec` enum, so the equivalent of `StringPrintf("%04x", my_int)` is
+// `absl::StrCat(absl::Hex(my_int, absl::kZeroPad4))`.
+//
+// -----------------------------------------------------------------------------
+
+#ifndef ABSL_STRINGS_STR_CAT_H_
+#define ABSL_STRINGS_STR_CAT_H_
+
+#include <array>
+#include <cstdint>
+#include <string>
+#include <type_traits>
+
+#include "absl/base/port.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+namespace strings_internal {
+// AlphaNumBuffer allows a way to pass a std::string to StrCat without having to do
+// memory allocation. It is simply a pair of a fixed-size character array, and
+// a size. Please don't use outside of absl, yet.
+template <size_t max_size>
+struct AlphaNumBuffer {
+ std::array<char, max_size> data;
+ size_t size;
+};
+
+} // namespace strings_internal
+
+// Enum that specifies the number of significant digits to return in a `Hex`
+// conversion and fill character to use. A `kZeroPad2` value, for example, would
+// produce hexadecimal strings such as "0A","0F" and 'kSpacePad5' value would
+// produce hexadecimal strings such as " A"," F".
+enum PadSpec {
+ kNoPad = 1,
+ kZeroPad2,
+ kZeroPad3,
+ kZeroPad4,
+ kZeroPad5,
+ kZeroPad6,
+ kZeroPad7,
+ kZeroPad8,
+ kZeroPad9,
+ kZeroPad10,
+ kZeroPad11,
+ kZeroPad12,
+ kZeroPad13,
+ kZeroPad14,
+ kZeroPad15,
+ kZeroPad16,
+
+ kSpacePad2 = kZeroPad2 + 64,
+ kSpacePad3,
+ kSpacePad4,
+ kSpacePad5,
+ kSpacePad6,
+ kSpacePad7,
+ kSpacePad8,
+ kSpacePad9,
+ kSpacePad10,
+ kSpacePad11,
+ kSpacePad12,
+ kSpacePad13,
+ kSpacePad14,
+ kSpacePad15,
+ kSpacePad16,
+};
+
+// -----------------------------------------------------------------------------
+// Hex
+// -----------------------------------------------------------------------------
+//
+// `Hex` stores a set of hexadecimal std::string conversion parameters for use
+// within `AlphaNum` std::string conversions.
+struct Hex {
+ uint64_t value;
+ uint8_t width;
+ char fill;
+
+ template <typename Int>
+ explicit Hex(Int v, PadSpec spec = absl::kNoPad,
+ typename std::enable_if<sizeof(Int) == 1>::type* = nullptr)
+ : Hex(spec, static_cast<uint8_t>(v)) {}
+ template <typename Int>
+ explicit Hex(Int v, PadSpec spec = absl::kNoPad,
+ typename std::enable_if<sizeof(Int) == 2>::type* = nullptr)
+ : Hex(spec, static_cast<uint16_t>(v)) {}
+ template <typename Int>
+ explicit Hex(Int v, PadSpec spec = absl::kNoPad,
+ typename std::enable_if<sizeof(Int) == 4>::type* = nullptr)
+ : Hex(spec, static_cast<uint32_t>(v)) {}
+ template <typename Int>
+ explicit Hex(Int v, PadSpec spec = absl::kNoPad,
+ typename std::enable_if<sizeof(Int) == 8>::type* = nullptr)
+ : Hex(spec, static_cast<uint64_t>(v)) {}
+
+ private:
+ Hex(PadSpec spec, uint64_t v)
+ : value(v),
+ width(spec == absl::kNoPad
+ ? 1
+ : spec >= absl::kSpacePad2 ? spec - absl::kSpacePad2 + 2
+ : spec - absl::kZeroPad2 + 2),
+ fill(spec >= absl::kSpacePad2 ? ' ' : '0') {}
+};
+
+// -----------------------------------------------------------------------------
+// AlphaNum
+// -----------------------------------------------------------------------------
+//
+// The `AlphaNum` class acts as the main parameter type for `StrCat()` and
+// `StrAppend()`, providing efficient conversion of numeric, boolean, and
+// hexadecimal values (through the `Hex` type) into strings.
+
+class AlphaNum {
+ public:
+ // No bool ctor -- bools convert to an integral type.
+ // A bool ctor would also convert incoming pointers (bletch).
+
+ AlphaNum(int x) // NOLINT(runtime/explicit)
+ : piece_(digits_,
+ numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
+ AlphaNum(unsigned int x) // NOLINT(runtime/explicit)
+ : piece_(digits_,
+ numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
+ AlphaNum(long x) // NOLINT(*)
+ : piece_(digits_,
+ numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
+ AlphaNum(unsigned long x) // NOLINT(*)
+ : piece_(digits_,
+ numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
+ AlphaNum(long long x) // NOLINT(*)
+ : piece_(digits_,
+ numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
+ AlphaNum(unsigned long long x) // NOLINT(*)
+ : piece_(digits_,
+ numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {}
+
+ AlphaNum(float f) // NOLINT(runtime/explicit)
+ : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
+ AlphaNum(double f) // NOLINT(runtime/explicit)
+ : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {}
+
+ AlphaNum(Hex hex); // NOLINT(runtime/explicit)
+
+ template <size_t size>
+ AlphaNum( // NOLINT(runtime/explicit)
+ const strings_internal::AlphaNumBuffer<size>& buf)
+ : piece_(&buf.data[0], buf.size) {}
+
+ AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit)
+ AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit)
+ template <typename Allocator>
+ AlphaNum( // NOLINT(runtime/explicit)
+ const std::basic_string<char, std::char_traits<char>, Allocator>& str)
+ : piece_(str) {}
+
+ // Use std::string literals ":" instead of character literals ':'.
+ AlphaNum(char c) = delete; // NOLINT(runtime/explicit)
+
+ AlphaNum(const AlphaNum&) = delete;
+ AlphaNum& operator=(const AlphaNum&) = delete;
+
+ absl::string_view::size_type size() const { return piece_.size(); }
+ const char* data() const { return piece_.data(); }
+ absl::string_view Piece() const { return piece_; }
+
+ // Normal enums are already handled by the integer formatters.
+ // This overload matches only scoped enums.
+ template <typename T,
+ typename = typename std::enable_if<
+ std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type>
+ AlphaNum(T e) // NOLINT(runtime/explicit)
+ : AlphaNum(static_cast<typename std::underlying_type<T>::type>(e)) {}
+
+ private:
+ absl::string_view piece_;
+ char digits_[numbers_internal::kFastToBufferSize];
+};
+
+// -----------------------------------------------------------------------------
+// StrCat()
+// -----------------------------------------------------------------------------
+//
+// Merges given strings or numbers, using no delimiter(s).
+//
+// `StrCat()` is designed to be the fastest possible way to construct a std::string
+// out of a mix of raw C strings, string_views, strings, bool values,
+// and numeric values.
+//
+// Don't use `StrCat()` for user-visible strings. The localization process
+// works poorly on strings built up out of fragments.
+//
+// For clarity and performance, don't use `StrCat()` when appending to a
+// std::string. Use `StrAppend()` instead. In particular, avoid using any of these
+// (anti-)patterns:
+//
+// str.append(StrCat(...))
+// str += StrCat(...)
+// str = StrCat(str, ...)
+//
+// The last case is the worst, with a potential to change a loop
+// from a linear time operation with O(1) dynamic allocations into a
+// quadratic time operation with O(n) dynamic allocations.
+//
+// See `StrAppend()` below for more information.
+
+namespace strings_internal {
+
+// Do not call directly - this is not part of the public API.
+std::string CatPieces(std::initializer_list<absl::string_view> pieces);
+void AppendPieces(std::string* dest,
+ std::initializer_list<absl::string_view> pieces);
+
+} // namespace strings_internal
+
+ABSL_MUST_USE_RESULT inline std::string StrCat() { return std::string(); }
+
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a) {
+ return std::string(a.data(), a.size());
+}
+
+ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b);
+ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c);
+ABSL_MUST_USE_RESULT std::string StrCat(const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c, const AlphaNum& d);
+
+// Support 5 or more arguments
+template <typename... AV>
+ABSL_MUST_USE_RESULT inline std::string StrCat(const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c, const AlphaNum& d,
+ const AlphaNum& e,
+ const AV&... args) {
+ return strings_internal::CatPieces(
+ {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
+ static_cast<const AlphaNum&>(args).Piece()...});
+}
+
+// -----------------------------------------------------------------------------
+// StrAppend()
+// -----------------------------------------------------------------------------
+//
+// Appends a std::string or set of strings to an existing std::string, in a similar
+// fashion to `StrCat()`.
+//
+// WARNING: `StrAppend(&str, a, b, c, ...)` requires that none of the
+// a, b, c, parameters be a reference into str. For speed, `StrAppend()` does
+// not try to check each of its input arguments to be sure that they are not
+// a subset of the std::string being appended to. That is, while this will work:
+//
+// std::string s = "foo";
+// s += s;
+//
+// This output is undefined:
+//
+// std::string s = "foo";
+// StrAppend(&s, s);
+//
+// This output is undefined as well, since `absl::string_view` does not own its
+// data:
+//
+// std::string s = "foobar";
+// absl::string_view p = s;
+// StrAppend(&s, p);
+
+inline void StrAppend(std::string*) {}
+void StrAppend(std::string* dest, const AlphaNum& a);
+void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b);
+void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c);
+void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c, const AlphaNum& d);
+
+// Support 5 or more arguments
+template <typename... AV>
+inline void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b,
+ const AlphaNum& c, const AlphaNum& d, const AlphaNum& e,
+ const AV&... args) {
+ strings_internal::AppendPieces(
+ dest, {a.Piece(), b.Piece(), c.Piece(), d.Piece(), e.Piece(),
+ static_cast<const AlphaNum&>(args).Piece()...});
+}
+
+// Helper function for the future StrCat default floating-point format, %.6g
+// This is fast.
+inline strings_internal::AlphaNumBuffer<
+ numbers_internal::kSixDigitsToBufferSize>
+SixDigits(double d) {
+ strings_internal::AlphaNumBuffer<numbers_internal::kSixDigitsToBufferSize>
+ result;
+ result.size = numbers_internal::SixDigitsToBuffer(d, &result.data[0]);
+ return result;
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_STR_CAT_H_
diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc
new file mode 100644
index 00000000..293d1943
--- /dev/null
+++ b/absl/strings/str_cat_test.cc
@@ -0,0 +1,462 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Unit tests for all str_cat.h functions
+
+#include "absl/strings/str_cat.h"
+
+#include <cstdint>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/strings/substitute.h"
+
+namespace {
+
+// Test absl::StrCat of ints and longs of various sizes and signdedness.
+TEST(StrCat, Ints) {
+ const short s = -1; // NOLINT(runtime/int)
+ const uint16_t us = 2;
+ const int i = -3;
+ const unsigned int ui = 4;
+ const long l = -5; // NOLINT(runtime/int)
+ const unsigned long ul = 6; // NOLINT(runtime/int)
+ const long long ll = -7; // NOLINT(runtime/int)
+ const unsigned long long ull = 8; // NOLINT(runtime/int)
+ const ptrdiff_t ptrdiff = -9;
+ const size_t size = 10;
+ const intptr_t intptr = -12;
+ const uintptr_t uintptr = 13;
+ std::string answer;
+ answer = absl::StrCat(s, us);
+ EXPECT_EQ(answer, "-12");
+ answer = absl::StrCat(i, ui);
+ EXPECT_EQ(answer, "-34");
+ answer = absl::StrCat(l, ul);
+ EXPECT_EQ(answer, "-56");
+ answer = absl::StrCat(ll, ull);
+ EXPECT_EQ(answer, "-78");
+ answer = absl::StrCat(ptrdiff, size);
+ EXPECT_EQ(answer, "-910");
+ answer = absl::StrCat(ptrdiff, intptr);
+ EXPECT_EQ(answer, "-9-12");
+ answer = absl::StrCat(uintptr, 0);
+ EXPECT_EQ(answer, "130");
+}
+
+TEST(StrCat, Enums) {
+ enum SmallNumbers { One = 1, Ten = 10 } e = Ten;
+ EXPECT_EQ("10", absl::StrCat(e));
+ EXPECT_EQ("-5", absl::StrCat(SmallNumbers(-5)));
+
+ enum class Option { Boxers = 1, Briefs = -1 };
+
+ EXPECT_EQ("-1", absl::StrCat(Option::Briefs));
+
+ enum class Airplane : uint64_t {
+ Airbus = 1,
+ Boeing = 1000,
+ Canary = 10000000000 // too big for "int"
+ };
+
+ EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary));
+
+ enum class TwoGig : int32_t {
+ TwoToTheZero = 1,
+ TwoToTheSixteenth = 1 << 16,
+ TwoToTheThirtyFirst = INT32_MIN
+ };
+ EXPECT_EQ("65536", absl::StrCat(TwoGig::TwoToTheSixteenth));
+ EXPECT_EQ("-2147483648", absl::StrCat(TwoGig::TwoToTheThirtyFirst));
+ EXPECT_EQ("-1", absl::StrCat(static_cast<TwoGig>(-1)));
+
+ enum class FourGig : uint32_t {
+ TwoToTheZero = 1,
+ TwoToTheSixteenth = 1 << 16,
+ TwoToTheThirtyFirst = 1U << 31 // too big for "int"
+ };
+ EXPECT_EQ("65536", absl::StrCat(FourGig::TwoToTheSixteenth));
+ EXPECT_EQ("2147483648", absl::StrCat(FourGig::TwoToTheThirtyFirst));
+ EXPECT_EQ("4294967295", absl::StrCat(static_cast<FourGig>(-1)));
+
+ EXPECT_EQ("10000000000", absl::StrCat(Airplane::Canary));
+}
+
+TEST(StrCat, Basics) {
+ std::string result;
+
+ std::string strs[] = {
+ "Hello",
+ "Cruel",
+ "World"
+ };
+
+ std::string stdstrs[] = {
+ "std::Hello",
+ "std::Cruel",
+ "std::World"
+ };
+
+ absl::string_view pieces[] = {"Hello", "Cruel", "World"};
+
+ const char* c_strs[] = {
+ "Hello",
+ "Cruel",
+ "World"
+ };
+
+ int32_t i32s[] = {'H', 'C', 'W'};
+ uint64_t ui64s[] = {12345678910LL, 10987654321LL};
+
+ EXPECT_EQ(absl::StrCat(), "");
+
+ result = absl::StrCat(false, true, 2, 3);
+ EXPECT_EQ(result, "0123");
+
+ result = absl::StrCat(-1);
+ EXPECT_EQ(result, "-1");
+
+ result = absl::StrCat(absl::SixDigits(0.5));
+ EXPECT_EQ(result, "0.5");
+
+ result = absl::StrCat(strs[1], pieces[2]);
+ EXPECT_EQ(result, "CruelWorld");
+
+ result = absl::StrCat(stdstrs[1], " ", stdstrs[2]);
+ EXPECT_EQ(result, "std::Cruel std::World");
+
+ result = absl::StrCat(strs[0], ", ", pieces[2]);
+ EXPECT_EQ(result, "Hello, World");
+
+ result = absl::StrCat(strs[0], ", ", strs[1], " ", strs[2], "!");
+ EXPECT_EQ(result, "Hello, Cruel World!");
+
+ result = absl::StrCat(pieces[0], ", ", pieces[1], " ", pieces[2]);
+ EXPECT_EQ(result, "Hello, Cruel World");
+
+ result = absl::StrCat(c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
+ EXPECT_EQ(result, "Hello, Cruel World");
+
+ result = absl::StrCat("ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
+ EXPECT_EQ(result, "ASCII 72, 67 87!");
+
+ result = absl::StrCat(ui64s[0], ", ", ui64s[1], "!");
+ EXPECT_EQ(result, "12345678910, 10987654321!");
+
+ std::string one = "1"; // Actually, it's the size of this std::string that we want; a
+ // 64-bit build distinguishes between size_t and uint64_t,
+ // even though they're both unsigned 64-bit values.
+ result = absl::StrCat("And a ", one.size(), " and a ",
+ &result[2] - &result[0], " and a ", one, " 2 3 4", "!");
+ EXPECT_EQ(result, "And a 1 and a 2 and a 1 2 3 4!");
+
+ // result = absl::StrCat("Single chars won't compile", '!');
+ // result = absl::StrCat("Neither will nullptrs", nullptr);
+ result =
+ absl::StrCat("To output a char by ASCII/numeric value, use +: ", '!' + 0);
+ EXPECT_EQ(result, "To output a char by ASCII/numeric value, use +: 33");
+
+ float f = 100000.5;
+ result = absl::StrCat("A hundred K and a half is ", absl::SixDigits(f));
+ EXPECT_EQ(result, "A hundred K and a half is 100000");
+
+ f = 100001.5;
+ result =
+ absl::StrCat("A hundred K and one and a half is ", absl::SixDigits(f));
+ EXPECT_EQ(result, "A hundred K and one and a half is 100002");
+
+ double d = 100000.5;
+ d *= d;
+ result =
+ absl::StrCat("A hundred K and a half squared is ", absl::SixDigits(d));
+ EXPECT_EQ(result, "A hundred K and a half squared is 1.00001e+10");
+
+ result = absl::StrCat(1, 2, 333, 4444, 55555, 666666, 7777777, 88888888,
+ 999999999);
+ EXPECT_EQ(result, "12333444455555666666777777788888888999999999");
+}
+
+// A minimal allocator that uses malloc().
+template <typename T>
+struct Mallocator {
+ typedef T value_type;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+
+ size_type max_size() const {
+ return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type);
+ }
+ template <typename U>
+ struct rebind {
+ typedef Mallocator<U> other;
+ };
+ Mallocator() = default;
+
+ T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); }
+ void deallocate(T* p, size_t) { std::free(p); }
+};
+template <typename T, typename U>
+bool operator==(const Mallocator<T>&, const Mallocator<U>&) {
+ return true;
+}
+template <typename T, typename U>
+bool operator!=(const Mallocator<T>&, const Mallocator<U>&) {
+ return false;
+}
+
+TEST(StrCat, CustomAllocator) {
+ using mstring =
+ std::basic_string<char, std::char_traits<char>, Mallocator<char>>;
+ const mstring str1("PARACHUTE OFF A BLIMP INTO MOSCONE!!");
+
+ const mstring str2("Read this book about coffee tables");
+
+ std::string result = absl::StrCat(str1, str2);
+ EXPECT_EQ(result,
+ "PARACHUTE OFF A BLIMP INTO MOSCONE!!"
+ "Read this book about coffee tables");
+}
+
+TEST(StrCat, MaxArgs) {
+ std::string result;
+ // Test 10 up to 26 arguments, the current maximum
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a");
+ EXPECT_EQ(result, "123456789a");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b");
+ EXPECT_EQ(result, "123456789ab");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c");
+ EXPECT_EQ(result, "123456789abc");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d");
+ EXPECT_EQ(result, "123456789abcd");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e");
+ EXPECT_EQ(result, "123456789abcde");
+ result =
+ absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f");
+ EXPECT_EQ(result, "123456789abcdef");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g");
+ EXPECT_EQ(result, "123456789abcdefg");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h");
+ EXPECT_EQ(result, "123456789abcdefgh");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i");
+ EXPECT_EQ(result, "123456789abcdefghi");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j");
+ EXPECT_EQ(result, "123456789abcdefghij");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k");
+ EXPECT_EQ(result, "123456789abcdefghijk");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l");
+ EXPECT_EQ(result, "123456789abcdefghijkl");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l", "m");
+ EXPECT_EQ(result, "123456789abcdefghijklm");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l", "m", "n");
+ EXPECT_EQ(result, "123456789abcdefghijklmn");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l", "m", "n", "o");
+ EXPECT_EQ(result, "123456789abcdefghijklmno");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l", "m", "n", "o", "p");
+ EXPECT_EQ(result, "123456789abcdefghijklmnop");
+ result = absl::StrCat(1, 2, 3, 4, 5, 6, 7, 8, 9, "a", "b", "c", "d", "e", "f",
+ "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q");
+ EXPECT_EQ(result, "123456789abcdefghijklmnopq");
+ // No limit thanks to C++11's variadic templates
+ result = absl::StrCat(
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, "a", "b", "c", "d", "e", "f", "g", "h",
+ "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
+ "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L",
+ "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z");
+ EXPECT_EQ(result,
+ "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
+}
+
+TEST(StrAppend, Basics) {
+ std::string result = "existing text";
+
+ std::string strs[] = {
+ "Hello",
+ "Cruel",
+ "World"
+ };
+
+ absl::string_view pieces[] = {"Hello", "Cruel", "World"};
+
+ const char* c_strs[] = {
+ "Hello",
+ "Cruel",
+ "World"
+ };
+
+ int32_t i32s[] = {'H', 'C', 'W'};
+ uint64_t ui64s[] = {12345678910LL, 10987654321LL};
+
+ std::string::size_type old_size = result.size();
+ absl::StrAppend(&result);
+ EXPECT_EQ(result.size(), old_size);
+
+ old_size = result.size();
+ absl::StrAppend(&result, strs[0]);
+ EXPECT_EQ(result.substr(old_size), "Hello");
+
+ old_size = result.size();
+ absl::StrAppend(&result, strs[1], pieces[2]);
+ EXPECT_EQ(result.substr(old_size), "CruelWorld");
+
+ old_size = result.size();
+ absl::StrAppend(&result, strs[0], ", ", pieces[2]);
+ EXPECT_EQ(result.substr(old_size), "Hello, World");
+
+ old_size = result.size();
+ absl::StrAppend(&result, strs[0], ", ", strs[1], " ", strs[2], "!");
+ EXPECT_EQ(result.substr(old_size), "Hello, Cruel World!");
+
+ old_size = result.size();
+ absl::StrAppend(&result, pieces[0], ", ", pieces[1], " ", pieces[2]);
+ EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
+
+ old_size = result.size();
+ absl::StrAppend(&result, c_strs[0], ", ", c_strs[1], " ", c_strs[2]);
+ EXPECT_EQ(result.substr(old_size), "Hello, Cruel World");
+
+ old_size = result.size();
+ absl::StrAppend(&result, "ASCII ", i32s[0], ", ", i32s[1], " ", i32s[2], "!");
+ EXPECT_EQ(result.substr(old_size), "ASCII 72, 67 87!");
+
+ old_size = result.size();
+ absl::StrAppend(&result, ui64s[0], ", ", ui64s[1], "!");
+ EXPECT_EQ(result.substr(old_size), "12345678910, 10987654321!");
+
+ std::string one = "1"; // Actually, it's the size of this std::string that we want; a
+ // 64-bit build distinguishes between size_t and uint64_t,
+ // even though they're both unsigned 64-bit values.
+ old_size = result.size();
+ absl::StrAppend(&result, "And a ", one.size(), " and a ",
+ &result[2] - &result[0], " and a ", one, " 2 3 4", "!");
+ EXPECT_EQ(result.substr(old_size), "And a 1 and a 2 and a 1 2 3 4!");
+
+ // result = absl::StrCat("Single chars won't compile", '!');
+ // result = absl::StrCat("Neither will nullptrs", nullptr);
+ old_size = result.size();
+ absl::StrAppend(&result,
+ "To output a char by ASCII/numeric value, use +: ", '!' + 0);
+ EXPECT_EQ(result.substr(old_size),
+ "To output a char by ASCII/numeric value, use +: 33");
+
+ // Test 9 arguments, the old maximum
+ old_size = result.size();
+ absl::StrAppend(&result, 1, 22, 333, 4444, 55555, 666666, 7777777, 88888888,
+ 9);
+ EXPECT_EQ(result.substr(old_size), "1223334444555556666667777777888888889");
+
+ // No limit thanks to C++11's variadic templates
+ old_size = result.size();
+ absl::StrAppend(
+ &result, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, //
+ "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", //
+ "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", //
+ "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", //
+ "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", //
+ "No limit thanks to C++11's variadic templates");
+ EXPECT_EQ(result.substr(old_size),
+ "12345678910abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "No limit thanks to C++11's variadic templates");
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(StrAppend, Death) {
+ std::string s = "self";
+ // on linux it's "assertion", on mac it's "Assertion",
+ // on chromiumos it's "Assertion ... failed".
+ EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s.c_str() + 1), "ssertion.*failed");
+ EXPECT_DEBUG_DEATH(absl::StrAppend(&s, s), "ssertion.*failed");
+}
+#endif // GTEST_HAS_DEATH_TEST
+
+TEST(StrAppend, EmptyString) {
+ std::string s = "";
+ absl::StrAppend(&s, s);
+ EXPECT_EQ(s, "");
+}
+
+template <typename IntType>
+void CheckHex(IntType v, const char* nopad_format, const char* zeropad_format,
+ const char* spacepad_format) {
+ char expected[256];
+
+ std::string actual = absl::StrCat(absl::Hex(v, absl::kNoPad));
+ snprintf(expected, sizeof(expected), nopad_format, v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+
+ for (int spec = absl::kZeroPad2; spec <= absl::kZeroPad16; ++spec) {
+ std::string actual =
+ absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec)));
+ snprintf(expected, sizeof(expected), zeropad_format,
+ spec - absl::kZeroPad2 + 2, v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+ }
+
+ for (int spec = absl::kSpacePad2; spec <= absl::kSpacePad16; ++spec) {
+ std::string actual =
+ absl::StrCat(absl::Hex(v, static_cast<absl::PadSpec>(spec)));
+ snprintf(expected, sizeof(expected), spacepad_format,
+ spec - absl::kSpacePad2 + 2, v);
+ EXPECT_EQ(expected, actual) << " decimal value " << v;
+ }
+}
+
+void CheckHex64(uint64_t v) {
+ unsigned long long llv = v; // NOLINT(runtime/int)
+
+ CheckHex(llv, "%llx", "%0*llx", "%*llx");
+}
+
+template <typename Int32Type>
+void CheckHex32(Int32Type v) {
+ CheckHex(v, "%x", "%0*x", "%*x");
+}
+
+void TestFastPrints() {
+ // Test min int to make sure that works
+ for (int i = 0; i < 10000; i++) {
+ CheckHex64(i);
+ CheckHex32(static_cast<uint32_t>(i));
+ CheckHex32(i);
+ CheckHex32(-i);
+ }
+
+ CheckHex64(uint64_t{0x123456789abcdef0});
+ CheckHex32(0x12345678U);
+
+ int8_t minus_one_8bit = -1;
+ EXPECT_EQ("ff", absl::StrCat(absl::Hex(minus_one_8bit)));
+
+ int16_t minus_one_16bit = -1;
+ EXPECT_EQ("ffff", absl::StrCat(absl::Hex(minus_one_16bit)));
+}
+
+TEST(Numbers, TestFunctionsMovedOverFromNumbersMain) {
+ TestFastPrints();
+}
+
+} // namespace
diff --git a/absl/strings/str_join.h b/absl/strings/str_join.h
new file mode 100644
index 00000000..82a3cac2
--- /dev/null
+++ b/absl/strings/str_join.h
@@ -0,0 +1,288 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: str_join.h
+// -----------------------------------------------------------------------------
+//
+// This header file contains functions for joining a range of elements and
+// returning the result as a std::string. StrJoin operations are specified by passing
+// a range, a separator std::string to use between the elements joined, and an
+// optional Formatter responsible for converting each argument in the range to a
+// std::string. If omitted, a default `AlphaNumFormatter()` is called on the elements
+// to be joined, using the same formatting that `absl::StrCat()` uses. This
+// package defines a number of default formatters, and you can define your own
+// implementations.
+//
+// Ranges are specified by passing a container with `std::begin()` and
+// `std::end()` iterators, container-specific `begin()` and `end()` iterators, a
+// brace-initialized `std::initializer_list`, or a `std::tuple` of heterogeneous
+// objects. The separator std::string is specified as an `absl::string_view`.
+//
+// Because the default formatter uses the `absl::AlphaNum` class,
+// `absl::StrJoin()`, like `absl::StrCat()`, will work out-of-the-box on
+// collections of strings, ints, floats, doubles, etc.
+//
+// Example:
+//
+// std::vector<std::string> v = {"foo", "bar", "baz"};
+// std::string s = absl::StrJoin(v, "-");
+// EXPECT_EQ("foo-bar-baz", s);
+//
+// See comments on the `absl::StrJoin()` function for more examples.
+
+#ifndef ABSL_STRINGS_STR_JOIN_H_
+#define ABSL_STRINGS_STR_JOIN_H_
+
+#include <cstdio>
+#include <cstring>
+#include <initializer_list>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+
+#include "absl/base/macros.h"
+#include "absl/strings/internal/str_join_internal.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+// -----------------------------------------------------------------------------
+// Concept: Formatter
+// -----------------------------------------------------------------------------
+//
+// A Formatter is a function object that is responsible for formatting its
+// argument as a std::string and appending it to a given output std::string. Formatters
+// may be implemented as function objects, lambdas, or normal functions. You may
+// provide your own Formatter to enable `absl::StrJoin()` to work with arbitrary
+// types.
+//
+// The following is an example of a custom Formatter that simply uses
+// `std::to_string()` to format an integer as a std::string.
+//
+// struct MyFormatter {
+// void operator()(std::string* out, int i) const {
+// out->append(std::to_string(i));
+// }
+// };
+//
+// You would use the above formatter by passing an instance of it as the final
+// argument to `absl::StrJoin()`:
+//
+// std::vector<int> v = {1, 2, 3, 4};
+// std::string s = absl::StrJoin(v, "-", MyFormatter());
+// EXPECT_EQ("1-2-3-4", s);
+//
+// The following standard formatters are provided within this file:
+//
+// - `AlphaNumFormatter()` (the default)
+// - `StreamFormatter()`
+// - `PairFormatter()`
+// - `DereferenceFormatter()`
+
+// AlphaNumFormatter()
+//
+// Default formatter used if none is specified. Uses `absl::AlphaNum` to convert
+// numeric arguments to strings.
+inline strings_internal::AlphaNumFormatterImpl AlphaNumFormatter() {
+ return strings_internal::AlphaNumFormatterImpl();
+}
+
+// StreamFormatter()
+//
+// Formats its argument using the << operator.
+inline strings_internal::StreamFormatterImpl StreamFormatter() {
+ return strings_internal::StreamFormatterImpl();
+}
+
+// Function Template: PairFormatter(Formatter, absl::string_view, Formatter)
+//
+// Formats a `std::pair` by putting a given separator between the pair's
+// `.first` and `.second` members. This formatter allows you to specify
+// custom Formatters for both the first and second member of each pair.
+template <typename FirstFormatter, typename SecondFormatter>
+inline strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>
+PairFormatter(FirstFormatter f1, absl::string_view sep, SecondFormatter f2) {
+ return strings_internal::PairFormatterImpl<FirstFormatter, SecondFormatter>(
+ std::move(f1), sep, std::move(f2));
+}
+
+// Function overload of PairFormatter() for using a default
+// `AlphaNumFormatter()` for each Formatter in the pair.
+inline strings_internal::PairFormatterImpl<
+ strings_internal::AlphaNumFormatterImpl,
+ strings_internal::AlphaNumFormatterImpl>
+PairFormatter(absl::string_view sep) {
+ return PairFormatter(AlphaNumFormatter(), sep, AlphaNumFormatter());
+}
+
+// Function Template: DereferenceFormatter(Formatter)
+//
+// Formats its argument by dereferencing it and then applying the given
+// formatter. This formatter is useful for formatting a container of
+// pointer-to-T. This pattern often shows up when joining repeated fields in
+// protocol buffers.
+template <typename Formatter>
+strings_internal::DereferenceFormatterImpl<Formatter> DereferenceFormatter(
+ Formatter&& f) {
+ return strings_internal::DereferenceFormatterImpl<Formatter>(
+ std::forward<Formatter>(f));
+}
+
+// Function overload of `DererefenceFormatter()` for using a default
+// `AlphaNumFormatter()`.
+inline strings_internal::DereferenceFormatterImpl<
+ strings_internal::AlphaNumFormatterImpl>
+DereferenceFormatter() {
+ return strings_internal::DereferenceFormatterImpl<
+ strings_internal::AlphaNumFormatterImpl>(AlphaNumFormatter());
+}
+
+// -----------------------------------------------------------------------------
+// StrJoin()
+// -----------------------------------------------------------------------------
+//
+// Joins a range of elements and returns the result as a std::string.
+// `absl::StrJoin()` takes a range, a separator std::string to use between the
+// elements joined, and an optional Formatter responsible for converting each
+// argument in the range to a std::string.
+//
+// If omitted, the default `AlphaNumFormatter()` is called on the elements to be
+// joined.
+//
+// Example 1:
+// // Joins a collection of strings. This pattern also works with a collection
+// // of `asbl::string_view` or even `const char*`.
+// std::vector<std::string> v = {"foo", "bar", "baz"};
+// std::string s = absl::StrJoin(v, "-");
+// EXPECT_EQ("foo-bar-baz", s);
+//
+// Example 2:
+// // Joins the values in the given `std::initializer_list<>` specified using
+// // brace initialization. This pattern also works with an initializer_list
+// // of ints or `absl::string_view` -- any `AlphaNum`-compatible type.
+// std::string s = absl::StrJoin({"foo", "bar", "baz"}, "-");
+// EXPECT_EQ("foo-bar-baz", s);
+//
+// Example 3:
+// // Joins a collection of ints. This pattern also works with floats,
+// // doubles, int64s -- any `StrCat()`-compatible type.
+// std::vector<int> v = {1, 2, 3, -4};
+// std::string s = absl::StrJoin(v, "-");
+// EXPECT_EQ("1-2-3--4", s);
+//
+// Example 4:
+// // Joins a collection of pointer-to-int. By default, pointers are
+// // dereferenced and the pointee is formatted using the default format for
+// // that type; such dereferencing occurs for all levels of indirection, so
+// // this pattern works just as well for `std::vector<int**>` as for
+// // `std::vector<int*>`.
+// int x = 1, y = 2, z = 3;
+// std::vector<int*> v = {&x, &y, &z};
+// std::string s = absl::StrJoin(v, "-");
+// EXPECT_EQ("1-2-3", s);
+//
+// Example 5:
+// // Dereferencing of `std::unique_ptr<>` is also supported:
+// std::vector<std::unique_ptr<int>> v
+// v.emplace_back(new int(1));
+// v.emplace_back(new int(2));
+// v.emplace_back(new int(3));
+// std::string s = absl::StrJoin(v, "-");
+// EXPECT_EQ("1-2-3", s);
+//
+// Example 6:
+// // Joins a `std::map`, with each key-value pair separated by an equals
+// // sign. This pattern would also work with, say, a
+// // `std::vector<std::pair<>>`.
+// std::map<std::string, int> m = {
+// std::make_pair("a", 1),
+// std::make_pair("b", 2),
+// std::make_pair("c", 3)};
+// std::string s = absl::StrJoin(m, ",", strings::PairFormatter("="));
+// EXPECT_EQ("a=1,b=2,c=3", s);
+//
+// Example 7:
+// // These examples show how `absl::StrJoin()` handles a few common edge
+// // cases:
+// std::vector<std::string> v_empty;
+// EXPECT_EQ("", absl::StrJoin(v_empty, "-"));
+//
+// std::vector<std::string> v_one_item = {"foo"};
+// EXPECT_EQ("foo", absl::StrJoin(v_one_item, "-"));
+//
+// std::vector<std::string> v_empty_string = {""};
+// EXPECT_EQ("", absl::StrJoin(v_empty_string, "-"));
+//
+// std::vector<std::string> v_one_item_empty_string = {"a", ""};
+// EXPECT_EQ("a-", absl::StrJoin(v_one_item_empty_string, "-"));
+//
+// std::vector<std::string> v_two_empty_string = {"", ""};
+// EXPECT_EQ("-", absl::StrJoin(v_two_empty_string, "-"));
+//
+// Example 8:
+// // Joins a `std::tuple<T...>` of heterogeneous types, converting each to
+// // a std::string using the `absl::AlphaNum` class.
+// std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-");
+// EXPECT_EQ("123-abc-0.456", s);
+
+template <typename Iterator, typename Formatter>
+std::string StrJoin(Iterator start, Iterator end, absl::string_view sep,
+ Formatter&& fmt) {
+ return strings_internal::JoinAlgorithm(start, end, sep, fmt);
+}
+
+template <typename Range, typename Formatter>
+std::string StrJoin(const Range& range, absl::string_view separator,
+ Formatter&& fmt) {
+ return strings_internal::JoinRange(range, separator, fmt);
+}
+
+template <typename T, typename Formatter>
+std::string StrJoin(std::initializer_list<T> il, absl::string_view separator,
+ Formatter&& fmt) {
+ return strings_internal::JoinRange(il, separator, fmt);
+}
+
+template <typename... T, typename Formatter>
+std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator,
+ Formatter&& fmt) {
+ return strings_internal::JoinAlgorithm(value, separator, fmt);
+}
+
+template <typename Iterator>
+std::string StrJoin(Iterator start, Iterator end, absl::string_view separator) {
+ return strings_internal::JoinRange(start, end, separator);
+}
+
+template <typename Range>
+std::string StrJoin(const Range& range, absl::string_view separator) {
+ return strings_internal::JoinRange(range, separator);
+}
+
+template <typename T>
+std::string StrJoin(std::initializer_list<T> il, absl::string_view separator) {
+ return strings_internal::JoinRange(il, separator);
+}
+
+template <typename... T>
+std::string StrJoin(const std::tuple<T...>& value, absl::string_view separator) {
+ return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter());
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_STR_JOIN_H_
diff --git a/absl/strings/str_join_test.cc b/absl/strings/str_join_test.cc
new file mode 100644
index 00000000..7c2ed09b
--- /dev/null
+++ b/absl/strings/str_join_test.cc
@@ -0,0 +1,474 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Unit tests for all join.h functions
+
+#include "absl/strings/str_join.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <initializer_list>
+#include <map>
+#include <ostream>
+#include <random>
+#include <set>
+#include <tuple>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_split.h"
+
+namespace {
+
+TEST(StrJoin, APIExamples) {
+ {
+ // Collection of strings
+ std::vector<std::string> v = {"foo", "bar", "baz"};
+ EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Collection of absl::string_view
+ std::vector<absl::string_view> v = {"foo", "bar", "baz"};
+ EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Collection of const char*
+ std::vector<const char*> v = {"foo", "bar", "baz"};
+ EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Collection of non-const char*
+ std::string a = "foo", b = "bar", c = "baz";
+ std::vector<char*> v = {&a[0], &b[0], &c[0]};
+ EXPECT_EQ("foo-bar-baz", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Collection of ints
+ std::vector<int> v = {1, 2, 3, -4};
+ EXPECT_EQ("1-2-3--4", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Literals passed as a std::initializer_list
+ std::string s = absl::StrJoin({"a", "b", "c"}, "-");
+ EXPECT_EQ("a-b-c", s);
+ }
+ {
+ // Join a std::tuple<T...>.
+ std::string s = absl::StrJoin(std::make_tuple(123, "abc", 0.456), "-");
+ EXPECT_EQ("123-abc-0.456", s);
+ }
+
+ {
+ // Collection of unique_ptrs
+ std::vector<std::unique_ptr<int>> v;
+ v.emplace_back(new int(1));
+ v.emplace_back(new int(2));
+ v.emplace_back(new int(3));
+ EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Array of ints
+ const int a[] = {1, 2, 3, -4};
+ EXPECT_EQ("1-2-3--4", absl::StrJoin(a, a + ABSL_ARRAYSIZE(a), "-"));
+ }
+
+ {
+ // Collection of pointers
+ int x = 1, y = 2, z = 3;
+ std::vector<int*> v = {&x, &y, &z};
+ EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Collection of pointers to pointers
+ int x = 1, y = 2, z = 3;
+ int *px = &x, *py = &y, *pz = &z;
+ std::vector<int**> v = {&px, &py, &pz};
+ EXPECT_EQ("1-2-3", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // Collection of pointers to std::string
+ std::string a("a"), b("b");
+ std::vector<std::string*> v = {&a, &b};
+ EXPECT_EQ("a-b", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // A std::map, which is a collection of std::pair<>s.
+ std::map<std::string, int> m = { {"a", 1}, {"b", 2}, {"c", 3} };
+ EXPECT_EQ("a=1,b=2,c=3", absl::StrJoin(m, ",", absl::PairFormatter("=")));
+ }
+
+ {
+ // Shows absl::StrSplit and absl::StrJoin working together. This example is
+ // equivalent to s/=/-/g.
+ const std::string s = "a=b=c=d";
+ EXPECT_EQ("a-b-c-d", absl::StrJoin(absl::StrSplit(s, "="), "-"));
+ }
+
+ //
+ // A few examples of edge cases
+ //
+
+ {
+ // Empty range yields an empty std::string.
+ std::vector<std::string> v;
+ EXPECT_EQ("", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // A range of 1 element gives a std::string with that element but no separator.
+ std::vector<std::string> v = {"foo"};
+ EXPECT_EQ("foo", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // A range with a single empty std::string element
+ std::vector<std::string> v = {""};
+ EXPECT_EQ("", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // A range with 2 elements, one of which is an empty std::string
+ std::vector<std::string> v = {"a", ""};
+ EXPECT_EQ("a-", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // A range with 2 empty elements.
+ std::vector<std::string> v = {"", ""};
+ EXPECT_EQ("-", absl::StrJoin(v, "-"));
+ }
+
+ {
+ // A std::vector of bool.
+ std::vector<bool> v = {true, false, true};
+ EXPECT_EQ("1-0-1", absl::StrJoin(v, "-"));
+ }
+}
+
+TEST(StrJoin, CustomFormatter) {
+ std::vector<std::string> v{"One", "Two", "Three"};
+ {
+ std::string joined = absl::StrJoin(v, "", [](std::string* out, const std::string& in) {
+ absl::StrAppend(out, "(", in, ")");
+ });
+ EXPECT_EQ("(One)(Two)(Three)", joined);
+ }
+ {
+ class ImmovableFormatter {
+ public:
+ void operator()(std::string* out, const std::string& in) {
+ absl::StrAppend(out, "(", in, ")");
+ }
+ ImmovableFormatter() {}
+ ImmovableFormatter(const ImmovableFormatter&) = delete;
+ };
+ EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", ImmovableFormatter()));
+ }
+ {
+ class OverloadedFormatter {
+ public:
+ void operator()(std::string* out, const std::string& in) {
+ absl::StrAppend(out, "(", in, ")");
+ }
+ void operator()(std::string* out, const std::string& in) const {
+ absl::StrAppend(out, "[", in, "]");
+ }
+ };
+ EXPECT_EQ("(One)(Two)(Three)", absl::StrJoin(v, "", OverloadedFormatter()));
+ const OverloadedFormatter fmt = {};
+ EXPECT_EQ("[One][Two][Three]", absl::StrJoin(v, "", fmt));
+ }
+}
+
+//
+// Tests the Formatters
+//
+
+TEST(AlphaNumFormatter, FormatterAPI) {
+ // Not an exhaustive test. See strings/strcat_test.h for the exhaustive test
+ // of what AlphaNum can convert.
+ auto f = absl::AlphaNumFormatter();
+ std::string s;
+ f(&s, "Testing: ");
+ f(&s, static_cast<int>(1));
+ f(&s, static_cast<int16_t>(2));
+ f(&s, static_cast<int64_t>(3));
+ f(&s, static_cast<float>(4));
+ f(&s, static_cast<double>(5));
+ f(&s, static_cast<unsigned>(6));
+ f(&s, static_cast<size_t>(7));
+ f(&s, absl::string_view(" OK"));
+ EXPECT_EQ("Testing: 1234567 OK", s);
+}
+
+// Make sure people who are mistakenly using std::vector<bool> even though
+// they're not memory-constrained can use absl::AlphaNumFormatter().
+TEST(AlphaNumFormatter, VectorOfBool) {
+ auto f = absl::AlphaNumFormatter();
+ std::string s;
+ std::vector<bool> v = {true, false, true};
+ f(&s, *v.cbegin());
+ f(&s, *v.begin());
+ f(&s, v[1]);
+ EXPECT_EQ("110", s);
+}
+
+TEST(AlphaNumFormatter, AlphaNum) {
+ auto f = absl::AlphaNumFormatter();
+ std::string s;
+ f(&s, absl::AlphaNum("hello"));
+ EXPECT_EQ("hello", s);
+}
+
+struct StreamableType {
+ std::string contents;
+};
+inline std::ostream& operator<<(std::ostream& os, const StreamableType& t) {
+ os << "Streamable:" << t.contents;
+ return os;
+}
+
+TEST(StreamFormatter, FormatterAPI) {
+ auto f = absl::StreamFormatter();
+ std::string s;
+ f(&s, "Testing: ");
+ f(&s, static_cast<int>(1));
+ f(&s, static_cast<int16_t>(2));
+ f(&s, static_cast<int64_t>(3));
+ f(&s, static_cast<float>(4));
+ f(&s, static_cast<double>(5));
+ f(&s, static_cast<unsigned>(6));
+ f(&s, static_cast<size_t>(7));
+ f(&s, absl::string_view(" OK "));
+ StreamableType streamable = {"object"};
+ f(&s, streamable);
+ EXPECT_EQ("Testing: 1234567 OK Streamable:object", s);
+}
+
+// A dummy formatter that wraps each element in parens. Used in some tests
+// below.
+struct TestingParenFormatter {
+ template <typename T>
+ void operator()(std::string* s, const T& t) {
+ absl::StrAppend(s, "(", t, ")");
+ }
+};
+
+TEST(PairFormatter, FormatterAPI) {
+ {
+ // Tests default PairFormatter(sep) that uses AlphaNumFormatter for the
+ // 'first' and 'second' members.
+ const auto f = absl::PairFormatter("=");
+ std::string s;
+ f(&s, std::make_pair("a", "b"));
+ f(&s, std::make_pair(1, 2));
+ EXPECT_EQ("a=b1=2", s);
+ }
+
+ {
+ // Tests using a custom formatter for the 'first' and 'second' members.
+ auto f = absl::PairFormatter(TestingParenFormatter(), "=",
+ TestingParenFormatter());
+ std::string s;
+ f(&s, std::make_pair("a", "b"));
+ f(&s, std::make_pair(1, 2));
+ EXPECT_EQ("(a)=(b)(1)=(2)", s);
+ }
+}
+
+TEST(DereferenceFormatter, FormatterAPI) {
+ {
+ // Tests wrapping the default AlphaNumFormatter.
+ const absl::strings_internal::DereferenceFormatterImpl<
+ absl::strings_internal::AlphaNumFormatterImpl>
+ f;
+ int x = 1, y = 2, z = 3;
+ std::string s;
+ f(&s, &x);
+ f(&s, &y);
+ f(&s, &z);
+ EXPECT_EQ("123", s);
+ }
+
+ {
+ // Tests wrapping std::string's default formatter.
+ absl::strings_internal::DereferenceFormatterImpl<
+ absl::strings_internal::DefaultFormatter<std::string>::Type>
+ f;
+
+ std::string x = "x";
+ std::string y = "y";
+ std::string z = "z";
+ std::string s;
+ f(&s, &x);
+ f(&s, &y);
+ f(&s, &z);
+ EXPECT_EQ(s, "xyz");
+ }
+
+ {
+ // Tests wrapping a custom formatter.
+ auto f = absl::DereferenceFormatter(TestingParenFormatter());
+ int x = 1, y = 2, z = 3;
+ std::string s;
+ f(&s, &x);
+ f(&s, &y);
+ f(&s, &z);
+ EXPECT_EQ("(1)(2)(3)", s);
+ }
+
+ {
+ absl::strings_internal::DereferenceFormatterImpl<
+ absl::strings_internal::AlphaNumFormatterImpl>
+ f;
+ auto x = std::unique_ptr<int>(new int(1));
+ auto y = std::unique_ptr<int>(new int(2));
+ auto z = std::unique_ptr<int>(new int(3));
+ std::string s;
+ f(&s, x);
+ f(&s, y);
+ f(&s, z);
+ EXPECT_EQ("123", s);
+ }
+}
+
+//
+// Tests the interfaces for the 4 public Join function overloads. The semantics
+// of the algorithm is covered in the above APIExamples test.
+//
+TEST(StrJoin, PublicAPIOverloads) {
+ std::vector<std::string> v = {"a", "b", "c"};
+
+ // Iterators + formatter
+ EXPECT_EQ("a-b-c",
+ absl::StrJoin(v.begin(), v.end(), "-", absl::AlphaNumFormatter()));
+ // Range + formatter
+ EXPECT_EQ("a-b-c", absl::StrJoin(v, "-", absl::AlphaNumFormatter()));
+ // Iterators, no formatter
+ EXPECT_EQ("a-b-c", absl::StrJoin(v.begin(), v.end(), "-"));
+ // Range, no formatter
+ EXPECT_EQ("a-b-c", absl::StrJoin(v, "-"));
+}
+
+TEST(StrJoin, Array) {
+ const absl::string_view a[] = {"a", "b", "c"};
+ EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
+}
+
+TEST(StrJoin, InitializerList) {
+ { EXPECT_EQ("a-b-c", absl::StrJoin({"a", "b", "c"}, "-")); }
+
+ {
+ auto a = {"a", "b", "c"};
+ EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
+ }
+
+ {
+ std::initializer_list<const char*> a = {"a", "b", "c"};
+ EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
+ }
+
+ {
+ std::initializer_list<std::string> a = {"a", "b", "c"};
+ EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
+ }
+
+ {
+ std::initializer_list<absl::string_view> a = {"a", "b", "c"};
+ EXPECT_EQ("a-b-c", absl::StrJoin(a, "-"));
+ }
+
+ {
+ // Tests initializer_list with a non-default formatter
+ auto a = {"a", "b", "c"};
+ TestingParenFormatter f;
+ EXPECT_EQ("(a)-(b)-(c)", absl::StrJoin(a, "-", f));
+ }
+
+ {
+ // initializer_list of ints
+ EXPECT_EQ("1-2-3", absl::StrJoin({1, 2, 3}, "-"));
+ }
+
+ {
+ // Tests initializer_list of ints with a non-default formatter
+ auto a = {1, 2, 3};
+ TestingParenFormatter f;
+ EXPECT_EQ("(1)-(2)-(3)", absl::StrJoin(a, "-", f));
+ }
+}
+
+TEST(StrJoin, Tuple) {
+ EXPECT_EQ("", absl::StrJoin(std::make_tuple(), "-"));
+ EXPECT_EQ("hello", absl::StrJoin(std::make_tuple("hello"), "-"));
+
+ int x(10);
+ std::string y("hello");
+ double z(3.14);
+ EXPECT_EQ("10-hello-3.14", absl::StrJoin(std::make_tuple(x, y, z), "-"));
+
+ // Faster! Faster!!
+ EXPECT_EQ("10-hello-3.14",
+ absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-"));
+
+ struct TestFormatter {
+ char buffer[128];
+ void operator()(std::string* out, int v) {
+ snprintf(buffer, sizeof(buffer), "%#.8x", v);
+ out->append(buffer);
+ }
+ void operator()(std::string* out, double v) {
+ snprintf(buffer, sizeof(buffer), "%#.0f", v);
+ out->append(buffer);
+ }
+ void operator()(std::string* out, const std::string& v) {
+ snprintf(buffer, sizeof(buffer), "%.4s", v.c_str());
+ out->append(buffer);
+ }
+ };
+ EXPECT_EQ("0x0000000a-hell-3.",
+ absl::StrJoin(std::make_tuple(x, y, z), "-", TestFormatter()));
+ EXPECT_EQ(
+ "0x0000000a-hell-3.",
+ absl::StrJoin(std::make_tuple(x, std::cref(y), z), "-", TestFormatter()));
+ EXPECT_EQ("0x0000000a-hell-3.",
+ absl::StrJoin(std::make_tuple(&x, &y, &z), "-",
+ absl::DereferenceFormatter(TestFormatter())));
+ EXPECT_EQ("0x0000000a-hell-3.",
+ absl::StrJoin(std::make_tuple(absl::make_unique<int>(x),
+ absl::make_unique<std::string>(y),
+ absl::make_unique<double>(z)),
+ "-", absl::DereferenceFormatter(TestFormatter())));
+ EXPECT_EQ("0x0000000a-hell-3.",
+ absl::StrJoin(std::make_tuple(absl::make_unique<int>(x), &y, &z),
+ "-", absl::DereferenceFormatter(TestFormatter())));
+}
+
+} // namespace
diff --git a/absl/strings/str_replace.cc b/absl/strings/str_replace.cc
new file mode 100644
index 00000000..69efa357
--- /dev/null
+++ b/absl/strings/str_replace.cc
@@ -0,0 +1,79 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/str_replace.h"
+
+#include "absl/strings/str_cat.h"
+
+namespace absl {
+namespace strings_internal {
+
+using FixedMapping =
+ std::initializer_list<std::pair<absl::string_view, absl::string_view>>;
+
+// Applies the ViableSubstitutions in subs_ptr to the absl::string_view s, and
+// stores the result in *result_ptr. Returns the number of substitutions that
+// occurred.
+int ApplySubstitutions(
+ absl::string_view s,
+ std::vector<strings_internal::ViableSubstitution>* subs_ptr,
+ std::string* result_ptr) {
+ auto& subs = *subs_ptr;
+ int substitutions = 0;
+ size_t pos = 0;
+ while (!subs.empty()) {
+ auto& sub = subs.back();
+ if (sub.offset >= pos) {
+ if (pos <= s.size()) {
+ StrAppend(result_ptr, s.substr(pos, sub.offset - pos), sub.replacement);
+ }
+ pos = sub.offset + sub.old.size();
+ substitutions += 1;
+ }
+ sub.offset = s.find(sub.old, pos);
+ if (sub.offset == s.npos) {
+ subs.pop_back();
+ } else {
+ // Insertion sort to ensure the last ViableSubstitution continues to be
+ // before all the others.
+ size_t index = subs.size();
+ while (--index && subs[index - 1].OccursBefore(subs[index])) {
+ std::swap(subs[index], subs[index - 1]);
+ }
+ }
+ }
+ result_ptr->append(s.data() + pos, s.size() - pos);
+ return substitutions;
+}
+
+} // namespace strings_internal
+
+// We can implement this in terms of the generic StrReplaceAll, but
+// we must specify the template overload because C++ cannot deduce the type
+// of an initializer_list parameter to a function, and also if we don't specify
+// the type, we just call ourselves.
+//
+// Note that we implement them here, rather than in the header, so that they
+// aren't inlined.
+
+std::string StrReplaceAll(absl::string_view s,
+ strings_internal::FixedMapping replacements) {
+ return StrReplaceAll<strings_internal::FixedMapping>(s, replacements);
+}
+
+int StrReplaceAll(strings_internal::FixedMapping replacements, std::string* target) {
+ return StrReplaceAll<strings_internal::FixedMapping>(replacements, target);
+}
+
+} // namespace absl
diff --git a/absl/strings/str_replace.h b/absl/strings/str_replace.h
new file mode 100644
index 00000000..f4d9bb95
--- /dev/null
+++ b/absl/strings/str_replace.h
@@ -0,0 +1,213 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: str_replace.h
+// -----------------------------------------------------------------------------
+//
+// This file defines `absl::StrReplaceAll()`, a general-purpose std::string
+// replacement function designed for large, arbitrary text substitutions,
+// especially on strings which you are receiving from some other system for
+// further processing (e.g. processing regular expressions, escaping HTML
+// entities, etc. `StrReplaceAll` is designed to be efficient even when only
+// one substitution is being performed, or when substitution is rare.
+//
+// If the std::string being modified is known at compile-time, and the substitutions
+// vary, `absl::Substitute()` may be a better choice.
+//
+// Example:
+//
+// std::string html_escaped = absl::StrReplaceAll(user_input, {
+// {"&", "&amp;"},
+// {"<", "&lt;"},
+// {">", "&gt;"},
+// {"\"", "&quot;"},
+// {"'", "&#39;"}});
+#ifndef ABSL_STRINGS_STR_REPLACE_H_
+#define ABSL_STRINGS_STR_REPLACE_H_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+// StrReplaceAll()
+//
+// Replaces character sequences within a given std::string with replacements provided
+// within an initializer list of key/value pairs. Candidate replacements are
+// considered in order as they occur within the std::string, with earlier matches
+// taking precedence, and longer matches taking precedence for candidates
+// starting at the same position in the std::string. Once a substitution is made, the
+// replaced text is not considered for any further substitutions.
+//
+// Example:
+//
+// std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
+// {{"$count", absl::StrCat(5)},
+// {"$who", "Bob"},
+// {"#Noun", "Apples"}});
+// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
+ABSL_MUST_USE_RESULT std::string StrReplaceAll(
+ absl::string_view s,
+ std::initializer_list<std::pair<absl::string_view, absl::string_view>>
+ replacements);
+
+// Overload of `StrReplaceAll()` to accept a container of key/value replacement
+// pairs (typically either an associative map or a `std::vector` of `std::pair`
+// elements). A vector of pairs is generally more efficient.
+//
+// Examples:
+//
+// std::map<const absl::string_view, const absl::string_view> replacements;
+// replacements["$who"] = "Bob";
+// replacements["$count"] = "5";
+// replacements["#Noun"] = "Apples";
+// std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
+// replacements);
+// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
+//
+// // A std::vector of std::pair elements can be more efficient.
+// std::vector<std::pair<const absl::string_view, std::string>> replacements;
+// replacements.push_back({"&", "&amp;"});
+// replacements.push_back({"<", "&lt;"});
+// replacements.push_back({">", "&gt;"});
+// std::string s = absl::StrReplaceAll("if (ptr < &foo)",
+// replacements);
+// EXPECT_EQ("if (ptr &lt; &amp;foo)", s);
+template <typename StrToStrMapping>
+std::string StrReplaceAll(absl::string_view s, const StrToStrMapping& replacements);
+
+// Overload of `StrReplaceAll()` to replace character sequences within a given
+// output std::string *in place* with replacements provided within an initializer
+// list of key/value pairs, returning the number of substitutions that occurred.
+//
+// Example:
+//
+// std::string s = std::string("$who bought $count #Noun. Thanks $who!");
+// int count;
+// count = absl::StrReplaceAll({{"$count", absl::StrCat(5)},
+// {"$who", "Bob"},
+// {"#Noun", "Apples"}}, &s);
+// EXPECT_EQ(count, 4);
+// EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
+int StrReplaceAll(
+ std::initializer_list<std::pair<absl::string_view, absl::string_view>>
+ replacements,
+ std::string* target);
+
+// Overload of `StrReplaceAll()` to replace patterns within a given output
+// std::string *in place* with replacements provided within a container of key/value
+// pairs.
+//
+// Example:
+//
+// std::string s = std::string("if (ptr < &foo)");
+// int count = absl::StrReplaceAll({{"&", "&amp;"},
+// {"<", "&lt;"},
+// {">", "&gt;"}}, &s);
+// EXPECT_EQ(count, 2);
+// EXPECT_EQ("if (ptr &lt; &amp;foo)", s);
+template <typename StrToStrMapping>
+int StrReplaceAll(const StrToStrMapping& replacements, std::string* target);
+
+// Implementation details only, past this point.
+namespace strings_internal {
+
+struct ViableSubstitution {
+ absl::string_view old;
+ absl::string_view replacement;
+ size_t offset;
+
+ ViableSubstitution(absl::string_view old_str,
+ absl::string_view replacement_str, size_t offset_val)
+ : old(old_str), replacement(replacement_str), offset(offset_val) {}
+
+ // One substitution occurs "before" another (takes priority) if either
+ // it has the lowest offset, or it has the same offset but a larger size.
+ bool OccursBefore(const ViableSubstitution& y) const {
+ if (offset != y.offset) return offset < y.offset;
+ return old.size() > y.old.size();
+ }
+};
+
+// Build a vector of ViableSubstitutions based on the given list of
+// replacements. subs can be implemented as a priority_queue. However, it turns
+// out that most callers have small enough a list of substitutions that the
+// overhead of such a queue isn't worth it.
+template <typename StrToStrMapping>
+std::vector<ViableSubstitution> FindSubstitutions(
+ absl::string_view s, const StrToStrMapping& replacements) {
+ std::vector<ViableSubstitution> subs;
+ subs.reserve(replacements.size());
+
+ for (const auto& rep : replacements) {
+ using std::get;
+ absl::string_view old(get<0>(rep));
+
+ size_t pos = s.find(old);
+ if (pos == s.npos) continue;
+
+ // Ignore attempts to replace "". This condition is almost never true,
+ // but above condition is frequently true. That's why we test for this
+ // now and not before.
+ if (old.empty()) continue;
+
+ subs.emplace_back(old, get<1>(rep), pos);
+
+ // Insertion sort to ensure the last ViableSubstitution comes before
+ // all the others.
+ size_t index = subs.size();
+ while (--index && subs[index - 1].OccursBefore(subs[index])) {
+ std::swap(subs[index], subs[index - 1]);
+ }
+ }
+ return subs;
+}
+
+int ApplySubstitutions(absl::string_view s,
+ std::vector<ViableSubstitution>* subs_ptr,
+ std::string* result_ptr);
+
+} // namespace strings_internal
+
+template <typename StrToStrMapping>
+std::string StrReplaceAll(absl::string_view s, const StrToStrMapping& replacements) {
+ auto subs = strings_internal::FindSubstitutions(s, replacements);
+ std::string result;
+ result.reserve(s.size());
+ strings_internal::ApplySubstitutions(s, &subs, &result);
+ return result;
+}
+
+template <typename StrToStrMapping>
+int StrReplaceAll(const StrToStrMapping& replacements, std::string* target) {
+ auto subs = strings_internal::FindSubstitutions(*target, replacements);
+ if (subs.empty()) return 0;
+
+ std::string result;
+ result.reserve(target->size());
+ int substitutions =
+ strings_internal::ApplySubstitutions(*target, &subs, &result);
+ target->swap(result);
+ return substitutions;
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_STR_REPLACE_H_
diff --git a/absl/strings/str_replace_test.cc b/absl/strings/str_replace_test.cc
new file mode 100644
index 00000000..f49c7e1c
--- /dev/null
+++ b/absl/strings/str_replace_test.cc
@@ -0,0 +1,340 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/str_replace.h"
+
+#include <list>
+#include <tuple>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/str_cat.h"
+
+TEST(StrReplaceAll, OneReplacement) {
+ std::string s;
+
+ // Empty std::string.
+ s = absl::StrReplaceAll(s, {{"", ""}});
+ EXPECT_EQ(s, "");
+ s = absl::StrReplaceAll(s, {{"x", ""}});
+ EXPECT_EQ(s, "");
+ s = absl::StrReplaceAll(s, {{"", "y"}});
+ EXPECT_EQ(s, "");
+ s = absl::StrReplaceAll(s, {{"x", "y"}});
+ EXPECT_EQ(s, "");
+
+ // Empty substring.
+ s = absl::StrReplaceAll("abc", {{"", ""}});
+ EXPECT_EQ(s, "abc");
+ s = absl::StrReplaceAll("abc", {{"", "y"}});
+ EXPECT_EQ(s, "abc");
+ s = absl::StrReplaceAll("abc", {{"x", ""}});
+ EXPECT_EQ(s, "abc");
+
+ // Substring not found.
+ s = absl::StrReplaceAll("abc", {{"xyz", "123"}});
+ EXPECT_EQ(s, "abc");
+
+ // Replace entire std::string.
+ s = absl::StrReplaceAll("abc", {{"abc", "xyz"}});
+ EXPECT_EQ(s, "xyz");
+
+ // Replace once at the start.
+ s = absl::StrReplaceAll("abc", {{"a", "x"}});
+ EXPECT_EQ(s, "xbc");
+
+ // Replace once in the middle.
+ s = absl::StrReplaceAll("abc", {{"b", "x"}});
+ EXPECT_EQ(s, "axc");
+
+ // Replace once at the end.
+ s = absl::StrReplaceAll("abc", {{"c", "x"}});
+ EXPECT_EQ(s, "abx");
+
+ // Replace multiple times with varying lengths of original/replacement.
+ s = absl::StrReplaceAll("ababa", {{"a", "xxx"}});
+ EXPECT_EQ(s, "xxxbxxxbxxx");
+
+ s = absl::StrReplaceAll("ababa", {{"b", "xxx"}});
+ EXPECT_EQ(s, "axxxaxxxa");
+
+ s = absl::StrReplaceAll("aaabaaabaaa", {{"aaa", "x"}});
+ EXPECT_EQ(s, "xbxbx");
+
+ s = absl::StrReplaceAll("abbbabbba", {{"bbb", "x"}});
+ EXPECT_EQ(s, "axaxa");
+
+ // Overlapping matches are replaced greedily.
+ s = absl::StrReplaceAll("aaa", {{"aa", "x"}});
+ EXPECT_EQ(s, "xa");
+
+ // The replacements are not recursive.
+ s = absl::StrReplaceAll("aaa", {{"aa", "a"}});
+ EXPECT_EQ(s, "aa");
+}
+
+TEST(StrReplaceAll, ManyReplacements) {
+ std::string s;
+
+ // Empty std::string.
+ s = absl::StrReplaceAll("", {{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}});
+ EXPECT_EQ(s, "");
+
+ // Empty substring.
+ s = absl::StrReplaceAll("abc", {{"", ""}, {"", "y"}, {"x", ""}});
+ EXPECT_EQ(s, "abc");
+
+ // Replace entire std::string, one char at a time
+ s = absl::StrReplaceAll("abc", {{"a", "x"}, {"b", "y"}, {"c", "z"}});
+ EXPECT_EQ(s, "xyz");
+ s = absl::StrReplaceAll("zxy", {{"z", "x"}, {"x", "y"}, {"y", "z"}});
+ EXPECT_EQ(s, "xyz");
+
+ // Replace once at the start (longer matches take precedence)
+ s = absl::StrReplaceAll("abc", {{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}});
+ EXPECT_EQ(s, "xyz");
+
+ // Replace once in the middle.
+ s = absl::StrReplaceAll(
+ "Abc!", {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}});
+ EXPECT_EQ(s, "Ayz!");
+
+ // Replace once at the end.
+ s = absl::StrReplaceAll(
+ "Abc!",
+ {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}});
+ EXPECT_EQ(s, "Ayz?");
+
+ // Replace multiple times with varying lengths of original/replacement.
+ s = absl::StrReplaceAll("ababa", {{"a", "xxx"}, {"b", "XXXX"}});
+ EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx");
+
+ // Overlapping matches are replaced greedily.
+ s = absl::StrReplaceAll("aaa", {{"aa", "x"}, {"a", "X"}});
+ EXPECT_EQ(s, "xX");
+ s = absl::StrReplaceAll("aaa", {{"a", "X"}, {"aa", "x"}});
+ EXPECT_EQ(s, "xX");
+
+ // Two well-known sentences
+ s = absl::StrReplaceAll("the quick brown fox jumped over the lazy dogs",
+ {
+ {"brown", "box"},
+ {"dogs", "jugs"},
+ {"fox", "with"},
+ {"jumped", "five"},
+ {"over", "dozen"},
+ {"quick", "my"},
+ {"the", "pack"},
+ {"the lazy", "liquor"},
+ });
+ EXPECT_EQ(s, "pack my box with five dozen liquor jugs");
+}
+
+TEST(StrReplaceAll, ManyReplacementsInMap) {
+ std::map<const char *, const char *> replacements;
+ replacements["$who"] = "Bob";
+ replacements["$count"] = "5";
+ replacements["#Noun"] = "Apples";
+ std::string s = absl::StrReplaceAll("$who bought $count #Noun. Thanks $who!",
+ replacements);
+ EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
+}
+
+TEST(StrReplaceAll, ReplacementsInPlace) {
+ std::string s = std::string("$who bought $count #Noun. Thanks $who!");
+ int count;
+ count = absl::StrReplaceAll({{"$count", absl::StrCat(5)},
+ {"$who", "Bob"},
+ {"#Noun", "Apples"}}, &s);
+ EXPECT_EQ(count, 4);
+ EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
+}
+
+TEST(StrReplaceAll, ReplacementsInPlaceInMap) {
+ std::string s = std::string("$who bought $count #Noun. Thanks $who!");
+ std::map<absl::string_view, absl::string_view> replacements;
+ replacements["$who"] = "Bob";
+ replacements["$count"] = "5";
+ replacements["#Noun"] = "Apples";
+ int count;
+ count = absl::StrReplaceAll(replacements, &s);
+ EXPECT_EQ(count, 4);
+ EXPECT_EQ("Bob bought 5 Apples. Thanks Bob!", s);
+}
+
+struct Cont {
+ Cont() {}
+ explicit Cont(absl::string_view src) : data(src) {}
+
+ absl::string_view data;
+};
+
+template <int index>
+absl::string_view get(const Cont& c) {
+ auto splitter = absl::StrSplit(c.data, ':');
+ auto it = splitter.begin();
+ for (int i = 0; i < index; ++i) ++it;
+
+ return *it;
+}
+
+TEST(StrReplaceAll, VariableNumber) {
+ std::string s;
+ {
+ std::vector<std::pair<std::string, std::string>> replacements;
+
+ s = "abc";
+ EXPECT_EQ(0, absl::StrReplaceAll(replacements, &s));
+ EXPECT_EQ("abc", s);
+
+ s = "abc";
+ replacements.push_back({"a", "A"});
+ EXPECT_EQ(1, absl::StrReplaceAll(replacements, &s));
+ EXPECT_EQ("Abc", s);
+
+ s = "abc";
+ replacements.push_back({"b", "B"});
+ EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
+ EXPECT_EQ("ABc", s);
+
+ s = "abc";
+ replacements.push_back({"d", "D"});
+ EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
+ EXPECT_EQ("ABc", s);
+
+ EXPECT_EQ("ABcABc", absl::StrReplaceAll("abcabc", replacements));
+ }
+
+ {
+ std::map<const char*, const char*> replacements;
+ replacements["aa"] = "x";
+ replacements["a"] = "X";
+ s = "aaa";
+ EXPECT_EQ(2, absl::StrReplaceAll(replacements, &s));
+ EXPECT_EQ("xX", s);
+
+ EXPECT_EQ("xxX", absl::StrReplaceAll("aaaaa", replacements));
+ }
+
+ {
+ std::list<std::pair<absl::string_view, absl::string_view>> replacements = {
+ {"a", "x"}, {"b", "y"}, {"c", "z"}};
+
+ std::string s = absl::StrReplaceAll("abc", replacements);
+ EXPECT_EQ(s, "xyz");
+ }
+
+ {
+ using X = std::tuple<absl::string_view, std::string, int>;
+ std::vector<X> replacements(3);
+ replacements[0] = X{"a", "x", 1};
+ replacements[1] = X{"b", "y", 0};
+ replacements[2] = X{"c", "z", -1};
+
+ std::string s = absl::StrReplaceAll("abc", replacements);
+ EXPECT_EQ(s, "xyz");
+ }
+
+ {
+ std::vector<Cont> replacements(3);
+ replacements[0] = Cont{"a:x"};
+ replacements[1] = Cont{"b:y"};
+ replacements[2] = Cont{"c:z"};
+
+ std::string s = absl::StrReplaceAll("abc", replacements);
+ EXPECT_EQ(s, "xyz");
+ }
+}
+
+// Same as above, but using the in-place variant of absl::StrReplaceAll,
+// that returns the # of replacements performed.
+TEST(StrReplaceAll, Inplace) {
+ std::string s;
+ int reps;
+
+ // Empty std::string.
+ s = "";
+ reps = absl::StrReplaceAll({{"", ""}, {"x", ""}, {"", "y"}, {"x", "y"}}, &s);
+ EXPECT_EQ(reps, 0);
+ EXPECT_EQ(s, "");
+
+ // Empty substring.
+ s = "abc";
+ reps = absl::StrReplaceAll({{"", ""}, {"", "y"}, {"x", ""}}, &s);
+ EXPECT_EQ(reps, 0);
+ EXPECT_EQ(s, "abc");
+
+ // Replace entire std::string, one char at a time
+ s = "abc";
+ reps = absl::StrReplaceAll({{"a", "x"}, {"b", "y"}, {"c", "z"}}, &s);
+ EXPECT_EQ(reps, 3);
+ EXPECT_EQ(s, "xyz");
+ s = "zxy";
+ reps = absl::StrReplaceAll({{"z", "x"}, {"x", "y"}, {"y", "z"}}, &s);
+ EXPECT_EQ(reps, 3);
+ EXPECT_EQ(s, "xyz");
+
+ // Replace once at the start (longer matches take precedence)
+ s = "abc";
+ reps = absl::StrReplaceAll({{"a", "x"}, {"ab", "xy"}, {"abc", "xyz"}}, &s);
+ EXPECT_EQ(reps, 1);
+ EXPECT_EQ(s, "xyz");
+
+ // Replace once in the middle.
+ s = "Abc!";
+ reps = absl::StrReplaceAll(
+ {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc", "yz"}, {"c", "z"}}, &s);
+ EXPECT_EQ(reps, 1);
+ EXPECT_EQ(s, "Ayz!");
+
+ // Replace once at the end.
+ s = "Abc!";
+ reps = absl::StrReplaceAll(
+ {{"a", "x"}, {"ab", "xy"}, {"b", "y"}, {"bc!", "yz?"}, {"c!", "z;"}}, &s);
+ EXPECT_EQ(reps, 1);
+ EXPECT_EQ(s, "Ayz?");
+
+ // Replace multiple times with varying lengths of original/replacement.
+ s = "ababa";
+ reps = absl::StrReplaceAll({{"a", "xxx"}, {"b", "XXXX"}}, &s);
+ EXPECT_EQ(reps, 5);
+ EXPECT_EQ(s, "xxxXXXXxxxXXXXxxx");
+
+ // Overlapping matches are replaced greedily.
+ s = "aaa";
+ reps = absl::StrReplaceAll({{"aa", "x"}, {"a", "X"}}, &s);
+ EXPECT_EQ(reps, 2);
+ EXPECT_EQ(s, "xX");
+ s = "aaa";
+ reps = absl::StrReplaceAll({{"a", "X"}, {"aa", "x"}}, &s);
+ EXPECT_EQ(reps, 2);
+ EXPECT_EQ(s, "xX");
+
+ // Two well-known sentences
+ s = "the quick brown fox jumped over the lazy dogs";
+ reps = absl::StrReplaceAll(
+ {
+ {"brown", "box"},
+ {"dogs", "jugs"},
+ {"fox", "with"},
+ {"jumped", "five"},
+ {"over", "dozen"},
+ {"quick", "my"},
+ {"the", "pack"},
+ {"the lazy", "liquor"},
+ },
+ &s);
+ EXPECT_EQ(reps, 8);
+ EXPECT_EQ(s, "pack my box with five dozen liquor jugs");
+}
diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc
new file mode 100644
index 00000000..910a67cf
--- /dev/null
+++ b/absl/strings/str_split.cc
@@ -0,0 +1,133 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/str_split.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <limits>
+
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/ascii.h"
+
+namespace absl {
+
+namespace {
+
+// This GenericFind() template function encapsulates the finding algorithm
+// shared between the ByString and ByAnyChar delimiters. The FindPolicy
+// template parameter allows each delimiter to customize the actual find
+// function to use and the length of the found delimiter. For example, the
+// Literal delimiter will ultimately use absl::string_view::find(), and the
+// AnyOf delimiter will use absl::string_view::find_first_of().
+template <typename FindPolicy>
+absl::string_view GenericFind(absl::string_view text,
+ absl::string_view delimiter, size_t pos,
+ FindPolicy find_policy) {
+ if (delimiter.empty() && text.length() > 0) {
+ // Special case for empty std::string delimiters: always return a zero-length
+ // absl::string_view referring to the item at position 1 past pos.
+ return absl::string_view(text.begin() + pos + 1, 0);
+ }
+ size_t found_pos = absl::string_view::npos;
+ absl::string_view found(text.end(), 0); // By default, not found
+ found_pos = find_policy.Find(text, delimiter, pos);
+ if (found_pos != absl::string_view::npos) {
+ found = absl::string_view(text.data() + found_pos,
+ find_policy.Length(delimiter));
+ }
+ return found;
+}
+
+// Finds using absl::string_view::find(), therefore the length of the found
+// delimiter is delimiter.length().
+struct LiteralPolicy {
+ size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
+ return text.find(delimiter, pos);
+ }
+ size_t Length(absl::string_view delimiter) { return delimiter.length(); }
+};
+
+// Finds using absl::string_view::find_first_of(), therefore the length of the
+// found delimiter is 1.
+struct AnyOfPolicy {
+ size_t Find(absl::string_view text, absl::string_view delimiter, size_t pos) {
+ return text.find_first_of(delimiter, pos);
+ }
+ size_t Length(absl::string_view /* delimiter */) { return 1; }
+};
+
+} // namespace
+
+//
+// ByString
+//
+
+ByString::ByString(absl::string_view sp) : delimiter_(sp) {}
+
+absl::string_view ByString::Find(absl::string_view text, size_t pos) const {
+ if (delimiter_.length() == 1) {
+ // Much faster to call find on a single character than on an
+ // absl::string_view.
+ size_t found_pos = text.find(delimiter_[0], pos);
+ if (found_pos == absl::string_view::npos)
+ return absl::string_view(text.end(), 0);
+ return text.substr(found_pos, 1);
+ }
+ return GenericFind(text, delimiter_, pos, LiteralPolicy());
+}
+
+//
+// ByChar
+//
+
+absl::string_view ByChar::Find(absl::string_view text, size_t pos) const {
+ size_t found_pos = text.find(c_, pos);
+ if (found_pos == absl::string_view::npos)
+ return absl::string_view(text.end(), 0);
+ return text.substr(found_pos, 1);
+}
+
+//
+// ByAnyChar
+//
+
+ByAnyChar::ByAnyChar(absl::string_view sp) : delimiters_(sp) {}
+
+absl::string_view ByAnyChar::Find(absl::string_view text, size_t pos) const {
+ return GenericFind(text, delimiters_, pos, AnyOfPolicy());
+}
+
+//
+// ByLength
+//
+ByLength::ByLength(ptrdiff_t length) : length_(length) {
+ ABSL_RAW_CHECK(length > 0, "");
+}
+
+absl::string_view ByLength::Find(absl::string_view text,
+ size_t pos) const {
+ pos = std::min(pos, text.size()); // truncate `pos`
+ absl::string_view substr = text.substr(pos);
+ // If the std::string is shorter than the chunk size we say we
+ // "can't find the delimiter" so this will be the last chunk.
+ if (substr.length() <= static_cast<size_t>(length_))
+ return absl::string_view(text.end(), 0);
+
+ return absl::string_view(substr.begin() + length_, 0);
+}
+
+} // namespace absl
diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h
new file mode 100644
index 00000000..a7b48b18
--- /dev/null
+++ b/absl/strings/str_split.h
@@ -0,0 +1,511 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: str_split.h
+// -----------------------------------------------------------------------------
+//
+// This file contains functions for splitting strings. It defines the main
+// `StrSplit()` function, several delimiters for determining the boundaries on
+// which to split the std::string, and predicates for filtering delimited results.
+// `StrSplit()` adapts the returned collection to the type specified by the
+// caller.
+//
+// Example:
+//
+// // Splits the given std::string on commas. Returns the results in a
+// // vector of strings.
+// std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
+// // Can also use ","
+// // v[0] == "a", v[1] == "b", v[2] == "c"
+//
+// See StrSplit() below for more information.
+#ifndef ABSL_STRINGS_STR_SPLIT_H_
+#define ABSL_STRINGS_STR_SPLIT_H_
+
+#include <algorithm>
+#include <cstddef>
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/internal/str_split_internal.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+
+namespace absl {
+
+//------------------------------------------------------------------------------
+// Delimiters
+//------------------------------------------------------------------------------
+//
+// `StrSplit()` uses delimiters to define the boundaries between elements in the
+// provided input. Several `Delimiter` types are defined below. If a std::string
+// (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
+// an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
+// were passed a `ByString` delimiter.
+//
+// A `Delimiter` is an object with a `Find()` function that knows how to find
+// the first occurrence of itself in a given `absl::string_view`.
+//
+// The following `Delimiter` types are available for use within `StrSplit()`:
+//
+// - `ByString` (default for std::string arguments)
+// - `ByChar` (default for a char argument)
+// - `ByAnyChar`
+// - `ByLength`
+// - `MaxSplits`
+//
+//
+// A Delimiter's Find() member function will be passed the input text that is to
+// be split and the position to begin searching for the next delimiter in the
+// input text. The returned absl::string_view should refer to the next
+// occurrence (after pos) of the represented delimiter; this returned
+// absl::string_view represents the next location where the input std::string should
+// be broken. The returned absl::string_view may be zero-length if the Delimiter
+// does not represent a part of the std::string (e.g., a fixed-length delimiter). If
+// no delimiter is found in the given text, a zero-length absl::string_view
+// referring to text.end() should be returned (e.g.,
+// absl::string_view(text.end(), 0)). It is important that the returned
+// absl::string_view always be within the bounds of input text given as an
+// argument--it must not refer to a std::string that is physically located outside of
+// the given std::string.
+//
+// The following example is a simple Delimiter object that is created with a
+// single char and will look for that char in the text passed to the Find()
+// function:
+//
+// struct SimpleDelimiter {
+// const char c_;
+// explicit SimpleDelimiter(char c) : c_(c) {}
+// absl::string_view Find(absl::string_view text, size_t pos) {
+// auto found = text.find(c_, pos);
+// if (found == absl::string_view::npos)
+// return absl::string_view(text.end(), 0);
+//
+// return absl::string_view(text, found, 1);
+// }
+// };
+
+// ByString
+//
+// A sub-std::string delimiter. If `StrSplit()` is passed a std::string in place of a
+// `Delimiter` object, the std::string will be implicitly converted into a
+// `ByString` delimiter.
+//
+// Example:
+//
+// // Because a std::string literal is converted to an `absl::ByString`,
+// // the following two splits are equivalent.
+//
+// std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
+//
+// using absl::ByString;
+// std::vector<std::string> v2 = absl::StrSplit("a, b, c",
+// ByString(", "));
+// // v[0] == "a", v[1] == "b", v[3] == "c"
+class ByString {
+ public:
+ explicit ByString(absl::string_view sp);
+ absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+ const std::string delimiter_;
+};
+
+// ByChar
+//
+// A single character delimiter. `ByChar` is functionally equivalent to a
+// 1-char std::string within a `ByString` delimiter, but slightly more
+// efficient.
+//
+// Example:
+//
+// // Because a char literal is converted to a absl::ByChar,
+// // the following two splits are equivalent.
+// std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
+// using absl::ByChar;
+// std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
+// // v[0] == "a", v[1] == "b", v[3] == "c"
+//
+// `ByChar` is also the default delimiter if a single character is given
+// as the delimiter to `StrSplit()`. For example, the following calls are
+// equivalent:
+//
+// std::vector<std::string> v = absl::StrSplit("a-b", '-');
+//
+// using absl::ByChar;
+// std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
+//
+class ByChar {
+ public:
+ explicit ByChar(char c) : c_(c) {}
+ absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+ char c_;
+};
+
+// ByAnyChar
+//
+// A delimiter that will match any of the given byte-sized characters within
+// its provided std::string.
+//
+// Note: this delimiter works with single-byte std::string data, but does not work
+// with variable-width encodings, such as UTF-8.
+//
+// Example:
+//
+// using absl::ByAnyChar;
+// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
+// // v[0] == "a", v[1] == "b", v[3] == "c"
+//
+// If `ByAnyChar` is given the empty std::string, it behaves exactly like
+// `ByString` and matches each individual character in the input std::string.
+//
+class ByAnyChar {
+ public:
+ explicit ByAnyChar(absl::string_view sp);
+ absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+ const std::string delimiters_;
+};
+
+// ByLength
+//
+// A delimiter for splitting into equal-length strings. The length argument to
+// the constructor must be greater than 0.
+//
+// Note: this delimiter works with single-byte std::string data, but does not work
+// with variable-width encodings, such as UTF-8.
+//
+// Example:
+//
+// using absl::ByLength;
+// std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
+
+// // v[0] == "123", v[1] == "456", v[2] == "789"
+//
+// Note that the std::string does not have to be a multiple of the fixed split
+// length. In such a case, the last substring will be shorter.
+//
+// using absl::ByLength;
+// std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
+//
+// // v[0] == "12", v[1] == "35", v[2] == "5"
+class ByLength {
+ public:
+ explicit ByLength(ptrdiff_t length);
+ absl::string_view Find(absl::string_view text, size_t pos) const;
+
+ private:
+ const ptrdiff_t length_;
+};
+
+namespace strings_internal {
+
+// A traits-like metafunction for selecting the default Delimiter object type
+// for a particular Delimiter type. The base case simply exposes type Delimiter
+// itself as the delimiter's Type. However, there are specializations for
+// std::string-like objects that map them to the ByString delimiter object.
+// This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
+// std::string-like objects (e.g., ',') as delimiter arguments but they will be
+// treated as if a ByString delimiter was given.
+template <typename Delimiter>
+struct SelectDelimiter {
+ using type = Delimiter;
+};
+
+template <>
+struct SelectDelimiter<char> {
+ using type = ByChar;
+};
+template <>
+struct SelectDelimiter<char*> {
+ using type = ByString;
+};
+template <>
+struct SelectDelimiter<const char*> {
+ using type = ByString;
+};
+template <>
+struct SelectDelimiter<absl::string_view> {
+ using type = ByString;
+};
+template <>
+struct SelectDelimiter<std::string> {
+ using type = ByString;
+};
+
+// Wraps another delimiter and sets a max number of matches for that delimiter.
+template <typename Delimiter>
+class MaxSplitsImpl {
+ public:
+ MaxSplitsImpl(Delimiter delimiter, int limit)
+ : delimiter_(delimiter), limit_(limit), count_(0) {}
+ absl::string_view Find(absl::string_view text, size_t pos) {
+ if (count_++ == limit_) {
+ return absl::string_view(text.end(), 0); // No more matches.
+ }
+ return delimiter_.Find(text, pos);
+ }
+
+ private:
+ Delimiter delimiter_;
+ const int limit_;
+ int count_;
+};
+
+} // namespace strings_internal
+
+// MaxSplits()
+//
+// A delimiter that limits the number of matches which can occur to the passed
+// `limit`. The last element in the returned collection will contain all
+// remaining unsplit pieces, which may contain instances of the delimiter.
+// The collection will contain at most `limit` + 1 elements.
+// Example:
+//
+// using absl::MaxSplits;
+// std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
+//
+// // v[0] == "a", v[1] == "b,c"
+template <typename Delimiter>
+inline strings_internal::MaxSplitsImpl<
+ typename strings_internal::SelectDelimiter<Delimiter>::type>
+MaxSplits(Delimiter delimiter, int limit) {
+ typedef
+ typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
+ return strings_internal::MaxSplitsImpl<DelimiterType>(
+ DelimiterType(delimiter), limit);
+}
+
+//------------------------------------------------------------------------------
+// Predicates
+//------------------------------------------------------------------------------
+//
+// Predicates filter the results of a `StrSplit()` by determining whether or not
+// a resultant element is included in the result set. A predicate may be passed
+// as an optional third argument to the `StrSplit()` function.
+//
+// Predicates are unary functions (or functors) that take a single
+// `absl::string_view` argument and return a bool indicating whether the
+// argument should be included (`true`) or excluded (`false`).
+//
+// Predicates are useful when filtering out empty substrings. By default, empty
+// substrings may be returned by `StrSplit()`, which is similar to the way split
+// functions work in other programming languages.
+
+// AllowEmpty()
+//
+// Always returns `true`, indicating that all strings--including empty
+// strings--should be included in the split output. This predicate is not
+// strictly needed because this is the default behavior of `StrSplit()`;
+// however, it might be useful at some call sites to make the intent explicit.
+//
+// Example:
+//
+// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
+//
+// // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
+struct AllowEmpty {
+ bool operator()(absl::string_view) const { return true; }
+};
+
+// SkipEmpty()
+//
+// Returns `false` if the given `absl::string_view` is empty, indicating that
+// `StrSplit()` should omit the empty std::string.
+//
+// Example:
+//
+// std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
+//
+// // v[0] == "a", v[1] == "b"
+//
+// Note: `SkipEmpty()` does not consider a std::string containing only whitespace
+// to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
+// predicate.
+struct SkipEmpty {
+ bool operator()(absl::string_view sp) const { return !sp.empty(); }
+};
+
+// SkipWhitespace()
+//
+// Returns `false` if the given `absl::string_view` is empty *or* contains only
+// whitespace, indicating that `StrSplit()` should omit the std::string.
+//
+// Example:
+//
+// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
+// ',', SkipWhitespace());
+// // v[0] == " a ", v[1] == "b"
+//
+// // SkipEmpty() would return whitespace elements
+// std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
+// // v[0] == " a ", v[1] == " ", v[2] == "b"
+struct SkipWhitespace {
+ bool operator()(absl::string_view sp) const {
+ sp = absl::StripAsciiWhitespace(sp);
+ return !sp.empty();
+ }
+};
+
+//------------------------------------------------------------------------------
+// StrSplit()
+//------------------------------------------------------------------------------
+
+// StrSplit()
+//
+// Splits a given `std::string` based on the provided `Delimiter` object,
+// returning the elements within the type specified by the caller. Optionally,
+// you may also pass a `Predicate` to `StrSplit()` indicating whether to include
+// or exclude the resulting element within the final result set. (See the
+// overviews for Delimiters and Predicates above.)
+//
+// Example:
+//
+// std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
+// // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
+//
+// You can also provide an explicit `Delimiter` object:
+//
+// Example:
+//
+// using absl::ByAnyChar;
+// std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
+// // v[0] == "a", v[1] == "b", v[3] == "c"
+//
+// See above for more information on delimiters.
+//
+// By default, empty strings are included in the result set. You can optionally
+// include a third `Predicate` argument to apply a test for whether the
+// resultant element should be included in the result set:
+//
+// Example:
+//
+// std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
+// ',', SkipWhitespace());
+// // v[0] == "a", v[1] == "b"
+//
+// See above for more information on predicates.
+//
+//------------------------------------------------------------------------------
+// StrSplit() Return Types
+//------------------------------------------------------------------------------
+//
+// The `StrSplit()` function adapts the returned collection to the collection
+// specified by the caller (e.g. `std::vector` above). The returned collections
+// may contain `string`, `absl::string_view` (in which case the original std::string
+// being split must ensure that it outlives the collection), or any object that
+// can be explicitly created from an `absl::string_view`. This behavior works
+// for:
+//
+// 1) All standard STL containers including `std::vector`, `std::list`,
+// `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
+// 2) `std::pair` (which is not actually a container). See below.
+//
+// Example:
+//
+// // The results are returned as `absl::string_view` objects. Note that we
+// // have to ensure that the input std::string outlives any results.
+// std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
+//
+// // Stores results in a std::set<std::string>, which also performs
+// // de-duplication and orders the elements in ascending order.
+// std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
+// // v[0] == "a", v[1] == "b", v[2] = "c"
+//
+// // `StrSplit()` can be used within a range-based for loop, in which case
+// // each element will be of type `absl::string_view`.
+// std::vector<std::string> v;
+// for (const auto sv : absl::StrSplit("a,b,c", ',')) {
+// if (sv != "b") v.emplace_back(sv);
+// }
+// // v[0] == "a", v[1] == "c"
+//
+// // Stores results in a map. The map implementation assumes that the input
+// // is provided as a series of key/value pairs. For example, the 0th element
+// // resulting from the split will be stored as a key to the 1st element. If
+// // an odd number of elements are resolved, the last element is paired with
+// // a default-constructed value (e.g., empty std::string).
+// std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
+// // m["a"] == "b", m["c"] == "" // last component value equals ""
+//
+// Splitting to `std::pair` is an interesting case because it can hold only two
+// elements and is not a collection type. When splitting to a `std::pair` the
+// first two split strings become the `std::pair` `.first` and `.second`
+// members, respectively. The remaining split substrings are discarded. If there
+// are less than two split substrings, the empty std::string is used for the
+// corresponding
+// `std::pair` member.
+//
+// Example:
+//
+// // Stores first two split strings as the members in a std::pair.
+// std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
+// // p.first == "a", p.second == "b" // "c" is omitted.
+//
+// The `StrSplit()` function can be used multiple times to perform more
+// complicated splitting logic, such as intelligently parsing key-value pairs.
+//
+// Example:
+//
+// // The input std::string "a=b=c,d=e,f=,g" becomes
+// // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
+// std::map<std::string, std::string> m;
+// for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
+// m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
+// }
+// EXPECT_EQ("b=c", m.find("a")->second);
+// EXPECT_EQ("e", m.find("d")->second);
+// EXPECT_EQ("", m.find("f")->second);
+// EXPECT_EQ("", m.find("g")->second);
+//
+// WARNING: Due to a legacy bug that is maintained for backward compatibility,
+// splitting the following empty string_views produces different results:
+//
+// absl::StrSplit(absl::string_view(""), '-'); // {""}
+// absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""}
+//
+// Try not to depend on this distinction because the bug may one day be fixed.
+template <typename Delimiter>
+strings_internal::Splitter<
+ typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty>
+StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
+ using DelimiterType =
+ typename strings_internal::SelectDelimiter<Delimiter>::type;
+ return strings_internal::Splitter<DelimiterType, AllowEmpty>(
+ std::move(text), DelimiterType(d), AllowEmpty());
+}
+
+template <typename Delimiter, typename Predicate>
+strings_internal::Splitter<
+ typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate>
+StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
+ Predicate p) {
+ using DelimiterType =
+ typename strings_internal::SelectDelimiter<Delimiter>::type;
+ return strings_internal::Splitter<DelimiterType, Predicate>(
+ std::move(text), DelimiterType(d), std::move(p));
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_STR_SPLIT_H_
diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc
new file mode 100644
index 00000000..a95a0fbd
--- /dev/null
+++ b/absl/strings/str_split_test.cc
@@ -0,0 +1,896 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/str_split.h"
+
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+#include <deque>
+#include <limits>
+#include <list>
+#include <map>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/dynamic_annotations.h" // for RunningOnValgrind
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/strings/numbers.h"
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Pair;
+using ::testing::UnorderedElementsAre;
+
+// This tests the overall split API, which is made up of the absl::StrSplit()
+// function and the Delimiter objects in the absl:: namespace.
+// This TEST macro is outside of any namespace to require full specification of
+// namespaces just like callers will need to use.
+TEST(Split, APIExamples) {
+ {
+ // Passes std::string delimiter. Assumes the default of Literal.
+ std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+
+ // Equivalent to...
+ using absl::ByString;
+ v = absl::StrSplit("a,b,c", ByString(","));
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+
+ // Equivalent to...
+ EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
+ ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Same as above, but using a single character as the delimiter.
+ std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+
+ // Equivalent to...
+ using absl::ByChar;
+ v = absl::StrSplit("a,b,c", ByChar(','));
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Same as above, but using std::string
+ std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+
+ // Equivalent to...
+ using absl::ByChar;
+ v = absl::StrSplit("a,b,c", ByChar(','));
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Uses the Literal std::string "=>" as the delimiter.
+ const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // The substrings are returned as string_views, eliminating copying.
+ std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Leading and trailing empty substrings.
+ std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
+ EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
+ }
+
+ {
+ // Splits on a delimiter that is not found.
+ std::vector<std::string> v = absl::StrSplit("abc", ',');
+ EXPECT_THAT(v, ElementsAre("abc"));
+ }
+
+ {
+ // Splits the input std::string into individual characters by using an empty
+ // std::string as the delimiter.
+ std::vector<std::string> v = absl::StrSplit("abc", "");
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Splits std::string data with embedded NUL characters, using NUL as the
+ // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
+ // say that's the empty std::string when constructing the absl::string_view
+ // delimiter. Instead, a non-empty std::string containing NUL can be used as the
+ // delimiter.
+ std::string embedded_nulls("a\0b\0c", 5);
+ std::string null_delim("\0", 1);
+ std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Stores first two split strings as the members in a std::pair.
+ std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
+ EXPECT_EQ("a", p.first);
+ EXPECT_EQ("b", p.second);
+ // "c" is omitted because std::pair can hold only two elements.
+ }
+
+ {
+ // Results stored in std::set<std::string>
+ std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Uses a non-const char* delimiter.
+ char a[] = ",";
+ char* d = a + 0;
+ std::vector<std::string> v = absl::StrSplit("a,b,c", d);
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Results split using either of , or ;
+ using absl::ByAnyChar;
+ std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Uses the SkipWhitespace predicate.
+ using absl::SkipWhitespace;
+ std::vector<std::string> v = absl::StrSplit("a, ,,b,", ',', SkipWhitespace());
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+
+ {
+ // Uses the ByLength delimiter.
+ using absl::ByLength;
+ std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
+ EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
+ }
+
+ {
+ // Results stored in a std::map.
+ std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
+ EXPECT_EQ(2, m.size());
+ EXPECT_EQ("3", m["a"]);
+ EXPECT_EQ("2", m["b"]);
+ }
+
+ {
+ // Results stored in a std::multimap.
+ std::multimap<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
+ EXPECT_EQ(3, m.size());
+ auto it = m.find("a");
+ EXPECT_EQ("1", it->second);
+ ++it;
+ EXPECT_EQ("3", it->second);
+ it = m.find("b");
+ EXPECT_EQ("2", it->second);
+ }
+
+ {
+ // Demonstrates use in a range-based for loop in C++11.
+ std::string s = "x,x,x,x,x,x,x";
+ for (absl::string_view sp : absl::StrSplit(s, ',')) {
+ EXPECT_EQ("x", sp);
+ }
+ }
+
+ {
+ // Demonstrates use with a Predicate in a range-based for loop.
+ using absl::SkipWhitespace;
+ std::string s = " ,x,,x,,x,x,x,,";
+ for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
+ EXPECT_EQ("x", sp);
+ }
+ }
+
+ {
+ // Demonstrates a "smart" split to std::map using two separate calls to
+ // absl::StrSplit. One call to split the records, and another call to split
+ // the keys and values. This also uses the Limit delimiter so that the
+ // std::string "a=b=c" will split to "a" -> "b=c".
+ std::map<std::string, std::string> m;
+ for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
+ m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
+ }
+ EXPECT_EQ("b=c", m.find("a")->second);
+ EXPECT_EQ("e", m.find("d")->second);
+ EXPECT_EQ("", m.find("f")->second);
+ EXPECT_EQ("", m.find("g")->second);
+ }
+}
+
+//
+// Tests for SplitIterator
+//
+
+TEST(SplitIterator, Basics) {
+ auto splitter = absl::StrSplit("a,b", ',');
+ auto it = splitter.begin();
+ auto end = splitter.end();
+
+ EXPECT_NE(it, end);
+ EXPECT_EQ("a", *it); // tests dereference
+ ++it; // tests preincrement
+ EXPECT_NE(it, end);
+ EXPECT_EQ("b", std::string(it->data(), it->size())); // tests dereference as ptr
+ it++; // tests postincrement
+ EXPECT_EQ(it, end);
+}
+
+// Simple Predicate to skip a particular std::string.
+class Skip {
+ public:
+ explicit Skip(const std::string& s) : s_(s) {}
+ bool operator()(absl::string_view sp) { return sp != s_; }
+
+ private:
+ std::string s_;
+};
+
+TEST(SplitIterator, Predicate) {
+ auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
+ auto it = splitter.begin();
+ auto end = splitter.end();
+
+ EXPECT_NE(it, end);
+ EXPECT_EQ("a", *it); // tests dereference
+ ++it; // tests preincrement -- "b" should be skipped here.
+ EXPECT_NE(it, end);
+ EXPECT_EQ("c", std::string(it->data(), it->size())); // tests dereference as ptr
+ it++; // tests postincrement
+ EXPECT_EQ(it, end);
+}
+
+TEST(SplitIterator, EdgeCases) {
+ // Expected input and output, assuming a delimiter of ','
+ struct {
+ std::string in;
+ std::vector<std::string> expect;
+ } specs[] = {
+ {"", {""}},
+ {"foo", {"foo"}},
+ {",", {"", ""}},
+ {",foo", {"", "foo"}},
+ {"foo,", {"foo", ""}},
+ {",foo,", {"", "foo", ""}},
+ {"foo,bar", {"foo", "bar"}},
+ };
+
+ for (const auto& spec : specs) {
+ SCOPED_TRACE(spec.in);
+ auto splitter = absl::StrSplit(spec.in, ',');
+ auto it = splitter.begin();
+ auto end = splitter.end();
+ for (const auto& expected : spec.expect) {
+ EXPECT_NE(it, end);
+ EXPECT_EQ(expected, *it++);
+ }
+ EXPECT_EQ(it, end);
+ }
+}
+
+TEST(Splitter, Const) {
+ const auto splitter = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
+}
+
+TEST(Split, EmptyAndNull) {
+ // Attention: Splitting a null absl::string_view is different than splitting
+ // an empty absl::string_view even though both string_views are considered
+ // equal. This behavior is likely surprising and undesirable. However, to
+ // maintain backward compatibility, there is a small "hack" in
+ // str_split_internal.h that preserves this behavior. If that behavior is ever
+ // changed/fixed, this test will need to be updated.
+ EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
+ EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
+}
+
+TEST(SplitIterator, EqualityAsEndCondition) {
+ auto splitter = absl::StrSplit("a,b,c", ',');
+ auto it = splitter.begin();
+ auto it2 = it;
+
+ // Increments it2 twice to point to "c" in the input text.
+ ++it2;
+ ++it2;
+ EXPECT_EQ("c", *it2);
+
+ // This test uses a non-end SplitIterator as the terminating condition in a
+ // for loop. This relies on SplitIterator equality for non-end SplitIterators
+ // working correctly. At this point it2 points to "c", and we use that as the
+ // "end" condition in this test.
+ std::vector<absl::string_view> v;
+ for (; it != it2; ++it) {
+ v.push_back(*it);
+ }
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+}
+
+//
+// Tests for Splitter
+//
+
+TEST(Splitter, RangeIterators) {
+ auto splitter = absl::StrSplit("a,b,c", ',');
+ std::vector<absl::string_view> output;
+ for (const absl::string_view p : splitter) {
+ output.push_back(p);
+ }
+ EXPECT_THAT(output, ElementsAre("a", "b", "c"));
+}
+
+// Some template functions for use in testing conversion operators
+template <typename ContainerType, typename Splitter>
+void TestConversionOperator(const Splitter& splitter) {
+ ContainerType output = splitter;
+ EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
+}
+
+template <typename MapType, typename Splitter>
+void TestMapConversionOperator(const Splitter& splitter) {
+ MapType m = splitter;
+ EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
+}
+
+template <typename FirstType, typename SecondType, typename Splitter>
+void TestPairConversionOperator(const Splitter& splitter) {
+ std::pair<FirstType, SecondType> p = splitter;
+ EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
+}
+
+TEST(Splitter, ConversionOperator) {
+ auto splitter = absl::StrSplit("a,b,c,d", ',');
+
+ TestConversionOperator<std::vector<absl::string_view>>(splitter);
+ TestConversionOperator<std::vector<std::string>>(splitter);
+ TestConversionOperator<std::list<absl::string_view>>(splitter);
+ TestConversionOperator<std::list<std::string>>(splitter);
+ TestConversionOperator<std::deque<absl::string_view>>(splitter);
+ TestConversionOperator<std::deque<std::string>>(splitter);
+ TestConversionOperator<std::set<absl::string_view>>(splitter);
+ TestConversionOperator<std::set<std::string>>(splitter);
+ TestConversionOperator<std::multiset<absl::string_view>>(splitter);
+ TestConversionOperator<std::multiset<std::string>>(splitter);
+ TestConversionOperator<std::unordered_set<std::string>>(splitter);
+
+ // Tests conversion to map-like objects.
+
+ TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
+ splitter);
+ TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
+ TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
+ TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
+ TestMapConversionOperator<
+ std::multimap<absl::string_view, absl::string_view>>(splitter);
+ TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(splitter);
+ TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(splitter);
+ TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
+ TestMapConversionOperator<std::unordered_map<std::string, std::string>>(splitter);
+
+ // Tests conversion to std::pair
+
+ TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
+ TestPairConversionOperator<absl::string_view, std::string>(splitter);
+ TestPairConversionOperator<std::string, absl::string_view>(splitter);
+ TestPairConversionOperator<std::string, std::string>(splitter);
+}
+
+// A few additional tests for conversion to std::pair. This conversion is
+// different from others because a std::pair always has exactly two elements:
+// .first and .second. The split has to work even when the split has
+// less-than, equal-to, and more-than 2 strings.
+TEST(Splitter, ToPair) {
+ {
+ // Empty std::string
+ std::pair<std::string, std::string> p = absl::StrSplit("", ',');
+ EXPECT_EQ("", p.first);
+ EXPECT_EQ("", p.second);
+ }
+
+ {
+ // Only first
+ std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
+ EXPECT_EQ("a", p.first);
+ EXPECT_EQ("", p.second);
+ }
+
+ {
+ // Only second
+ std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
+ EXPECT_EQ("", p.first);
+ EXPECT_EQ("b", p.second);
+ }
+
+ {
+ // First and second.
+ std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
+ EXPECT_EQ("a", p.first);
+ EXPECT_EQ("b", p.second);
+ }
+
+ {
+ // First and second and then more stuff that will be ignored.
+ std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
+ EXPECT_EQ("a", p.first);
+ EXPECT_EQ("b", p.second);
+ // "c" is omitted.
+ }
+}
+
+TEST(Splitter, Predicates) {
+ static const char kTestChars[] = ",a, ,b,";
+ using absl::AllowEmpty;
+ using absl::SkipEmpty;
+ using absl::SkipWhitespace;
+
+ {
+ // No predicate. Does not skip empties.
+ auto splitter = absl::StrSplit(kTestChars, ',');
+ std::vector<std::string> v = splitter;
+ EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
+ }
+
+ {
+ // Allows empty strings. Same behavior as no predicate at all.
+ auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
+ std::vector<std::string> v_allowempty = splitter;
+ EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
+
+ // Ensures AllowEmpty equals the behavior with no predicate.
+ auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
+ std::vector<std::string> v_nopredicate = splitter_nopredicate;
+ EXPECT_EQ(v_allowempty, v_nopredicate);
+ }
+
+ {
+ // Skips empty strings.
+ auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
+ std::vector<std::string> v = splitter;
+ EXPECT_THAT(v, ElementsAre("a", " ", "b"));
+ }
+
+ {
+ // Skips empty and all-whitespace strings.
+ auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
+ std::vector<std::string> v = splitter;
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+}
+
+//
+// Tests for StrSplit()
+//
+
+TEST(Split, Basics) {
+ {
+ // Doesn't really do anything useful because the return value is ignored,
+ // but it should work.
+ absl::StrSplit("a,b,c", ',');
+ }
+
+ {
+ std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ }
+
+ {
+ // Ensures that assignment works. This requires a little extra work with
+ // C++11 because of overloads with initializer_list.
+ std::vector<std::string> v;
+ v = absl::StrSplit("a,b,c", ',');
+
+ EXPECT_THAT(v, ElementsAre("a", "b", "c"));
+ std::map<std::string, std::string> m;
+ m = absl::StrSplit("a,b,c", ',');
+ EXPECT_EQ(2, m.size());
+ std::unordered_map<std::string, std::string> hm;
+ hm = absl::StrSplit("a,b,c", ',');
+ EXPECT_EQ(2, hm.size());
+ }
+}
+
+absl::string_view ReturnStringView() { return "Hello World"; }
+const char* ReturnConstCharP() { return "Hello World"; }
+char* ReturnCharP() { return const_cast<char*>("Hello World"); }
+
+TEST(Split, AcceptsCertainTemporaries) {
+ std::vector<std::string> v;
+ v = absl::StrSplit(ReturnStringView(), ' ');
+ EXPECT_THAT(v, ElementsAre("Hello", "World"));
+ v = absl::StrSplit(ReturnConstCharP(), ' ');
+ EXPECT_THAT(v, ElementsAre("Hello", "World"));
+ v = absl::StrSplit(ReturnCharP(), ' ');
+ EXPECT_THAT(v, ElementsAre("Hello", "World"));
+}
+
+TEST(Split, Temporary) {
+ // Use a std::string longer than the small-std::string-optimization length, so that when
+ // the temporary is destroyed, if the splitter keeps a reference to the
+ // std::string's contents, it'll reference freed memory instead of just dead
+ // on-stack memory.
+ const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
+ EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
+ << "Input should be larger than fits on the stack.";
+
+ // This happens more often in C++11 as part of a range-based for loop.
+ auto splitter = absl::StrSplit(std::string(input), ',');
+ std::string expected = "a";
+ for (absl::string_view letter : splitter) {
+ EXPECT_EQ(expected, letter);
+ ++expected[0];
+ }
+ EXPECT_EQ("v", expected);
+
+ // This happens more often in C++11 as part of a range-based for loop.
+ auto std_splitter = absl::StrSplit(std::string(input), ',');
+ expected = "a";
+ for (absl::string_view letter : std_splitter) {
+ EXPECT_EQ(expected, letter);
+ ++expected[0];
+ }
+ EXPECT_EQ("v", expected);
+}
+
+template <typename T>
+static std::unique_ptr<T> CopyToHeap(const T& value) {
+ return std::unique_ptr<T>(new T(value));
+}
+
+TEST(Split, LvalueCaptureIsCopyable) {
+ std::string input = "a,b";
+ auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
+ auto stack_splitter = *heap_splitter;
+ heap_splitter.reset();
+ std::vector<std::string> result = stack_splitter;
+ EXPECT_THAT(result, testing::ElementsAre("a", "b"));
+}
+
+TEST(Split, TemporaryCaptureIsCopyable) {
+ auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
+ auto stack_splitter = *heap_splitter;
+ heap_splitter.reset();
+ std::vector<std::string> result = stack_splitter;
+ EXPECT_THAT(result, testing::ElementsAre("a", "b"));
+}
+
+TEST(Split, SplitterIsCopyableAndMoveable) {
+ auto a = absl::StrSplit("foo", '-');
+
+ // Ensures that the following expressions compile.
+ auto b = a; // Copy construct
+ auto c = std::move(a); // Move construct
+ b = c; // Copy assign
+ c = std::move(b); // Move assign
+
+ EXPECT_THAT(c, ElementsAre("foo"));
+}
+
+TEST(Split, StringDelimiter) {
+ {
+ std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+
+ {
+ std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+
+ {
+ std::vector<absl::string_view> v =
+ absl::StrSplit("a,b", absl::string_view(","));
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+}
+
+TEST(Split, UTF8) {
+ // Tests splitting utf8 strings and utf8 delimiters.
+ {
+ // A utf8 input std::string with an ascii delimiter.
+ std::vector<absl::string_view> v = absl::StrSplit("a,κόσμε", ',');
+ EXPECT_THAT(v, ElementsAre("a", "κόσμε"));
+ }
+
+ {
+ // A utf8 input std::string and a utf8 delimiter.
+ std::vector<absl::string_view> v = absl::StrSplit("a,κόσμε,b", ",κόσμε,");
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+
+ {
+ // A utf8 input std::string and ByAnyChar with ascii chars.
+ std::vector<absl::string_view> v =
+ absl::StrSplit("Foo hällo th丞re", absl::ByAnyChar(" \t"));
+ EXPECT_THAT(v, ElementsAre("Foo", "hällo", "th丞re"));
+ }
+}
+
+TEST(Split, EmptyStringDelimiter) {
+ {
+ std::vector<std::string> v = absl::StrSplit("", "");
+ EXPECT_THAT(v, ElementsAre(""));
+ }
+
+ {
+ std::vector<std::string> v = absl::StrSplit("a", "");
+ EXPECT_THAT(v, ElementsAre("a"));
+ }
+
+ {
+ std::vector<std::string> v = absl::StrSplit("ab", "");
+ EXPECT_THAT(v, ElementsAre("a", "b"));
+ }
+
+ {
+ std::vector<std::string> v = absl::StrSplit("a b", "");
+ EXPECT_THAT(v, ElementsAre("a", " ", "b"));
+ }
+}
+
+TEST(Split, SubstrDelimiter) {
+ std::vector<absl::string_view> results;
+ absl::string_view delim("//");
+
+ results = absl::StrSplit("", delim);
+ EXPECT_THAT(results, ElementsAre(""));
+
+ results = absl::StrSplit("//", delim);
+ EXPECT_THAT(results, ElementsAre("", ""));
+
+ results = absl::StrSplit("ab", delim);
+ EXPECT_THAT(results, ElementsAre("ab"));
+
+ results = absl::StrSplit("ab//", delim);
+ EXPECT_THAT(results, ElementsAre("ab", ""));
+
+ results = absl::StrSplit("ab/", delim);
+ EXPECT_THAT(results, ElementsAre("ab/"));
+
+ results = absl::StrSplit("a/b", delim);
+ EXPECT_THAT(results, ElementsAre("a/b"));
+
+ results = absl::StrSplit("a//b", delim);
+ EXPECT_THAT(results, ElementsAre("a", "b"));
+
+ results = absl::StrSplit("a///b", delim);
+ EXPECT_THAT(results, ElementsAre("a", "/b"));
+
+ results = absl::StrSplit("a////b", delim);
+ EXPECT_THAT(results, ElementsAre("a", "", "b"));
+}
+
+TEST(Split, EmptyResults) {
+ std::vector<absl::string_view> results;
+
+ results = absl::StrSplit("", '#');
+ EXPECT_THAT(results, ElementsAre(""));
+
+ results = absl::StrSplit("#", '#');
+ EXPECT_THAT(results, ElementsAre("", ""));
+
+ results = absl::StrSplit("#cd", '#');
+ EXPECT_THAT(results, ElementsAre("", "cd"));
+
+ results = absl::StrSplit("ab#cd#", '#');
+ EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
+
+ results = absl::StrSplit("ab##cd", '#');
+ EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
+
+ results = absl::StrSplit("ab##", '#');
+ EXPECT_THAT(results, ElementsAre("ab", "", ""));
+
+ results = absl::StrSplit("ab#ab#", '#');
+ EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
+
+ results = absl::StrSplit("aaaa", 'a');
+ EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
+
+ results = absl::StrSplit("", '#', absl::SkipEmpty());
+ EXPECT_THAT(results, ElementsAre());
+}
+
+template <typename Delimiter>
+static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
+ size_t starting_pos, int expected_pos) {
+ absl::string_view found = d.Find(text, starting_pos);
+ return found.data() != text.end() &&
+ expected_pos == found.data() - text.data();
+}
+
+// Helper function for testing Delimiter objects. Returns true if the given
+// Delimiter is found in the given std::string at the given position. This function
+// tests two cases:
+// 1. The actual text given, staring at position 0
+// 2. The text given with leading padding that should be ignored
+template <typename Delimiter>
+static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
+ const std::string leading_text = ",x,y,z,";
+ return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
+ IsFoundAtStartingPos(leading_text + std::string(text), d,
+ leading_text.length(),
+ expected_pos + leading_text.length());
+}
+
+//
+// Tests for Literal
+//
+
+// Tests using any delimiter that represents a single comma.
+template <typename Delimiter>
+void TestComma(Delimiter d) {
+ EXPECT_TRUE(IsFoundAt(",", d, 0));
+ EXPECT_TRUE(IsFoundAt("a,", d, 1));
+ EXPECT_TRUE(IsFoundAt(",b", d, 0));
+ EXPECT_TRUE(IsFoundAt("a,b", d, 1));
+ EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
+ EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
+ EXPECT_FALSE(IsFoundAt("", d, -1));
+ EXPECT_FALSE(IsFoundAt(" ", d, -1));
+ EXPECT_FALSE(IsFoundAt("a", d, -1));
+ EXPECT_FALSE(IsFoundAt("a b c", d, -1));
+ EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
+ EXPECT_FALSE(IsFoundAt(";", d, -1));
+}
+
+TEST(Delimiter, Literal) {
+ using absl::ByString;
+ TestComma(ByString(","));
+
+ // Works as named variable.
+ ByString comma_string(",");
+ TestComma(comma_string);
+
+ // The first occurrence of empty std::string ("") in a std::string is at position 0.
+ // There is a test below that demonstrates this for absl::string_view::find().
+ // If the ByString delimiter returned position 0 for this, there would
+ // be an infinite loop in the SplitIterator code. To avoid this, empty std::string
+ // is a special case in that it always returns the item at position 1.
+ absl::string_view abc("abc");
+ EXPECT_EQ(0, abc.find("")); // "" is found at position 0
+ ByString empty("");
+ EXPECT_FALSE(IsFoundAt("", empty, 0));
+ EXPECT_FALSE(IsFoundAt("a", empty, 0));
+ EXPECT_TRUE(IsFoundAt("ab", empty, 1));
+ EXPECT_TRUE(IsFoundAt("abc", empty, 1));
+}
+
+TEST(Split, ByChar) {
+ using absl::ByChar;
+ TestComma(ByChar(','));
+
+ // Works as named variable.
+ ByChar comma_char(',');
+ TestComma(comma_char);
+}
+
+//
+// Tests for ByAnyChar
+//
+
+TEST(Delimiter, ByAnyChar) {
+ using absl::ByAnyChar;
+ ByAnyChar one_delim(",");
+ // Found
+ EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
+ EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
+ EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
+ EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
+ // Not found
+ EXPECT_FALSE(IsFoundAt("", one_delim, -1));
+ EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
+ EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
+ EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
+ EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
+
+ ByAnyChar two_delims(",;");
+ // Found
+ EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
+ EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
+ EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
+ EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
+ EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
+ EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
+ EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
+ EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
+ EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
+ EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
+ // Not found
+ EXPECT_FALSE(IsFoundAt("", two_delims, -1));
+ EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
+ EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
+ EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
+ EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
+
+ // ByAnyChar behaves just like ByString when given a delimiter of empty
+ // std::string. That is, it always returns a zero-length absl::string_view
+ // referring to the item at position 1, not position 0.
+ ByAnyChar empty("");
+ EXPECT_FALSE(IsFoundAt("", empty, 0));
+ EXPECT_FALSE(IsFoundAt("a", empty, 0));
+ EXPECT_TRUE(IsFoundAt("ab", empty, 1));
+ EXPECT_TRUE(IsFoundAt("abc", empty, 1));
+}
+
+//
+// Tests for ByLength
+//
+
+TEST(Delimiter, ByLength) {
+ using absl::ByLength;
+
+ ByLength four_char_delim(4);
+
+ // Found
+ EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
+ EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
+ EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
+ // Not found
+ EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
+ EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
+ EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
+ EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
+ EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
+}
+
+// Allocates too much memory for TSan and MSan.
+#if !defined(THREAD_SANITIZER) && !defined(MEMORY_SANITIZER)
+TEST(Split, WorksWithLargeStrings) {
+ if (sizeof(size_t) > 4 && !RunningOnValgrind()) {
+ std::string s(1ULL << 31, 'x');
+ s.push_back('-'); // 2G + 1 byte
+ std::vector<absl::string_view> v = absl::StrSplit(s, '-');
+ EXPECT_EQ(2, v.size());
+ // The first element will contain 2G of 'x's.
+ // testing::StartsWith is too slow with a 2G std::string.
+ EXPECT_EQ('x', v[0][0]);
+ EXPECT_EQ('x', v[0][1]);
+ EXPECT_EQ('x', v[0][3]);
+ EXPECT_EQ("", v[1]);
+ }
+}
+#endif // THREAD_SANITIZER
+
+TEST(SplitInternalTest, TypeTraits) {
+ EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
+ EXPECT_TRUE(
+ (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
+ EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
+ EXPECT_TRUE(
+ (absl::strings_internal::HasValueType<std::map<int, int>>::value));
+ EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
+ EXPECT_TRUE(
+ (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
+ EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
+ EXPECT_TRUE((absl::strings_internal::IsInitializerList<
+ std::initializer_list<int>>::value));
+}
+
+} // namespace
diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc
new file mode 100644
index 00000000..4d4ba6c1
--- /dev/null
+++ b/absl/strings/string_view.cc
@@ -0,0 +1,248 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/string_view.h"
+
+#ifndef ABSL_HAVE_STD_STRING_VIEW
+
+#include <algorithm>
+#include <climits>
+#include <cstring>
+#include <ostream>
+#include <string>
+
+#include "absl/strings/internal/memutil.h"
+#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/match.h"
+
+namespace absl {
+
+namespace {
+void WritePadding(std::ostream& o, size_t pad) {
+ char fill_buf[32];
+ memset(fill_buf, o.fill(), sizeof(fill_buf));
+ while (pad) {
+ size_t n = std::min(pad, sizeof(fill_buf));
+ o.write(fill_buf, n);
+ pad -= n;
+ }
+}
+
+class LookupTable {
+ public:
+ // For each character in wanted, sets the index corresponding
+ // to the ASCII code of that character. This is used by
+ // the find_.*_of methods below to tell whether or not a character is in
+ // the lookup table in constant time.
+ explicit LookupTable(string_view wanted) {
+ for (char c : wanted) {
+ table_[Index(c)] = true;
+ }
+ }
+ bool operator[](char c) const { return table_[Index(c)]; }
+
+ private:
+ static unsigned char Index(char c) { return static_cast<unsigned char>(c); }
+ bool table_[UCHAR_MAX + 1] = {};
+};
+
+} // namespace
+
+std::ostream& operator<<(std::ostream& o, string_view piece) {
+ std::ostream::sentry sentry(o);
+ if (sentry) {
+ size_t lpad = 0;
+ size_t rpad = 0;
+ if (static_cast<size_t>(o.width()) > piece.size()) {
+ size_t pad = o.width() - piece.size();
+ if ((o.flags() & o.adjustfield) == o.left) {
+ rpad = pad;
+ } else {
+ lpad = pad;
+ }
+ }
+ if (lpad) WritePadding(o, lpad);
+ o.write(piece.data(), piece.size());
+ if (rpad) WritePadding(o, rpad);
+ o.width(0);
+ }
+ return o;
+}
+
+string_view::size_type string_view::copy(char* buf, size_type n,
+ size_type pos) const {
+ size_type ulen = length_;
+ assert(pos <= ulen);
+ size_type rlen = std::min(ulen - pos, n);
+ if (rlen > 0) {
+ const char* start = ptr_ + pos;
+ std::copy(start, start + rlen, buf);
+ }
+ return rlen;
+}
+
+string_view::size_type string_view::find(string_view s, size_type pos) const
+ noexcept {
+ if (empty() || pos > length_) {
+ if (empty() && pos == 0 && s.empty()) return 0;
+ return npos;
+ }
+ const char* result =
+ strings_internal::memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_);
+ return result ? result - ptr_ : npos;
+}
+
+string_view::size_type string_view::find(char c, size_type pos) const noexcept {
+ if (empty() || pos >= length_) {
+ return npos;
+ }
+ const char* result =
+ static_cast<const char*>(memchr(ptr_ + pos, c, length_ - pos));
+ return result != nullptr ? result - ptr_ : npos;
+}
+
+string_view::size_type string_view::rfind(string_view s, size_type pos) const
+ noexcept {
+ if (length_ < s.length_) return npos;
+ if (s.empty()) return std::min(length_, pos);
+ const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_;
+ const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_);
+ return result != last ? result - ptr_ : npos;
+}
+
+// Search range is [0..pos] inclusive. If pos == npos, search everything.
+string_view::size_type string_view::rfind(char c, size_type pos) const
+ noexcept {
+ // Note: memrchr() is not available on Windows.
+ if (empty()) return npos;
+ for (size_type i = std::min(pos, length_ - 1);; --i) {
+ if (ptr_[i] == c) {
+ return i;
+ }
+ if (i == 0) break;
+ }
+ return npos;
+}
+
+string_view::size_type string_view::find_first_of(string_view s,
+ size_type pos) const
+ noexcept {
+ if (empty() || s.empty()) {
+ return npos;
+ }
+ // Avoid the cost of LookupTable() for a single-character search.
+ if (s.length_ == 1) return find_first_of(s.ptr_[0], pos);
+ LookupTable tbl(s);
+ for (size_type i = pos; i < length_; ++i) {
+ if (tbl[ptr_[i]]) {
+ return i;
+ }
+ }
+ return npos;
+}
+
+string_view::size_type string_view::find_first_not_of(string_view s,
+ size_type pos) const
+ noexcept {
+ if (empty()) return npos;
+ // Avoid the cost of LookupTable() for a single-character search.
+ if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos);
+ LookupTable tbl(s);
+ for (size_type i = pos; i < length_; ++i) {
+ if (!tbl[ptr_[i]]) {
+ return i;
+ }
+ }
+ return npos;
+}
+
+string_view::size_type string_view::find_first_not_of(char c,
+ size_type pos) const
+ noexcept {
+ if (empty()) return npos;
+ for (; pos < length_; ++pos) {
+ if (ptr_[pos] != c) {
+ return pos;
+ }
+ }
+ return npos;
+}
+
+string_view::size_type string_view::find_last_of(string_view s,
+ size_type pos) const noexcept {
+ if (empty() || s.empty()) return npos;
+ // Avoid the cost of LookupTable() for a single-character search.
+ if (s.length_ == 1) return find_last_of(s.ptr_[0], pos);
+ LookupTable tbl(s);
+ for (size_type i = std::min(pos, length_ - 1);; --i) {
+ if (tbl[ptr_[i]]) {
+ return i;
+ }
+ if (i == 0) break;
+ }
+ return npos;
+}
+
+string_view::size_type string_view::find_last_not_of(string_view s,
+ size_type pos) const
+ noexcept {
+ if (empty()) return npos;
+ size_type i = std::min(pos, length_ - 1);
+ if (s.empty()) return i;
+ // Avoid the cost of LookupTable() for a single-character search.
+ if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos);
+ LookupTable tbl(s);
+ for (;; --i) {
+ if (!tbl[ptr_[i]]) {
+ return i;
+ }
+ if (i == 0) break;
+ }
+ return npos;
+}
+
+string_view::size_type string_view::find_last_not_of(char c,
+ size_type pos) const
+ noexcept {
+ if (empty()) return npos;
+ size_type i = std::min(pos, length_ - 1);
+ for (;; --i) {
+ if (ptr_[i] != c) {
+ return i;
+ }
+ if (i == 0) break;
+ }
+ return npos;
+}
+
+// MSVC has non-standard behavior that implicitly creates definitions for static
+// const members. These implicit definitions conflict with explicit out-of-class
+// member definitions that are required by the C++ standard, resulting in
+// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks
+// MSVC to choose only one definition for the symbol it decorates. See details
+// at http://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx
+#ifdef _MSC_VER
+#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany)
+#else
+#define ABSL_STRING_VIEW_SELECTANY
+#endif
+
+ABSL_STRING_VIEW_SELECTANY
+constexpr string_view::size_type string_view::npos;
+ABSL_STRING_VIEW_SELECTANY
+constexpr string_view::size_type string_view::kMaxSize;
+
+} // namespace absl
+
+#endif // ABSL_HAVE_STD_STRING_VIEW
diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h
new file mode 100644
index 00000000..e2609f17
--- /dev/null
+++ b/absl/strings/string_view.h
@@ -0,0 +1,572 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: string_view.h
+// -----------------------------------------------------------------------------
+//
+// This file contains the definition of the `absl::string_view` class. A
+// `string_view` points to a contiguous span of characters, often part or all of
+// another `std::string`, double-quoted std::string literal, character array, or even
+// another `string_view`.
+//
+// This `absl::string_view` abstraction is designed to be a drop-in
+// replacement for the C++17 `std::string_view` abstraction.
+#ifndef ABSL_STRINGS_STRING_VIEW_H_
+#define ABSL_STRINGS_STRING_VIEW_H_
+
+#include <algorithm>
+#include "absl/base/config.h"
+
+#ifdef ABSL_HAVE_STD_STRING_VIEW
+
+#include <string_view>
+
+namespace absl {
+using std::string_view;
+};
+
+#else // ABSL_HAVE_STD_STRING_VIEW
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <iosfwd>
+#include <iterator>
+#include <limits>
+#include <string>
+
+#include "absl/base/internal/throw_delegate.h"
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+
+namespace absl {
+
+// absl::string_view
+//
+// A `string_view` provides a lightweight view into the std::string data provided by
+// a `std::string`, double-quoted std::string literal, character array, or even
+// another `string_view`. A `string_view` does *not* own the std::string to which it
+// points, and that data cannot be modified through the view.
+//
+// You can use `string_view` as a function or method parameter anywhere a
+// parameter can receive a double-quoted std::string literal, `const char*`,
+// `std::string`, or another `absl::string_view` argument with no need to copy
+// the std::string data. Systematic use of `string_view` within function arguments
+// reduces data copies and `strlen()` calls.
+//
+// Because of its small size, prefer passing `string_view` by value:
+//
+// void MyFunction(absl::string_view arg);
+//
+// If circumstances require, you may also pass one by const reference:
+//
+// void MyFunction(const absl::string_view& arg); // not preferred
+//
+// Passing by value generates slightly smaller code for many architectures.
+//
+// In either case, the source data of the `string_view` must outlive the
+// `string_view` itself.
+//
+// A `string_view` is also suitable for local variables if you know that the
+// lifetime of the underlying object is longer than the lifetime of your
+// `string_view` variable. However, beware of binding a `string_view` to a
+// temporary value:
+//
+// // BAD use of string_view: lifetime problem
+// absl::string_view sv = obj.ReturnAString();
+//
+// // GOOD use of string_view: str outlives sv
+// std::string str = obj.ReturnAString();
+// absl::string_view sv = str;
+//
+// Due to lifetime issues, a `string_view` is sometimes a poor choice for a
+// return value and usually a poor choice for a data member. If you do use a
+// `string_view` this way, it is your responsibility to ensure that the object
+// pointed to by the `string_view` outlives the `string_view`.
+//
+// A `string_view` may represent a whole std::string or just part of a std::string. For
+// example, when splitting a std::string, `std::vector<absl::string_view>` is a
+// natural data type for the output.
+//
+//
+// When constructed from a source which is nul-terminated, the `string_view`
+// itself will not include the nul-terminator unless a specific size (including
+// the nul) is passed to the constructor. As a result, common idioms that work
+// on nul-terminated strings do not work on `string_view` objects. If you write
+// code that scans a `string_view`, you must check its length rather than test
+// for nul, for example. Note, however, that nuls may still be embedded within
+// a `string_view` explicitly.
+//
+// You may create a null `string_view` in two ways:
+//
+// absl::string_view sv();
+// absl::string_view sv(nullptr, 0);
+//
+// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
+// `sv.empty() == true`. Also, if you create a `string_view` with a non-null
+// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
+// signal an undefined value that is different from other `string_view` values
+// in a similar fashion to how `const char* p1 = nullptr;` is different from
+// `const char* p2 = "";`. However, in practice, it is not recommended to rely
+// on this behavior.
+//
+// Be careful not to confuse a null `string_view` with an empty one. A null
+// `string_view` is an empty `string_view`, but some empty `string_view`s are
+// not null. Prefer checking for emptiness over checking for null.
+//
+// There are many ways to create an empty string_view:
+//
+// const char* nullcp = nullptr;
+// // string_view.size() will return 0 in all cases.
+// absl::string_view();
+// absl::string_view(nullcp, 0);
+// absl::string_view("");
+// absl::string_view("", 0);
+// absl::string_view("abcdef", 0);
+// absl::string_view("abcdef" + 6, 0);
+//
+// All empty `string_view` objects whether null or not, are equal:
+//
+// absl::string_view() == absl::string_view("", 0)
+// absl::string_view(nullptr, 0) == absl:: string_view("abcdef"+6, 0)
+class string_view {
+ public:
+ using traits_type = std::char_traits<char>;
+ using value_type = char;
+ using pointer = char*;
+ using const_pointer = const char*;
+ using reference = char&;
+ using const_reference = const char&;
+ using const_iterator = const char*;
+ using iterator = const_iterator;
+ using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+ using reverse_iterator = const_reverse_iterator;
+ using size_type = size_t;
+ using difference_type = std::ptrdiff_t;
+
+ static constexpr size_type npos = static_cast<size_type>(-1);
+
+ // Null `string_view` constructor
+ constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
+
+ // Implicit constructors
+
+ template <typename Allocator>
+ string_view( // NOLINT(runtime/explicit)
+ const std::basic_string<char, std::char_traits<char>, Allocator>&
+ str) noexcept
+ : ptr_(str.data()), length_(str.size()) {}
+
+ // Implicit constructor of a `string_view` from nul-terminated `str`. When
+ // accepting possibly null strings, use `absl::NullSafeStringView(str)`
+ // instead (see below).
+ constexpr string_view(const char* str) // NOLINT(runtime/explicit)
+ : ptr_(str), length_(StrLenInternal(str)) {}
+
+ // Implicit consructor of a `string_view` from a `const char*` and length
+ constexpr string_view(const char* data, size_type len)
+ : ptr_(data), length_(CheckLengthInternal(len)) {}
+
+ // NOTE(b/36227513): harmlessly omitted to work around gdb bug.
+ // constexpr string_view(const string_view&) noexcept = default;
+ // string_view& operator=(const string_view&) noexcept = default;
+
+ // Iterators
+
+ // string_view::begin()
+ //
+ // Returns an iterator pointing to the first character at the beginning of the
+ // `string_view`, or `end()` if the `string_view` is empty.
+ constexpr const_iterator begin() const noexcept { return ptr_; }
+
+ // string_view::end()
+ //
+ // Returns an iterator pointing just beyond the last character at the end of
+ // the `string_view`. This iterator acts as a placeholder; attempting to
+ // access it results in undefined behavior.
+ constexpr const_iterator end() const noexcept { return ptr_ + length_; }
+
+ // string_view::cbegin()
+ //
+ // Returns a const iterator pointing to the first character at the beginning
+ // of the `string_view`, or `end()` if the `string_view` is empty.
+ constexpr const_iterator cbegin() const noexcept { return begin(); }
+
+ // string_view::cend()
+ //
+ // Returns a const iterator pointing just beyond the last character at the end
+ // of the `string_view`. This pointer acts as a placeholder; attempting to
+ // access its element results in undefined behavior.
+ constexpr const_iterator cend() const noexcept { return end(); }
+
+ // string_view::rbegin()
+ //
+ // Returns a reverse iterator pointing to the last character at the end of the
+ // `string_view`, or `rend()` if the `string_view` is empty.
+ const_reverse_iterator rbegin() const noexcept {
+ return const_reverse_iterator(end());
+ }
+
+ // string_view::rend()
+ //
+ // Returns a reverse iterator pointing just before the first character at the
+ // beginning of the `string_view`. This pointer acts as a placeholder;
+ // attempting to access its element results in undefined behavior.
+ const_reverse_iterator rend() const noexcept {
+ return const_reverse_iterator(begin());
+ }
+
+ // string_view::crbegin()
+ //
+ // Returns a const reverse iterator pointing to the last character at the end
+ // of the `string_view`, or `crend()` if the `string_view` is empty.
+ const_reverse_iterator crbegin() const noexcept { return rbegin(); }
+
+ // string_view::crend()
+ //
+ // Returns a const reverse iterator pointing just before the first character
+ // at the beginning of the `string_view`. This pointer acts as a placeholder;
+ // attempting to access its element results in undefined behavior.
+ const_reverse_iterator crend() const noexcept { return rend(); }
+
+ // Capacity Utilities
+
+ // string_view::size()
+ //
+ // Returns the number of characters in the `string_view`.
+ constexpr size_type size() const noexcept {
+ return length_;
+ }
+
+ // string_view::length()
+ //
+ // Returns the number of characters in the `string_view`. Alias for `size()`.
+ constexpr size_type length() const noexcept { return size(); }
+
+ // string_view::max_size()
+ //
+ // Returns the maximum number of characters the `string_view` can hold.
+ constexpr size_type max_size() const noexcept { return kMaxSize; }
+
+ // string_view::empty()
+ //
+ // Checks if the `string_view` is empty (refers to no characters).
+ constexpr bool empty() const noexcept { return length_ == 0; }
+
+ // std::string:view::operator[]
+ //
+ // Returns the ith element of an `string_view` using the array operator.
+ // Note that this operator does not perform any bounds checking.
+ constexpr const_reference operator[](size_type i) const { return ptr_[i]; }
+
+ // string_view::front()
+ //
+ // Returns the first element of a `string_view`.
+ constexpr const_reference front() const { return ptr_[0]; }
+
+ // string_view::back()
+ //
+ // Returns the last element of a `string_view`.
+ constexpr const_reference back() const { return ptr_[size() - 1]; }
+
+ // string_view::data()
+ //
+ // Returns a pointer to the underlying character array (which is of course
+ // stored elsewhere). Note that `string_view::data()` may contain embedded nul
+ // characters, but the returned buffer may or may not be nul-terminated;
+ // therefore, do not pass `data()` to a routine that expects a nul-terminated
+ // std::string.
+ constexpr const_pointer data() const noexcept { return ptr_; }
+
+ // Modifiers
+
+ // string_view::remove_prefix()
+ //
+ // Removes the first `n` characters from the `string_view`, returning a
+ // pointer to the new first character. Note that the underlying std::string is not
+ // changed, only the view.
+ void remove_prefix(size_type n) {
+ assert(n <= length_);
+ ptr_ += n;
+ length_ -= n;
+ }
+
+ // string_view::remove_suffix()
+ //
+ // Removes the last `n` characters from the `string_view`. Note that the
+ // underlying std::string is not changed, only the view.
+ void remove_suffix(size_type n) {
+ assert(n <= length_);
+ length_ -= n;
+ }
+
+ // string_view::swap()
+ //
+ // Swaps this `string_view` with another `string_view`.
+ void swap(string_view& s) noexcept {
+ auto t = *this;
+ *this = s;
+ s = t;
+ }
+
+ // Explicit conversion operators
+
+ // Supports conversion to both `std::basic_string` where available.
+ template <typename A>
+ explicit operator std::basic_string<char, traits_type, A>() const {
+ if (!data()) return {};
+ return std::basic_string<char, traits_type, A>(data(), size());
+ }
+
+ // string_view::copy()
+ //
+ // Copies the contents of the `string_view` at offset `pos` and length `n`
+ // into `buf`.
+ size_type copy(char* buf, size_type n, size_type pos = 0) const;
+
+ // string_view::substr()
+ //
+ // Returns a "substring" of the `string_view` (at offset `post` and length
+ // `n`) as another std::string views. This function throws `std::out_of_bounds` if
+ // `pos > size'.
+ string_view substr(size_type pos, size_type n = npos) const {
+ if (ABSL_PREDICT_FALSE(pos > length_))
+ base_internal::ThrowStdOutOfRange("absl::string_view::substr");
+ n = std::min(n, length_ - pos);
+ return string_view(ptr_ + pos, n);
+ }
+
+ // string_view::compare()
+ //
+ // Performs a lexicographical comparison between the `string_view` and
+ // another `absl::string_view), returning -1 if `this` is less than, 0 if
+ // `this` is equal to, and 1 if `this` is greater than the passed std::string
+ // view. Note that in the case of data equality, a further comparison is made
+ // on the respective sizes of the two `string_view`s to determine which is
+ // smaller, equal, or greater.
+ int compare(string_view x) const noexcept {
+ auto min_length = std::min(length_, x.length_);
+ if (min_length > 0) {
+ int r = memcmp(ptr_, x.ptr_, min_length);
+ if (r < 0) return -1;
+ if (r > 0) return 1;
+ }
+ if (length_ < x.length_) return -1;
+ if (length_ > x.length_) return 1;
+ return 0;
+ }
+
+ // Overload of `string_view::compare()` for comparing a substring of the
+ // 'string_view` and another `absl::string_view`.
+ int compare(size_type pos1, size_type count1, string_view v) const {
+ return substr(pos1, count1).compare(v);
+ }
+
+ // Overload of `string_view::compare()` for comparing a substring of the
+ // `string_view` and a substring of another `absl::string_view`.
+ int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
+ size_type count2) const {
+ return substr(pos1, count1).compare(v.substr(pos2, count2));
+ }
+
+ // Overload of `string_view::compare()` for comparing a `string_view` and a
+ // a different C-style std::string `s`.
+ int compare(const char* s) const { return compare(string_view(s)); }
+
+ // Overload of `string_view::compare()` for comparing a substring of the
+ // `string_view` and a different std::string C-style std::string `s`.
+ int compare(size_type pos1, size_type count1, const char* s) const {
+ return substr(pos1, count1).compare(string_view(s));
+ }
+
+ // Overload of `string_view::compare()` for comparing a substring of the
+ // `string_view` and a substring of a different C-style std::string `s`.
+ int compare(size_type pos1, size_type count1, const char* s,
+ size_type count2) const {
+ return substr(pos1, count1).compare(string_view(s, count2));
+ }
+
+ // Find Utilities
+
+ // string_view::find()
+ //
+ // Finds the first occurrence of the substring `s` within the `string_view`,
+ // returning the position of the first character's match, or `npos` if no
+ // match was found.
+ size_type find(string_view s, size_type pos = 0) const noexcept;
+
+ // Overload of `string_view::find()` for finding the given character `c`
+ // within the `string_view`.
+ size_type find(char c, size_type pos = 0) const noexcept;
+
+ // string_view::rfind()
+ //
+ // Finds the last occurrence of a substring `s` within the `string_view`,
+ // returning the position of the first character's match, or `npos` if no
+ // match was found.
+ size_type rfind(string_view s, size_type pos = npos) const
+ noexcept;
+
+ // Overload of `string_view::rfind()` for finding the given character `c`
+ // within the `string_view`.
+ size_type rfind(char c, size_type pos = npos) const noexcept;
+
+ // string_view::find_first_of()
+ //
+ // Finds the first occurrence of any of the characters in `s` within the
+ // `string_view`, returning the start position of the match, or `npos` if no
+ // match was found.
+ size_type find_first_of(string_view s, size_type pos = 0) const
+ noexcept;
+
+ // Overload of `string_view::find_first_of()` for finding a character `c`
+ // within the `string_view`.
+ size_type find_first_of(char c, size_type pos = 0) const
+ noexcept {
+ return find(c, pos);
+ }
+
+ // string_view::find_last_of()
+ //
+ // Finds the last occurrence of any of the characters in `s` within the
+ // `string_view`, returning the start position of the match, or `npos` if no
+ // match was found.
+ size_type find_last_of(string_view s, size_type pos = npos) const
+ noexcept;
+
+ // Overload of `string_view::find_last_of()` for finding a character `c`
+ // within the `string_view`.
+ size_type find_last_of(char c, size_type pos = npos) const
+ noexcept {
+ return rfind(c, pos);
+ }
+
+ // string_view::find_first_not_of()
+ //
+ // Finds the first occurrence of any of the characters not in `s` within the
+ // `string_view`, returning the start position of the first non-match, or
+ // `npos` if no non-match was found.
+ size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
+
+ // Overload of `string_view::find_first_not_of()` for finding a character
+ // that is not `c` within the `string_view`.
+ size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
+
+ // string_view::find_last_not_of()
+ //
+ // Finds the last occurrence of any of the characters not in `s` within the
+ // `string_view`, returning the start position of the last non-match, or
+ // `npos` if no non-match was found.
+ size_type find_last_not_of(string_view s,
+ size_type pos = npos) const noexcept;
+
+ // Overload of `string_view::find_last_not_of()` for finding a character
+ // that is not `c` within the `string_view`.
+ size_type find_last_not_of(char c, size_type pos = npos) const
+ noexcept;
+
+ private:
+ static constexpr size_type kMaxSize =
+ std::numeric_limits<size_type>::max() / 2 + 1;
+
+ static constexpr size_type StrLenInternal(const char* str) {
+ return str ?
+// check whether __builtin_strlen is provided by the compiler.
+// GCC doesn't have __has_builtin()
+// (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66970),
+// but has __builtin_strlen according to
+// https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html.
+#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \
+ (defined(__GNUC__) && !defined(__clang__))
+ __builtin_strlen(str)
+#else
+ strlen(str)
+#endif
+ : 0;
+ }
+
+ static constexpr size_type CheckLengthInternal(size_type len) {
+ return ABSL_ASSERT(len <= kMaxSize), len;
+ }
+
+ const char* ptr_;
+ size_type length_;
+};
+
+// This large function is defined inline so that in a fairly common case where
+// one of the arguments is a literal, the compiler can elide a lot of the
+// following comparisons.
+inline bool operator==(string_view x, string_view y) noexcept {
+ auto len = x.size();
+ if (len != y.size()) {
+ return false;
+ }
+ return x.data() == y.data() || len <= 0 ||
+ memcmp(x.data(), y.data(), len) == 0;
+}
+
+inline bool operator!=(string_view x, string_view y) noexcept {
+ return !(x == y);
+}
+
+inline bool operator<(string_view x, string_view y) noexcept {
+ auto min_size = std::min(x.size(), y.size());
+ const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
+ return (r < 0) || (r == 0 && x.size() < y.size());
+}
+
+inline bool operator>(string_view x, string_view y) noexcept { return y < x; }
+
+inline bool operator<=(string_view x, string_view y) noexcept {
+ return !(y < x);
+}
+
+inline bool operator>=(string_view x, string_view y) noexcept {
+ return !(x < y);
+}
+
+// IO Insertion Operator
+std::ostream& operator<<(std::ostream& o, string_view piece);
+
+} // namespace absl
+
+#endif // ABSL_HAVE_STD_STRING_VIEW
+
+namespace absl {
+
+// ClippedSubstr()
+//
+// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
+// Provided because std::string_view::substr throws if `pos > size()`,
+// to support b/37991613.
+inline string_view ClippedSubstr(string_view s, size_t pos,
+ size_t n = string_view::npos) {
+ pos = std::min(pos, static_cast<size_t>(s.size()));
+ return s.substr(pos, n);
+}
+
+// NullSafeStringView()
+//
+// Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
+// This function should be used where an `absl::string_view` can be created from
+// a possibly-null pointer.
+inline string_view NullSafeStringView(const char* p) {
+ return p ? string_view(p) : string_view();
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_STRING_VIEW_H_
diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc
new file mode 100644
index 00000000..439d6499
--- /dev/null
+++ b/absl/strings/string_view_test.cc
@@ -0,0 +1,1097 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/string_view.h"
+
+#include <algorithm>
+#include <iomanip>
+#include <iterator>
+#include <map>
+#include <random>
+#include <sstream>
+#include <string>
+#include <type_traits>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/port.h"
+
+namespace {
+
+// A minimal allocator that uses malloc().
+template <typename T>
+struct Mallocator {
+ typedef T value_type;
+ typedef size_t size_type;
+ typedef ptrdiff_t difference_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+
+ size_type max_size() const {
+ return size_t(std::numeric_limits<size_type>::max()) / sizeof(value_type);
+ }
+ template <typename U>
+ struct rebind {
+ typedef Mallocator<U> other;
+ };
+ Mallocator() = default;
+
+ T* allocate(size_t n) { return static_cast<T*>(std::malloc(n * sizeof(T))); }
+ void deallocate(T* p, size_t) { std::free(p); }
+};
+template <typename T, typename U>
+bool operator==(const Mallocator<T>&, const Mallocator<U>&) {
+ return true;
+}
+template <typename T, typename U>
+bool operator!=(const Mallocator<T>&, const Mallocator<U>&) {
+ return false;
+}
+
+TEST(StringViewTest, Ctor) {
+ {
+ // Null.
+ absl::string_view s10;
+ EXPECT_TRUE(s10.data() == nullptr);
+ EXPECT_EQ(0, s10.length());
+ }
+
+ {
+ // const char* without length.
+ const char* hello = "hello";
+ absl::string_view s20(hello);
+ EXPECT_TRUE(s20.data() == hello);
+ EXPECT_EQ(5, s20.length());
+
+ // const char* with length.
+ absl::string_view s21(hello, 4);
+ EXPECT_TRUE(s21.data() == hello);
+ EXPECT_EQ(4, s21.length());
+
+ // Not recommended, but valid C++
+ absl::string_view s22(hello, 6);
+ EXPECT_TRUE(s22.data() == hello);
+ EXPECT_EQ(6, s22.length());
+ }
+
+ {
+ // std::string.
+ std::string hola = "hola";
+ absl::string_view s30(hola);
+ EXPECT_TRUE(s30.data() == hola.data());
+ EXPECT_EQ(4, s30.length());
+
+ // std::string with embedded '\0'.
+ hola.push_back('\0');
+ hola.append("h2");
+ hola.push_back('\0');
+ absl::string_view s31(hola);
+ EXPECT_TRUE(s31.data() == hola.data());
+ EXPECT_EQ(8, s31.length());
+ }
+
+ {
+ using mstring =
+ std::basic_string<char, std::char_traits<char>, Mallocator<char>>;
+ mstring str1("BUNGIE-JUMPING!");
+ const mstring str2("SLEEPING!");
+
+ absl::string_view s1(str1);
+ s1.remove_prefix(strlen("BUNGIE-JUM"));
+
+ absl::string_view s2(str2);
+ s2.remove_prefix(strlen("SLEE"));
+
+ EXPECT_EQ(s1, s2);
+ EXPECT_EQ(s1, "PING!");
+ }
+
+ // TODO(mec): absl::string_view(const absl::string_view&);
+}
+
+TEST(StringViewTest, Swap) {
+ absl::string_view a("a");
+ absl::string_view b("bbb");
+ EXPECT_TRUE(noexcept(a.swap(b)));
+ a.swap(b);
+ EXPECT_EQ(a, "bbb");
+ EXPECT_EQ(b, "a");
+ a.swap(b);
+ EXPECT_EQ(a, "a");
+ EXPECT_EQ(b, "bbb");
+}
+
+TEST(StringViewTest, STLComparator) {
+ std::string s1("foo");
+ std::string s2("bar");
+ std::string s3("baz");
+
+ absl::string_view p1(s1);
+ absl::string_view p2(s2);
+ absl::string_view p3(s3);
+
+ typedef std::map<absl::string_view, int> TestMap;
+ TestMap map;
+
+ map.insert(std::make_pair(p1, 0));
+ map.insert(std::make_pair(p2, 1));
+ map.insert(std::make_pair(p3, 2));
+ EXPECT_EQ(map.size(), 3);
+
+ TestMap::const_iterator iter = map.begin();
+ EXPECT_EQ(iter->second, 1);
+ ++iter;
+ EXPECT_EQ(iter->second, 2);
+ ++iter;
+ EXPECT_EQ(iter->second, 0);
+ ++iter;
+ EXPECT_TRUE(iter == map.end());
+
+ TestMap::iterator new_iter = map.find("zot");
+ EXPECT_TRUE(new_iter == map.end());
+
+ new_iter = map.find("bar");
+ EXPECT_TRUE(new_iter != map.end());
+
+ map.erase(new_iter);
+ EXPECT_EQ(map.size(), 2);
+
+ iter = map.begin();
+ EXPECT_EQ(iter->second, 2);
+ ++iter;
+ EXPECT_EQ(iter->second, 0);
+ ++iter;
+ EXPECT_TRUE(iter == map.end());
+}
+
+#define COMPARE(result, op, x, y) \
+ EXPECT_EQ(result, absl::string_view((x)) op absl::string_view((y))); \
+ EXPECT_EQ(result, absl::string_view((x)).compare(absl::string_view((y))) op 0)
+
+TEST(StringViewTest, ComparisonOperators) {
+ COMPARE(true, ==, "", "");
+ COMPARE(true, ==, "", absl::string_view());
+ COMPARE(true, ==, absl::string_view(), "");
+ COMPARE(true, ==, "a", "a");
+ COMPARE(true, ==, "aa", "aa");
+ COMPARE(false, ==, "a", "");
+ COMPARE(false, ==, "", "a");
+ COMPARE(false, ==, "a", "b");
+ COMPARE(false, ==, "a", "aa");
+ COMPARE(false, ==, "aa", "a");
+
+ COMPARE(false, !=, "", "");
+ COMPARE(false, !=, "a", "a");
+ COMPARE(false, !=, "aa", "aa");
+ COMPARE(true, !=, "a", "");
+ COMPARE(true, !=, "", "a");
+ COMPARE(true, !=, "a", "b");
+ COMPARE(true, !=, "a", "aa");
+ COMPARE(true, !=, "aa", "a");
+
+ COMPARE(true, <, "a", "b");
+ COMPARE(true, <, "a", "aa");
+ COMPARE(true, <, "aa", "b");
+ COMPARE(true, <, "aa", "bb");
+ COMPARE(false, <, "a", "a");
+ COMPARE(false, <, "b", "a");
+ COMPARE(false, <, "aa", "a");
+ COMPARE(false, <, "b", "aa");
+ COMPARE(false, <, "bb", "aa");
+
+ COMPARE(true, <=, "a", "a");
+ COMPARE(true, <=, "a", "b");
+ COMPARE(true, <=, "a", "aa");
+ COMPARE(true, <=, "aa", "b");
+ COMPARE(true, <=, "aa", "bb");
+ COMPARE(false, <=, "b", "a");
+ COMPARE(false, <=, "aa", "a");
+ COMPARE(false, <=, "b", "aa");
+ COMPARE(false, <=, "bb", "aa");
+
+ COMPARE(false, >=, "a", "b");
+ COMPARE(false, >=, "a", "aa");
+ COMPARE(false, >=, "aa", "b");
+ COMPARE(false, >=, "aa", "bb");
+ COMPARE(true, >=, "a", "a");
+ COMPARE(true, >=, "b", "a");
+ COMPARE(true, >=, "aa", "a");
+ COMPARE(true, >=, "b", "aa");
+ COMPARE(true, >=, "bb", "aa");
+
+ COMPARE(false, >, "a", "a");
+ COMPARE(false, >, "a", "b");
+ COMPARE(false, >, "a", "aa");
+ COMPARE(false, >, "aa", "b");
+ COMPARE(false, >, "aa", "bb");
+ COMPARE(true, >, "b", "a");
+ COMPARE(true, >, "aa", "a");
+ COMPARE(true, >, "b", "aa");
+ COMPARE(true, >, "bb", "aa");
+}
+
+TEST(StringViewTest, ComparisonOperatorsByCharacterPosition) {
+ std::string x;
+ for (int i = 0; i < 256; i++) {
+ x += 'a';
+ std::string y = x;
+ COMPARE(true, ==, x, y);
+ for (int j = 0; j < i; j++) {
+ std::string z = x;
+ z[j] = 'b'; // Differs in position 'j'
+ COMPARE(false, ==, x, z);
+ COMPARE(true, <, x, z);
+ COMPARE(true, >, z, x);
+ if (j + 1 < i) {
+ z[j + 1] = 'A'; // Differs in position 'j+1' as well
+ COMPARE(false, ==, x, z);
+ COMPARE(true, <, x, z);
+ COMPARE(true, >, z, x);
+ z[j + 1] = 'z'; // Differs in position 'j+1' as well
+ COMPARE(false, ==, x, z);
+ COMPARE(true, <, x, z);
+ COMPARE(true, >, z, x);
+ }
+ }
+ }
+}
+#undef COMPARE
+
+// Sadly, our users often confuse std::string::npos with absl::string_view::npos;
+// So much so that we test here that they are the same. They need to
+// both be unsigned, and both be the maximum-valued integer of their type.
+
+template <typename T>
+struct is_type {
+ template <typename U>
+ static bool same(U) {
+ return false;
+ }
+ static bool same(T) { return true; }
+};
+
+TEST(StringViewTest, NposMatchesStdStringView) {
+ EXPECT_EQ(absl::string_view::npos, std::string::npos);
+
+ EXPECT_TRUE(is_type<size_t>::same(absl::string_view::npos));
+ EXPECT_FALSE(is_type<size_t>::same(""));
+
+ // Make sure absl::string_view::npos continues to be a header constant.
+ char test[absl::string_view::npos & 1] = {0};
+ EXPECT_EQ(0, test[0]);
+}
+
+TEST(StringViewTest, STL1) {
+ const absl::string_view a("abcdefghijklmnopqrstuvwxyz");
+ const absl::string_view b("abc");
+ const absl::string_view c("xyz");
+ const absl::string_view d("foobar");
+ const absl::string_view e;
+ std::string temp("123");
+ temp += '\0';
+ temp += "456";
+ const absl::string_view f(temp);
+
+ EXPECT_EQ(a[6], 'g');
+ EXPECT_EQ(b[0], 'a');
+ EXPECT_EQ(c[2], 'z');
+ EXPECT_EQ(f[3], '\0');
+ EXPECT_EQ(f[5], '5');
+
+ EXPECT_EQ(*d.data(), 'f');
+ EXPECT_EQ(d.data()[5], 'r');
+ EXPECT_TRUE(e.data() == nullptr);
+
+ EXPECT_EQ(*a.begin(), 'a');
+ EXPECT_EQ(*(b.begin() + 2), 'c');
+ EXPECT_EQ(*(c.end() - 1), 'z');
+
+ EXPECT_EQ(*a.rbegin(), 'z');
+ EXPECT_EQ(*(b.rbegin() + 2), 'a');
+ EXPECT_EQ(*(c.rend() - 1), 'x');
+ EXPECT_TRUE(a.rbegin() + 26 == a.rend());
+
+ EXPECT_EQ(a.size(), 26);
+ EXPECT_EQ(b.size(), 3);
+ EXPECT_EQ(c.size(), 3);
+ EXPECT_EQ(d.size(), 6);
+ EXPECT_EQ(e.size(), 0);
+ EXPECT_EQ(f.size(), 7);
+
+ EXPECT_TRUE(!d.empty());
+ EXPECT_TRUE(d.begin() != d.end());
+ EXPECT_TRUE(d.begin() + 6 == d.end());
+
+ EXPECT_TRUE(e.empty());
+ EXPECT_TRUE(e.begin() == e.end());
+
+ char buf[4] = { '%', '%', '%', '%' };
+ EXPECT_EQ(a.copy(buf, 4), 4);
+ EXPECT_EQ(buf[0], a[0]);
+ EXPECT_EQ(buf[1], a[1]);
+ EXPECT_EQ(buf[2], a[2]);
+ EXPECT_EQ(buf[3], a[3]);
+ EXPECT_EQ(a.copy(buf, 3, 7), 3);
+ EXPECT_EQ(buf[0], a[7]);
+ EXPECT_EQ(buf[1], a[8]);
+ EXPECT_EQ(buf[2], a[9]);
+ EXPECT_EQ(buf[3], a[3]);
+ EXPECT_EQ(c.copy(buf, 99), 3);
+ EXPECT_EQ(buf[0], c[0]);
+ EXPECT_EQ(buf[1], c[1]);
+ EXPECT_EQ(buf[2], c[2]);
+ EXPECT_EQ(buf[3], a[3]);
+}
+
+// Separated from STL1() because some compilers produce an overly
+// large stack frame for the combined function.
+TEST(StringViewTest, STL2) {
+ const absl::string_view a("abcdefghijklmnopqrstuvwxyz");
+ const absl::string_view b("abc");
+ const absl::string_view c("xyz");
+ absl::string_view d("foobar");
+ const absl::string_view e;
+ const absl::string_view f(
+ "123"
+ "\0"
+ "456",
+ 7);
+
+ d = absl::string_view();
+ EXPECT_EQ(d.size(), 0);
+ EXPECT_TRUE(d.empty());
+ EXPECT_TRUE(d.data() == nullptr);
+ EXPECT_TRUE(d.begin() == d.end());
+
+ EXPECT_EQ(a.find(b), 0);
+ EXPECT_EQ(a.find(b, 1), absl::string_view::npos);
+ EXPECT_EQ(a.find(c), 23);
+ EXPECT_EQ(a.find(c, 9), 23);
+ EXPECT_EQ(a.find(c, absl::string_view::npos), absl::string_view::npos);
+ EXPECT_EQ(b.find(c), absl::string_view::npos);
+ EXPECT_EQ(b.find(c, absl::string_view::npos), absl::string_view::npos);
+ EXPECT_EQ(a.find(d), 0);
+ EXPECT_EQ(a.find(e), 0);
+ EXPECT_EQ(a.find(d, 12), 12);
+ EXPECT_EQ(a.find(e, 17), 17);
+ absl::string_view g("xx not found bb");
+ EXPECT_EQ(a.find(g), absl::string_view::npos);
+ // empty std::string nonsense
+ EXPECT_EQ(d.find(b), absl::string_view::npos);
+ EXPECT_EQ(e.find(b), absl::string_view::npos);
+ EXPECT_EQ(d.find(b, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find(b, 7), absl::string_view::npos);
+
+ size_t empty_search_pos = std::string().find(std::string());
+ EXPECT_EQ(d.find(d), empty_search_pos);
+ EXPECT_EQ(d.find(e), empty_search_pos);
+ EXPECT_EQ(e.find(d), empty_search_pos);
+ EXPECT_EQ(e.find(e), empty_search_pos);
+ EXPECT_EQ(d.find(d, 4), std::string().find(std::string(), 4));
+ EXPECT_EQ(d.find(e, 4), std::string().find(std::string(), 4));
+ EXPECT_EQ(e.find(d, 4), std::string().find(std::string(), 4));
+ EXPECT_EQ(e.find(e, 4), std::string().find(std::string(), 4));
+
+ EXPECT_EQ(a.find('a'), 0);
+ EXPECT_EQ(a.find('c'), 2);
+ EXPECT_EQ(a.find('z'), 25);
+ EXPECT_EQ(a.find('$'), absl::string_view::npos);
+ EXPECT_EQ(a.find('\0'), absl::string_view::npos);
+ EXPECT_EQ(f.find('\0'), 3);
+ EXPECT_EQ(f.find('3'), 2);
+ EXPECT_EQ(f.find('5'), 5);
+ EXPECT_EQ(g.find('o'), 4);
+ EXPECT_EQ(g.find('o', 4), 4);
+ EXPECT_EQ(g.find('o', 5), 8);
+ EXPECT_EQ(a.find('b', 5), absl::string_view::npos);
+ // empty std::string nonsense
+ EXPECT_EQ(d.find('\0'), absl::string_view::npos);
+ EXPECT_EQ(e.find('\0'), absl::string_view::npos);
+ EXPECT_EQ(d.find('\0', 4), absl::string_view::npos);
+ EXPECT_EQ(e.find('\0', 7), absl::string_view::npos);
+ EXPECT_EQ(d.find('x'), absl::string_view::npos);
+ EXPECT_EQ(e.find('x'), absl::string_view::npos);
+ EXPECT_EQ(d.find('x', 4), absl::string_view::npos);
+ EXPECT_EQ(e.find('x', 7), absl::string_view::npos);
+
+ EXPECT_EQ(a.rfind(b), 0);
+ EXPECT_EQ(a.rfind(b, 1), 0);
+ EXPECT_EQ(a.rfind(c), 23);
+ EXPECT_EQ(a.rfind(c, 22), absl::string_view::npos);
+ EXPECT_EQ(a.rfind(c, 1), absl::string_view::npos);
+ EXPECT_EQ(a.rfind(c, 0), absl::string_view::npos);
+ EXPECT_EQ(b.rfind(c), absl::string_view::npos);
+ EXPECT_EQ(b.rfind(c, 0), absl::string_view::npos);
+ EXPECT_EQ(a.rfind(d), std::string(a).rfind(std::string()));
+ EXPECT_EQ(a.rfind(e), std::string(a).rfind(std::string()));
+ EXPECT_EQ(a.rfind(d, 12), 12);
+ EXPECT_EQ(a.rfind(e, 17), 17);
+ EXPECT_EQ(a.rfind(g), absl::string_view::npos);
+ EXPECT_EQ(d.rfind(b), absl::string_view::npos);
+ EXPECT_EQ(e.rfind(b), absl::string_view::npos);
+ EXPECT_EQ(d.rfind(b, 4), absl::string_view::npos);
+ EXPECT_EQ(e.rfind(b, 7), absl::string_view::npos);
+ // empty std::string nonsense
+ EXPECT_EQ(d.rfind(d, 4), std::string().rfind(std::string()));
+ EXPECT_EQ(e.rfind(d, 7), std::string().rfind(std::string()));
+ EXPECT_EQ(d.rfind(e, 4), std::string().rfind(std::string()));
+ EXPECT_EQ(e.rfind(e, 7), std::string().rfind(std::string()));
+ EXPECT_EQ(d.rfind(d), std::string().rfind(std::string()));
+ EXPECT_EQ(e.rfind(d), std::string().rfind(std::string()));
+ EXPECT_EQ(d.rfind(e), std::string().rfind(std::string()));
+ EXPECT_EQ(e.rfind(e), std::string().rfind(std::string()));
+
+ EXPECT_EQ(g.rfind('o'), 8);
+ EXPECT_EQ(g.rfind('q'), absl::string_view::npos);
+ EXPECT_EQ(g.rfind('o', 8), 8);
+ EXPECT_EQ(g.rfind('o', 7), 4);
+ EXPECT_EQ(g.rfind('o', 3), absl::string_view::npos);
+ EXPECT_EQ(f.rfind('\0'), 3);
+ EXPECT_EQ(f.rfind('\0', 12), 3);
+ EXPECT_EQ(f.rfind('3'), 2);
+ EXPECT_EQ(f.rfind('5'), 5);
+ // empty std::string nonsense
+ EXPECT_EQ(d.rfind('o'), absl::string_view::npos);
+ EXPECT_EQ(e.rfind('o'), absl::string_view::npos);
+ EXPECT_EQ(d.rfind('o', 4), absl::string_view::npos);
+ EXPECT_EQ(e.rfind('o', 7), absl::string_view::npos);
+}
+
+// Continued from STL2
+TEST(StringViewTest, STL2FindFirst) {
+ const absl::string_view a("abcdefghijklmnopqrstuvwxyz");
+ const absl::string_view b("abc");
+ const absl::string_view c("xyz");
+ absl::string_view d("foobar");
+ const absl::string_view e;
+ const absl::string_view f(
+ "123"
+ "\0"
+ "456",
+ 7);
+ absl::string_view g("xx not found bb");
+
+ d = absl::string_view();
+ EXPECT_EQ(a.find_first_of(b), 0);
+ EXPECT_EQ(a.find_first_of(b, 0), 0);
+ EXPECT_EQ(a.find_first_of(b, 1), 1);
+ EXPECT_EQ(a.find_first_of(b, 2), 2);
+ EXPECT_EQ(a.find_first_of(b, 3), absl::string_view::npos);
+ EXPECT_EQ(a.find_first_of(c), 23);
+ EXPECT_EQ(a.find_first_of(c, 23), 23);
+ EXPECT_EQ(a.find_first_of(c, 24), 24);
+ EXPECT_EQ(a.find_first_of(c, 25), 25);
+ EXPECT_EQ(a.find_first_of(c, 26), absl::string_view::npos);
+ EXPECT_EQ(g.find_first_of(b), 13);
+ EXPECT_EQ(g.find_first_of(c), 0);
+ EXPECT_EQ(a.find_first_of(f), absl::string_view::npos);
+ EXPECT_EQ(f.find_first_of(a), absl::string_view::npos);
+ // empty std::string nonsense
+ EXPECT_EQ(a.find_first_of(d), absl::string_view::npos);
+ EXPECT_EQ(a.find_first_of(e), absl::string_view::npos);
+ EXPECT_EQ(d.find_first_of(b), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_of(b), absl::string_view::npos);
+ EXPECT_EQ(d.find_first_of(d), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_of(d), absl::string_view::npos);
+ EXPECT_EQ(d.find_first_of(e), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_of(e), absl::string_view::npos);
+
+ EXPECT_EQ(a.find_first_not_of(b), 3);
+ EXPECT_EQ(a.find_first_not_of(c), 0);
+ EXPECT_EQ(b.find_first_not_of(a), absl::string_view::npos);
+ EXPECT_EQ(c.find_first_not_of(a), absl::string_view::npos);
+ EXPECT_EQ(f.find_first_not_of(a), 0);
+ EXPECT_EQ(a.find_first_not_of(f), 0);
+ EXPECT_EQ(a.find_first_not_of(d), 0);
+ EXPECT_EQ(a.find_first_not_of(e), 0);
+ // empty std::string nonsense
+ EXPECT_EQ(a.find_first_not_of(d), 0);
+ EXPECT_EQ(a.find_first_not_of(e), 0);
+ EXPECT_EQ(a.find_first_not_of(d, 1), 1);
+ EXPECT_EQ(a.find_first_not_of(e, 1), 1);
+ EXPECT_EQ(a.find_first_not_of(d, a.size() - 1), a.size() - 1);
+ EXPECT_EQ(a.find_first_not_of(e, a.size() - 1), a.size() - 1);
+ EXPECT_EQ(a.find_first_not_of(d, a.size()), absl::string_view::npos);
+ EXPECT_EQ(a.find_first_not_of(e, a.size()), absl::string_view::npos);
+ EXPECT_EQ(a.find_first_not_of(d, absl::string_view::npos),
+ absl::string_view::npos);
+ EXPECT_EQ(a.find_first_not_of(e, absl::string_view::npos),
+ absl::string_view::npos);
+ EXPECT_EQ(d.find_first_not_of(a), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_not_of(a), absl::string_view::npos);
+ EXPECT_EQ(d.find_first_not_of(d), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_not_of(d), absl::string_view::npos);
+ EXPECT_EQ(d.find_first_not_of(e), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_not_of(e), absl::string_view::npos);
+
+ absl::string_view h("====");
+ EXPECT_EQ(h.find_first_not_of('='), absl::string_view::npos);
+ EXPECT_EQ(h.find_first_not_of('=', 3), absl::string_view::npos);
+ EXPECT_EQ(h.find_first_not_of('\0'), 0);
+ EXPECT_EQ(g.find_first_not_of('x'), 2);
+ EXPECT_EQ(f.find_first_not_of('\0'), 0);
+ EXPECT_EQ(f.find_first_not_of('\0', 3), 4);
+ EXPECT_EQ(f.find_first_not_of('\0', 2), 2);
+ // empty std::string nonsense
+ EXPECT_EQ(d.find_first_not_of('x'), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_not_of('x'), absl::string_view::npos);
+ EXPECT_EQ(d.find_first_not_of('\0'), absl::string_view::npos);
+ EXPECT_EQ(e.find_first_not_of('\0'), absl::string_view::npos);
+}
+
+// Continued from STL2
+TEST(StringViewTest, STL2FindLast) {
+ const absl::string_view a("abcdefghijklmnopqrstuvwxyz");
+ const absl::string_view b("abc");
+ const absl::string_view c("xyz");
+ absl::string_view d("foobar");
+ const absl::string_view e;
+ const absl::string_view f(
+ "123"
+ "\0"
+ "456",
+ 7);
+ absl::string_view g("xx not found bb");
+ absl::string_view h("====");
+ absl::string_view i("56");
+
+ d = absl::string_view();
+ EXPECT_EQ(h.find_last_of(a), absl::string_view::npos);
+ EXPECT_EQ(g.find_last_of(a), g.size()-1);
+ EXPECT_EQ(a.find_last_of(b), 2);
+ EXPECT_EQ(a.find_last_of(c), a.size()-1);
+ EXPECT_EQ(f.find_last_of(i), 6);
+ EXPECT_EQ(a.find_last_of('a'), 0);
+ EXPECT_EQ(a.find_last_of('b'), 1);
+ EXPECT_EQ(a.find_last_of('z'), 25);
+ EXPECT_EQ(a.find_last_of('a', 5), 0);
+ EXPECT_EQ(a.find_last_of('b', 5), 1);
+ EXPECT_EQ(a.find_last_of('b', 0), absl::string_view::npos);
+ EXPECT_EQ(a.find_last_of('z', 25), 25);
+ EXPECT_EQ(a.find_last_of('z', 24), absl::string_view::npos);
+ EXPECT_EQ(f.find_last_of(i, 5), 5);
+ EXPECT_EQ(f.find_last_of(i, 6), 6);
+ EXPECT_EQ(f.find_last_of(a, 4), absl::string_view::npos);
+ // empty std::string nonsense
+ EXPECT_EQ(f.find_last_of(d), absl::string_view::npos);
+ EXPECT_EQ(f.find_last_of(e), absl::string_view::npos);
+ EXPECT_EQ(f.find_last_of(d, 4), absl::string_view::npos);
+ EXPECT_EQ(f.find_last_of(e, 4), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_of(d), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_of(e), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_of(d), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_of(e), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_of(f), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_of(f), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_of(d, 4), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_of(e, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_of(d, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_of(e, 4), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_of(f, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_of(f, 4), absl::string_view::npos);
+
+ EXPECT_EQ(a.find_last_not_of(b), a.size()-1);
+ EXPECT_EQ(a.find_last_not_of(c), 22);
+ EXPECT_EQ(b.find_last_not_of(a), absl::string_view::npos);
+ EXPECT_EQ(b.find_last_not_of(b), absl::string_view::npos);
+ EXPECT_EQ(f.find_last_not_of(i), 4);
+ EXPECT_EQ(a.find_last_not_of(c, 24), 22);
+ EXPECT_EQ(a.find_last_not_of(b, 3), 3);
+ EXPECT_EQ(a.find_last_not_of(b, 2), absl::string_view::npos);
+ // empty std::string nonsense
+ EXPECT_EQ(f.find_last_not_of(d), f.size()-1);
+ EXPECT_EQ(f.find_last_not_of(e), f.size()-1);
+ EXPECT_EQ(f.find_last_not_of(d, 4), 4);
+ EXPECT_EQ(f.find_last_not_of(e, 4), 4);
+ EXPECT_EQ(d.find_last_not_of(d), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_not_of(e), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of(d), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of(e), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_not_of(f), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of(f), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_not_of(d, 4), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_not_of(e, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of(d, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of(e, 4), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_not_of(f, 4), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of(f, 4), absl::string_view::npos);
+
+ EXPECT_EQ(h.find_last_not_of('x'), h.size() - 1);
+ EXPECT_EQ(h.find_last_not_of('='), absl::string_view::npos);
+ EXPECT_EQ(b.find_last_not_of('c'), 1);
+ EXPECT_EQ(h.find_last_not_of('x', 2), 2);
+ EXPECT_EQ(h.find_last_not_of('=', 2), absl::string_view::npos);
+ EXPECT_EQ(b.find_last_not_of('b', 1), 0);
+ // empty std::string nonsense
+ EXPECT_EQ(d.find_last_not_of('x'), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of('x'), absl::string_view::npos);
+ EXPECT_EQ(d.find_last_not_of('\0'), absl::string_view::npos);
+ EXPECT_EQ(e.find_last_not_of('\0'), absl::string_view::npos);
+}
+
+// Continued from STL2
+TEST(StringViewTest, STL2Substr) {
+ const absl::string_view a("abcdefghijklmnopqrstuvwxyz");
+ const absl::string_view b("abc");
+ const absl::string_view c("xyz");
+ absl::string_view d("foobar");
+ const absl::string_view e;
+
+ d = absl::string_view();
+ EXPECT_EQ(a.substr(0, 3), b);
+ EXPECT_EQ(a.substr(23), c);
+ EXPECT_EQ(a.substr(23, 3), c);
+ EXPECT_EQ(a.substr(23, 99), c);
+ EXPECT_EQ(a.substr(0), a);
+ EXPECT_EQ(a.substr(3, 2), "de");
+ // empty std::string nonsense
+ EXPECT_EQ(d.substr(0, 99), e);
+ // use of npos
+ EXPECT_EQ(a.substr(0, absl::string_view::npos), a);
+ EXPECT_EQ(a.substr(23, absl::string_view::npos), c);
+ // throw exception
+#ifdef ABSL_HAVE_EXCEPTIONS
+ EXPECT_THROW(a.substr(99, 2), std::out_of_range);
+#else
+ EXPECT_DEATH(a.substr(99, 2), "absl::string_view::substr");
+#endif
+}
+
+TEST(StringViewTest, TruncSubstr) {
+ const absl::string_view hi("hi");
+ EXPECT_EQ("", absl::ClippedSubstr(hi, 0, 0));
+ EXPECT_EQ("h", absl::ClippedSubstr(hi, 0, 1));
+ EXPECT_EQ("hi", absl::ClippedSubstr(hi, 0));
+ EXPECT_EQ("i", absl::ClippedSubstr(hi, 1));
+ EXPECT_EQ("", absl::ClippedSubstr(hi, 2));
+ EXPECT_EQ("", absl::ClippedSubstr(hi, 3)); // truncation
+ EXPECT_EQ("", absl::ClippedSubstr(hi, 3, 2)); // truncation
+}
+
+TEST(StringViewTest, UTF8) {
+ EXPECT_EQ(strlen("á"), absl::string_view("á á").find_first_of(" "));
+ EXPECT_EQ(strlen("á"), absl::string_view("á á").find_first_of(" \t"));
+}
+
+TEST(StringViewTest, FindConformance) {
+ struct {
+ std::string haystack;
+ std::string needle;
+ } specs[] = {
+ {"", ""},
+ {"", "a"},
+ {"a", ""},
+ {"a", "a"},
+ {"a", "b"},
+ {"aa", ""},
+ {"aa", "a"},
+ {"aa", "b"},
+ {"ab", "a"},
+ {"ab", "b"},
+ {"abcd", ""},
+ {"abcd", "a"},
+ {"abcd", "d"},
+ {"abcd", "ab"},
+ {"abcd", "bc"},
+ {"abcd", "cd"},
+ {"abcd", "abcd"},
+ };
+ for (const auto& s : specs) {
+ SCOPED_TRACE(s.haystack);
+ SCOPED_TRACE(s.needle);
+ std::string st = s.haystack;
+ absl::string_view sp = s.haystack;
+ for (size_t i = 0; i <= sp.size(); ++i) {
+ size_t pos = (i == sp.size()) ? absl::string_view::npos : i;
+ SCOPED_TRACE(pos);
+ EXPECT_EQ(sp.find(s.needle, pos),
+ st.find(s.needle, pos));
+ EXPECT_EQ(sp.rfind(s.needle, pos),
+ st.rfind(s.needle, pos));
+ EXPECT_EQ(sp.find_first_of(s.needle, pos),
+ st.find_first_of(s.needle, pos));
+ EXPECT_EQ(sp.find_first_not_of(s.needle, pos),
+ st.find_first_not_of(s.needle, pos));
+ EXPECT_EQ(sp.find_last_of(s.needle, pos),
+ st.find_last_of(s.needle, pos));
+ EXPECT_EQ(sp.find_last_not_of(s.needle, pos),
+ st.find_last_not_of(s.needle, pos));
+ }
+ }
+}
+
+TEST(StringViewTest, Remove) {
+ absl::string_view a("foobar");
+ std::string s1("123");
+ s1 += '\0';
+ s1 += "456";
+ absl::string_view b(s1);
+ absl::string_view e;
+ std::string s2;
+
+ // remove_prefix
+ absl::string_view c(a);
+ c.remove_prefix(3);
+ EXPECT_EQ(c, "bar");
+ c = a;
+ c.remove_prefix(0);
+ EXPECT_EQ(c, a);
+ c.remove_prefix(c.size());
+ EXPECT_EQ(c, e);
+
+ // remove_suffix
+ c = a;
+ c.remove_suffix(3);
+ EXPECT_EQ(c, "foo");
+ c = a;
+ c.remove_suffix(0);
+ EXPECT_EQ(c, a);
+ c.remove_suffix(c.size());
+ EXPECT_EQ(c, e);
+}
+
+TEST(StringViewTest, Set) {
+ absl::string_view a("foobar");
+ absl::string_view empty;
+ absl::string_view b;
+
+ // set
+ b = absl::string_view("foobar", 6);
+ EXPECT_EQ(b, a);
+ b = absl::string_view("foobar", 0);
+ EXPECT_EQ(b, empty);
+ b = absl::string_view("foobar", 7);
+ EXPECT_NE(b, a);
+
+ b = absl::string_view("foobar");
+ EXPECT_EQ(b, a);
+}
+
+TEST(StringViewTest, FrontBack) {
+ static const char arr[] = "abcd";
+ const absl::string_view csp(arr, 4);
+ EXPECT_EQ(&arr[0], &csp.front());
+ EXPECT_EQ(&arr[3], &csp.back());
+}
+
+TEST(StringViewTest, FrontBackSingleChar) {
+ static const char c = 'a';
+ const absl::string_view csp(&c, 1);
+ EXPECT_EQ(&c, &csp.front());
+ EXPECT_EQ(&c, &csp.back());
+}
+
+TEST(StringViewTest, NULLInput) {
+ absl::string_view s;
+ EXPECT_EQ(s.data(), nullptr);
+ EXPECT_EQ(s.size(), 0);
+
+ s = absl::string_view(nullptr);
+ EXPECT_EQ(s.data(), nullptr);
+ EXPECT_EQ(s.size(), 0);
+
+ // .ToString() on a absl::string_view with nullptr should produce the empty
+ // std::string.
+ EXPECT_EQ("", std::string(s));
+}
+
+TEST(StringViewTest, Comparisons2) {
+ // The `compare` member has 6 overloads (v: string_view, s: const char*):
+ // (1) compare(v)
+ // (2) compare(pos1, count1, v)
+ // (3) compare(pos1, count1, v, pos2, count2)
+ // (4) compare(s)
+ // (5) compare(pos1, count1, s)
+ // (6) compare(pos1, count1, s, count2)
+
+ absl::string_view abc("abcdefghijklmnopqrstuvwxyz");
+
+ // check comparison operations on strings longer than 4 bytes.
+ EXPECT_EQ(abc, absl::string_view("abcdefghijklmnopqrstuvwxyz"));
+ EXPECT_EQ(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxyz")), 0);
+
+ EXPECT_LT(abc, absl::string_view("abcdefghijklmnopqrstuvwxzz"));
+ EXPECT_LT(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxzz")), 0);
+
+ EXPECT_GT(abc, absl::string_view("abcdefghijklmnopqrstuvwxyy"));
+ EXPECT_GT(abc.compare(absl::string_view("abcdefghijklmnopqrstuvwxyy")), 0);
+
+ // The "substr" variants of `compare`.
+ absl::string_view digits("0123456789");
+ auto npos = absl::string_view::npos;
+
+ // Taking string_view
+ EXPECT_EQ(digits.compare(3, npos, absl::string_view("3456789")), 0); // 2
+ EXPECT_EQ(digits.compare(3, 4, absl::string_view("3456")), 0); // 2
+ EXPECT_EQ(digits.compare(10, 0, absl::string_view()), 0); // 2
+ EXPECT_EQ(digits.compare(3, 4, absl::string_view("0123456789"), 3, 4),
+ 0); // 3
+ EXPECT_LT(digits.compare(3, 4, absl::string_view("0123456789"), 3, 5),
+ 0); // 3
+ EXPECT_LT(digits.compare(0, npos, absl::string_view("0123456789"), 3, 5),
+ 0); // 3
+ // Taking const char*
+ EXPECT_EQ(digits.compare(3, 4, "3456"), 0); // 5
+ EXPECT_EQ(digits.compare(3, npos, "3456789"), 0); // 5
+ EXPECT_EQ(digits.compare(10, 0, ""), 0); // 5
+ EXPECT_EQ(digits.compare(3, 4, "0123456789", 3, 4), 0); // 6
+ EXPECT_LT(digits.compare(3, 4, "0123456789", 3, 5), 0); // 6
+ EXPECT_LT(digits.compare(0, npos, "0123456789", 3, 5), 0); // 6
+}
+
+struct MyCharAlloc : std::allocator<char> {};
+
+TEST(StringViewTest, ExplicitConversionOperator) {
+ absl::string_view sp = "hi";
+ EXPECT_EQ(sp, std::string(sp));
+}
+
+TEST(StringViewTest, NullSafeStringView) {
+ {
+ absl::string_view s = absl::NullSafeStringView(nullptr);
+ EXPECT_EQ(nullptr, s.data());
+ EXPECT_EQ(0, s.size());
+ EXPECT_EQ(absl::string_view(), s);
+ }
+ {
+ static const char kHi[] = "hi";
+ absl::string_view s = absl::NullSafeStringView(kHi);
+ EXPECT_EQ(kHi, s.data());
+ EXPECT_EQ(strlen(kHi), s.size());
+ EXPECT_EQ(absl::string_view("hi"), s);
+ }
+}
+
+TEST(StringViewTest, ConstexprCompiles) {
+ constexpr absl::string_view sp;
+ constexpr absl::string_view cstr(nullptr);
+ constexpr absl::string_view cstr_len("cstr", 4);
+
+#if defined(ABSL_HAVE_STD_STRING_VIEW)
+ // In libstdc++ (as of 7.2), `std::string_view::string_view(const char*)`
+ // calls `std::char_traits<char>::length(const char*)` to get the std::string
+ // length, but it is not marked constexpr yet. See GCC bug:
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78156
+ // Also, there is a LWG issue that adds constexpr to length() which was just
+ // resolved 2017-06-02. See
+ // http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2232
+ // TODO(zhangxy): Update the condition when libstdc++ adopts the constexpr
+ // length().
+#if !defined(__GLIBCXX__)
+#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1
+#endif // !__GLIBCXX__
+
+#else // ABSL_HAVE_STD_STRING_VIEW
+
+// This duplicates the check for __builtin_strlen in the header.
+#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \
+ (defined(__GNUC__) && !defined(__clang__))
+#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1
+#elif defined(__GNUC__) // GCC or clang
+#error GCC/clang should have constexpr string_view.
+#endif
+
+#endif // ABSL_HAVE_STD_STRING_VIEW
+
+#ifdef ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR
+ constexpr absl::string_view cstr_strlen("foo");
+ EXPECT_EQ(cstr_strlen.length(), 3);
+ constexpr absl::string_view cstr_strlen2 = "bar";
+ EXPECT_EQ(cstr_strlen2, "bar");
+#endif
+
+#if !defined(__clang__) || 3 < __clang_major__ || \
+ (3 == __clang_major__ && 4 < __clang_minor__)
+ // older clang versions (< 3.5) complain that:
+ // "cannot perform pointer arithmetic on null pointer"
+ constexpr absl::string_view::iterator const_begin_empty = sp.begin();
+ constexpr absl::string_view::iterator const_end_empty = sp.end();
+ EXPECT_EQ(const_begin_empty, const_end_empty);
+#endif
+
+ constexpr absl::string_view::iterator const_begin = cstr_len.begin();
+ constexpr absl::string_view::iterator const_end = cstr_len.end();
+ constexpr absl::string_view::size_type const_size = cstr_len.size();
+ constexpr absl::string_view::size_type const_length = cstr_len.length();
+ EXPECT_EQ(const_begin + const_size, const_end);
+ EXPECT_EQ(const_begin + const_length, const_end);
+
+ constexpr bool isempty = sp.empty();
+ EXPECT_TRUE(isempty);
+
+ constexpr const char c = cstr_len[2];
+ EXPECT_EQ(c, 't');
+
+ constexpr const char cfront = cstr_len.front();
+ constexpr const char cback = cstr_len.back();
+ EXPECT_EQ(cfront, 'c');
+ EXPECT_EQ(cback, 'r');
+
+ constexpr const char* np = sp.data();
+ constexpr const char* cstr_ptr = cstr_len.data();
+ EXPECT_EQ(np, nullptr);
+ EXPECT_NE(cstr_ptr, nullptr);
+
+ constexpr size_t sp_npos = sp.npos;
+ EXPECT_EQ(sp_npos, -1);
+}
+
+TEST(StringViewTest, Noexcept) {
+ EXPECT_TRUE((std::is_nothrow_constructible<absl::string_view,
+ const std::string&>::value));
+ EXPECT_TRUE(
+ (std::is_nothrow_constructible<absl::string_view, const std::string&>::value));
+ EXPECT_TRUE(std::is_nothrow_constructible<absl::string_view>::value);
+ constexpr absl::string_view sp;
+ EXPECT_TRUE(noexcept(sp.begin()));
+ EXPECT_TRUE(noexcept(sp.end()));
+ EXPECT_TRUE(noexcept(sp.cbegin()));
+ EXPECT_TRUE(noexcept(sp.cend()));
+ EXPECT_TRUE(noexcept(sp.rbegin()));
+ EXPECT_TRUE(noexcept(sp.rend()));
+ EXPECT_TRUE(noexcept(sp.crbegin()));
+ EXPECT_TRUE(noexcept(sp.crend()));
+ EXPECT_TRUE(noexcept(sp.size()));
+ EXPECT_TRUE(noexcept(sp.length()));
+ EXPECT_TRUE(noexcept(sp.empty()));
+ EXPECT_TRUE(noexcept(sp.data()));
+ EXPECT_TRUE(noexcept(sp.compare(sp)));
+ EXPECT_TRUE(noexcept(sp.find(sp)));
+ EXPECT_TRUE(noexcept(sp.find('f')));
+ EXPECT_TRUE(noexcept(sp.rfind(sp)));
+ EXPECT_TRUE(noexcept(sp.rfind('f')));
+ EXPECT_TRUE(noexcept(sp.find_first_of(sp)));
+ EXPECT_TRUE(noexcept(sp.find_first_of('f')));
+ EXPECT_TRUE(noexcept(sp.find_last_of(sp)));
+ EXPECT_TRUE(noexcept(sp.find_last_of('f')));
+ EXPECT_TRUE(noexcept(sp.find_first_not_of(sp)));
+ EXPECT_TRUE(noexcept(sp.find_first_not_of('f')));
+ EXPECT_TRUE(noexcept(sp.find_last_not_of(sp)));
+ EXPECT_TRUE(noexcept(sp.find_last_not_of('f')));
+}
+
+TEST(ComparisonOpsTest, StringCompareNotAmbiguous) {
+ EXPECT_EQ("hello", std::string("hello"));
+ EXPECT_LT("hello", std::string("world"));
+}
+
+TEST(ComparisonOpsTest, HeterogenousStringViewEquals) {
+ EXPECT_EQ(absl::string_view("hello"), std::string("hello"));
+ EXPECT_EQ("hello", absl::string_view("hello"));
+}
+
+TEST(FindOneCharTest, EdgeCases) {
+ absl::string_view a("xxyyyxx");
+
+ // Set a = "xyyyx".
+ a.remove_prefix(1);
+ a.remove_suffix(1);
+
+ EXPECT_EQ(0, a.find('x'));
+ EXPECT_EQ(0, a.find('x', 0));
+ EXPECT_EQ(4, a.find('x', 1));
+ EXPECT_EQ(4, a.find('x', 4));
+ EXPECT_EQ(absl::string_view::npos, a.find('x', 5));
+
+ EXPECT_EQ(4, a.rfind('x'));
+ EXPECT_EQ(4, a.rfind('x', 5));
+ EXPECT_EQ(4, a.rfind('x', 4));
+ EXPECT_EQ(0, a.rfind('x', 3));
+ EXPECT_EQ(0, a.rfind('x', 0));
+
+ // Set a = "yyy".
+ a.remove_prefix(1);
+ a.remove_suffix(1);
+
+ EXPECT_EQ(absl::string_view::npos, a.find('x'));
+ EXPECT_EQ(absl::string_view::npos, a.rfind('x'));
+}
+
+#ifndef THREAD_SANITIZER // Allocates too much memory for tsan.
+TEST(HugeStringView, TwoPointTwoGB) {
+ if (sizeof(size_t) <= 4 || RunningOnValgrind())
+ return;
+ // Try a huge std::string piece.
+ const size_t size = size_t{2200} * 1000 * 1000;
+ std::string s(size, 'a');
+ absl::string_view sp(s);
+ EXPECT_EQ(size, sp.length());
+ sp.remove_prefix(1);
+ EXPECT_EQ(size - 1, sp.length());
+ sp.remove_suffix(2);
+ EXPECT_EQ(size - 1 - 2, sp.length());
+}
+#endif // THREAD_SANITIZER
+
+#ifndef NDEBUG
+TEST(NonNegativeLenTest, NonNegativeLen) {
+ EXPECT_DEATH_IF_SUPPORTED(absl::string_view("xyz", -1), "len <= kMaxSize");
+}
+#endif // NDEBUG
+
+class StringViewStreamTest : public ::testing::Test {
+ public:
+ // Set negative 'width' for right justification.
+ template <typename T>
+ std::string Pad(const T& s, int width, char fill = 0) {
+ std::ostringstream oss;
+ if (fill != 0) {
+ oss << std::setfill(fill);
+ }
+ if (width < 0) {
+ width = -width;
+ oss << std::right;
+ }
+ oss << std::setw(width) << s;
+ return oss.str();
+ }
+};
+
+TEST_F(StringViewStreamTest, Padding) {
+ std::string s("hello");
+ absl::string_view sp(s);
+ for (int w = -64; w < 64; ++w) {
+ SCOPED_TRACE(w);
+ EXPECT_EQ(Pad(s, w), Pad(sp, w));
+ }
+ for (int w = -64; w < 64; ++w) {
+ SCOPED_TRACE(w);
+ EXPECT_EQ(Pad(s, w, '#'), Pad(sp, w, '#'));
+ }
+}
+
+TEST_F(StringViewStreamTest, ResetsWidth) {
+ // Width should reset after one formatted write.
+ // If we weren't resetting width after formatting the string_view,
+ // we'd have width=5 carrying over to the printing of the "]",
+ // creating "[###hi####]".
+ std::string s = "hi";
+ absl::string_view sp = s;
+ {
+ std::ostringstream oss;
+ oss << "[" << std::setfill('#') << std::setw(5) << s << "]";
+ ASSERT_EQ("[###hi]", oss.str());
+ }
+ {
+ std::ostringstream oss;
+ oss << "[" << std::setfill('#') << std::setw(5) << sp << "]";
+ EXPECT_EQ("[###hi]", oss.str());
+ }
+}
+
+} // namespace
diff --git a/absl/strings/strip.cc b/absl/strings/strip.cc
new file mode 100644
index 00000000..968c09c6
--- /dev/null
+++ b/absl/strings/strip.cc
@@ -0,0 +1,269 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains functions that remove a defined part from the std::string,
+// i.e., strip the std::string.
+
+#include "absl/strings/strip.h"
+
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <string>
+
+#include "absl/strings/ascii.h"
+#include "absl/strings/string_view.h"
+
+// ----------------------------------------------------------------------
+// ReplaceCharacters
+// Replaces any occurrence of the character 'remove' (or the characters
+// in 'remove') with the character 'replace_with'.
+// ----------------------------------------------------------------------
+void ReplaceCharacters(char* str, size_t len, absl::string_view remove,
+ char replace_with) {
+ for (char* end = str + len; str != end; ++str) {
+ if (remove.find(*str) != absl::string_view::npos) {
+ *str = replace_with;
+ }
+ }
+}
+
+void ReplaceCharacters(std::string* s, absl::string_view remove, char replace_with) {
+ for (char& ch : *s) {
+ if (remove.find(ch) != absl::string_view::npos) {
+ ch = replace_with;
+ }
+ }
+}
+
+bool StripTrailingNewline(std::string* s) {
+ if (!s->empty() && (*s)[s->size() - 1] == '\n') {
+ if (s->size() > 1 && (*s)[s->size() - 2] == '\r')
+ s->resize(s->size() - 2);
+ else
+ s->resize(s->size() - 1);
+ return true;
+ }
+ return false;
+}
+
+// ----------------------------------------------------------------------
+// Misc. stripping routines
+// ----------------------------------------------------------------------
+void StripCurlyBraces(std::string* s) {
+ return StripBrackets('{', '}', s);
+}
+
+void StripBrackets(char left, char right, std::string* s) {
+ std::string::iterator opencurly = std::find(s->begin(), s->end(), left);
+ while (opencurly != s->end()) {
+ std::string::iterator closecurly = std::find(opencurly, s->end(), right);
+ if (closecurly == s->end()) return;
+ opencurly = s->erase(opencurly, closecurly + 1);
+ opencurly = std::find(opencurly, s->end(), left);
+ }
+}
+
+void StripMarkupTags(std::string* s) {
+ std::string::iterator output = std::find(s->begin(), s->end(), '<');
+ std::string::iterator input = output;
+ while (input != s->end()) {
+ if (*input == '<') {
+ input = std::find(input, s->end(), '>');
+ if (input == s->end()) break;
+ ++input;
+ } else {
+ *output++ = *input++;
+ }
+ }
+ s->resize(output - s->begin());
+}
+
+std::string OutputWithMarkupTagsStripped(const std::string& s) {
+ std::string result(s);
+ StripMarkupTags(&result);
+ return result;
+}
+
+ptrdiff_t TrimStringLeft(std::string* s, absl::string_view remove) {
+ size_t i = 0;
+ while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
+ ++i;
+ }
+ if (i > 0) s->erase(0, i);
+ return i;
+}
+
+ptrdiff_t TrimStringRight(std::string* s, absl::string_view remove) {
+ size_t i = s->size(), trimmed = 0;
+ while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
+ --i;
+ }
+ if (i < s->size()) {
+ trimmed = s->size() - i;
+ s->erase(i);
+ }
+ return trimmed;
+}
+
+// Unfortunately, absl::string_view does not have erase, so we've to replicate
+// the implementation with remove_prefix()/remove_suffix()
+ptrdiff_t TrimStringLeft(absl::string_view* s, absl::string_view remove) {
+ size_t i = 0;
+ while (i < s->size() && memchr(remove.data(), (*s)[i], remove.size())) {
+ ++i;
+ }
+ if (i > 0) s->remove_prefix(i);
+ return i;
+}
+
+ptrdiff_t TrimStringRight(absl::string_view* s, absl::string_view remove) {
+ size_t i = s->size(), trimmed = 0;
+ while (i > 0 && memchr(remove.data(), (*s)[i - 1], remove.size())) {
+ --i;
+ }
+ if (i < s->size()) {
+ trimmed = s->size() - i;
+ s->remove_suffix(trimmed);
+ }
+ return trimmed;
+}
+
+// ----------------------------------------------------------------------
+// Various removal routines
+// ----------------------------------------------------------------------
+ptrdiff_t strrm(char* str, char c) {
+ char* src;
+ char* dest;
+ for (src = dest = str; *src != '\0'; ++src)
+ if (*src != c) *(dest++) = *src;
+ *dest = '\0';
+ return dest - str;
+}
+
+ptrdiff_t memrm(char* str, ptrdiff_t strlen, char c) {
+ char* src;
+ char* dest;
+ for (src = dest = str; strlen-- > 0; ++src)
+ if (*src != c) *(dest++) = *src;
+ return dest - str;
+}
+
+ptrdiff_t strrmm(char* str, const char* chars) {
+ char* src;
+ char* dest;
+ for (src = dest = str; *src != '\0'; ++src) {
+ bool skip = false;
+ for (const char* c = chars; *c != '\0'; c++) {
+ if (*src == *c) {
+ skip = true;
+ break;
+ }
+ }
+ if (!skip) *(dest++) = *src;
+ }
+ *dest = '\0';
+ return dest - str;
+}
+
+ptrdiff_t strrmm(std::string* str, const std::string& chars) {
+ size_t str_len = str->length();
+ size_t in_index = str->find_first_of(chars);
+ if (in_index == std::string::npos) return str_len;
+
+ size_t out_index = in_index++;
+
+ while (in_index < str_len) {
+ char c = (*str)[in_index++];
+ if (chars.find(c) == std::string::npos) (*str)[out_index++] = c;
+ }
+
+ str->resize(out_index);
+ return out_index;
+}
+
+// ----------------------------------------------------------------------
+// StripDupCharacters
+// Replaces any repeated occurrence of the character 'dup_char'
+// with single occurrence. e.g.,
+// StripDupCharacters("a//b/c//d", '/', 0) => "a/b/c/d"
+// Return the number of characters removed
+// ----------------------------------------------------------------------
+ptrdiff_t StripDupCharacters(std::string* s, char dup_char, ptrdiff_t start_pos) {
+ if (start_pos < 0) start_pos = 0;
+
+ // remove dups by compaction in-place
+ ptrdiff_t input_pos = start_pos; // current reader position
+ ptrdiff_t output_pos = start_pos; // current writer position
+ const ptrdiff_t input_end = s->size();
+ while (input_pos < input_end) {
+ // keep current character
+ const char curr_char = (*s)[input_pos];
+ if (output_pos != input_pos) // must copy
+ (*s)[output_pos] = curr_char;
+ ++input_pos;
+ ++output_pos;
+
+ if (curr_char == dup_char) { // skip subsequent dups
+ while ((input_pos < input_end) && ((*s)[input_pos] == dup_char))
+ ++input_pos;
+ }
+ }
+ const ptrdiff_t num_deleted = input_pos - output_pos;
+ s->resize(s->size() - num_deleted);
+ return num_deleted;
+}
+
+// ----------------------------------------------------------------------
+// TrimRunsInString
+// Removes leading and trailing runs, and collapses middle
+// runs of a set of characters into a single character (the
+// first one specified in 'remove'). Useful for collapsing
+// runs of repeated delimiters, whitespace, etc. E.g.,
+// TrimRunsInString(&s, " :,()") removes leading and trailing
+// delimiter chars and collapses and converts internal runs
+// of delimiters to single ' ' characters, so, for example,
+// " a:(b):c " -> "a b c"
+// "first,last::(area)phone, ::zip" -> "first last area phone zip"
+// ----------------------------------------------------------------------
+void TrimRunsInString(std::string* s, absl::string_view remove) {
+ std::string::iterator dest = s->begin();
+ std::string::iterator src_end = s->end();
+ for (std::string::iterator src = s->begin(); src != src_end;) {
+ if (remove.find(*src) == absl::string_view::npos) {
+ *(dest++) = *(src++);
+ } else {
+ // Skip to the end of this run of chars that are in 'remove'.
+ for (++src; src != src_end; ++src) {
+ if (remove.find(*src) == absl::string_view::npos) {
+ if (dest != s->begin()) {
+ // This is an internal run; collapse it.
+ *(dest++) = remove[0];
+ }
+ *(dest++) = *(src++);
+ break;
+ }
+ }
+ }
+ }
+ s->erase(dest, src_end);
+}
+
+// ----------------------------------------------------------------------
+// RemoveNullsInString
+// Removes any internal \0 characters from the std::string.
+// ----------------------------------------------------------------------
+void RemoveNullsInString(std::string* s) {
+ s->erase(std::remove(s->begin(), s->end(), '\0'), s->end());
+}
diff --git a/absl/strings/strip.h b/absl/strings/strip.h
new file mode 100644
index 00000000..370f9e88
--- /dev/null
+++ b/absl/strings/strip.h
@@ -0,0 +1,89 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: strip.h
+// -----------------------------------------------------------------------------
+//
+// This file contains various functions for stripping substrings from a std::string.
+#ifndef ABSL_STRINGS_STRIP_H_
+#define ABSL_STRINGS_STRIP_H_
+
+#include <cstddef>
+#include <string>
+
+#include "absl/base/macros.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+
+// ConsumePrefix()
+//
+// Strips the `expected` prefix from the start of the given std::string, returning
+// `true` if the strip operation succeeded or false otherwise.
+//
+// Example:
+//
+// absl::string_view input("abc");
+// EXPECT_TRUE(absl::ConsumePrefix(&input, "a"));
+// EXPECT_EQ(input, "bc");
+inline bool ConsumePrefix(absl::string_view* str, absl::string_view expected) {
+ if (!absl::StartsWith(*str, expected)) return false;
+ str->remove_prefix(expected.size());
+ return true;
+}
+// ConsumeSuffix()
+//
+// Strips the `expected` suffix from the end of the given std::string, returning
+// `true` if the strip operation succeeded or false otherwise.
+//
+// Example:
+//
+// absl::string_view input("abcdef");
+// EXPECT_TRUE(absl::ConsumeSuffix(&input, "def"));
+// EXPECT_EQ(input, "abc");
+inline bool ConsumeSuffix(absl::string_view* str, absl::string_view expected) {
+ if (!absl::EndsWith(*str, expected)) return false;
+ str->remove_suffix(expected.size());
+ return true;
+}
+
+// StripPrefix()
+//
+// Returns a view into the input std::string 'str' with the given 'prefix' removed,
+// but leaving the original std::string intact. If the prefix does not match at the
+// start of the std::string, returns the original std::string instead.
+inline absl::string_view StripPrefix(absl::string_view str,
+ absl::string_view prefix) {
+ if (absl::StartsWith(str, prefix)) str.remove_prefix(prefix.size());
+ return str;
+}
+
+// StripSuffix()
+//
+// Returns a view into the input std::string 'str' with the given 'suffix' removed,
+// but leaving the original std::string intact. If the suffix does not match at the
+// end of the std::string, returns the original std::string instead.
+inline absl::string_view StripSuffix(absl::string_view str,
+ absl::string_view suffix) {
+ if (absl::EndsWith(str, suffix)) str.remove_suffix(suffix.size());
+ return str;
+}
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_STRIP_H_
diff --git a/absl/strings/strip_test.cc b/absl/strings/strip_test.cc
new file mode 100644
index 00000000..3c9e726e
--- /dev/null
+++ b/absl/strings/strip_test.cc
@@ -0,0 +1,119 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains functions that remove a defined part from the std::string,
+// i.e., strip the std::string.
+
+#include "absl/strings/strip.h"
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/strings/string_view.h"
+
+namespace {
+
+using testing::ElementsAre;
+using testing::IsEmpty;
+
+TEST(Strip, ConsumePrefixOneChar) {
+ absl::string_view input("abc");
+ EXPECT_TRUE(absl::ConsumePrefix(&input, "a"));
+ EXPECT_EQ(input, "bc");
+
+ EXPECT_FALSE(absl::ConsumePrefix(&input, "x"));
+ EXPECT_EQ(input, "bc");
+
+ EXPECT_TRUE(absl::ConsumePrefix(&input, "b"));
+ EXPECT_EQ(input, "c");
+
+ EXPECT_TRUE(absl::ConsumePrefix(&input, "c"));
+ EXPECT_EQ(input, "");
+
+ EXPECT_FALSE(absl::ConsumePrefix(&input, "a"));
+ EXPECT_EQ(input, "");
+}
+
+TEST(Strip, ConsumePrefix) {
+ absl::string_view input("abcdef");
+ EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdefg"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_FALSE(absl::ConsumePrefix(&input, "abce"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(absl::ConsumePrefix(&input, ""));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_FALSE(absl::ConsumePrefix(&input, "abcdeg"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(absl::ConsumePrefix(&input, "abcdef"));
+ EXPECT_EQ(input, "");
+
+ input = "abcdef";
+ EXPECT_TRUE(absl::ConsumePrefix(&input, "abcde"));
+ EXPECT_EQ(input, "f");
+}
+
+TEST(Strip, ConsumeSuffix) {
+ absl::string_view input("abcdef");
+ EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdefg"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(absl::ConsumeSuffix(&input, ""));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(absl::ConsumeSuffix(&input, "def"));
+ EXPECT_EQ(input, "abc");
+
+ input = "abcdef";
+ EXPECT_FALSE(absl::ConsumeSuffix(&input, "abcdeg"));
+ EXPECT_EQ(input, "abcdef");
+
+ EXPECT_TRUE(absl::ConsumeSuffix(&input, "f"));
+ EXPECT_EQ(input, "abcde");
+
+ EXPECT_TRUE(absl::ConsumeSuffix(&input, "abcde"));
+ EXPECT_EQ(input, "");
+}
+
+TEST(Strip, StripPrefix) {
+ const absl::string_view null_str;
+
+ EXPECT_EQ(absl::StripPrefix("foobar", "foo"), "bar");
+ EXPECT_EQ(absl::StripPrefix("foobar", ""), "foobar");
+ EXPECT_EQ(absl::StripPrefix("foobar", null_str), "foobar");
+ EXPECT_EQ(absl::StripPrefix("foobar", "foobar"), "");
+ EXPECT_EQ(absl::StripPrefix("foobar", "bar"), "foobar");
+ EXPECT_EQ(absl::StripPrefix("foobar", "foobarr"), "foobar");
+ EXPECT_EQ(absl::StripPrefix("", ""), "");
+}
+
+TEST(Strip, StripSuffix) {
+ const absl::string_view null_str;
+
+ EXPECT_EQ(absl::StripSuffix("foobar", "bar"), "foo");
+ EXPECT_EQ(absl::StripSuffix("foobar", ""), "foobar");
+ EXPECT_EQ(absl::StripSuffix("foobar", null_str), "foobar");
+ EXPECT_EQ(absl::StripSuffix("foobar", "foobar"), "");
+ EXPECT_EQ(absl::StripSuffix("foobar", "foo"), "foobar");
+ EXPECT_EQ(absl::StripSuffix("foobar", "ffoobar"), "foobar");
+ EXPECT_EQ(absl::StripSuffix("", ""), "");
+}
+
+} // namespace
diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc
new file mode 100644
index 00000000..f739f8c2
--- /dev/null
+++ b/absl/strings/substitute.cc
@@ -0,0 +1,117 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/substitute.h"
+
+#include <algorithm>
+
+#include "absl/base/internal/raw_logging.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/internal/resize_uninitialized.h"
+#include "absl/strings/string_view.h"
+
+namespace absl {
+namespace substitute_internal {
+
+void SubstituteAndAppendArray(std::string* output, absl::string_view format,
+ const absl::string_view* args_array,
+ size_t num_args) {
+ // Determine total size needed.
+ size_t size = 0;
+ for (size_t i = 0; i < format.size(); i++) {
+ if (format[i] == '$') {
+ if (i + 1 >= format.size()) {
+#ifndef NDEBUG
+ ABSL_RAW_LOG(FATAL,
+ "Invalid strings::Substitute() format std::string: \"%s\".",
+ absl::CEscape(format).c_str());
+#endif
+ return;
+ } else if (absl::ascii_isdigit(format[i + 1])) {
+ int index = format[i + 1] - '0';
+ if (static_cast<size_t>(index) >= num_args) {
+#ifndef NDEBUG
+ ABSL_RAW_LOG(
+ FATAL,
+ "Invalid strings::Substitute() format std::string: asked for \"$"
+ "%d\", but only %d args were given. Full format std::string was: "
+ "\"%s\".",
+ index, static_cast<int>(num_args), absl::CEscape(format).c_str());
+#endif
+ return;
+ }
+ size += args_array[index].size();
+ ++i; // Skip next char.
+ } else if (format[i + 1] == '$') {
+ ++size;
+ ++i; // Skip next char.
+ } else {
+#ifndef NDEBUG
+ ABSL_RAW_LOG(FATAL,
+ "Invalid strings::Substitute() format std::string: \"%s\".",
+ absl::CEscape(format).c_str());
+#endif
+ return;
+ }
+ } else {
+ ++size;
+ }
+ }
+
+ if (size == 0) return;
+
+ // Build the std::string.
+ size_t original_size = output->size();
+ strings_internal::STLStringResizeUninitialized(output, original_size + size);
+ char* target = &(*output)[original_size];
+ for (size_t i = 0; i < format.size(); i++) {
+ if (format[i] == '$') {
+ if (absl::ascii_isdigit(format[i + 1])) {
+ const absl::string_view src = args_array[format[i + 1] - '0'];
+ target = std::copy(src.begin(), src.end(), target);
+ ++i; // Skip next char.
+ } else if (format[i + 1] == '$') {
+ *target++ = '$';
+ ++i; // Skip next char.
+ }
+ } else {
+ *target++ = format[i];
+ }
+ }
+
+ assert(target == output->data() + output->size());
+}
+
+Arg::Arg(const void* value) {
+ static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2,
+ "fix sizeof(scratch_)");
+ if (value == nullptr) {
+ piece_ = "NULL";
+ } else {
+ char* ptr = scratch_ + sizeof(scratch_);
+ uintptr_t num = reinterpret_cast<uintptr_t>(value);
+ static const char kHexDigits[] = "0123456789abcdef";
+ do {
+ *--ptr = kHexDigits[num & 0xf];
+ num >>= 4;
+ } while (num != 0);
+ *--ptr = 'x';
+ *--ptr = '0';
+ piece_ = absl::string_view(ptr, scratch_ + sizeof(scratch_) - ptr);
+ }
+}
+
+} // namespace substitute_internal
+} // namespace absl
diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h
new file mode 100644
index 00000000..5d6bfd90
--- /dev/null
+++ b/absl/strings/substitute.h
@@ -0,0 +1,674 @@
+//
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// -----------------------------------------------------------------------------
+// File: substitute.h
+// -----------------------------------------------------------------------------
+//
+// This package contains functions for efficiently performing std::string
+// substitutions using a format std::string with positional notation:
+// `Substitute()` and `SubstituteAndAppend()`.
+//
+// Unlike printf-style format specifiers, `Substitute()` functions do not need
+// to specify the type of the substitution arguments. Supported arguments
+// following the format std::string, such as strings, string_views, ints,
+// floats, and bools, are automatically converted to strings during the
+// substitution process. (See below for a full list of supported types.)
+//
+// `Substitute()` does not allow you to specify *how* to format a value, beyond
+// the default conversion to std::string. For example, you cannot format an integer
+// in hex.
+//
+// The format std::string uses positional identifiers indicated by a dollar sign ($)
+// and single digit positional ids to indicate which substitution arguments to
+// use at that location within the format std::string.
+//
+// Example 1:
+// std::string s = Substitute("$1 purchased $0 $2. Thanks $1!",
+// 5, "Bob", "Apples");
+// EXPECT_EQ("Bob purchased 5 Apples. Thanks Bob!", s);
+//
+// Example 2:
+// std::string s = "Hi. ";
+// SubstituteAndAppend(&s, "My name is $0 and I am $1 years old.", "Bob", 5);
+// EXPECT_EQ("Hi. My name is Bob and I am 5 years old.", s);
+//
+// Differences from `StringPrintf()`:
+// * The format std::string does not identify the types of arguments. Instead, the
+// arguments are implicitly converted to strings. See below for a list of
+// accepted types.
+// * Substitutions in the format std::string are identified by a '$' followed by a
+// single digit. You can use arguments out-of-order and use the same
+// argument multiple times.
+// * A '$$' sequence in the format std::string means output a literal '$'
+// character.
+// * `Substitute()` is significantly faster than `StringPrintf()`. For very
+// large strings, it may be orders of magnitude faster.
+//
+// Supported types:
+// * absl::string_view, std::string, const char* (null is equivalent to "")
+// * int32_t, int64_t, uint32_t, uint64
+// * float, double
+// * bool (Printed as "true" or "false")
+// * pointer types other than char* (Printed as "0x<lower case hex std::string>",
+// except that null is printed as "NULL")
+//
+// If an invalid format std::string is provided, Substitute returns an empty std::string
+// and SubstituteAndAppend does not change the provided output std::string.
+// A format std::string is invalid if it:
+// * ends in an unescaped $ character,
+// e.g. "Hello $", or
+// * calls for a position argument which is not provided,
+// e.g. Substitute("Hello $2", "world"), or
+// * specifies a non-digit, non-$ character after an unescaped $ character,
+// e.g. "Hello %f".
+// In debug mode, i.e. #ifndef NDEBUG, such errors terminate the program.
+
+#ifndef ABSL_STRINGS_SUBSTITUTE_H_
+#define ABSL_STRINGS_SUBSTITUTE_H_
+
+#include <cstring>
+#include <string>
+
+#include "absl/base/macros.h"
+#include "absl/base/port.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+
+namespace absl {
+namespace substitute_internal {
+
+// Arg
+//
+// This class provides an argument type for `absl::Substitute()` and
+// `absl::SubstituteAndAppend()`. `Arg` handles implicit conversion of various
+// types to a std::string. (`Arg` is very similar to the `AlphaNum` class in
+// `StrCat()`.)
+//
+// This class has implicit constructors.
+class Arg {
+ public:
+ // Overloads for std::string-y things
+ //
+ // Explicitly overload `const char*` so the compiler doesn't cast to `bool`.
+ Arg(const char* value) // NOLINT(runtime/explicit)
+ : piece_(value) {}
+ Arg(const std::string& value) // NOLINT(runtime/explicit)
+ : piece_(value) {}
+ Arg(absl::string_view value) // NOLINT(runtime/explicit)
+ : piece_(value) {}
+
+ // Overloads for primitives
+ //
+ // No overloads are available for signed and unsigned char because if people
+ // are explicitly declaring their chars as signed or unsigned then they are
+ // probably using them as 8-bit integers and would probably prefer an integer
+ // representation. However, we can't really know, so we make the caller decide
+ // what to do.
+ Arg(char value) // NOLINT(runtime/explicit)
+ : piece_(scratch_, 1) { scratch_[0] = value; }
+ Arg(short value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(unsigned short value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(int value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(unsigned int value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(long value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(unsigned long value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(long long value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(unsigned long long value) // NOLINT(runtime/explicit)
+ : piece_(scratch_,
+ numbers_internal::FastIntToBuffer(value, scratch_) - scratch_) {}
+ Arg(float value) // NOLINT(runtime/explicit)
+ : piece_(numbers_internal::RoundTripFloatToBuffer(value, scratch_)) {}
+ Arg(double value) // NOLINT(runtime/explicit)
+ : piece_(numbers_internal::RoundTripDoubleToBuffer(value, scratch_)) {}
+ Arg(bool value) // NOLINT(runtime/explicit)
+ : piece_(value ? "true" : "false") {}
+ // `void*` values, with the exception of `char*`, are printed as
+ // `StringPrintf()` with format "%p": e.g. ("0x<hex value>").
+ // However, in the case of `nullptr`, "NULL" is printed.
+ Arg(const void* value); // NOLINT(runtime/explicit)
+
+ Arg(const Arg&) = delete;
+ Arg& operator=(const Arg&) = delete;
+
+ absl::string_view piece() const { return piece_; }
+
+ private:
+ absl::string_view piece_;
+ char scratch_[numbers_internal::kFastToBufferSize];
+};
+
+// Internal helper function. Don't call this from outside this implementation.
+// This interface may change without notice.
+void SubstituteAndAppendArray(std::string* output, absl::string_view format,
+ const absl::string_view* args_array,
+ size_t num_args);
+
+#if defined(ABSL_BAD_CALL_IF)
+constexpr int CalculateOneBit(const char* format) {
+ return (*format < '0' || *format > '9') ? 0 : (1 << (*format - '0'));
+}
+
+constexpr const char* SkipNumber(const char* format) {
+ return !*format ? format : (format + 1);
+}
+
+constexpr int PlaceholderBitmask(const char* format) {
+ return !*format ? 0 : *format != '$'
+ ? PlaceholderBitmask(format + 1)
+ : (CalculateOneBit(format + 1) |
+ PlaceholderBitmask(SkipNumber(format + 1)));
+}
+#endif // ABSL_BAD_CALL_IF
+
+} // namespace substitute_internal
+
+//
+// PUBLIC API
+//
+
+// SubstituteAndAppend()
+//
+// Substitutes variables into a given format std::string and appends to a given
+// output std::string. See file comments above for usage.
+//
+// The declarations of `SubstituteAndAppend()` below consist of overloads
+// for passing 0 to 10 arguments, respectively.
+//
+// NOTE: A zero-argument `SubstituteAndAppend()` may be used within variadic
+// templates to allow a variable number of arguments.
+//
+// Example:
+// template <typename... Args>
+// void VarMsg(std::string* boilerplate, const std::string& format,
+// const Args&... args) {
+// std::string s = absl::SubstituteAndAppend(boilerplate, format, args...)";
+// }
+//
+inline void SubstituteAndAppend(std::string* output, absl::string_view format) {
+ substitute_internal::SubstituteAndAppendArray(output, format, nullptr, 0);
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0) {
+ const absl::string_view args[] = {a0.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1) {
+ const absl::string_view args[] = {a0.piece(), a1.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
+ a3.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
+ a3.piece(), a4.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
+ a3.piece(), a4.piece(), a5.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5,
+ const substitute_internal::Arg& a6) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
+ a3.piece(), a4.piece(), a5.piece(),
+ a6.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(
+ std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0, const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2, const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4, const substitute_internal::Arg& a5,
+ const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
+ a3.piece(), a4.piece(), a5.piece(),
+ a6.piece(), a7.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(
+ std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0, const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2, const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4, const substitute_internal::Arg& a5,
+ const substitute_internal::Arg& a6, const substitute_internal::Arg& a7,
+ const substitute_internal::Arg& a8) {
+ const absl::string_view args[] = {a0.piece(), a1.piece(), a2.piece(),
+ a3.piece(), a4.piece(), a5.piece(),
+ a6.piece(), a7.piece(), a8.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+inline void SubstituteAndAppend(
+ std::string* output, absl::string_view format,
+ const substitute_internal::Arg& a0, const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2, const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4, const substitute_internal::Arg& a5,
+ const substitute_internal::Arg& a6, const substitute_internal::Arg& a7,
+ const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) {
+ const absl::string_view args[] = {
+ a0.piece(), a1.piece(), a2.piece(), a3.piece(), a4.piece(),
+ a5.piece(), a6.piece(), a7.piece(), a8.piece(), a9.piece()};
+ substitute_internal::SubstituteAndAppendArray(output, format, args,
+ ABSL_ARRAYSIZE(args));
+}
+
+#if defined(ABSL_BAD_CALL_IF)
+// This body of functions catches cases where the number of placeholders
+// doesn't match the number of data arguments.
+void SubstituteAndAppend(std::string* output, const char* format)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0,
+ "There were no substitution arguments "
+ "but this format std::string has a $[0-9] in it");
+
+void SubstituteAndAppend(std::string* output, const char* format,
+ const substitute_internal::Arg& a0)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1,
+ "There was 1 substitution argument given, but "
+ "this format std::string is either missing its $0, or "
+ "contains one of $1-$9");
+
+void SubstituteAndAppend(std::string* output, const char* format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3,
+ "There were 2 substitution arguments given, but "
+ "this format std::string is either missing its $0/$1, or "
+ "contains one of $2-$9");
+
+void SubstituteAndAppend(std::string* output, const char* format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7,
+ "There were 3 substitution arguments given, but "
+ "this format std::string is either missing its $0/$1/$2, or "
+ "contains one of $3-$9");
+
+void SubstituteAndAppend(std::string* output, const char* format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15,
+ "There were 4 substitution arguments given, but "
+ "this format std::string is either missing its $0-$3, or "
+ "contains one of $4-$9");
+
+void SubstituteAndAppend(std::string* output, const char* format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31,
+ "There were 5 substitution arguments given, but "
+ "this format std::string is either missing its $0-$4, or "
+ "contains one of $5-$9");
+
+void SubstituteAndAppend(std::string* output, const char* format,
+ const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63,
+ "There were 6 substitution arguments given, but "
+ "this format std::string is either missing its $0-$5, or "
+ "contains one of $6-$9");
+
+void SubstituteAndAppend(
+ std::string* output, const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127,
+ "There were 7 substitution arguments given, but "
+ "this format std::string is either missing its $0-$6, or "
+ "contains one of $7-$9");
+
+void SubstituteAndAppend(
+ std::string* output, const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255,
+ "There were 8 substitution arguments given, but "
+ "this format std::string is either missing its $0-$7, or "
+ "contains one of $8-$9");
+
+void SubstituteAndAppend(
+ std::string* output, const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7, const substitute_internal::Arg& a8)
+ ABSL_BAD_CALL_IF(
+ substitute_internal::PlaceholderBitmask(format) != 511,
+ "There were 9 substitution arguments given, but "
+ "this format std::string is either missing its $0-$8, or contains a $9");
+
+void SubstituteAndAppend(
+ std::string* output, const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7, const substitute_internal::Arg& a8,
+ const substitute_internal::Arg& a9)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023,
+ "There were 10 substitution arguments given, but this "
+ "format std::string doesn't contain all of $0 through $9");
+#endif // ABSL_BAD_CALL_IF
+
+// Substitute()
+//
+// Substitutes variables into a given format std::string. See file comments above
+// for usage.
+//
+// The declarations of `Substitute()` below consist of overloads for passing 0
+// to 10 arguments, respectively.
+//
+// NOTE: A zero-argument `Substitute()` may be used within variadic templates to
+// allow a variable number of arguments.
+//
+// Example:
+// template <typename... Args>
+// void VarMsg(const std::string& format, const Args&... args) {
+// std::string s = absl::Substitute(format, args...)";
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(absl::string_view format) {
+ std::string result;
+ SubstituteAndAppend(&result, format);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7, const substitute_internal::Arg& a8) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8);
+ return result;
+}
+
+ABSL_MUST_USE_RESULT inline std::string Substitute(
+ absl::string_view format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7, const substitute_internal::Arg& a8,
+ const substitute_internal::Arg& a9) {
+ std::string result;
+ SubstituteAndAppend(&result, format, a0, a1, a2, a3, a4, a5, a6, a7, a8, a9);
+ return result;
+}
+
+#if defined(ABSL_BAD_CALL_IF)
+// This body of functions catches cases where the number of placeholders
+// doesn't match the number of data arguments.
+std::string Substitute(const char* format)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0,
+ "There were no substitution arguments "
+ "but this format std::string has a $[0-9] in it");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1,
+ "There was 1 substitution argument given, but "
+ "this format std::string is either missing its $0, or "
+ "contains one of $1-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3,
+ "There were 2 substitution arguments given, but "
+ "this format std::string is either missing its $0/$1, or "
+ "contains one of $2-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7,
+ "There were 3 substitution arguments given, but "
+ "this format std::string is either missing its $0/$1/$2, or "
+ "contains one of $3-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15,
+ "There were 4 substitution arguments given, but "
+ "this format std::string is either missing its $0-$3, or "
+ "contains one of $4-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31,
+ "There were 5 substitution arguments given, but "
+ "this format std::string is either missing its $0-$4, or "
+ "contains one of $5-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63,
+ "There were 6 substitution arguments given, but "
+ "this format std::string is either missing its $0-$5, or "
+ "contains one of $6-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5,
+ const substitute_internal::Arg& a6)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127,
+ "There were 7 substitution arguments given, but "
+ "this format std::string is either missing its $0-$6, or "
+ "contains one of $7-$9");
+
+std::string Substitute(const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1,
+ const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3,
+ const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5,
+ const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255,
+ "There were 8 substitution arguments given, but "
+ "this format std::string is either missing its $0-$7, or "
+ "contains one of $8-$9");
+
+std::string Substitute(
+ const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7, const substitute_internal::Arg& a8)
+ ABSL_BAD_CALL_IF(
+ substitute_internal::PlaceholderBitmask(format) != 511,
+ "There were 9 substitution arguments given, but "
+ "this format std::string is either missing its $0-$8, or contains a $9");
+
+std::string Substitute(
+ const char* format, const substitute_internal::Arg& a0,
+ const substitute_internal::Arg& a1, const substitute_internal::Arg& a2,
+ const substitute_internal::Arg& a3, const substitute_internal::Arg& a4,
+ const substitute_internal::Arg& a5, const substitute_internal::Arg& a6,
+ const substitute_internal::Arg& a7, const substitute_internal::Arg& a8,
+ const substitute_internal::Arg& a9)
+ ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023,
+ "There were 10 substitution arguments given, but this "
+ "format std::string doesn't contain all of $0 through $9");
+#endif // ABSL_BAD_CALL_IF
+
+} // namespace absl
+
+#endif // ABSL_STRINGS_SUBSTITUTE_H_
diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc
new file mode 100644
index 00000000..a6d7d7b0
--- /dev/null
+++ b/absl/strings/substitute_test.cc
@@ -0,0 +1,168 @@
+// Copyright 2017 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/substitute.h"
+
+#include <cstdint>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+
+namespace {
+
+TEST(SubstituteTest, Substitute) {
+ // Basic.
+ EXPECT_EQ("Hello, world!", absl::Substitute("$0, $1!", "Hello", "world"));
+
+ // Non-char* types.
+ EXPECT_EQ("123 0.2 0.1 foo true false x",
+ absl::Substitute("$0 $1 $2 $3 $4 $5 $6", 123, 0.2, 0.1f,
+ std::string("foo"), true, false, 'x'));
+
+ // All int types.
+ EXPECT_EQ(
+ "-32767 65535 "
+ "-1234567890 3234567890 "
+ "-1234567890 3234567890 "
+ "-1234567890123456789 9234567890123456789",
+ absl::Substitute(
+ "$0 $1 $2 $3 $4 $5 $6 $7",
+ static_cast<short>(-32767), // NOLINT(runtime/int)
+ static_cast<unsigned short>(65535), // NOLINT(runtime/int)
+ -1234567890, 3234567890U, -1234567890L, 3234567890UL,
+ -int64_t{1234567890123456789}, uint64_t{9234567890123456789u}));
+
+ // Pointer.
+ const int* int_p = reinterpret_cast<const int*>(0x12345);
+ std::string str = absl::Substitute("$0", int_p);
+ EXPECT_EQ(absl::StrCat("0x", absl::Hex(reinterpret_cast<intptr_t>(int_p))),
+ str);
+
+ // null is special. StrCat prints 0x0. Substitute prints NULL.
+ const uint64_t* null_p = nullptr;
+ str = absl::Substitute("$0", null_p);
+ EXPECT_EQ("NULL", str);
+
+ // char* is also special.
+ const char* char_p = "print me";
+ str = absl::Substitute("$0", char_p);
+ EXPECT_EQ("print me", str);
+
+ char char_buf[16];
+ strncpy(char_buf, "print me too", sizeof(char_buf));
+ str = absl::Substitute("$0", char_buf);
+ EXPECT_EQ("print me too", str);
+
+ // null char* is "doubly" special. Represented as the empty std::string.
+ char_p = nullptr;
+ str = absl::Substitute("$0", char_p);
+ EXPECT_EQ("", str);
+
+ // Out-of-order.
+ EXPECT_EQ("b, a, c, b", absl::Substitute("$1, $0, $2, $1", "a", "b", "c"));
+
+ // Literal $
+ EXPECT_EQ("$", absl::Substitute("$$"));
+
+ EXPECT_EQ("$1", absl::Substitute("$$1"));
+
+ // Test all overloads.
+ EXPECT_EQ("a", absl::Substitute("$0", "a"));
+ EXPECT_EQ("a b", absl::Substitute("$0 $1", "a", "b"));
+ EXPECT_EQ("a b c", absl::Substitute("$0 $1 $2", "a", "b", "c"));
+ EXPECT_EQ("a b c d", absl::Substitute("$0 $1 $2 $3", "a", "b", "c", "d"));
+ EXPECT_EQ("a b c d e",
+ absl::Substitute("$0 $1 $2 $3 $4", "a", "b", "c", "d", "e"));
+ EXPECT_EQ("a b c d e f", absl::Substitute("$0 $1 $2 $3 $4 $5", "a", "b", "c",
+ "d", "e", "f"));
+ EXPECT_EQ("a b c d e f g", absl::Substitute("$0 $1 $2 $3 $4 $5 $6", "a", "b",
+ "c", "d", "e", "f", "g"));
+ EXPECT_EQ("a b c d e f g h",
+ absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d", "e",
+ "f", "g", "h"));
+ EXPECT_EQ("a b c d e f g h i",
+ absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c", "d",
+ "e", "f", "g", "h", "i"));
+ EXPECT_EQ("a b c d e f g h i j",
+ absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b", "c",
+ "d", "e", "f", "g", "h", "i", "j"));
+ EXPECT_EQ("a b c d e f g h i j b0",
+ absl::Substitute("$0 $1 $2 $3 $4 $5 $6 $7 $8 $9 $10", "a", "b", "c",
+ "d", "e", "f", "g", "h", "i", "j"));
+
+ const char* null_cstring = nullptr;
+ EXPECT_EQ("Text: ''", absl::Substitute("Text: '$0'", null_cstring));
+}
+
+TEST(SubstituteTest, SubstituteAndAppend) {
+ std::string str = "Hello";
+ absl::SubstituteAndAppend(&str, ", $0!", "world");
+ EXPECT_EQ("Hello, world!", str);
+
+ // Test all overloads.
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0", "a");
+ EXPECT_EQ("a", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1", "a", "b");
+ EXPECT_EQ("a b", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2", "a", "b", "c");
+ EXPECT_EQ("a b c", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3", "a", "b", "c", "d");
+ EXPECT_EQ("a b c d", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4", "a", "b", "c", "d", "e");
+ EXPECT_EQ("a b c d e", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5", "a", "b", "c", "d", "e",
+ "f");
+ EXPECT_EQ("a b c d e f", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6", "a", "b", "c", "d",
+ "e", "f", "g");
+ EXPECT_EQ("a b c d e f g", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7", "a", "b", "c", "d",
+ "e", "f", "g", "h");
+ EXPECT_EQ("a b c d e f g h", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8", "a", "b", "c",
+ "d", "e", "f", "g", "h", "i");
+ EXPECT_EQ("a b c d e f g h i", str);
+ str.clear();
+ absl::SubstituteAndAppend(&str, "$0 $1 $2 $3 $4 $5 $6 $7 $8 $9", "a", "b",
+ "c", "d", "e", "f", "g", "h", "i", "j");
+ EXPECT_EQ("a b c d e f g h i j", str);
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+
+TEST(SubstituteDeathTest, SubstituteDeath) {
+ EXPECT_DEBUG_DEATH(
+ static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")),
+ "Invalid strings::Substitute\\(\\) format std::string: asked for \"\\$2\", "
+ "but only 2 args were given.");
+ EXPECT_DEBUG_DEATH(
+ static_cast<void>(absl::Substitute("-$z-")),
+ "Invalid strings::Substitute\\(\\) format std::string: \"-\\$z-\"");
+ EXPECT_DEBUG_DEATH(
+ static_cast<void>(absl::Substitute("-$")),
+ "Invalid strings::Substitute\\(\\) format std::string: \"-\\$\"");
+}
+
+#endif // GTEST_HAS_DEATH_TEST
+
+} // namespace
diff --git a/absl/strings/testdata/getline-1.txt b/absl/strings/testdata/getline-1.txt
new file mode 100644
index 00000000..19b90973
--- /dev/null
+++ b/absl/strings/testdata/getline-1.txt
@@ -0,0 +1,3 @@
+alpha
+
+beta gamma
diff --git a/absl/strings/testdata/getline-2.txt b/absl/strings/testdata/getline-2.txt
new file mode 100644
index 00000000..d6842d8e
--- /dev/null
+++ b/absl/strings/testdata/getline-2.txt
@@ -0,0 +1 @@
+one.two.three