diff options
author | Abseil Team <absl-team@google.com> | 2020-02-25 22:27:31 +0100 |
---|---|---|
committer | CJ Johnson <johnsoncj@google.com> | 2020-02-25 17:56:58 -0500 |
commit | b832dce8489ef7b6231384909fd9b68d5a5ff2b7 (patch) | |
tree | 3ad4be9a9a4105366be714da9458e076a77be18f /absl/strings | |
parent | aa844899c937bde5d2b24f276b59997e5b668bde (diff) |
Creation of LTS branch "lts_2020_02_25"20200225
- 0033c9ea91a52ade7c6b725aa2ef3cbe15463421 Fix build on FreeBSD/powerpc (#616) by kgotlinux <60880393+kgotlinux@users.noreply.github.com>
- 0d5ce2797eb695aee7e019e25323251ef6ffc277 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- b69c7d880caddfc25bf348dbcfe9d45fdd8bc6e6 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 2a5633fc077a58528cdbfe78720f3f6bfdc6044d Merge "Export of internal Abseil changes" by Xiaoyi Zhang <zhangxy@google.com>
- f9b3d6e493c1b6ab3dbdab71c5f8fa849db4abaf Add RISCV support to GetProgramCounter() (#621) by Khem Raj <raj.khem@gmail.com>
- 0232c87f21c26718aa3eb2d86678070f3b498a4e Add missing ABSL_HAVE_VDSO_SUPPORT conditional (#622) by Sinan Kaya <41809318+franksinankaya@users.noreply.github.com>
- 3c814105108680997d0821077694f663693b5382 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- c44657f55692eddf5504156645d1f4ec7b3acabd Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 98eb410c93ad059f9bba1bf43f5bb916fc92a5ea Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- bf78e977309c4cb946914b456404141ddac1c302 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- d95d1567165d449e4c213ea31a15cbb112a9865f Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 24713a7036a81498334807fa5c7ad3cb7c643711 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 72382c21fefed981b4b8a2a1b82e2d231c2c2e39 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 08a7e7bf972c8451855a5022f2faf3d3655db015 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 36bcd9599b3f48c99357ba61cf33584889306d6a Fix pointer format specifier in documentation (#614) by Andre Nguyen <andre-nguyen@users.noreply.github.com>
- 0f86336b6939ea673cc1cbe29189286cae67d63a Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- c512f118dde6ffd51cb7d8ac8804bbaf4d266c3a Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 37dd2562ec830d547a1524bb306be313ac3f2556 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 44427702614d7b86b064ba06a390f5eb2f85dbf6 fix: Add support for more ARM processors detection (#608) by Andre Nguyen <andre-nguyen@users.noreply.github.com>
- 159bf2bf6d1cc8087e02468d071e94d1177d1bae Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- a2e6adecc294dc4cd98cc285a9134ce58e0f2ad0 Use https links. (#586) by nlewycky <nicholas@mxc.ca>
- 564001ae506a17c51fa1223684a78f05f91d3d91 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- b3aaac8a37c467a1125c794196caa90d0957bdc3 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 63ee2f8877915a3565c29707dba8fe4d7822596a Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- a048203a881f11f4b7b8df5fb563aec85522f8db Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 1de0166368e2ae67347f92099d6dca3ab3a4a496 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- ad904b6cd3906ddf79878003d92b7bc08d7786ae Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 29235139149790f5afc430c11cec8f1eb1677607 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- bf86cfe165ef7d70dfe68f0b8fc0c018bc79a577 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 12bc53e0318d80569270a5b26ccbc62b52022b89 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 1e39f8626a4dadec1f56920b999dd4c3cfae333e Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 77f87009a34c745255bd84d8f2647040d831a2b3 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- d659fe54b35ab9b8e35c72e50a4b8814167d5a84 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- a4b757b5d42694306a9de853cee0a5fba9c7bbe9 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 0514227d2547793b23e209809276375e41c76617 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 7f4fe64af80fe3c84db8ea938276c3690573c45e Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 16d9fd58a51c6083234e2e9f8f50e49ed5ed02e4 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- bcaae6009c0833b73c6fa7bdd972921d8081a724 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 8ba96a8244bbe334d09542e92d566673a65c1f78 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 2103fd9acdf58279f739860bff3f8c9f4b845605 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 3df7b52a6ada51a72a23796b844549a7b282f1b8 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- fa8c75182fbfdeddb2485fc0d53baeda3f40b7a3 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 85092b4b648ca729c6263c4a302a41dfff28705e Fix Conan builds (#400) by Adrian Ostrowski <adr.ostrowski@gmail.com>
- e96ae2203b80d5ae2e0b7abe0c77b722b224b16d Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 20de2db748ca0471cfb61cb53e813dd12938c12b Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 846e5dbedac123d12455adcfe6f53c8b5dcbfeef Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 8207907f4f7fbaeeaa2b7340c76875e06fd345ba Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 078b89b3c046d230ef3ad39494e5852184eb528b Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 19b021cb3ff23048dfbe236a4e611925d8930831 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- ecc0033b54847f6c9ee37dbb0be8aa17e5b6d37b Always enable proper symbolize implementation on Windows ... by Loo Rong Jie <loorongjie@gmail.com>
- 2796d500aea5a31d26b8b24a33fab7a1c8fa2f32 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- e4c8d0eb8ef4acb5d7a4252b3b87feb391ef7e41 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- a15364ce4d88534ae2295127e5d8e32aefb6b446 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- ab3552a18964e7063c8324f45b3896a6a20b08a8 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- e9f9000c7c80993cb589d011616b7a8016e42f4a Fix ABSL_WAITER_MODE detection for mingw (#342) by Joe Sylve <Joe.Sylve@gmail.com>
- abea769b551f7a100f540967cb95debdb0080df8 Fix ABSL_HAVE_ALARM check on mingw (#341) by Joe Sylve <Joe.Sylve@gmail.com>
- 25597bdfc148e91e27678ec30efa52f4fc8c164f Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- aad33fefaa8f744d71ce747a53717b835bdf8e84 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 8fe7214fe2d7a45ecc4d85f6a524c6b1532426de Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- debac94cfb5a0fa75d1d97c399682bd1c72e3191 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 882b3501a31eb0e4ae4213afb91a0e43feda7d5f Fix spelling errors (#384) by Sungmann Cho <55860394+chosungmann@users.noreply.github.com>
- 502efe6d7841bff82b1aeef5500491fe9a48c635 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- ccdd1d57b6386ebc26fb0c7d99b604672437c124 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- ddf8e52a2918dd0ccec75d3e2426125fa3926724 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 6ec136281086b71da32b5fb068bd6e46b78a5c79 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- ac78ffc3bc0a8b295cab9a03817760fd460df2a1 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 5374c56e5196320681993869e3126b51edac2a43 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 97c1664b4bbab5f78fac2b151ab02656268fb34b Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 325fd7b042ff4ec34f7dd32e602cd81ad0e24b22 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 83c1d65c90a92aa49632b9ac5a793214bb0768bc Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- eb6b7bd23bc0815bfd784e1a74021ce166765280 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 9ddac555b7861dc178d0dbe758a1cfbed718784b Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 1948f6f967e34db9793cfa8b4bcbaf370d039fd8 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- a0d1e098c2f99694fa399b175a7ccf920762030e Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 2d2d7fbc283315b676159716376e739d3d23ed94 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 0302d1e5fa4fcdd1763b7d1bb3212943b1ae911d supppress unused variable warning for gcc (#372) by Martin <pizzard@users.noreply.github.com>
- 262d74ba81b1fc4d71f459555cde8ecb39786d68 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- f0afae0d49af3e15a7169e019634d7719143d94d Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 0e7afdcbd24c7e5b7cab4e0217d8886f1525b520 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 9a41ffdd3a0ccbcdd29c4e3886b28e06f2cd9c66 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 36910d3d7e9fccadd6603f232d0c4f54dcd47c7e [bazel] Add fixes for --incompatible_load_cc_rules_from_b... by Yannic <contact@yannic-bonenberger.com>
- aae8143cf9aa611f70d7ea9b95b8b8b383b2271a Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- d9aa92d7fb324314f9df487ac23d32a25650b742 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 321ab5303023c86cd15d9ddc5740fb4b4fde32e1 Export of internal Abseil changes by Abseil Team <absl-team@google.com>
- 4ef574064e75b86f115549e9eb4c7e806781b3ab Export of internal Abseil changes by Abseil Team <absl-team@google.com>
GitOrigin-RevId: 0033c9ea91a52ade7c6b725aa2ef3cbe15463421
Change-Id: I8a2b70063cb3ab40c6943a6db0fe40cae71ed8d7
Diffstat (limited to 'absl/strings')
81 files changed, 6625 insertions, 931 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 20511a35..b950ec76 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -13,11 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load( "//absl:copts/configure_copts.bzl", "ABSL_DEFAULT_COPTS", - "ABSL_EXCEPTIONS_FLAG", - "ABSL_EXCEPTIONS_FLAG_LINKOPTS", "ABSL_TEST_COPTS", ) @@ -26,7 +25,7 @@ package( features = ["parse_headers"], ) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "strings", @@ -73,6 +72,7 @@ cc_library( "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", + "//absl/base:raw_logging_internal", "//absl/base:throw_delegate", "//absl/memory", "//absl/meta:type_traits", @@ -83,19 +83,23 @@ cc_library( cc_library( name = "internal", srcs = [ + "internal/escaping.cc", "internal/ostringstream.cc", "internal/utf8.cc", ], hdrs = [ "internal/char_map.h", + "internal/escaping.h", "internal/ostringstream.h", "internal/resize_uninitialized.h", "internal/utf8.h", ], copts = ABSL_DEFAULT_COPTS, deps = [ + "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", + "//absl/base:raw_logging_internal", "//absl/meta:type_traits", ], ) @@ -122,6 +126,7 @@ cc_test( copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ + ":cord", ":strings", "//absl/base:core_headers", "//absl/container:fixed_array", @@ -140,7 +145,7 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":strings", - "//absl/base", + "//absl/base:raw_logging_internal", "@com_github_google_benchmark//:benchmark_main", ], ) @@ -225,8 +230,8 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":strings", - "//absl/base", "//absl/base:core_headers", + "//absl/base:raw_logging_internal", "@com_github_google_benchmark//:benchmark_main", ], ) @@ -235,8 +240,7 @@ cc_test( name = "string_view_test", size = "small", srcs = ["string_view_test.cc"], - copts = ABSL_TEST_COPTS + ABSL_EXCEPTIONS_FLAG, - linkopts = ABSL_EXCEPTIONS_FLAG_LINKOPTS, + copts = ABSL_TEST_COPTS, visibility = ["//visibility:private"], deps = [ ":strings", @@ -247,6 +251,74 @@ cc_test( ], ) +cc_library( + name = "cord_internal", + hdrs = ["internal/cord_internal.h"], + copts = ABSL_DEFAULT_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":strings", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord", + srcs = [ + "cord.cc", + ], + hdrs = [ + "cord.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":internal", + ":str_format", + ":strings", + "//absl/base", + "//absl/base:base_internal", + "//absl/base:core_headers", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "//absl/container:inlined_vector", + "//absl/functional:function_ref", + "//absl/meta:type_traits", + ], +) + +cc_library( + name = "cord_test_helpers", + testonly = 1, + hdrs = [ + "cord_test_helpers.h", + ], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord", + ], +) + +cc_test( + name = "cord_test", + size = "medium", + srcs = ["cord_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_test_helpers", + ":strings", + "//absl/base", + "//absl/base:config", + "//absl/base:endian", + "//absl/base:raw_logging_internal", + "//absl/container:fixed_array", + "@com_google_googletest//:gtest_main", + ], +) + cc_test( name = "substitute_test", size = "small", @@ -268,7 +340,7 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":strings", - "//absl/base", + "//absl/base:raw_logging_internal", "@com_github_google_benchmark//:benchmark_main", ], ) @@ -306,7 +378,7 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":strings", - "//absl/base", + "//absl/base:raw_logging_internal", "@com_github_google_benchmark//:benchmark_main", ], ) @@ -414,8 +486,10 @@ cc_test( deps = [ ":pow10_helper", ":strings", - "//absl/base", - "//absl/base:core_headers", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", "@com_google_googletest//:gtest_main", ], ) @@ -428,7 +502,9 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":strings", - "//absl/base", + "//absl/base:raw_logging_internal", + "//absl/random", + "//absl/random:distributions", "@com_github_google_benchmark//:benchmark_main", ], ) @@ -474,7 +550,6 @@ cc_test( ":pow10_helper", ":str_format", ":strings", - "//absl/base", "@com_google_googletest//:gtest_main", ], ) @@ -488,7 +563,8 @@ cc_test( copts = ABSL_TEST_COPTS, deps = [ ":strings", - "//absl/base", + "//absl/base:config", + "//absl/base:raw_logging_internal", "@com_google_googletest//:gtest_main", ], ) @@ -503,7 +579,7 @@ cc_test( copts = ABSL_TEST_COPTS, deps = [ ":strings", - "//absl/base", + "//absl/base:config", "@com_google_googletest//:gtest_main", ], ) @@ -518,7 +594,6 @@ cc_test( ], deps = [ ":strings", - "//absl/base", "@com_github_google_benchmark//:benchmark_main", ], ) @@ -634,6 +709,7 @@ cc_test( visibility = ["//visibility:private"], deps = [ ":str_format_internal", + "//absl/base:raw_logging_internal", "//absl/numeric:int128", "@com_google_googletest//:gtest_main", ], @@ -668,6 +744,7 @@ cc_library( srcs = ["internal/pow10_helper.cc"], hdrs = ["internal/pow10_helper.h"], visibility = ["//visibility:private"], + deps = ["//absl/base:config"], ) cc_test( diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index e63eec19..cebc5928 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -59,10 +59,11 @@ absl_cc_library( absl::config absl::core_headers absl::endian - absl::throw_delegate + absl::int128 absl::memory + absl::raw_logging_internal + absl::throw_delegate absl::type_traits - absl::int128 PUBLIC ) @@ -71,6 +72,8 @@ absl_cc_library( strings_internal HDRS "internal/char_map.h" + "internal/escaping.cc" + "internal/escaping.h" "internal/ostringstream.h" "internal/resize_uninitialized.h" "internal/utf8.h" @@ -80,8 +83,10 @@ absl_cc_library( COPTS ${ABSL_DEFAULT_COPTS} DEPS + absl::config absl::core_headers absl::endian + absl::raw_logging_internal absl::type_traits ) @@ -160,9 +165,6 @@ absl_cc_test( "string_view_test.cc" COPTS ${ABSL_TEST_COPTS} - ${ABSL_EXCEPTIONS_FLAG} - LINKOPTS - ${ABSL_EXCEPTIONS_FLAG_LINKOPTS} DEPS absl::strings absl::config @@ -276,9 +278,12 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - absl::base absl::core_headers absl::pow10_helper + absl::config + absl::raw_logging_internal + absl::random_random + absl::random_distributions gmock_main ) @@ -317,7 +322,6 @@ absl_cc_test( DEPS absl::strings absl::str_format - absl::base absl::pow10_helper gmock_main ) @@ -332,7 +336,8 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - absl::base + absl::config + absl::raw_logging_internal gmock_main ) @@ -347,7 +352,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - absl::base + absl::config gmock_main ) @@ -465,6 +470,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format_internal + absl::raw_logging_internal absl::int128 gmock_main ) @@ -503,6 +509,8 @@ absl_cc_library( "internal/pow10_helper.cc" COPTS ${ABSL_TEST_COPTS} + DEPS + absl::config TESTONLY ) @@ -518,3 +526,57 @@ absl_cc_test( absl::str_format gmock_main ) + +absl_cc_library( + NAME + cord + HDRS + "cord.h" + SRCS + "cord.cc" + "internal/cord_internal.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::strings_internal + absl::base + absl::base_internal + absl::core_headers + absl::endian + absl::fixed_array + absl::function_ref + absl::inlined_vector + absl::raw_logging_internal + absl::type_traits + PUBLIC +) + +absl_cc_library( + NAME + cord_test_helpers + HDRS + "cord_test_helpers.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + TESTONLY +) + +absl_cc_test( + NAME + cord_test + SRCS + "cord_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::strings + absl::base + absl::config + absl::endian + absl::raw_logging_internal + absl::fixed_array + gmock_main +) diff --git a/absl/strings/ascii.cc b/absl/strings/ascii.cc index 045a5e21..93bb03e9 100644 --- a/absl/strings/ascii.cc +++ b/absl/strings/ascii.cc @@ -15,7 +15,7 @@ #include "absl/strings/ascii.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace ascii_internal { // # Table generated by this Python code (bit 0x02 is currently unused): @@ -57,7 +57,7 @@ namespace ascii_internal { // of these bits is tightly coupled to this implementation, the individual bits // are not named. Note that bitfields for all characters above ASCII 127 are // zero-initialized. -const unsigned char kPropertyBits[256] = { +ABSL_DLL const unsigned char kPropertyBits[256] = { 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x00 0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, // 0x10 @@ -79,7 +79,7 @@ const unsigned char kPropertyBits[256] = { // Array of characters for the ascii_tolower() function. For values 'A' // through 'Z', return the lower-case character; otherwise, return the // identity of the passed character. -const char kToLower[256] = { +ABSL_DLL const char kToLower[256] = { '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', @@ -117,7 +117,7 @@ const char kToLower[256] = { // Array of characters for the ascii_toupper() function. For values 'a' // through 'z', return the upper-case character; otherwise, return the // identity of the passed character. -const char kToUpper[256] = { +ABSL_DLL const char kToUpper[256] = { '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', @@ -196,5 +196,5 @@ void RemoveExtraAsciiWhitespace(std::string* str) { str->erase(output_it - &(*str)[0]); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/ascii.h b/absl/strings/ascii.h index ebcbb11a..b46bc71f 100644 --- a/absl/strings/ascii.h +++ b/absl/strings/ascii.h @@ -56,20 +56,21 @@ #include <string> #include "absl/base/attributes.h" +#include "absl/base/config.h" #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace ascii_internal { // Declaration for an array of bitfields holding character information. -extern const unsigned char kPropertyBits[256]; +ABSL_DLL extern const unsigned char kPropertyBits[256]; // Declaration for the array of characters to upper-case characters. -extern const char kToUpper[256]; +ABSL_DLL extern const char kToUpper[256]; // Declaration for the array of characters to lower-case characters. -extern const char kToLower[256]; +ABSL_DLL extern const char kToLower[256]; } // namespace ascii_internal @@ -235,7 +236,7 @@ inline void StripAsciiWhitespace(std::string* str) { // Removes leading, trailing, and consecutive internal whitespace. void RemoveExtraAsciiWhitespace(std::string*); -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_ASCII_H_ diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc index 866a163e..bdba768d 100644 --- a/absl/strings/charconv.cc +++ b/absl/strings/charconv.cc @@ -57,7 +57,7 @@ // narrower mantissas. namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { template <typename FloatType> @@ -673,7 +673,6 @@ from_chars_result FromCharsImpl(const char* first, const char* last, EncodeResult(calculated, negative, &result, &value); return result; } - return result; } } // namespace @@ -981,5 +980,5 @@ const int16_t kPower10ExponentTable[] = { }; } // namespace -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h index 0b84ccac..e04be32f 100644 --- a/absl/strings/charconv.h +++ b/absl/strings/charconv.h @@ -17,8 +17,10 @@ #include <system_error> // NOLINT(build/c++11) +#include "absl/base/config.h" + namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // Workalike compatibilty version of std::chars_format from C++17. // @@ -111,7 +113,7 @@ inline chars_format& operator^=(chars_format& lhs, chars_format rhs) { return lhs; } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_CHARCONV_H_ diff --git a/absl/strings/charconv_test.cc b/absl/strings/charconv_test.cc index b58fad26..9090e9c8 100644 --- a/absl/strings/charconv_test.cc +++ b/absl/strings/charconv_test.cc @@ -511,6 +511,13 @@ TEST(FromChars, Overflow) { EXPECT_EQ(f, std::numeric_limits<float>::max()); } +TEST(FromChars, RegressionTestsFromFuzzer) { + absl::string_view src = "0x21900000p00000000099"; + float f; + auto result = absl::from_chars(src.data(), src.data() + src.size(), f); + EXPECT_EQ(result.ec, std::errc::result_out_of_range); +} + TEST(FromChars, ReturnValuePtr) { // Check that `ptr` points one past the number scanned, even if that number // is not representable. diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc new file mode 100644 index 00000000..d9503ae3 --- /dev/null +++ b/absl/strings/cord.cc @@ -0,0 +1,2019 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord.h" + +#include <algorithm> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <iomanip> +#include <limits> +#include <ostream> +#include <sstream> +#include <type_traits> +#include <unordered_set> +#include <vector> + +#include "absl/base/casts.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/port.h" +#include "absl/container/fixed_array.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +using ::absl::cord_internal::CordRep; +using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepExternal; +using ::absl::cord_internal::CordRepSubstring; + +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + + // We have different tags for different sized flat arrays, + // starting with FLAT + FLAT = 3, +}; + +namespace { + +// Type used with std::allocator for allocating and deallocating +// `CordRepExternal`. std::allocator is used because it opaquely handles the +// different new / delete overloads available on a given platform. +struct alignas(absl::cord_internal::ExternalRepAlignment()) ExternalAllocType { + unsigned char value[absl::cord_internal::ExternalRepAlignment()]; +}; + +// Returns the number of objects to pass in to std::allocator<ExternalAllocType> +// allocate() and deallocate() to create enough room for `CordRepExternal` with +// `releaser_size` bytes on the end. +constexpr size_t GetExternalAllocNumObjects(size_t releaser_size) { + // Be sure to round up since `releaser_size` could be smaller than + // `sizeof(ExternalAllocType)`. + return (sizeof(CordRepExternal) + releaser_size + sizeof(ExternalAllocType) - + 1) / + sizeof(ExternalAllocType); +} + +// Allocates enough memory for `CordRepExternal` and a releaser with size +// `releaser_size` bytes. +void* AllocateExternal(size_t releaser_size) { + return std::allocator<ExternalAllocType>().allocate( + GetExternalAllocNumObjects(releaser_size)); +} + +// Deallocates the memory for a `CordRepExternal` assuming it was allocated with +// a releaser of given size and alignment. +void DeallocateExternal(CordRepExternal* p, size_t releaser_size) { + std::allocator<ExternalAllocType>().deallocate( + reinterpret_cast<ExternalAllocType*>(p), + GetExternalAllocNumObjects(releaser_size)); +} + +// Returns a pointer to the type erased releaser for the given CordRepExternal. +void* GetExternalReleaser(CordRepExternal* rep) { + return rep + 1; +} + +} // namespace + +namespace cord_internal { + +inline CordRepConcat* CordRep::concat() { + assert(tag == CONCAT); + return static_cast<CordRepConcat*>(this); +} + +inline const CordRepConcat* CordRep::concat() const { + assert(tag == CONCAT); + return static_cast<const CordRepConcat*>(this); +} + +inline CordRepSubstring* CordRep::substring() { + assert(tag == SUBSTRING); + return static_cast<CordRepSubstring*>(this); +} + +inline const CordRepSubstring* CordRep::substring() const { + assert(tag == SUBSTRING); + return static_cast<const CordRepSubstring*>(this); +} + +inline CordRepExternal* CordRep::external() { + assert(tag == EXTERNAL); + return static_cast<CordRepExternal*>(this); +} + +inline const CordRepExternal* CordRep::external() const { + assert(tag == EXTERNAL); + return static_cast<const CordRepExternal*>(this); +} + +} // namespace cord_internal + +static const size_t kFlatOverhead = offsetof(CordRep, data); + +static_assert(kFlatOverhead == 13, "Unittests assume kFlatOverhead == 13"); + +// Largest and smallest flat node lengths we are willing to allocate +// Flat allocation size is stored in tag, which currently can encode sizes up +// to 4K, encoded as multiple of either 8 or 32 bytes. +// If we allow for larger sizes, we need to change this to 8/64, 16/128, etc. +static constexpr size_t kMaxFlatSize = 4096; +static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; +static constexpr size_t kMinFlatLength = 32 - kFlatOverhead; + +// Prefer copying blocks of at most this size, otherwise reference count. +static const size_t kMaxBytesToCopy = 511; + +// Helper functions for rounded div, and rounding to exact sizes. +static size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } +static size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } + +// Returns the size to the nearest equal or larger value that can be +// expressed exactly as a tag value. +static size_t RoundUpForTag(size_t size) { + return RoundUp(size, (size <= 1024) ? 8 : 32); +} + +// Converts the allocated size to a tag, rounding down if the size +// does not exactly match a 'tag expressible' size value. The result is +// undefined if the size exceeds the maximum size that can be encoded in +// a tag, i.e., if size is larger than TagToAllocatedSize(<max tag>). +static uint8_t AllocatedSizeToTag(size_t size) { + const size_t tag = (size <= 1024) ? size / 8 : 128 + size / 32 - 1024 / 32; + assert(tag <= std::numeric_limits<uint8_t>::max()); + return tag; +} + +// Converts the provided tag to the corresponding allocated size +static constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); +} + +// Converts the provided tag to the corresponding available data length +static constexpr size_t TagToLength(uint8_t tag) { + return TagToAllocatedSize(tag) - kFlatOverhead; +} + +// Enforce that kMaxFlatSize maps to a well-known exact tag value. +static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); + +constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { + return n == 0 ? a : Fibonacci(n - 1, b, a + b); +} + +static_assert(Fibonacci(63) == 6557470319842, + "Fibonacci values computed incorrectly"); + +// Minimum length required for a given depth tree -- a tree is considered +// balanced if +// length(t) >= min_length[depth(t)] +// The root node depth is allowed to become twice as large to reduce rebalancing +// for larger strings (see IsRootBalanced). +static constexpr uint64_t min_length[] = { + Fibonacci(2), + Fibonacci(3), + Fibonacci(4), + Fibonacci(5), + Fibonacci(6), + Fibonacci(7), + Fibonacci(8), + Fibonacci(9), + Fibonacci(10), + Fibonacci(11), + Fibonacci(12), + Fibonacci(13), + Fibonacci(14), + Fibonacci(15), + Fibonacci(16), + Fibonacci(17), + Fibonacci(18), + Fibonacci(19), + Fibonacci(20), + Fibonacci(21), + Fibonacci(22), + Fibonacci(23), + Fibonacci(24), + Fibonacci(25), + Fibonacci(26), + Fibonacci(27), + Fibonacci(28), + Fibonacci(29), + Fibonacci(30), + Fibonacci(31), + Fibonacci(32), + Fibonacci(33), + Fibonacci(34), + Fibonacci(35), + Fibonacci(36), + Fibonacci(37), + Fibonacci(38), + Fibonacci(39), + Fibonacci(40), + Fibonacci(41), + Fibonacci(42), + Fibonacci(43), + Fibonacci(44), + Fibonacci(45), + Fibonacci(46), + Fibonacci(47), + 0xffffffffffffffffull, // Avoid overflow +}; + +static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); + +// The inlined size to use with absl::InlinedVector. +// +// Note: The InlinedVectors in this file (and in cord.h) do not need to use +// the same value for their inlined size. The fact that they do is historical. +// It may be desirable for each to use a different inlined size optimized for +// that InlinedVector's usage. +// +// TODO(jgm): Benchmark to see if there's a more optimal value than 47 for +// the inlined vector size (47 exists for backward compatibility). +static const int kInlinedVectorSize = 47; + +static inline bool IsRootBalanced(CordRep* node) { + if (node->tag != CONCAT) { + return true; + } else if (node->concat()->depth() <= 15) { + return true; + } else if (node->concat()->depth() > kMinLengthSize) { + return false; + } else { + // Allow depth to become twice as large as implied by fibonacci rule to + // reduce rebalancing for larger strings. + return (node->length >= min_length[node->concat()->depth() / 2]); + } +} + +static CordRep* Rebalance(CordRep* node); +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os); +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation); + +static inline CordRep* VerifyTree(CordRep* node) { + // Verification is expensive, so only do it in debug mode. + // Even in debug mode we normally do only light validation. + // If you are debugging Cord itself, you should define the + // macro EXTRA_CORD_VALIDATION, e.g. by adding + // --copt=-DEXTRA_CORD_VALIDATION to the blaze line. +#ifdef EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/true)); +#else // EXTRA_CORD_VALIDATION + assert(node == nullptr || VerifyNode(node, node, /*full_validation=*/false)); +#endif // EXTRA_CORD_VALIDATION + static_cast<void>(&VerifyNode); + + return node; +} + +// -------------------------------------------------------------------- +// Memory management + +inline CordRep* Ref(CordRep* rep) { + if (rep != nullptr) { + rep->refcount.Increment(); + } + return rep; +} + +// This internal routine is called from the cold path of Unref below. Keeping it +// in a separate routine allows good inlining of Unref into many profitable call +// sites. However, the call to this function can be highly disruptive to the +// register pressure in those callers. To minimize the cost to callers, we use +// a special LLVM calling convention that preserves most registers. This allows +// the call to this routine in cold paths to not disrupt the caller's register +// pressure. This calling convention is not available on all platforms; we +// intentionally allow LLVM to ignore the attribute rather than attempting to +// hardcode the list of supported platforms. +#if defined(__clang__) && !defined(__i386__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wattributes" +__attribute__((preserve_most)) +#pragma clang diagnostic pop +#endif +static void UnrefInternal(CordRep* rep) { + assert(rep != nullptr); + + absl::InlinedVector<CordRep*, kInlinedVectorSize> pending; + while (true) { + if (rep->tag == CONCAT) { + CordRepConcat* rep_concat = rep->concat(); + CordRep* right = rep_concat->right; + if (!right->refcount.Decrement()) { + pending.push_back(right); + } + CordRep* left = rep_concat->left; + delete rep_concat; + rep = nullptr; + if (!left->refcount.Decrement()) { + rep = left; + continue; + } + } else if (rep->tag == EXTERNAL) { + CordRepExternal* rep_external = rep->external(); + absl::string_view data(rep_external->base, rep->length); + void* releaser = GetExternalReleaser(rep_external); + size_t releaser_size = rep_external->releaser_invoker(releaser, data); + rep_external->~CordRepExternal(); + DeallocateExternal(rep_external, releaser_size); + rep = nullptr; + } else if (rep->tag == SUBSTRING) { + CordRepSubstring* rep_substring = rep->substring(); + CordRep* child = rep_substring->child; + delete rep_substring; + rep = nullptr; + if (!child->refcount.Decrement()) { + rep = child; + continue; + } + } else { + // Flat CordReps are allocated and constructed with raw ::operator new + // and placement new, and must be destructed and deallocated + // accordingly. +#if defined(__cpp_sized_deallocation) + size_t size = TagToAllocatedSize(rep->tag); + rep->~CordRep(); + ::operator delete(rep, size); +#else + rep->~CordRep(); + ::operator delete(rep); +#endif + rep = nullptr; + } + + if (!pending.empty()) { + rep = pending.back(); + pending.pop_back(); + } else { + break; + } + } +} + +inline void Unref(CordRep* rep) { + // Fast-path for two common, hot cases: a null rep and a shared root. + if (ABSL_PREDICT_TRUE(rep == nullptr || + rep->refcount.DecrementExpectHighRefcount())) { + return; + } + + UnrefInternal(rep); +} + +// Return the depth of a node +static int Depth(const CordRep* rep) { + if (rep->tag == CONCAT) { + return rep->concat()->depth(); + } else { + return 0; + } +} + +static void SetConcatChildren(CordRepConcat* concat, CordRep* left, + CordRep* right) { + concat->left = left; + concat->right = right; + + concat->length = left->length + right->length; + concat->set_depth(1 + std::max(Depth(left), Depth(right))); +} + +// Create a concatenation of the specified nodes. +// Does not change the refcounts of "left" and "right". +// The returned node has a refcount of 1. +static CordRep* RawConcat(CordRep* left, CordRep* right) { + // Avoid making degenerate concat nodes (one child is empty) + if (left == nullptr || left->length == 0) { + Unref(left); + return right; + } + if (right == nullptr || right->length == 0) { + Unref(right); + return left; + } + + CordRepConcat* rep = new CordRepConcat(); + rep->tag = CONCAT; + SetConcatChildren(rep, left, right); + + return rep; +} + +static CordRep* Concat(CordRep* left, CordRep* right) { + CordRep* rep = RawConcat(left, right); + if (rep != nullptr && !IsRootBalanced(rep)) { + rep = Rebalance(rep); + } + return VerifyTree(rep); +} + +// Make a balanced tree out of an array of leaf nodes. +static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { + // Make repeated passes over the array, merging adjacent pairs + // until we are left with just a single node. + while (n > 1) { + size_t dst = 0; + for (size_t src = 0; src < n; src += 2) { + if (src + 1 < n) { + reps[dst] = Concat(reps[src], reps[src + 1]); + } else { + reps[dst] = reps[src]; + } + dst++; + } + n = dst; + } + + return reps[0]; +} + +// Create a new flat node. +static CordRep* NewFlat(size_t length_hint) { + if (length_hint <= kMinFlatLength) { + length_hint = kMinFlatLength; + } else if (length_hint > kMaxFlatLength) { + length_hint = kMaxFlatLength; + } + + // Round size up so it matches a size we can exactly express in a tag. + const size_t size = RoundUpForTag(length_hint + kFlatOverhead); + void* const raw_rep = ::operator new(size); + CordRep* rep = new (raw_rep) CordRep(); + rep->tag = AllocatedSizeToTag(size); + return VerifyTree(rep); +} + +// Create a new tree out of the specified array. +// The returned node has a refcount of 1. +static CordRep* NewTree(const char* data, + size_t length, + size_t alloc_hint) { + if (length == 0) return nullptr; + absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); + size_t n = 0; + do { + const size_t len = std::min(length, kMaxFlatLength); + CordRep* rep = NewFlat(len + alloc_hint); + rep->length = len; + memcpy(rep->data, data, len); + reps[n++] = VerifyTree(rep); + data += len; + length -= len; + } while (length != 0); + return MakeBalancedTree(reps.data(), n); +} + +namespace cord_internal { + +ExternalRepReleaserPair NewExternalWithUninitializedReleaser( + absl::string_view data, ExternalReleaserInvoker invoker, + size_t releaser_size) { + assert(!data.empty()); + + void* raw_rep = AllocateExternal(releaser_size); + auto* rep = new (raw_rep) CordRepExternal(); + rep->length = data.size(); + rep->tag = EXTERNAL; + rep->base = data.data(); + rep->releaser_invoker = invoker; + return {VerifyTree(rep), GetExternalReleaser(rep)}; +} + +} // namespace cord_internal + +static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { + // Never create empty substring nodes + if (length == 0) { + Unref(child); + return nullptr; + } else { + CordRepSubstring* rep = new CordRepSubstring(); + assert((offset + length) <= child->length); + rep->length = length; + rep->tag = SUBSTRING; + rep->start = offset; + rep->child = child; + return VerifyTree(rep); + } +} + +// -------------------------------------------------------------------- +// Cord::InlineRep functions + +// This will trigger LNK2005 in MSVC. +#ifndef COMPILER_MSVC +const unsigned char Cord::InlineRep::kMaxInline; +#endif // COMPILER_MSVC + +inline void Cord::InlineRep::set_data(const char* data, size_t n, + bool nullify_tail) { + static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); + + cord_internal::SmallMemmove(data_, data, n, nullify_tail); + data_[kMaxInline] = static_cast<char>(n); +} + +inline char* Cord::InlineRep::set_data(size_t n) { + assert(n <= kMaxInline); + memset(data_, 0, sizeof(data_)); + data_[kMaxInline] = static_cast<char>(n); + return data_; +} + +inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { + size_t len = data_[kMaxInline]; + CordRep* result; + if (len > kMaxInline) { + memcpy(&result, data_, sizeof(result)); + } else { + result = NewFlat(len + extra_hint); + result->length = len; + memcpy(result->data, data_, len); + set_tree(result); + } + return result; +} + +inline void Cord::InlineRep::reduce_size(size_t n) { + size_t tag = data_[kMaxInline]; + assert(tag <= kMaxInline); + assert(tag >= n); + tag -= n; + memset(data_ + tag, 0, n); + data_[kMaxInline] = static_cast<char>(tag); +} + +inline void Cord::InlineRep::remove_prefix(size_t n) { + cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n); + reduce_size(n); +} + +void Cord::InlineRep::AppendTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = data_[kMaxInline]; + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(force_tree(0), tree)); + } +} + +void Cord::InlineRep::PrependTree(CordRep* tree) { + if (tree == nullptr) return; + size_t len = data_[kMaxInline]; + if (len == 0) { + set_tree(tree); + } else { + set_tree(Concat(tree, force_tree(0))); + } +} + +// Searches for a non-full flat node at the rightmost leaf of the tree. If a +// suitable leaf is found, the function will update the length field for all +// nodes to account for the size increase. The append region address will be +// written to region and the actual size increase will be written to size. +static inline bool PrepareAppendRegion(CordRep* root, char** region, + size_t* size, size_t max_length) { + // Search down the right-hand path for a non-full FLAT node. + CordRep* dst = root; + while (dst->tag == CONCAT && dst->refcount.IsOne()) { + dst = dst->concat()->right; + } + + if (dst->tag < FLAT || !dst->refcount.IsOne()) { + *region = nullptr; + *size = 0; + return false; + } + + const size_t in_use = dst->length; + const size_t capacity = TagToLength(dst->tag); + if (in_use == capacity) { + *region = nullptr; + *size = 0; + return false; + } + + size_t size_increase = std::min(capacity - in_use, max_length); + + // We need to update the length fields for all nodes, including the leaf node. + for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) { + rep->length += size_increase; + } + dst->length += size_increase; + + *region = dst->data + in_use; + *size = size_increase; + return true; +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, + size_t max_length) { + if (max_length == 0) { + *region = nullptr; + *size = 0; + return; + } + + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { + *region = data_ + inline_length; + *size = max_length; + data_[kMaxInline] = static_cast<char>(inline_length + max_length); + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = + NewFlat(std::max(static_cast<size_t>(root->length), max_length)); + new_node->length = + std::min(static_cast<size_t>(TagToLength(new_node->tag)), max_length); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { + const size_t max_length = std::numeric_limits<size_t>::max(); + + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline) { + *region = data_ + inline_length; + *size = kMaxInline - inline_length; + data_[kMaxInline] = kMaxInline; + return; + } + + CordRep* root = force_tree(max_length); + + if (PrepareAppendRegion(root, region, size, max_length)) { + return; + } + + // Allocate new node. + CordRep* new_node = NewFlat(root->length); + new_node->length = TagToLength(new_node->tag); + *region = new_node->data; + *size = new_node->length; + replace_tree(Concat(root, new_node)); +} + +// If the rep is a leaf, this will increment the value at total_mem_usage and +// will return true. +static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { + if (rep->tag >= FLAT) { + *total_mem_usage += TagToAllocatedSize(rep->tag); + return true; + } + if (rep->tag == EXTERNAL) { + *total_mem_usage += sizeof(CordRepConcat) + rep->length; + return true; + } + return false; +} + +void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { + ClearSlow(); + + memcpy(data_, src.data_, sizeof(data_)); + if (is_tree()) { + Ref(tree()); + } +} + +void Cord::InlineRep::ClearSlow() { + if (is_tree()) { + Unref(tree()); + } + memset(data_, 0, sizeof(data_)); +} + +// -------------------------------------------------------------------- +// Constructors and destructors + +Cord::Cord(const Cord& src) : contents_(src.contents_) { + Ref(contents_.tree()); // Does nothing if contents_ has embedded data +} + +Cord::Cord(absl::string_view src) { + const size_t n = src.size(); + if (n <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), n, false); + } else { + contents_.set_tree(NewTree(src.data(), n, 0)); + } +} + +// The destruction code is separate so that the compiler can determine +// that it does not need to call the destructor on a moved-from Cord. +void Cord::DestroyCordSlow() { + Unref(VerifyTree(contents_.tree())); +} + +// -------------------------------------------------------------------- +// Mutators + +void Cord::Clear() { + Unref(contents_.clear()); +} + +Cord& Cord::operator=(absl::string_view src) { + + const char* data = src.data(); + size_t length = src.size(); + CordRep* tree = contents_.tree(); + if (length <= InlineRep::kMaxInline) { + // Embed into this->contents_ + contents_.set_data(data, length, true); + Unref(tree); + return *this; + } + if (tree != nullptr && tree->tag >= FLAT && + TagToLength(tree->tag) >= length && tree->refcount.IsOne()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->data, data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.set_tree(NewTree(data, length, 0)); + Unref(tree); + return *this; +} + +// TODO(sanjay): Move to Cord::InlineRep section of file. For now, +// we keep it here to make diffs easier. +void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { + if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. + // Try to fit in the inline buffer if possible. + size_t inline_length = data_[kMaxInline]; + if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { + // Append new data to embedded array + data_[kMaxInline] = static_cast<char>(inline_length + src_size); + memcpy(data_ + inline_length, src_data, src_size); + return; + } + + CordRep* root = tree(); + + size_t appended = 0; + if (root) { + char* region; + if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { + memcpy(region, src_data, appended); + } + } else { + // It is possible that src_data == data_, but when we transition from an + // InlineRep to a tree we need to assign data_ = root via set_tree. To + // avoid corrupting the source data before we copy it, delay calling + // set_tree until after we've copied data. + // We are going from an inline size to beyond inline size. Make the new size + // either double the inlined size, or the added size + 10%. + const size_t size1 = inline_length * 2 + src_size; + const size_t size2 = inline_length + src_size / 10; + root = NewFlat(std::max<size_t>(size1, size2)); + appended = std::min(src_size, TagToLength(root->tag) - inline_length); + memcpy(root->data, data_, inline_length); + memcpy(root->data + inline_length, src_data, appended); + root->length = inline_length + appended; + set_tree(root); + } + + src_data += appended; + src_size -= appended; + if (src_size == 0) { + return; + } + + // Use new block(s) for any remaining bytes that were not handled above. + // Alloc extra memory only if the right child of the root of the new tree is + // going to be a FLAT node, which will permit further inplace appends. + size_t length = src_size; + if (src_size < kMaxFlatLength) { + // The new length is either + // - old size + 10% + // - old_size + src_size + // This will cause a reasonable conservative step-up in size that is still + // large enough to avoid excessive amounts of small fragments being added. + length = std::max<size_t>(root->length / 10, src_size); + } + set_tree(Concat(root, NewTree(src_data, src_size, length - src_size))); +} + +inline CordRep* Cord::TakeRep() const& { + return Ref(contents_.tree()); +} + +inline CordRep* Cord::TakeRep() && { + CordRep* rep = contents_.tree(); + contents_.clear(); + return rep; +} + +template <typename C> +inline void Cord::AppendImpl(C&& src) { + if (empty()) { + // In case of an empty destination avoid allocating a new node, do not copy + // data. + *this = std::forward<C>(src); + return; + } + + // For short cords, it is faster to copy data if there is room in dst. + const size_t src_size = src.contents_.size(); + if (src_size <= kMaxBytesToCopy) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree == nullptr) { + // src has embedded data. + contents_.AppendArray(src.contents_.data(), src_size); + return; + } + if (src_tree->tag >= FLAT) { + // src tree just has one flat node. + contents_.AppendArray(src_tree->data, src_size); + return; + } + if (&src == this) { + // ChunkIterator below assumes that src is not modified during traversal. + Append(Cord(src)); + return; + } + // TODO(mec): Should we only do this if "dst" has space? + for (absl::string_view chunk : src.Chunks()) { + Append(chunk); + } + return; + } + + contents_.AppendTree(std::forward<C>(src).TakeRep()); +} + +void Cord::Append(const Cord& src) { AppendImpl(src); } + +void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } + +void Cord::Prepend(const Cord& src) { + CordRep* src_tree = src.contents_.tree(); + if (src_tree != nullptr) { + Ref(src_tree); + contents_.PrependTree(src_tree); + return; + } + + // `src` cord is inlined. + absl::string_view src_contents(src.contents_.data(), src.contents_.size()); + return Prepend(src_contents); +} + +void Cord::Prepend(absl::string_view src) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. + size_t cur_size = contents_.size(); + if (!contents_.is_tree() && cur_size + src.size() <= InlineRep::kMaxInline) { + // Use embedded storage. + char data[InlineRep::kMaxInline + 1] = {0}; + data[InlineRep::kMaxInline] = cur_size + src.size(); // set size + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), cur_size); + memcpy(reinterpret_cast<void*>(&contents_), data, + InlineRep::kMaxInline + 1); + } else { + contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + } +} + +static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->left->length) { + // Push right to stack, descend left. + rhs_stack.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Drop left, descend right. + n -= node->concat()->left->length; + node = node->concat()->right; + } + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else { + size_t start = n; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + // Consider in-place update of node, similar to in RemoveSuffixFrom(). + start += node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!rhs_stack.empty()) { + node = Concat(node, Ref(rhs_stack.back())); + rhs_stack.pop_back(); + } + return node; +} + +// RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the +// exception that removing a suffix has an optimization where a node may be +// edited in place iff that node and all its ancestors have a refcount of 1. +static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { + if (n >= node->length) return nullptr; + if (n == 0) return Ref(node); + absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; + bool inplace_ok = node->refcount.IsOne(); + + while (node->tag == CONCAT) { + assert(n <= node->length); + if (n < node->concat()->right->length) { + // Push left to stack, descend right. + lhs_stack.push_back(node->concat()->left); + node = node->concat()->right; + } else { + // Drop right, descend left. + n -= node->concat()->right->length; + node = node->concat()->left; + } + inplace_ok = inplace_ok && node->refcount.IsOne(); + } + assert(n <= node->length); + + if (n == 0) { + Ref(node); + } else if (inplace_ok && node->tag != EXTERNAL) { + // Consider making a new buffer if the current node capacity is much + // larger than the new length. + Ref(node); + node->length -= n; + } else { + size_t start = 0; + size_t len = node->length - n; + if (node->tag == SUBSTRING) { + start = node->substring()->start; + node = node->substring()->child; + } + node = NewSubstring(Ref(node), start, len); + } + while (!lhs_stack.empty()) { + node = Concat(Ref(lhs_stack.back()), node); + lhs_stack.pop_back(); + } + return node; +} + +void Cord::RemovePrefix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested prefix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.remove_prefix(n); + } else { + CordRep* newrep = RemovePrefixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +void Cord::RemoveSuffix(size_t n) { + ABSL_INTERNAL_CHECK(n <= size(), + absl::StrCat("Requested suffix size ", n, + " exceeds Cord's size ", size())); + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + contents_.reduce_size(n); + } else { + CordRep* newrep = RemoveSuffixFrom(tree, n); + Unref(tree); + contents_.replace_tree(VerifyTree(newrep)); + } +} + +// Work item for NewSubRange(). +struct SubRange { + SubRange(CordRep* a_node, size_t a_pos, size_t a_n) + : node(a_node), pos(a_pos), n(a_n) {} + CordRep* node; // nullptr means concat last 2 results. + size_t pos; + size_t n; +}; + +static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { + absl::InlinedVector<CordRep*, kInlinedVectorSize> results; + absl::InlinedVector<SubRange, kInlinedVectorSize> todo; + todo.push_back(SubRange(node, pos, n)); + do { + const SubRange& sr = todo.back(); + node = sr.node; + pos = sr.pos; + n = sr.n; + todo.pop_back(); + + if (node == nullptr) { + assert(results.size() >= 2); + CordRep* right = results.back(); + results.pop_back(); + CordRep* left = results.back(); + results.pop_back(); + results.push_back(Concat(left, right)); + } else if (pos == 0 && n == node->length) { + results.push_back(Ref(node)); + } else if (node->tag != CONCAT) { + if (node->tag == SUBSTRING) { + pos += node->substring()->start; + node = node->substring()->child; + } + results.push_back(NewSubstring(Ref(node), pos, n)); + } else if (pos + n <= node->concat()->left->length) { + todo.push_back(SubRange(node->concat()->left, pos, n)); + } else if (pos >= node->concat()->left->length) { + pos -= node->concat()->left->length; + todo.push_back(SubRange(node->concat()->right, pos, n)); + } else { + size_t left_n = node->concat()->left->length - pos; + todo.push_back(SubRange(nullptr, 0, 0)); // Concat() + todo.push_back(SubRange(node->concat()->right, 0, n - left_n)); + todo.push_back(SubRange(node->concat()->left, pos, left_n)); + } + } while (!todo.empty()); + assert(results.size() == 1); + return results[0]; +} + +Cord Cord::Subcord(size_t pos, size_t new_size) const { + Cord sub_cord; + size_t length = size(); + if (pos > length) pos = length; + if (new_size > length - pos) new_size = length - pos; + CordRep* tree = contents_.tree(); + if (tree == nullptr) { + // sub_cord is newly constructed, no need to re-zero-out the tail of + // contents_ memory. + sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); + } else if (new_size == 0) { + // We want to return empty subcord, so nothing to do. + } else if (new_size <= InlineRep::kMaxInline) { + Cord::ChunkIterator it = chunk_begin(); + it.AdvanceBytes(pos); + char* dest = sub_cord.contents_.data_; + size_t remaining_size = new_size; + while (remaining_size > it->size()) { + cord_internal::SmallMemmove(dest, it->data(), it->size()); + remaining_size -= it->size(); + dest += it->size(); + ++it; + } + cord_internal::SmallMemmove(dest, it->data(), remaining_size); + sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size; + } else { + sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); + } + return sub_cord; +} + +// -------------------------------------------------------------------- +// Balancing + +class CordForest { + public: + explicit CordForest(size_t length) + : root_length_(length), trees_(kMinLengthSize, nullptr) {} + + void Build(CordRep* cord_root) { + std::vector<CordRep*> pending = {cord_root}; + + while (!pending.empty()) { + CordRep* node = pending.back(); + pending.pop_back(); + CheckNode(node); + if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) { + AddNode(node); + continue; + } + + CordRepConcat* concat_node = node->concat(); + if (concat_node->depth() >= kMinLengthSize || + concat_node->length < min_length[concat_node->depth()]) { + pending.push_back(concat_node->right); + pending.push_back(concat_node->left); + + if (concat_node->refcount.IsOne()) { + concat_node->left = concat_freelist_; + concat_freelist_ = concat_node; + } else { + Ref(concat_node->right); + Ref(concat_node->left); + Unref(concat_node); + } + } else { + AddNode(node); + } + } + } + + CordRep* ConcatNodes() { + CordRep* sum = nullptr; + for (auto* node : trees_) { + if (node == nullptr) continue; + + sum = PrependNode(node, sum); + root_length_ -= node->length; + if (root_length_ == 0) break; + } + ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node"); + return VerifyTree(sum); + } + + private: + CordRep* AppendNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(sum, node); + } + + CordRep* PrependNode(CordRep* node, CordRep* sum) { + return (sum == nullptr) ? node : MakeConcat(node, sum); + } + + void AddNode(CordRep* node) { + CordRep* sum = nullptr; + + // Collect together everything with which we will merge node + int i = 0; + for (; node->length > min_length[i + 1]; ++i) { + auto& tree_at_i = trees_[i]; + + if (tree_at_i == nullptr) continue; + sum = PrependNode(tree_at_i, sum); + tree_at_i = nullptr; + } + + sum = AppendNode(node, sum); + + // Insert sum into appropriate place in the forest + for (; sum->length >= min_length[i]; ++i) { + auto& tree_at_i = trees_[i]; + if (tree_at_i == nullptr) continue; + + sum = MakeConcat(tree_at_i, sum); + tree_at_i = nullptr; + } + + // min_length[0] == 1, which means sum->length >= min_length[0] + assert(i > 0); + trees_[i - 1] = sum; + } + + // Make concat node trying to resue existing CordRepConcat nodes we + // already collected in the concat_freelist_. + CordRep* MakeConcat(CordRep* left, CordRep* right) { + if (concat_freelist_ == nullptr) return RawConcat(left, right); + + CordRepConcat* rep = concat_freelist_; + if (concat_freelist_->left == nullptr) { + concat_freelist_ = nullptr; + } else { + concat_freelist_ = concat_freelist_->left->concat(); + } + SetConcatChildren(rep, left, right); + + return rep; + } + + static void CheckNode(CordRep* node) { + ABSL_INTERNAL_CHECK(node->length != 0u, ""); + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); + ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + + node->concat()->right->length), + ""); + } + } + + size_t root_length_; + + // use an inlined vector instead of a flat array to get bounds checking + absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_; + + // List of concat nodes we can re-use for Cord balancing. + CordRepConcat* concat_freelist_ = nullptr; +}; + +static CordRep* Rebalance(CordRep* node) { + VerifyTree(node); + assert(node->tag == CONCAT); + + if (node->length == 0) { + return nullptr; + } + + CordForest forest(node->length); + forest.Build(node); + return forest.ConcatNodes(); +} + +// -------------------------------------------------------------------- +// Comparators + +namespace { + +int ClampResult(int memcmp_res) { + return static_cast<int>(memcmp_res > 0) - static_cast<int>(memcmp_res < 0); +} + +int CompareChunks(absl::string_view* lhs, absl::string_view* rhs, + size_t* size_to_compare) { + size_t compared_size = std::min(lhs->size(), rhs->size()); + assert(*size_to_compare >= compared_size); + *size_to_compare -= compared_size; + + int memcmp_res = ::memcmp(lhs->data(), rhs->data(), compared_size); + if (memcmp_res != 0) return memcmp_res; + + lhs->remove_prefix(compared_size); + rhs->remove_prefix(compared_size); + + return 0; +} + +// This overload set computes comparison results from memcmp result. This +// interface is used inside GenericCompare below. Differet implementations +// are specialized for int and bool. For int we clamp result to {-1, 0, 1} +// set. For bool we just interested in "value == 0". +template <typename ResultType> +ResultType ComputeCompareResult(int memcmp_res) { + return ClampResult(memcmp_res); +} +template <> +bool ComputeCompareResult<bool>(int memcmp_res) { + return memcmp_res == 0; +} + +} // namespace + +// Helper routine. Locates the first flat chunk of the Cord without +// initializing the iterator. +inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { + size_t n = data_[kMaxInline]; + if (n <= kMaxInline) { + return absl::string_view(data_, n); + } + + CordRep* node = tree(); + if (node->tag >= FLAT) { + return absl::string_view(node->data, node->length); + } + + if (node->tag == EXTERNAL) { + return absl::string_view(node->external()->base, node->length); + } + + // Walk down the left branches until we hit a non-CONCAT node. + while (node->tag == CONCAT) { + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + assert(length != 0); + + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + if (node->tag >= FLAT) { + return absl::string_view(node->data + offset, length); + } + + assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); + + return absl::string_view(node->external()->base + offset, length); +} + +inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + + // compared_size is inside first chunk. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs.size()); + lhs_chunk.remove_prefix(compared_size); + rhs.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && !rhs.empty()) { + int comparison_result = CompareChunks(&lhs_chunk, &rhs, &size_to_compare); + if (comparison_result != 0) return comparison_result; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs.empty()) - static_cast<int>(lhs_chunk.empty()); +} + +inline int Cord::CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const { + auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { + if (!chunk->empty()) return true; + ++*it; + if (it->bytes_remaining_ == 0) return false; + *chunk = **it; + return true; + }; + + Cord::ChunkIterator lhs_it = chunk_begin(); + Cord::ChunkIterator rhs_it = rhs.chunk_begin(); + + // compared_size is inside both first chunks. + absl::string_view lhs_chunk = + (lhs_it.bytes_remaining_ != 0) ? *lhs_it : absl::string_view(); + absl::string_view rhs_chunk = + (rhs_it.bytes_remaining_ != 0) ? *rhs_it : absl::string_view(); + assert(compared_size <= lhs_chunk.size()); + assert(compared_size <= rhs_chunk.size()); + lhs_chunk.remove_prefix(compared_size); + rhs_chunk.remove_prefix(compared_size); + size_to_compare -= compared_size; // skip already compared size. + + while (advance(&lhs_it, &lhs_chunk) && advance(&rhs_it, &rhs_chunk)) { + int memcmp_res = CompareChunks(&lhs_chunk, &rhs_chunk, &size_to_compare); + if (memcmp_res != 0) return memcmp_res; + if (size_to_compare == 0) return 0; + } + + return static_cast<int>(rhs_chunk.empty()) - + static_cast<int>(lhs_chunk.empty()); +} + +inline absl::string_view Cord::GetFirstChunk(const Cord& c) { + return c.contents_.FindFlatStartPiece(); +} +inline absl::string_view Cord::GetFirstChunk(absl::string_view sv) { + return sv; +} + +// Compares up to 'size_to_compare' bytes of 'lhs' with 'rhs'. It is assumed +// that 'size_to_compare' is greater that size of smallest of first chunks. +template <typename ResultType, typename RHS> +ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare) { + absl::string_view lhs_chunk = Cord::GetFirstChunk(lhs); + absl::string_view rhs_chunk = Cord::GetFirstChunk(rhs); + + size_t compared_size = std::min(lhs_chunk.size(), rhs_chunk.size()); + assert(size_to_compare >= compared_size); + int memcmp_res = ::memcmp(lhs_chunk.data(), rhs_chunk.data(), compared_size); + if (compared_size == size_to_compare || memcmp_res != 0) { + return ComputeCompareResult<ResultType>(memcmp_res); + } + + return ComputeCompareResult<ResultType>( + lhs.CompareSlowPath(rhs, compared_size, size_to_compare)); +} + +bool Cord::EqualsImpl(absl::string_view rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +bool Cord::EqualsImpl(const Cord& rhs, size_t size_to_compare) const { + return GenericCompare<bool>(*this, rhs, size_to_compare); +} + +template <typename RHS> +inline int SharedCompareImpl(const Cord& lhs, const RHS& rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size == rhs_size) { + return GenericCompare<int>(lhs, rhs, lhs_size); + } + if (lhs_size < rhs_size) { + auto data_comp_res = GenericCompare<int>(lhs, rhs, lhs_size); + return data_comp_res == 0 ? -1 : data_comp_res; + } + + auto data_comp_res = GenericCompare<int>(lhs, rhs, rhs_size); + return data_comp_res == 0 ? +1 : data_comp_res; +} + +int Cord::Compare(absl::string_view rhs) const { + return SharedCompareImpl(*this, rhs); +} + +int Cord::CompareImpl(const Cord& rhs) const { + return SharedCompareImpl(*this, rhs); +} + +bool Cord::EndsWith(absl::string_view rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +bool Cord::EndsWith(const Cord& rhs) const { + size_t my_size = size(); + size_t rhs_size = rhs.size(); + + if (my_size < rhs_size) return false; + + Cord tmp(*this); + tmp.RemovePrefix(my_size - rhs_size); + return tmp.EqualsImpl(rhs, rhs_size); +} + +// -------------------------------------------------------------------- +// Misc. + +Cord::operator std::string() const { + std::string s; + absl::CopyCordToString(*this, &s); + return s; +} + +void CopyCordToString(const Cord& src, std::string* dst) { + if (!src.contents_.is_tree()) { + src.contents_.CopyTo(dst); + } else { + absl::strings_internal::STLStringResizeUninitialized(dst, src.size()); + src.CopyToArraySlowPath(&(*dst)[0]); + } +} + +void Cord::CopyToArraySlowPath(char* dst) const { + assert(contents_.is_tree()); + absl::string_view fragment; + if (GetFlatAux(contents_.tree(), &fragment)) { + memcpy(dst, fragment.data(), fragment.size()); + return; + } + for (absl::string_view chunk : Chunks()) { + memcpy(dst, chunk.data(), chunk.size()); + dst += chunk.size(); + } +} + +Cord::ChunkIterator& Cord::ChunkIterator::operator++() { + assert(bytes_remaining_ > 0 && "Attempted to iterate past `end()`"); + assert(bytes_remaining_ >= current_chunk_.size()); + bytes_remaining_ -= current_chunk_.size(); + + if (stack_of_right_children_.empty()) { + assert(!current_chunk_.empty()); // Called on invalid iterator. + // We have reached the end of the Cord. + return *this; + } + + // Process the next node on the stack. + CordRep* node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + + // Walk down the left branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length != 0); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset, length); + current_leaf_ = node; + return *this; +} + +Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + Cord subcord; + + if (n <= InlineRep::kMaxInline) { + // Range to read fits in inline data. Flatten it. + char* data = subcord.contents_.set_data(n); + while (n > current_chunk_.size()) { + memcpy(data, current_chunk_.data(), current_chunk_.size()); + data += current_chunk_.size(); + n -= current_chunk_.size(); + ++*this; + } + memcpy(data, current_chunk_.data(), n); + if (n < current_chunk_.size()) { + RemoveChunkPrefix(n); + } else if (n > 0) { + ++*this; + } + return subcord; + } + if (n < current_chunk_.size()) { + // Range to read is a proper subrange of the current chunk. + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, n); + subcord.contents_.set_tree(VerifyTree(subnode)); + RemoveChunkPrefix(n); + return subcord; + } + + // Range to read begins with a proper subrange of the current chunk. + assert(!current_chunk_.empty()); + assert(current_leaf_ != nullptr); + CordRep* subnode = Ref(current_leaf_); + if (current_chunk_.size() < subnode->length) { + const char* data = + subnode->tag == EXTERNAL ? subnode->external()->base : subnode->data; + subnode = NewSubstring(subnode, current_chunk_.data() - data, + current_chunk_.size()); + } + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, reading whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + // TODO(qrczak): This might unnecessarily recreate existing concat nodes. + // Avoiding that would need pretty complicated logic (instead of + // current_leaf_, keep current_subtree_ which points to the highest node + // such that the current leaf can be found on the path of left children + // starting from current_subtree_; delay creating subnode while node is + // below current_subtree_; find the proper node along the path of left + // children starting from current_subtree_ if this loop exits while staying + // below current_subtree_; etc.; alternatively, push parents instead of + // right children on the stack). + subnode = Concat(subnode, Ref(node)); + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Read left, descend right. + subnode = Concat(subnode, Ref(node->concat()->left)); + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + // Range to read ends with a proper (possibly empty) subrange of the current + // chunk. + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + if (n > 0) subnode = Concat(subnode, NewSubstring(Ref(node), offset, n)); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; + subcord.contents_.set_tree(VerifyTree(subnode)); + return subcord; +} + +void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { + assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); + assert(n >= current_chunk_.size()); // This should only be called when + // iterating to a new node. + + n -= current_chunk_.size(); + bytes_remaining_ -= current_chunk_.size(); + + // Process the next node(s) on the stack, skipping whole subtrees depending on + // their length and how many bytes we are advancing. + CordRep* node = nullptr; + while (!stack_of_right_children_.empty()) { + node = stack_of_right_children_.back(); + stack_of_right_children_.pop_back(); + if (node->length > n) break; + n -= node->length; + bytes_remaining_ -= node->length; + node = nullptr; + } + + if (node == nullptr) { + // We have reached the end of the Cord. + assert(bytes_remaining_ == 0); + return; + } + + // Walk down the appropriate branches until we hit a non-CONCAT node. Save the + // right children to the stack for subsequent traversal. + while (node->tag == CONCAT) { + if (node->concat()->left->length > n) { + // Push right, descend left. + stack_of_right_children_.push_back(node->concat()->right); + node = node->concat()->left; + } else { + // Skip left, descend right. + n -= node->concat()->left->length; + bytes_remaining_ -= node->concat()->left->length; + node = node->concat()->right; + } + } + + // Get the child node if we encounter a SUBSTRING. + size_t offset = 0; + size_t length = node->length; + if (node->tag == SUBSTRING) { + offset = node->substring()->start; + node = node->substring()->child; + } + + assert(node->tag == EXTERNAL || node->tag >= FLAT); + assert(length > n); + const char* data = + node->tag == EXTERNAL ? node->external()->base : node->data; + current_chunk_ = absl::string_view(data + offset + n, length - n); + current_leaf_ = node; + bytes_remaining_ -= n; +} + +char Cord::operator[](size_t i) const { + assert(i < size()); + size_t offset = i; + const CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return contents_.data()[i]; + } + while (true) { + assert(rep != nullptr); + assert(offset < rep->length); + if (rep->tag >= FLAT) { + // Get the "i"th character directly from the flat array. + return rep->data[offset]; + } else if (rep->tag == EXTERNAL) { + // Get the "i"th character from the external array. + return rep->external()->base[offset]; + } else if (rep->tag == CONCAT) { + // Recursively branch to the side of the concatenation that the "i"th + // character is on. + size_t left_length = rep->concat()->left->length; + if (offset < left_length) { + rep = rep->concat()->left; + } else { + offset -= left_length; + rep = rep->concat()->right; + } + } else { + // This must be a substring a node, so bypass it to get to the child. + assert(rep->tag == SUBSTRING); + offset += rep->substring()->start; + rep = rep->substring()->child; + } + } +} + +absl::string_view Cord::FlattenSlowPath() { + size_t total_size = size(); + CordRep* new_rep; + char* new_buffer; + + // Try to put the contents into a new flat rep. If they won't fit in the + // biggest possible flat node, use an external rep instead. + if (total_size <= kMaxFlatLength) { + new_rep = NewFlat(total_size); + new_rep->length = total_size; + new_buffer = new_rep->data; + CopyToArraySlowPath(new_buffer); + } else { + new_buffer = std::allocator<char>().allocate(total_size); + CopyToArraySlowPath(new_buffer); + new_rep = absl::cord_internal::NewExternalRep( + absl::string_view(new_buffer, total_size), [](absl::string_view s) { + std::allocator<char>().deallocate(const_cast<char*>(s.data()), + s.size()); + }); + } + Unref(contents_.tree()); + contents_.set_tree(new_rep); + return absl::string_view(new_buffer, total_size); +} + +/* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { + assert(rep != nullptr); + if (rep->tag >= FLAT) { + *fragment = absl::string_view(rep->data, rep->length); + return true; + } else if (rep->tag == EXTERNAL) { + *fragment = absl::string_view(rep->external()->base, rep->length); + return true; + } else if (rep->tag == SUBSTRING) { + CordRep* child = rep->substring()->child; + if (child->tag >= FLAT) { + *fragment = + absl::string_view(child->data + rep->substring()->start, rep->length); + return true; + } else if (child->tag == EXTERNAL) { + *fragment = absl::string_view( + child->external()->base + rep->substring()->start, rep->length); + return true; + } + } + return false; +} + +/* static */ void Cord::ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback) { + assert(rep != nullptr); + int stack_pos = 0; + constexpr int stack_max = 128; + // Stack of right branches for tree traversal + absl::cord_internal::CordRep* stack[stack_max]; + absl::cord_internal::CordRep* current_node = rep; + while (true) { + if (current_node->tag == CONCAT) { + if (stack_pos == stack_max) { + // There's no more room on our stack array to add another right branch, + // and the idea is to avoid allocations, so call this function + // recursively to navigate this subtree further. (This is not something + // we expect to happen in practice). + ForEachChunkAux(current_node, callback); + + // Pop the next right branch and iterate. + current_node = stack[--stack_pos]; + continue; + } else { + // Save the right branch for later traversal and continue down the left + // branch. + stack[stack_pos++] = current_node->concat()->right; + current_node = current_node->concat()->left; + continue; + } + } + // This is a leaf node, so invoke our callback. + absl::string_view chunk; + bool success = GetFlatAux(current_node, &chunk); + assert(success); + if (success) { + callback(chunk); + } + if (stack_pos == 0) { + // end of traversal + return; + } + current_node = stack[--stack_pos]; + } +} + +static void DumpNode(CordRep* rep, bool include_data, std::ostream* os) { + const int kIndentStep = 1; + int indent = 0; + absl::InlinedVector<CordRep*, kInlinedVectorSize> stack; + absl::InlinedVector<int, kInlinedVectorSize> indents; + for (;;) { + *os << std::setw(3) << rep->refcount.Get(); + *os << " " << std::setw(7) << rep->length; + *os << " ["; + if (include_data) *os << static_cast<void*>(rep); + *os << "]"; + *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); + *os << " " << std::setw(indent) << ""; + if (rep->tag == CONCAT) { + *os << "CONCAT depth=" << Depth(rep) << "\n"; + indent += kIndentStep; + indents.push_back(indent); + stack.push_back(rep->concat()->right); + rep = rep->concat()->left; + } else if (rep->tag == SUBSTRING) { + *os << "SUBSTRING @ " << rep->substring()->start << "\n"; + indent += kIndentStep; + rep = rep->substring()->child; + } else { // Leaf + if (rep->tag == EXTERNAL) { + *os << "EXTERNAL ["; + if (include_data) + *os << absl::CEscape(std::string(rep->external()->base, rep->length)); + *os << "]\n"; + } else { + *os << "FLAT cap=" << TagToLength(rep->tag) << " ["; + if (include_data) + *os << absl::CEscape(std::string(rep->data, rep->length)); + *os << "]\n"; + } + if (stack.empty()) break; + rep = stack.back(); + stack.pop_back(); + indent = indents.back(); + indents.pop_back(); + } + } + ABSL_INTERNAL_CHECK(indents.empty(), ""); +} + +static std::string ReportError(CordRep* root, CordRep* node) { + std::ostringstream buf; + buf << "Error at node " << node << " in:"; + DumpNode(root, true, &buf); + return buf.str(); +} + +static bool VerifyNode(CordRep* root, CordRep* start_node, + bool full_validation) { + absl::InlinedVector<CordRep*, 2> worklist; + worklist.push_back(start_node); + do { + CordRep* node = worklist.back(); + worklist.pop_back(); + + ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); + if (node != root) { + ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); + } + + if (node->tag == CONCAT) { + ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length + + node->concat()->right->length), + ReportError(root, node)); + if (full_validation) { + worklist.push_back(node->concat()->right); + worklist.push_back(node->concat()->left); + } + } else if (node->tag >= FLAT) { + ABSL_INTERNAL_CHECK(node->length <= TagToLength(node->tag), + ReportError(root, node)); + } else if (node->tag == EXTERNAL) { + ABSL_INTERNAL_CHECK(node->external()->base != nullptr, + ReportError(root, node)); + } else if (node->tag == SUBSTRING) { + ABSL_INTERNAL_CHECK( + node->substring()->start < node->substring()->child->length, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= + node->substring()->child->length, + ReportError(root, node)); + } + } while (!worklist.empty()); + return true; +} + +// Traverses the tree and computes the total memory allocated. +/* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) { + size_t total_mem_usage = 0; + + // Allow a quick exit for the common case that the root is a leaf. + if (RepMemoryUsageLeaf(rep, &total_mem_usage)) { + return total_mem_usage; + } + + // Iterate over the tree. cur_node is never a leaf node and leaf nodes will + // never be appended to tree_stack. This reduces overhead from manipulating + // tree_stack. + absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack; + const CordRep* cur_node = rep; + while (true) { + const CordRep* next_node = nullptr; + + if (cur_node->tag == CONCAT) { + total_mem_usage += sizeof(CordRepConcat); + const CordRep* left = cur_node->concat()->left; + if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { + next_node = left; + } + + const CordRep* right = cur_node->concat()->right; + if (!RepMemoryUsageLeaf(right, &total_mem_usage)) { + if (next_node) { + tree_stack.push_back(next_node); + } + next_node = right; + } + } else { + // Since cur_node is not a leaf or a concat node it must be a substring. + assert(cur_node->tag == SUBSTRING); + total_mem_usage += sizeof(CordRepSubstring); + next_node = cur_node->substring()->child; + if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { + next_node = nullptr; + } + } + + if (!next_node) { + if (tree_stack.empty()) { + return total_mem_usage; + } + next_node = tree_stack.back(); + tree_stack.pop_back(); + } + cur_node = next_node; + } +} + +std::ostream& operator<<(std::ostream& out, const Cord& cord) { + for (absl::string_view chunk : cord.Chunks()) { + out.write(chunk.data(), chunk.size()); + } + return out; +} + +namespace strings_internal { +size_t CordTestAccess::FlatOverhead() { return kFlatOverhead; } +size_t CordTestAccess::MaxFlatLength() { return kMaxFlatLength; } +size_t CordTestAccess::FlatTagToLength(uint8_t tag) { + return TagToLength(tag); +} +uint8_t CordTestAccess::LengthToTag(size_t s) { + ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); + return AllocatedSizeToTag(s + kFlatOverhead); +} +size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } +size_t CordTestAccess::SizeofCordRepExternal() { + return sizeof(CordRepExternal); +} +size_t CordTestAccess::SizeofCordRepSubstring() { + return sizeof(CordRepSubstring); +} +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord.h b/absl/strings/cord.h new file mode 100644 index 00000000..40566cba --- /dev/null +++ b/absl/strings/cord.h @@ -0,0 +1,1121 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A Cord is a sequence of characters with some unusual access propreties. +// A Cord supports efficient insertions and deletions at the start and end of +// the byte sequence, but random access reads are slower, and random access +// modifications are not supported by the API. Cord also provides cheap copies +// (using a copy-on-write strategy) and cheap substring operations. +// +// Thread safety +// ------------- +// Cord has the same thread-safety properties as many other types like +// std::string, std::vector<>, int, etc -- it is thread-compatible. In +// particular, if no thread may call a non-const method, then it is safe to +// concurrently call const methods. Copying a Cord produces a new instance that +// can be used concurrently with the original in arbitrary ways. +// +// Implementation is similar to the "Ropes" described in: +// Ropes: An alternative to strings +// Hans J. Boehm, Russ Atkinson, Michael Plass +// Software Practice and Experience, December 1995 + +#ifndef ABSL_STRINGS_CORD_H_ +#define ABSL_STRINGS_CORD_H_ + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iostream> +#include <iterator> +#include <string> + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/invoke.h" +#include "absl/base/internal/per_thread_tls.h" +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/meta/type_traits.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +class Cord; +class CordTestPeer; +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view, Releaser&&); +void CopyCordToString(const Cord& src, std::string* dst); +namespace hash_internal { +template <typename H> +H HashFragmentedCord(H, const Cord&); +} + +// A Cord is a sequence of characters. +class Cord { + private: + template <typename T> + using EnableIfString = + absl::enable_if_t<std::is_same<T, std::string>::value, int>; + + public: + // -------------------------------------------------------------------- + // Constructors, destructors and helper factories + + // Create an empty cord + constexpr Cord() noexcept; + + // Cord is copyable and efficiently movable. + // The moved-from state is valid but unspecified. + Cord(const Cord& src); + Cord(Cord&& src) noexcept; + Cord& operator=(const Cord& x); + Cord& operator=(Cord&& x) noexcept; + + // Create a cord out of "src". This constructor is explicit on + // purpose so that people do not get automatic type conversions. + explicit Cord(absl::string_view src); + Cord& operator=(absl::string_view src); + + // These are templated to avoid ambiguities for types that are convertible to + // both `absl::string_view` and `std::string`, such as `const char*`. + // + // Note that these functions reserve the right to reuse the `string&&`'s + // memory and that they will do so in the future. + template <typename T, EnableIfString<T> = 0> + explicit Cord(T&& src) : Cord(absl::string_view(src)) {} + template <typename T, EnableIfString<T> = 0> + Cord& operator=(T&& src); + + // Destroy the cord + ~Cord() { + if (contents_.is_tree()) DestroyCordSlow(); + } + + // Creates a Cord that takes ownership of external memory. The contents of + // `data` are not copied. + // + // This function takes a callable that is invoked when all Cords are + // finished with `data`. The data must remain live and unchanging until the + // releaser is called. The requirements for the releaser are that it: + // * is move constructible, + // * supports `void operator()(absl::string_view) const`, + // * does not have alignment requirement greater than what is guaranteed by + // ::operator new. This is dictated by alignof(std::max_align_t) before + // C++17 and __STDCPP_DEFAULT_NEW_ALIGNMENT__ if compiling with C++17 or + // it is supported by the implementation. + // + // Example: + // + // Cord MakeCord(BlockPool* pool) { + // Block* block = pool->NewBlock(); + // FillBlock(block); + // return absl::MakeCordFromExternal( + // block->ToStringView(), + // [pool, block](absl::string_view /*ignored*/) { + // pool->FreeBlock(block); + // }); + // } + // + // WARNING: It's likely a bug if your releaser doesn't do anything. + // For example, consider the following: + // + // void Foo(const char* buffer, int len) { + // auto c = absl::MakeCordFromExternal(absl::string_view(buffer, len), + // [](absl::string_view) {}); + // + // // BUG: If Bar() copies its cord for any reason, including keeping a + // // substring of it, the lifetime of buffer might be extended beyond + // // when Foo() returns. + // Bar(c); + // } + template <typename Releaser> + friend Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser); + + // -------------------------------------------------------------------- + // Mutations + + void Clear(); + + void Append(const Cord& src); + void Append(Cord&& src); + void Append(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Append(T&& src); + + void Prepend(const Cord& src); + void Prepend(absl::string_view src); + template <typename T, EnableIfString<T> = 0> + void Prepend(T&& src); + + void RemovePrefix(size_t n); + void RemoveSuffix(size_t n); + + // Returns a new cord representing the subrange [pos, pos + new_size) of + // *this. If pos >= size(), the result is empty(). If + // (pos + new_size) >= size(), the result is the subrange [pos, size()). + Cord Subcord(size_t pos, size_t new_size) const; + + friend void swap(Cord& x, Cord& y) noexcept; + + // -------------------------------------------------------------------- + // Accessors + + size_t size() const; + bool empty() const; + + // Returns the approximate number of bytes pinned by this Cord. Note that + // Cords that share memory could each be "charged" independently for the same + // shared memory. + size_t EstimatedMemoryUsage() const; + + // -------------------------------------------------------------------- + // Comparators + + // Compares 'this' Cord with rhs. This function and its relatives + // treat Cords as sequences of unsigned bytes. The comparison is a + // straightforward lexicographic comparison. Return value: + // -1 'this' Cord is smaller + // 0 two Cords are equal + // 1 'this' Cord is larger + int Compare(absl::string_view rhs) const; + int Compare(const Cord& rhs) const; + + // Does 'this' cord start/end with rhs + bool StartsWith(const Cord& rhs) const; + bool StartsWith(absl::string_view rhs) const; + bool EndsWith(absl::string_view rhs) const; + bool EndsWith(const Cord& rhs) const; + + // -------------------------------------------------------------------- + // Conversion to other types + + explicit operator std::string() const; + + // Copies the contents from `src` to `*dst`. + // + // This function optimizes the case of reusing the destination std::string since it + // can reuse previously allocated capacity. However, this function does not + // guarantee that pointers previously returned by `dst->data()` remain valid + // even if `*dst` had enough capacity to hold `src`. If `*dst` is a new + // object, prefer to simply use the conversion operator to `std::string`. + friend void CopyCordToString(const Cord& src, std::string* dst); + + // -------------------------------------------------------------------- + // Iteration + + class CharIterator; + + // Type for iterating over the chunks of a `Cord`. See comments for + // `Cord::chunk_begin()`, `Cord::chunk_end()` and `Cord::Chunks()` below for + // preferred usage. + // + // Additional notes: + // * The `string_view` returned by dereferencing a valid, non-`end()` + // iterator is guaranteed to be non-empty. + // * A `ChunkIterator` object is invalidated after any non-const + // operation on the `Cord` object over which it iterates. + // * Two `ChunkIterator` objects can be equality compared if and only if + // they remain valid and iterate over the same `Cord`. + // * This is a proxy iterator. This means the `string_view` returned by the + // iterator does not live inside the Cord, and its lifetime is limited to + // the lifetime of the iterator itself. To help prevent issues, + // `ChunkIterator::reference` is not a true reference type and is + // equivalent to `value_type`. + // * The iterator keeps state that can grow for `Cord`s that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + class ChunkIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = absl::string_view; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = value_type; + + ChunkIterator() = default; + + ChunkIterator& operator++(); + ChunkIterator operator++(int); + bool operator==(const ChunkIterator& other) const; + bool operator!=(const ChunkIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend class Cord; + friend class CharIterator; + + private: + // Constructs a `begin()` iterator from `cord`. + explicit ChunkIterator(const Cord* cord); + + // Removes `n` bytes from `current_chunk_`. Expects `n` to be smaller than + // `current_chunk_.size()`. + void RemoveChunkPrefix(size_t n); + Cord AdvanceAndReadBytes(size_t n); + void AdvanceBytes(size_t n); + // Iterates `n` bytes, where `n` is expected to be greater than or equal to + // `current_chunk_.size()`. + void AdvanceBytesSlowPath(size_t n); + + // A view into bytes of the current `CordRep`. It may only be a view to a + // suffix of bytes if this is being used by `CharIterator`. + absl::string_view current_chunk_; + // The current leaf, or `nullptr` if the iterator points to short data. + // If the current chunk is a substring node, current_leaf_ points to the + // underlying flat or external node. + absl::cord_internal::CordRep* current_leaf_ = nullptr; + // The number of bytes left in the `Cord` over which we are iterating. + size_t bytes_remaining_ = 0; + absl::InlinedVector<absl::cord_internal::CordRep*, 4> + stack_of_right_children_; + }; + + // Returns an iterator to the first chunk of the `Cord`. + // + // This is useful for getting a `ChunkIterator` outside the context of a + // range-based for-loop (in which case see `Cord::Chunks()` below). + // + // Example: + // + // absl::Cord::ChunkIterator FindAsChunk(const absl::Cord& c, + // absl::string_view s) { + // return std::find(c.chunk_begin(), c.chunk_end(), s); + // } + ChunkIterator chunk_begin() const; + // Returns an iterator one increment past the last chunk of the `Cord`. + ChunkIterator chunk_end() const; + + // Convenience wrapper over `Cord::chunk_begin()` and `Cord::chunk_end()` to + // enable range-based for-loop iteration over `Cord` chunks. + // + // Prefer to use `Cord::Chunks()` below instead of constructing this directly. + class ChunkRange { + public: + explicit ChunkRange(const Cord* cord) : cord_(cord) {} + + ChunkIterator begin() const; + ChunkIterator end() const; + + private: + const Cord* cord_; + }; + + // Returns a range for iterating over the chunks of a `Cord` with a + // range-based for-loop. + // + // Example: + // + // void ProcessChunks(const Cord& cord) { + // for (absl::string_view chunk : cord.Chunks()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (absl::string_view chunk : CordFactory().Chunks()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + ChunkRange Chunks() const; + + // Type for iterating over the characters of a `Cord`. See comments for + // `Cord::char_begin()`, `Cord::char_end()` and `Cord::Chars()` below for + // preferred usage. + // + // Additional notes: + // * A `CharIterator` object is invalidated after any non-const + // operation on the `Cord` object over which it iterates. + // * Two `CharIterator` objects can be equality compared if and only if + // they remain valid and iterate over the same `Cord`. + // * The iterator keeps state that can grow for `Cord`s that contain many + // nodes and are imbalanced due to sharing. Prefer to pass this type by + // const reference instead of by value. + // * This type cannot be a forward iterator because a `Cord` can reuse + // sections of memory. This violates the requirement that if dereferencing + // two iterators returns the same object, the iterators must compare + // equal. + class CharIterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = char; + using difference_type = ptrdiff_t; + using pointer = const char*; + using reference = const char&; + + CharIterator() = default; + + CharIterator& operator++(); + CharIterator operator++(int); + bool operator==(const CharIterator& other) const; + bool operator!=(const CharIterator& other) const; + reference operator*() const; + pointer operator->() const; + + friend Cord; + + private: + explicit CharIterator(const Cord* cord) : chunk_iterator_(cord) {} + + ChunkIterator chunk_iterator_; + }; + + // Advances `*it` by `n_bytes` and returns the bytes passed as a `Cord`. + // + // `n_bytes` must be less than or equal to the number of bytes remaining for + // iteration. Otherwise the behavior is undefined. It is valid to pass + // `char_end()` and 0. + static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); + + // Advances `*it` by `n_bytes`. + // + // `n_bytes` must be less than or equal to the number of bytes remaining for + // iteration. Otherwise the behavior is undefined. It is valid to pass + // `char_end()` and 0. + static void Advance(CharIterator* it, size_t n_bytes); + + // Returns the longest contiguous view starting at the iterator's position. + // + // `it` must be dereferenceable. + static absl::string_view ChunkRemaining(const CharIterator& it); + + // Returns an iterator to the first character of the `Cord`. + CharIterator char_begin() const; + // Returns an iterator to one past the last character of the `Cord`. + CharIterator char_end() const; + + // Convenience wrapper over `Cord::char_begin()` and `Cord::char_end()` to + // enable range-based for-loop iterator over the characters of a `Cord`. + // + // Prefer to use `Cord::Chars()` below instead of constructing this directly. + class CharRange { + public: + explicit CharRange(const Cord* cord) : cord_(cord) {} + + CharIterator begin() const; + CharIterator end() const; + + private: + const Cord* cord_; + }; + + // Returns a range for iterating over the characters of a `Cord` with a + // range-based for-loop. + // + // Example: + // + // void ProcessCord(const Cord& cord) { + // for (char c : cord.Chars()) { ... } + // } + // + // Note that the ordinary caveats of temporary lifetime extension apply: + // + // void Process() { + // for (char c : CordFactory().Chars()) { + // // The temporary Cord returned by CordFactory has been destroyed! + // } + // } + CharRange Chars() const; + + // -------------------------------------------------------------------- + // Miscellaneous + + // Get the "i"th character of 'this' and return it. + // NOTE: This routine is reasonably efficient. It is roughly + // logarithmic in the number of nodes that make up the cord. Still, + // if you need to iterate over the contents of a cord, you should + // use a CharIterator/CordIterator rather than call operator[] or Get() + // repeatedly in a loop. + // + // REQUIRES: 0 <= i < size() + char operator[](size_t i) const; + + // Flattens the cord into a single array and returns a view of the data. + // + // If the cord was already flat, the contents are not modified. + absl::string_view Flatten(); + + private: + friend class CordTestPeer; + template <typename H> + friend H absl::hash_internal::HashFragmentedCord(H, const Cord&); + friend bool operator==(const Cord& lhs, const Cord& rhs); + friend bool operator==(const Cord& lhs, absl::string_view rhs); + + // Call the provided function once for each cord chunk, in order. Unlike + // Chunks(), this API will not allocate memory. + void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; + + // Allocates new contiguous storage for the contents of the cord. This is + // called by Flatten() when the cord was not already flat. + absl::string_view FlattenSlowPath(); + + // Actual cord contents are hidden inside the following simple + // class so that we can isolate the bulk of cord.cc from changes + // to the representation. + // + // InlineRep holds either either a tree pointer, or an array of kMaxInline + // bytes. + class InlineRep { + public: + static const unsigned char kMaxInline = 15; + static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); + // Tag byte & kMaxInline means we are storing a pointer. + static const unsigned char kTreeFlag = 1 << 4; + // Tag byte & kProfiledFlag means we are profiling the Cord. + static const unsigned char kProfiledFlag = 1 << 5; + + constexpr InlineRep() : data_{} {} + InlineRep(const InlineRep& src); + InlineRep(InlineRep&& src); + InlineRep& operator=(const InlineRep& src); + InlineRep& operator=(InlineRep&& src) noexcept; + + void Swap(InlineRep* rhs); + bool empty() const; + size_t size() const; + const char* data() const; // Returns nullptr if holding pointer + void set_data(const char* data, size_t n, + bool nullify_tail); // Discards pointer, if any + char* set_data(size_t n); // Write data to the result + // Returns nullptr if holding bytes + absl::cord_internal::CordRep* tree() const; + // Discards old pointer, if any + void set_tree(absl::cord_internal::CordRep* rep); + // Replaces a tree with a new root. This is faster than set_tree, but it + // should only be used when it's clear that the old rep was a tree. + void replace_tree(absl::cord_internal::CordRep* rep); + // Returns non-null iff was holding a pointer + absl::cord_internal::CordRep* clear(); + // Convert to pointer if necessary + absl::cord_internal::CordRep* force_tree(size_t extra_hint); + void reduce_size(size_t n); // REQUIRES: holding data + void remove_prefix(size_t n); // REQUIRES: holding data + void AppendArray(const char* src_data, size_t src_size); + absl::string_view FindFlatStartPiece() const; + void AppendTree(absl::cord_internal::CordRep* tree); + void PrependTree(absl::cord_internal::CordRep* tree); + void GetAppendRegion(char** region, size_t* size, size_t max_length); + void GetAppendRegion(char** region, size_t* size); + bool IsSame(const InlineRep& other) const { + return memcmp(data_, other.data_, sizeof(data_)) == 0; + } + int BitwiseCompare(const InlineRep& other) const { + uint64_t x, y; + // Use memcpy to avoid anti-aliasing issues. + memcpy(&x, data_, sizeof(x)); + memcpy(&y, other.data_, sizeof(y)); + if (x == y) { + memcpy(&x, data_ + 8, sizeof(x)); + memcpy(&y, other.data_ + 8, sizeof(y)); + if (x == y) return 0; + } + return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) + ? -1 + : 1; + } + void CopyTo(std::string* dst) const { + // memcpy is much faster when operating on a known size. On most supported + // platforms, the small std::string optimization is large enough that resizing + // to 15 bytes does not cause a memory allocation. + absl::strings_internal::STLStringResizeUninitialized(dst, + sizeof(data_) - 1); + memcpy(&(*dst)[0], data_, sizeof(data_) - 1); + // erase is faster than resize because the logic for memory allocation is + // not needed. + dst->erase(data_[kMaxInline]); + } + + // Copies the inline contents into `dst`. Assumes the cord is not empty. + void CopyToArray(char* dst) const; + + bool is_tree() const { return data_[kMaxInline] > kMaxInline; } + + private: + friend class Cord; + + void AssignSlow(const InlineRep& src); + // Unrefs the tree, stops profiling, and zeroes the contents + void ClearSlow(); + + // If the data has length <= kMaxInline, we store it in data_[0..len-1], + // and store the length in data_[kMaxInline]. Else we store it in a tree + // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1]. + alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1]; + }; + InlineRep contents_; + + // Helper for MemoryUsage() + static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); + + // Helper for GetFlat() + static bool GetFlatAux(absl::cord_internal::CordRep* rep, + absl::string_view* fragment); + + // Helper for ForEachChunk() + static void ForEachChunkAux( + absl::cord_internal::CordRep* rep, + absl::FunctionRef<void(absl::string_view)> callback); + + // The destructor for non-empty Cords. + void DestroyCordSlow(); + + // Out-of-line implementation of slower parts of logic. + void CopyToArraySlowPath(char* dst) const; + int CompareSlowPath(absl::string_view rhs, size_t compared_size, + size_t size_to_compare) const; + int CompareSlowPath(const Cord& rhs, size_t compared_size, + size_t size_to_compare) const; + bool EqualsImpl(absl::string_view rhs, size_t size_to_compare) const; + bool EqualsImpl(const Cord& rhs, size_t size_to_compare) const; + int CompareImpl(const Cord& rhs) const; + + template <typename ResultType, typename RHS> + friend ResultType GenericCompare(const Cord& lhs, const RHS& rhs, + size_t size_to_compare); + static absl::string_view GetFirstChunk(const Cord& c); + static absl::string_view GetFirstChunk(absl::string_view sv); + + // Returns a new reference to contents_.tree(), or steals an existing + // reference if called on an rvalue. + absl::cord_internal::CordRep* TakeRep() const&; + absl::cord_internal::CordRep* TakeRep() &&; + + // Helper for Append() + template <typename C> + void AppendImpl(C&& src); +}; + +ABSL_NAMESPACE_END +} // namespace absl + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// allow a Cord to be logged +extern std::ostream& operator<<(std::ostream& out, const Cord& cord); + +// ------------------------------------------------------------------ +// Internal details follow. Clients should ignore. + +namespace cord_internal { + +// Fast implementation of memmove for up to 15 bytes. This implementation is +// safe for overlapping regions. If nullify_tail is true, the destination is +// padded with '\0' up to 16 bytes. +inline void SmallMemmove(char* dst, const char* src, size_t n, + bool nullify_tail = false) { + if (n >= 8) { + assert(n <= 16); + uint64_t buf1; + uint64_t buf2; + memcpy(&buf1, src, 8); + memcpy(&buf2, src + n - 8, 8); + if (nullify_tail) { + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 8); + memcpy(dst + n - 8, &buf2, 8); + } else if (n >= 4) { + uint32_t buf1; + uint32_t buf2; + memcpy(&buf1, src, 4); + memcpy(&buf2, src + n - 4, 4); + if (nullify_tail) { + memset(dst + 4, 0, 4); + memset(dst + 8, 0, 8); + } + memcpy(dst, &buf1, 4); + memcpy(dst + n - 4, &buf2, 4); + } else { + if (n != 0) { + dst[0] = src[0]; + dst[n / 2] = src[n / 2]; + dst[n - 1] = src[n - 1]; + } + if (nullify_tail) { + memset(dst + 8, 0, 8); + memset(dst + n, 0, 8); + } + } +} + +struct ExternalRepReleaserPair { + CordRep* rep; + void* releaser_address; +}; + +// Allocates a new external `CordRep` and returns a pointer to it and a pointer +// to `releaser_size` bytes where the desired releaser can be constructed. +// Expects `data` to be non-empty. +ExternalRepReleaserPair NewExternalWithUninitializedReleaser( + absl::string_view data, ExternalReleaserInvoker invoker, + size_t releaser_size); + +// Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer +// to it, or `nullptr` if `data` was empty. +template <typename Releaser> +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { + static_assert( +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + alignof(Releaser) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, +#else + alignof(Releaser) <= alignof(max_align_t), +#endif + "Releasers with alignment requirement greater than what is returned by " + "default `::operator new()` are not supported."); + + using ReleaserType = absl::decay_t<Releaser>; + if (data.empty()) { + // Never create empty external nodes. + ::absl::base_internal::Invoke( + ReleaserType(std::forward<Releaser>(releaser)), data); + return nullptr; + } + + auto releaser_invoker = [](void* type_erased_releaser, absl::string_view d) { + auto* my_releaser = static_cast<ReleaserType*>(type_erased_releaser); + ::absl::base_internal::Invoke(std::move(*my_releaser), d); + my_releaser->~ReleaserType(); + return sizeof(Releaser); + }; + + ExternalRepReleaserPair external = NewExternalWithUninitializedReleaser( + data, releaser_invoker, sizeof(releaser)); + ::new (external.releaser_address) + ReleaserType(std::forward<Releaser>(releaser)); + return external.rep; +} + +// Overload for function reference types that dispatches using a function +// pointer because there are no `alignof()` or `sizeof()` a function reference. +// NOLINTNEXTLINE - suppress clang-tidy raw pointer return. +inline CordRep* NewExternalRep(absl::string_view data, + void (&releaser)(absl::string_view)) { + return NewExternalRep(data, &releaser); +} + +} // namespace cord_internal + +template <typename Releaser> +Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { + Cord cord; + cord.contents_.set_tree(::absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser))); + return cord; +} + +inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { + cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); +} + +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { + memcpy(data_, src.data_, sizeof(data_)); + memset(src.data_, 0, sizeof(data_)); +} + +inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { + if (this == &src) { + return *this; + } + if (!is_tree() && !src.is_tree()) { + cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + return *this; + } + AssignSlow(src); + return *this; +} + +inline Cord::InlineRep& Cord::InlineRep::operator=( + Cord::InlineRep&& src) noexcept { + if (is_tree()) { + ClearSlow(); + } + memcpy(data_, src.data_, sizeof(data_)); + memset(src.data_, 0, sizeof(data_)); + return *this; +} + +inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { + if (rhs == this) { + return; + } + + Cord::InlineRep tmp; + cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_)); + cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_)); + cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_)); +} + +inline const char* Cord::InlineRep::data() const { + return is_tree() ? nullptr : data_; +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { + if (is_tree()) { + absl::cord_internal::CordRep* rep; + memcpy(&rep, data_, sizeof(rep)); + return rep; + } else { + return nullptr; + } +} + +inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; } + +inline size_t Cord::InlineRep::size() const { + const char tag = data_[kMaxInline]; + if (tag <= kMaxInline) return tag; + return static_cast<size_t>(tree()->length); +} + +inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { + if (rep == nullptr) { + memset(data_, 0, sizeof(data_)); + } else { + bool was_tree = is_tree(); + memcpy(data_, &rep, sizeof(rep)); + memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); + if (!was_tree) { + data_[kMaxInline] = kTreeFlag; + } + } +} + +inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { + ABSL_ASSERT(is_tree()); + if (ABSL_PREDICT_FALSE(rep == nullptr)) { + set_tree(rep); + return; + } + memcpy(data_, &rep, sizeof(rep)); + memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); +} + +inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { + const char tag = data_[kMaxInline]; + absl::cord_internal::CordRep* result = nullptr; + if (tag > kMaxInline) { + memcpy(&result, data_, sizeof(result)); + } + memset(data_, 0, sizeof(data_)); // Clear the cord + return result; +} + +inline void Cord::InlineRep::CopyToArray(char* dst) const { + assert(!is_tree()); + size_t n = data_[kMaxInline]; + assert(n != 0); + cord_internal::SmallMemmove(dst, data_, n); +} + +constexpr inline Cord::Cord() noexcept {} + +inline Cord& Cord::operator=(const Cord& x) { + contents_ = x.contents_; + return *this; +} + +inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} + +inline Cord& Cord::operator=(Cord&& x) noexcept { + contents_ = std::move(x.contents_); + return *this; +} + +template <typename T, Cord::EnableIfString<T>> +inline Cord& Cord::operator=(T&& src) { + *this = absl::string_view(src); + return *this; +} + +inline size_t Cord::size() const { + // Length is 1st field in str.rep_ + return contents_.size(); +} + +inline bool Cord::empty() const { return contents_.empty(); } + +inline size_t Cord::EstimatedMemoryUsage() const { + size_t result = sizeof(Cord); + if (const absl::cord_internal::CordRep* rep = contents_.tree()) { + result += MemoryUsageAux(rep); + } + return result; +} + +inline absl::string_view Cord::Flatten() { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + return absl::string_view(contents_.data(), contents_.size()); + } else { + absl::string_view already_flat_contents; + if (GetFlatAux(rep, &already_flat_contents)) { + return already_flat_contents; + } + } + return FlattenSlowPath(); +} + +inline void Cord::Append(absl::string_view src) { + contents_.AppendArray(src.data(), src.size()); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Append(T&& src) { + // Note that this function reserves the right to reuse the `string&&`'s + // memory and that it will do so in the future. + Append(absl::string_view(src)); +} + +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + // Note that this function reserves the right to reuse the `string&&`'s + // memory and that it will do so in the future. + Prepend(absl::string_view(src)); +} + +inline int Cord::Compare(const Cord& rhs) const { + if (!contents_.is_tree() && !rhs.contents_.is_tree()) { + return contents_.BitwiseCompare(rhs.contents_); + } + + return CompareImpl(rhs); +} + +// Does 'this' cord start/end with rhs +inline bool Cord::StartsWith(const Cord& rhs) const { + if (contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline bool Cord::StartsWith(absl::string_view rhs) const { + size_t rhs_size = rhs.size(); + if (size() < rhs_size) return false; + return EqualsImpl(rhs, rhs_size); +} + +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) + : bytes_remaining_(cord->size()) { + if (cord->empty()) return; + if (cord->contents_.is_tree()) { + stack_of_right_children_.push_back(cord->contents_.tree()); + operator++(); + } else { + current_chunk_ = absl::string_view(cord->contents_.data(), cord->size()); + } +} + +inline Cord::ChunkIterator Cord::ChunkIterator::operator++(int) { + ChunkIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::ChunkIterator::operator==(const ChunkIterator& other) const { + return bytes_remaining_ == other.bytes_remaining_; +} + +inline bool Cord::ChunkIterator::operator!=(const ChunkIterator& other) const { + return !(*this == other); +} + +inline Cord::ChunkIterator::reference Cord::ChunkIterator::operator*() const { + assert(bytes_remaining_ != 0); + return current_chunk_; +} + +inline Cord::ChunkIterator::pointer Cord::ChunkIterator::operator->() const { + assert(bytes_remaining_ != 0); + return ¤t_chunk_; +} + +inline void Cord::ChunkIterator::RemoveChunkPrefix(size_t n) { + assert(n < current_chunk_.size()); + current_chunk_.remove_prefix(n); + bytes_remaining_ -= n; +} + +inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { + if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { + RemoveChunkPrefix(n); + } else if (n != 0) { + AdvanceBytesSlowPath(n); + } +} + +inline Cord::ChunkIterator Cord::chunk_begin() const { + return ChunkIterator(this); +} + +inline Cord::ChunkIterator Cord::chunk_end() const { return ChunkIterator(); } + +inline Cord::ChunkIterator Cord::ChunkRange::begin() const { + return cord_->chunk_begin(); +} + +inline Cord::ChunkIterator Cord::ChunkRange::end() const { + return cord_->chunk_end(); +} + +inline Cord::ChunkRange Cord::Chunks() const { return ChunkRange(this); } + +inline Cord::CharIterator& Cord::CharIterator::operator++() { + if (ABSL_PREDICT_TRUE(chunk_iterator_->size() > 1)) { + chunk_iterator_.RemoveChunkPrefix(1); + } else { + ++chunk_iterator_; + } + return *this; +} + +inline Cord::CharIterator Cord::CharIterator::operator++(int) { + CharIterator tmp(*this); + operator++(); + return tmp; +} + +inline bool Cord::CharIterator::operator==(const CharIterator& other) const { + return chunk_iterator_ == other.chunk_iterator_; +} + +inline bool Cord::CharIterator::operator!=(const CharIterator& other) const { + return !(*this == other); +} + +inline Cord::CharIterator::reference Cord::CharIterator::operator*() const { + return *chunk_iterator_->data(); +} + +inline Cord::CharIterator::pointer Cord::CharIterator::operator->() const { + return chunk_iterator_->data(); +} + +inline Cord Cord::AdvanceAndRead(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + return it->chunk_iterator_.AdvanceAndReadBytes(n_bytes); +} + +inline void Cord::Advance(CharIterator* it, size_t n_bytes) { + assert(it != nullptr); + it->chunk_iterator_.AdvanceBytes(n_bytes); +} + +inline absl::string_view Cord::ChunkRemaining(const CharIterator& it) { + return *it.chunk_iterator_; +} + +inline Cord::CharIterator Cord::char_begin() const { + return CharIterator(this); +} + +inline Cord::CharIterator Cord::char_end() const { return CharIterator(); } + +inline Cord::CharIterator Cord::CharRange::begin() const { + return cord_->char_begin(); +} + +inline Cord::CharIterator Cord::CharRange::end() const { + return cord_->char_end(); +} + +inline Cord::CharRange Cord::Chars() const { return CharRange(this); } + +inline void Cord::ForEachChunk( + absl::FunctionRef<void(absl::string_view)> callback) const { + absl::cord_internal::CordRep* rep = contents_.tree(); + if (rep == nullptr) { + callback(absl::string_view(contents_.data(), contents_.size())); + } else { + return ForEachChunkAux(rep, callback); + } +} + +// Nonmember Cord-to-Cord relational operarators. +inline bool operator==(const Cord& lhs, const Cord& rhs) { + if (lhs.contents_.IsSame(rhs.contents_)) return true; + size_t rhs_size = rhs.size(); + if (lhs.size() != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, const Cord& y) { + return x.Compare(y) < 0; +} +inline bool operator>(const Cord& x, const Cord& y) { + return x.Compare(y) > 0; +} +inline bool operator<=(const Cord& x, const Cord& y) { + return x.Compare(y) <= 0; +} +inline bool operator>=(const Cord& x, const Cord& y) { + return x.Compare(y) >= 0; +} + +// Nonmember Cord-to-absl::string_view relational operators. +// +// Due to implicit conversions, these also enable comparisons of Cord with +// with std::string, ::string, and const char*. +inline bool operator==(const Cord& lhs, absl::string_view rhs) { + size_t lhs_size = lhs.size(); + size_t rhs_size = rhs.size(); + if (lhs_size != rhs_size) return false; + return lhs.EqualsImpl(rhs, rhs_size); +} + +inline bool operator==(absl::string_view x, const Cord& y) { return y == x; } +inline bool operator!=(const Cord& x, absl::string_view y) { return !(x == y); } +inline bool operator!=(absl::string_view x, const Cord& y) { return !(x == y); } +inline bool operator<(const Cord& x, absl::string_view y) { + return x.Compare(y) < 0; +} +inline bool operator<(absl::string_view x, const Cord& y) { + return y.Compare(x) > 0; +} +inline bool operator>(const Cord& x, absl::string_view y) { return y < x; } +inline bool operator>(absl::string_view x, const Cord& y) { return y < x; } +inline bool operator<=(const Cord& x, absl::string_view y) { return !(y < x); } +inline bool operator<=(absl::string_view x, const Cord& y) { return !(y < x); } +inline bool operator>=(const Cord& x, absl::string_view y) { return !(x < y); } +inline bool operator>=(absl::string_view x, const Cord& y) { return !(x < y); } + +// Overload of swap for Cord. The use of non-const references is +// required. :( +inline void swap(Cord& x, Cord& y) noexcept { y.contents_.Swap(&x.contents_); } + +// Some internals exposed to test code. +namespace strings_internal { +class CordTestAccess { + public: + static size_t FlatOverhead(); + static size_t MaxFlatLength(); + static size_t SizeofCordRepConcat(); + static size_t SizeofCordRepExternal(); + static size_t SizeofCordRepSubstring(); + static size_t FlatTagToLength(uint8_t tag); + static uint8_t LengthToTag(size_t s); +}; +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_H_ diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc new file mode 100644 index 00000000..434f3a24 --- /dev/null +++ b/absl/strings/cord_test.cc @@ -0,0 +1,1526 @@ +#include "absl/strings/cord.h" + +#include <algorithm> +#include <climits> +#include <cstdio> +#include <iterator> +#include <map> +#include <numeric> +#include <random> +#include <sstream> +#include <type_traits> +#include <utility> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/casts.h" +#include "absl/base/config.h" +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/container/fixed_array.h" +#include "absl/strings/cord_test_helpers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +typedef std::mt19937_64 RandomEngine; + +static std::string RandomLowercaseString(RandomEngine* rng); +static std::string RandomLowercaseString(RandomEngine* rng, size_t length); + +static int GetUniformRandomUpTo(RandomEngine* rng, int upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<int> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static size_t GetUniformRandomUpTo(RandomEngine* rng, size_t upper_bound) { + if (upper_bound > 0) { + std::uniform_int_distribution<size_t> uniform(0, upper_bound - 1); + return uniform(*rng); + } else { + return 0; + } +} + +static int32_t GenerateSkewedRandom(RandomEngine* rng, int max_log) { + const uint32_t base = (*rng)() % (max_log + 1); + const uint32_t mask = ((base < 32) ? (1u << base) : 0u) - 1u; + return (*rng)() & mask; +} + +static std::string RandomLowercaseString(RandomEngine* rng) { + int length; + std::bernoulli_distribution one_in_1k(0.001); + std::bernoulli_distribution one_in_10k(0.0001); + // With low probability, make a large fragment + if (one_in_10k(*rng)) { + length = GetUniformRandomUpTo(rng, 1048576); + } else if (one_in_1k(*rng)) { + length = GetUniformRandomUpTo(rng, 10000); + } else { + length = GenerateSkewedRandom(rng, 10); + } + return RandomLowercaseString(rng, length); +} + +static std::string RandomLowercaseString(RandomEngine* rng, size_t length) { + std::string result(length, '\0'); + std::uniform_int_distribution<int> chars('a', 'z'); + std::generate(result.begin(), result.end(), [&]() { + return static_cast<char>(chars(*rng)); + }); + return result; +} + +static void DoNothing(absl::string_view /* data */, void* /* arg */) {} + +static void DeleteExternalString(absl::string_view data, void* arg) { + std::string* s = reinterpret_cast<std::string*>(arg); + EXPECT_EQ(data, *s); + delete s; +} + +// Add "s" to *dst via `MakeCordFromExternal` +static void AddExternalMemory(absl::string_view s, absl::Cord* dst) { + std::string* str = new std::string(s.data(), s.size()); + dst->Append(absl::MakeCordFromExternal(*str, [str](absl::string_view data) { + DeleteExternalString(data, str); + })); +} + +static void DumpGrowth() { + absl::Cord str; + for (int i = 0; i < 1000; i++) { + char c = 'a' + i % 26; + str.Append(absl::string_view(&c, 1)); + } +} + +// Make a Cord with some number of fragments. Return the size (in bytes) +// of the smallest fragment. +static size_t AppendWithFragments(const std::string& s, RandomEngine* rng, + absl::Cord* cord) { + size_t j = 0; + const size_t max_size = s.size() / 5; // Make approx. 10 fragments + size_t min_size = max_size; // size of smallest fragment + while (j < s.size()) { + size_t N = 1 + GetUniformRandomUpTo(rng, max_size); + if (N > (s.size() - j)) { + N = s.size() - j; + } + if (N < min_size) { + min_size = N; + } + + std::bernoulli_distribution coin_flip(0.5); + if (coin_flip(*rng)) { + // Grow by adding an external-memory. + AddExternalMemory(absl::string_view(s.data() + j, N), cord); + } else { + cord->Append(absl::string_view(s.data() + j, N)); + } + j += N; + } + return min_size; +} + +// Add an external memory that contains the specified std::string to cord +static void AddNewStringBlock(const std::string& str, absl::Cord* dst) { + char* data = new char[str.size()]; + memcpy(data, str.data(), str.size()); + dst->Append(absl::MakeCordFromExternal( + absl::string_view(data, str.size()), + [](absl::string_view s) { delete[] s.data(); })); +} + +// Make a Cord out of many different types of nodes. +static absl::Cord MakeComposite() { + absl::Cord cord; + cord.Append("the"); + AddExternalMemory(" quick brown", &cord); + AddExternalMemory(" fox jumped", &cord); + + absl::Cord full(" over"); + AddExternalMemory(" the lazy", &full); + AddNewStringBlock(" dog slept the whole day away", &full); + absl::Cord substring = full.Subcord(0, 18); + + // Make substring long enough to defeat the copying fast path in Append. + substring.Append(std::string(1000, '.')); + cord.Append(substring); + cord = cord.Subcord(0, cord.size() - 998); // Remove most of extra junk + + return cord; +} + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class CordTestPeer { + public: + static void ForEachChunk( + const Cord& c, absl::FunctionRef<void(absl::string_view)> callback) { + c.ForEachChunk(callback); + } +}; + +ABSL_NAMESPACE_END +} // namespace absl + +TEST(Cord, AllFlatSizes) { + using absl::strings_internal::CordTestAccess; + + for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { + // Make a std::string of length s. + std::string src; + while (src.size() < s) { + src.push_back('a' + (src.size() % 26)); + } + + absl::Cord dst(src); + EXPECT_EQ(std::string(dst), src) << s; + } +} + +// We create a Cord at least 128GB in size using the fact that Cords can +// internally reference-count; thus the Cord is enormous without actually +// consuming very much memory. +TEST(GigabyteCord, FromExternal) { + const size_t one_gig = 1024U * 1024U * 1024U; + size_t max_size = 2 * one_gig; + if (sizeof(max_size) > 4) max_size = 128 * one_gig; + + size_t length = 128 * 1024; + char* data = new char[length]; + absl::Cord from = absl::MakeCordFromExternal( + absl::string_view(data, length), + [](absl::string_view sv) { delete[] sv.data(); }); + + // This loop may seem odd due to its combination of exponential doubling of + // size and incremental size increases. We do it incrementally to be sure the + // Cord will need rebalancing and will exercise code that, in the past, has + // caused crashes in production. We grow exponentially so that the code will + // execute in a reasonable amount of time. + absl::Cord c; + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + c.Append(from); + while (c.size() < max_size) { + c.Append(c); + c.Append(from); + c.Append(from); + c.Append(from); + c.Append(from); + } + + for (int i = 0; i < 1024; ++i) { + c.Append(from); + } + ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); + // Note: on a 32-bit build, this comes out to 2,818,048,000 bytes. + // Note: on a 64-bit build, this comes out to 171,932,385,280 bytes. +} + +static absl::Cord MakeExternalCord(int size) { + char* buffer = new char[size]; + memset(buffer, 'x', size); + absl::Cord cord; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(buffer, size), + [](absl::string_view s) { delete[] s.data(); })); + return cord; +} + +// Extern to fool clang that this is not constant. Needed to suppress +// a warning of unsafe code we want to test. +extern bool my_unique_true_boolean; +bool my_unique_true_boolean = true; + +TEST(Cord, Assignment) { + absl::Cord x(absl::string_view("hi there")); + absl::Cord y(x); + ASSERT_EQ(std::string(x), "hi there"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x == y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y <= x); + + x = absl::string_view("foo"); + ASSERT_EQ(std::string(x), "foo"); + ASSERT_EQ(std::string(y), "hi there"); + ASSERT_TRUE(x < y); + ASSERT_TRUE(y > x); + ASSERT_TRUE(x != y); + ASSERT_TRUE(x <= y); + ASSERT_TRUE(y >= x); + + x = "foo"; + ASSERT_EQ(x, "foo"); + + // Test that going from inline rep to tree we don't leak memory. + std::vector<std::pair<absl::string_view, absl::string_view>> + test_string_pairs = {{"hi there", "foo"}, + {"loooooong coooooord", "short cord"}, + {"short cord", "loooooong coooooord"}, + {"loooooong coooooord1", "loooooong coooooord2"}}; + for (std::pair<absl::string_view, absl::string_view> test_strings : + test_string_pairs) { + absl::Cord tmp(test_strings.first); + absl::Cord z(std::move(tmp)); + ASSERT_EQ(std::string(z), test_strings.first); + tmp = test_strings.second; + z = std::move(tmp); + ASSERT_EQ(std::string(z), test_strings.second); + } + { + // Test that self-move assignment doesn't crash/leak. + // Do not write such code! + absl::Cord my_small_cord("foo"); + absl::Cord my_big_cord("loooooong coooooord"); + // Bypass clang's warning on self move-assignment. + absl::Cord* my_small_alias = + my_unique_true_boolean ? &my_small_cord : &my_big_cord; + absl::Cord* my_big_alias = + !my_unique_true_boolean ? &my_small_cord : &my_big_cord; + + *my_small_alias = std::move(my_small_cord); + *my_big_alias = std::move(my_big_cord); + // my_small_cord and my_big_cord are in an unspecified but valid + // state, and will be correctly destroyed here. + } +} + +TEST(Cord, StartsEndsWith) { + absl::Cord x(absl::string_view("abcde")); + absl::Cord empty(""); + + ASSERT_TRUE(x.StartsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.StartsWith(absl::Cord("abc"))); + ASSERT_TRUE(x.StartsWith(absl::Cord(""))); + ASSERT_TRUE(empty.StartsWith(absl::Cord(""))); + ASSERT_TRUE(x.EndsWith(absl::Cord("abcde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord("cde"))); + ASSERT_TRUE(x.EndsWith(absl::Cord(""))); + ASSERT_TRUE(empty.EndsWith(absl::Cord(""))); + + ASSERT_TRUE(!x.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.StartsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!x.EndsWith(absl::Cord("xyz"))); + ASSERT_TRUE(!empty.EndsWith(absl::Cord("xyz"))); + + ASSERT_TRUE(x.StartsWith("abcde")); + ASSERT_TRUE(x.StartsWith("abc")); + ASSERT_TRUE(x.StartsWith("")); + ASSERT_TRUE(empty.StartsWith("")); + ASSERT_TRUE(x.EndsWith("abcde")); + ASSERT_TRUE(x.EndsWith("cde")); + ASSERT_TRUE(x.EndsWith("")); + ASSERT_TRUE(empty.EndsWith("")); + + ASSERT_TRUE(!x.StartsWith("xyz")); + ASSERT_TRUE(!empty.StartsWith("xyz")); + ASSERT_TRUE(!x.EndsWith("xyz")); + ASSERT_TRUE(!empty.EndsWith("xyz")); +} + +TEST(Cord, Subcord) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + const std::string s = RandomLowercaseString(&rng, 1024); + + absl::Cord a; + AppendWithFragments(s, &rng, &a); + ASSERT_EQ(s.size(), a.size()); + + // Check subcords of a, from a variety of interesting points. + std::set<size_t> positions; + for (int i = 0; i <= 32; ++i) { + positions.insert(i); + positions.insert(i * 32 - 1); + positions.insert(i * 32); + positions.insert(i * 32 + 1); + positions.insert(a.size() - i); + } + positions.insert(237); + positions.insert(732); + for (size_t pos : positions) { + if (pos > a.size()) continue; + for (size_t end_pos : positions) { + if (end_pos < pos || end_pos > a.size()) continue; + absl::Cord sa = a.Subcord(pos, end_pos - pos); + EXPECT_EQ(absl::string_view(s).substr(pos, end_pos - pos), + std::string(sa)) + << a; + } + } + + // Do the same thing for an inline cord. + const std::string sh = "short"; + absl::Cord c(sh); + for (size_t pos = 0; pos <= sh.size(); ++pos) { + for (size_t n = 0; n <= sh.size() - pos; ++n) { + absl::Cord sc = c.Subcord(pos, n); + EXPECT_EQ(sh.substr(pos, n), std::string(sc)) << c; + } + } + + // Check subcords of subcords. + absl::Cord sa = a.Subcord(0, a.size()); + std::string ss = s.substr(0, s.size()); + while (sa.size() > 1) { + sa = sa.Subcord(1, sa.size() - 2); + ss = ss.substr(1, ss.size() - 2); + EXPECT_EQ(ss, std::string(sa)) << a; + if (HasFailure()) break; // halt cascade + } + + // It is OK to ask for too much. + sa = a.Subcord(0, a.size() + 1); + EXPECT_EQ(s, std::string(sa)); + + // It is OK to ask for something beyond the end. + sa = a.Subcord(a.size() + 1, 0); + EXPECT_TRUE(sa.empty()); + sa = a.Subcord(a.size() + 1, 1); + EXPECT_TRUE(sa.empty()); +} + +TEST(Cord, Swap) { + absl::string_view a("Dexter"); + absl::string_view b("Mandark"); + absl::Cord x(a); + absl::Cord y(b); + swap(x, y); + ASSERT_EQ(x, absl::Cord(b)); + ASSERT_EQ(y, absl::Cord(a)); +} + +static void VerifyCopyToString(const absl::Cord& cord) { + std::string initially_empty; + absl::CopyCordToString(cord, &initially_empty); + EXPECT_EQ(initially_empty, cord); + + constexpr size_t kInitialLength = 1024; + std::string has_initial_contents(kInitialLength, 'x'); + const char* address_before_copy = has_initial_contents.data(); + absl::CopyCordToString(cord, &has_initial_contents); + EXPECT_EQ(has_initial_contents, cord); + + if (cord.size() <= kInitialLength) { + EXPECT_EQ(has_initial_contents.data(), address_before_copy) + << "CopyCordToString allocated new std::string storage; " + "has_initial_contents = \"" + << has_initial_contents << "\""; + } +} + +TEST(Cord, CopyToString) { + VerifyCopyToString(absl::Cord()); + VerifyCopyToString(absl::Cord("small cord")); + VerifyCopyToString( + absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", + "copying ", "to ", "a ", "string."})); +} + +static bool IsFlat(const absl::Cord& c) { + return c.chunk_begin() == c.chunk_end() || ++c.chunk_begin() == c.chunk_end(); +} + +static void VerifyFlatten(absl::Cord c) { + std::string old_contents(c); + absl::string_view old_flat; + bool already_flat_and_non_empty = IsFlat(c) && !c.empty(); + if (already_flat_and_non_empty) { + old_flat = *c.chunk_begin(); + } + absl::string_view new_flat = c.Flatten(); + + // Verify that the contents of the flattened Cord are correct. + EXPECT_EQ(new_flat, old_contents); + EXPECT_EQ(std::string(c), old_contents); + + // If the Cord contained data and was already flat, verify that the data + // wasn't copied. + if (already_flat_and_non_empty) { + EXPECT_EQ(old_flat.data(), new_flat.data()) + << "Allocated new memory even though the Cord was already flat."; + } + + // Verify that the flattened Cord is in fact flat. + EXPECT_TRUE(IsFlat(c)); +} + +TEST(Cord, Flatten) { + VerifyFlatten(absl::Cord()); + VerifyFlatten(absl::Cord("small cord")); + VerifyFlatten(absl::Cord("larger than small buffer optimization")); + VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); + + // Test with a cord that is longer than the largest flat buffer + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); +} + +// Test data +namespace { +class TestData { + private: + std::vector<std::string> data_; + + // Return a std::string of the specified length. + static std::string MakeString(int length) { + std::string result; + char buf[30]; + snprintf(buf, sizeof(buf), "(%d)", length); + while (result.size() < length) { + result += buf; + } + result.resize(length); + return result; + } + + public: + TestData() { + // short strings increasing in length by one + for (int i = 0; i < 30; i++) { + data_.push_back(MakeString(i)); + } + + // strings around half kMaxFlatLength + static const int kMaxFlatLength = 4096 - 9; + static const int kHalf = kMaxFlatLength / 2; + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kHalf + i)); + } + + for (int i = -10; i <= +10; i++) { + data_.push_back(MakeString(kMaxFlatLength + i)); + } + } + + size_t size() const { return data_.size(); } + const std::string& data(size_t i) const { return data_[i]; } +}; +} // namespace + +TEST(Cord, MultipleLengths) { + TestData d; + for (size_t i = 0; i < d.size(); i++) { + std::string a = d.data(i); + + { // Construct from Cord + absl::Cord tmp(a); + absl::Cord x(tmp); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Construct from absl::string_view + absl::Cord x(a); + EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; + } + + { // Append cord to self + absl::Cord self(a); + self.Append(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + { // Prepend cord to self + absl::Cord self(a); + self.Prepend(self); + EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; + } + + // Try to append/prepend others + for (size_t j = 0; j < d.size(); j++) { + std::string b = d.data(j); + + { // CopyFrom Cord + absl::Cord x(a); + absl::Cord y(b); + x = y; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // CopyFrom absl::string_view + absl::Cord x(a); + x = b; + EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Append(y); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Append(absl::string_view) + absl::Cord x(a); + x.Append(b); + EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; + } + + { // Cord::Prepend(Cord) + absl::Cord x(a); + absl::Cord y(b); + x.Prepend(y); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + + { // Cord::Prepend(absl::string_view) + absl::Cord x(a); + x.Prepend(b); + EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; + } + } + } +} + +namespace { + +TEST(Cord, RemoveSuffixWithExternalOrSubstring) { + absl::Cord cord = absl::MakeCordFromExternal( + "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); + + EXPECT_EQ("foo bar baz", std::string(cord)); + + // This RemoveSuffix() will wrap the EXTERNAL node in a SUBSTRING node. + cord.RemoveSuffix(4); + EXPECT_EQ("foo bar", std::string(cord)); + + // This RemoveSuffix() will adjust the SUBSTRING node in-place. + cord.RemoveSuffix(4); + EXPECT_EQ("foo", std::string(cord)); +} + +TEST(Cord, RemoveSuffixMakesZeroLengthNode) { + absl::Cord c; + c.Append(absl::Cord(std::string(100, 'x'))); + absl::Cord other_ref = c; // Prevent inplace appends + c.Append(absl::Cord(std::string(200, 'y'))); + c.RemoveSuffix(200); + EXPECT_EQ(std::string(100, 'x'), std::string(c)); +} + +} // namespace + +// CordSpliceTest contributed by hendrie. +namespace { + +// Create a cord with an external memory block filled with 'z' +absl::Cord CordWithZedBlock(size_t size) { + char* data = new char[size]; + if (size > 0) { + memset(data, 'z', size); + } + absl::Cord cord = absl::MakeCordFromExternal( + absl::string_view(data, size), + [](absl::string_view s) { delete[] s.data(); }); + return cord; +} + +// Establish that ZedBlock does what we think it does. +TEST(CordSpliceTest, ZedBlock) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("zzzzzzzzzz", s); +} + +TEST(CordSpliceTest, ZedBlock0) { + absl::Cord blob = CordWithZedBlock(0); + EXPECT_EQ(0, blob.size()); + std::string s; + absl::CopyCordToString(blob, &s); + EXPECT_EQ("", s); +} + +TEST(CordSpliceTest, ZedBlockSuffix1) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(9); + EXPECT_EQ(1, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("z", s); +} + +// Remove all of a prefix block +TEST(CordSpliceTest, ZedBlockSuffix0) { + absl::Cord blob = CordWithZedBlock(10); + EXPECT_EQ(10, blob.size()); + absl::Cord suffix(blob); + suffix.RemovePrefix(10); + EXPECT_EQ(0, suffix.size()); + std::string s; + absl::CopyCordToString(suffix, &s); + EXPECT_EQ("", s); +} + +absl::Cord BigCord(size_t len, char v) { + std::string s(len, v); + return absl::Cord(s); +} + +// Splice block into cord. +absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, + const absl::Cord& block) { + ABSL_RAW_CHECK(offset >= 0, ""); + ABSL_RAW_CHECK(offset + block.size() <= blob.size(), ""); + absl::Cord result(blob); + result.RemoveSuffix(blob.size() - offset); + result.Append(block); + absl::Cord suffix(blob); + suffix.RemovePrefix(offset + block.size()); + result.Append(suffix); + ABSL_RAW_CHECK(blob.size() == result.size(), ""); + return result; +} + +// Taking an empty suffix of a block breaks appending. +TEST(CordSpliceTest, RemoveEntireBlock1) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result; + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock2) { + absl::Cord zero = CordWithZedBlock(10); + absl::Cord prefix(zero); + prefix.RemoveSuffix(10); + absl::Cord suffix(zero); + suffix.RemovePrefix(10); + absl::Cord result(prefix); + result.Append(suffix); +} + +TEST(CordSpliceTest, RemoveEntireBlock3) { + absl::Cord blob = CordWithZedBlock(10); + absl::Cord block = BigCord(10, 'b'); + blob = SpliceCord(blob, 0, block); +} + +struct CordCompareTestCase { + template <typename LHS, typename RHS> + CordCompareTestCase(const LHS& lhs, const RHS& rhs) + : lhs_cord(lhs), rhs_cord(rhs) {} + + absl::Cord lhs_cord; + absl::Cord rhs_cord; +}; + +const auto sign = [](int x) { return x == 0 ? 0 : (x > 0 ? 1 : -1); }; + +void VerifyComparison(const CordCompareTestCase& test_case) { + std::string lhs_string(test_case.lhs_cord); + std::string rhs_string(test_case.rhs_cord); + int expected = sign(lhs_string.compare(rhs_string)); + EXPECT_EQ(expected, test_case.lhs_cord.Compare(test_case.rhs_cord)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(expected, test_case.lhs_cord.Compare(rhs_string)) + << "LHS=" << lhs_string << "; RHS=" << rhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(test_case.lhs_cord)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; + EXPECT_EQ(-expected, test_case.rhs_cord.Compare(lhs_string)) + << "LHS=" << rhs_string << "; RHS=" << lhs_string; +} + +TEST(Cord, Compare) { + absl::Cord subcord("aaaaaBBBBBcccccDDDDD"); + subcord = subcord.Subcord(3, 10); + + absl::Cord tmp("aaaaaaaaaaaaaaaa"); + tmp.Append("BBBBBBBBBBBBBBBB"); + absl::Cord concat = absl::Cord("cccccccccccccccc"); + concat.Append("DDDDDDDDDDDDDDDD"); + concat.Prepend(tmp); + + absl::Cord concat2("aaaaaaaaaaaaa"); + concat2.Append("aaaBBBBBBBBBBBBBBBBccccc"); + concat2.Append("cccccccccccDDDDDDDDDDDDDD"); + concat2.Append("DD"); + + std::vector<CordCompareTestCase> test_cases = {{ + // Inline cords + {"abcdef", "abcdef"}, + {"abcdef", "abcdee"}, + {"abcdef", "abcdeg"}, + {"bbcdef", "abcdef"}, + {"bbcdef", "abcdeg"}, + {"abcdefa", "abcdef"}, + {"abcdef", "abcdefa"}, + + // Small flat cords + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD"}, + {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX"}, + {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa"}, + + // Subcords + {subcord, subcord}, + {subcord, "aaBBBBBccc"}, + {subcord, "aaBBBBBccd"}, + {subcord, "aaBBBBBccb"}, + {subcord, "aaBBBBBxcb"}, + {subcord, "aaBBBBBccca"}, + {subcord, "aaBBBBBcc"}, + + // Concats + {concat, concat}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD"}, + {concat, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe"}, + + {concat, concat2}, + }}; + + for (const auto& tc : test_cases) { + VerifyComparison(tc); + } +} + +TEST(Cord, CompareAfterAssign) { + absl::Cord a("aaaaaa1111111"); + absl::Cord b("aaaaaa2222222"); + a = "cccccc"; + b = "cccccc"; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); + + a = "aaaa"; + b = "bbbbb"; + a = ""; + b = ""; + EXPECT_EQ(a, b); + EXPECT_FALSE(a < b); +} + +// Test CompareTo() and ComparePrefix() against string and substring +// comparison methods from std::basic_string. +static void TestCompare(const absl::Cord& c, const absl::Cord& d, + RandomEngine* rng) { + typedef std::basic_string<uint8_t> ustring; + ustring cs(reinterpret_cast<const uint8_t*>(std::string(c).data()), c.size()); + ustring ds(reinterpret_cast<const uint8_t*>(std::string(d).data()), d.size()); + // ustring comparison is ideal because we expect Cord comparisons to be + // based on unsigned byte comparisons regardless of whether char is signed. + int expected = sign(cs.compare(ds)); + EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; +} + +TEST(Compare, ComparisonIsUnsigned) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); + char x = static_cast<char>(uniform_uint8(rng)); + TestCompare( + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x)), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x ^ 0x80)), &rng); +} + +TEST(Compare, RandomComparisons) { + const int kIters = 5000; + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + int n = GetUniformRandomUpTo(&rng, 5000); + absl::Cord a[] = {MakeExternalCord(n), + absl::Cord("ant"), + absl::Cord("elephant"), + absl::Cord("giraffe"), + absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), + GetUniformRandomUpTo(&rng, 100))), + absl::Cord(""), + absl::Cord("x"), + absl::Cord("A"), + absl::Cord("B"), + absl::Cord("C")}; + for (int i = 0; i < kIters; i++) { + absl::Cord c, d; + for (int j = 0; j < (i % 7) + 1; j++) { + c.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + d.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); + } + std::bernoulli_distribution coin_flip(0.5); + TestCompare(coin_flip(rng) ? c : absl::Cord(std::string(c)), + coin_flip(rng) ? d : absl::Cord(std::string(d)), &rng); + } +} + +template <typename T1, typename T2> +void CompareOperators() { + const T1 a("a"); + const T2 b("b"); + + EXPECT_TRUE(a == a); + // For pointer type (i.e. `const char*`), operator== compares the address + // instead of the std::string, so `a == const char*("a")` isn't necessarily true. + EXPECT_TRUE(std::is_pointer<T1>::value || a == T1("a")); + EXPECT_TRUE(std::is_pointer<T2>::value || a == T2("a")); + EXPECT_FALSE(a == b); + + EXPECT_TRUE(a != b); + EXPECT_FALSE(a != a); + + EXPECT_TRUE(a < b); + EXPECT_FALSE(b < a); + + EXPECT_TRUE(b > a); + EXPECT_FALSE(a > b); + + EXPECT_TRUE(a >= a); + EXPECT_TRUE(b >= a); + EXPECT_FALSE(a >= b); + + EXPECT_TRUE(a <= a); + EXPECT_TRUE(a <= b); + EXPECT_FALSE(b <= a); +} + +TEST(ComparisonOperators, Cord_Cord) { + CompareOperators<absl::Cord, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_StringPiece) { + CompareOperators<absl::Cord, absl::string_view>(); +} + +TEST(ComparisonOperators, StringPiece_Cord) { + CompareOperators<absl::string_view, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_string) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, string_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, stdstring_Cord) { + CompareOperators<std::string, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_stdstring) { + CompareOperators<absl::Cord, std::string>(); +} + +TEST(ComparisonOperators, charstar_Cord) { + CompareOperators<const char*, absl::Cord>(); +} + +TEST(ComparisonOperators, Cord_charstar) { + CompareOperators<absl::Cord, const char*>(); +} + +TEST(ConstructFromExternal, ReleaserInvoked) { + // Empty external memory means the releaser should be called immediately. + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal("", releaser); + EXPECT_TRUE(invoked); + } + } + + // If the size of the data is small enough, a future constructor + // implementation may copy the bytes and immediately invoke the releaser + // instead of creating an external node. We make a large dummy std::string to + // make this test independent of such an optimization. + std::string large_dummy(2048, 'c'); + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } + + { + bool invoked = false; + auto releaser = [&invoked](absl::string_view) { invoked = true; }; + { + absl::Cord copy; + { + auto c = absl::MakeCordFromExternal(large_dummy, releaser); + copy = c; + EXPECT_FALSE(invoked); + } + EXPECT_FALSE(invoked); + } + EXPECT_TRUE(invoked); + } +} + +TEST(ConstructFromExternal, CompareContents) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + + for (int length = 1; length <= 2048; length *= 2) { + std::string data = RandomLowercaseString(&rng, length); + auto* external = new std::string(data); + auto cord = + absl::MakeCordFromExternal(*external, [external](absl::string_view sv) { + EXPECT_EQ(external->data(), sv.data()); + EXPECT_EQ(external->size(), sv.size()); + delete external; + }); + EXPECT_EQ(data, cord); + } +} + +TEST(ConstructFromExternal, LargeReleaser) { + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + constexpr size_t kLength = 256; + std::string data = RandomLowercaseString(&rng, kLength); + std::array<char, kLength> data_array; + for (size_t i = 0; i < kLength; ++i) data_array[i] = data[i]; + bool invoked = false; + auto releaser = [data_array, &invoked](absl::string_view data) { + EXPECT_EQ(data, absl::string_view(data_array.data(), data_array.size())); + invoked = true; + }; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, FunctionPointerReleaser) { + static absl::string_view data("hello world"); + static bool invoked; + auto* releaser = + static_cast<void (*)(absl::string_view)>([](absl::string_view sv) { + EXPECT_EQ(data, sv); + invoked = true; + }); + invoked = false; + (void)absl::MakeCordFromExternal(data, releaser); + EXPECT_TRUE(invoked); + + invoked = false; + (void)absl::MakeCordFromExternal(data, *releaser); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, MoveOnlyReleaser) { + struct Releaser { + explicit Releaser(bool* invoked) : invoked(invoked) {} + Releaser(Releaser&& other) noexcept : invoked(other.invoked) {} + void operator()(absl::string_view) const { *invoked = true; } + + bool* invoked; + }; + + bool invoked = false; + (void)absl::MakeCordFromExternal("dummy", Releaser(&invoked)); + EXPECT_TRUE(invoked); +} + +TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { + struct Releaser { + explicit Releaser(bool* destroyed) : destroyed(destroyed) {} + ~Releaser() { *destroyed = true; } + void operator()(absl::string_view) const {} + + bool* destroyed; + }; + + bool destroyed = false; + Releaser releaser(&destroyed); + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_TRUE(destroyed); +} + +TEST(ConstructFromExternal, ReferenceQualifierOverloads) { + struct Releaser { + void operator()(absl::string_view) & { *lvalue_invoked = true; } + void operator()(absl::string_view) && { *rvalue_invoked = true; } + + bool* lvalue_invoked; + bool* rvalue_invoked; + }; + + bool lvalue_invoked = false; + bool rvalue_invoked = false; + Releaser releaser = {&lvalue_invoked, &rvalue_invoked}; + (void)absl::MakeCordFromExternal("", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + (void)absl::MakeCordFromExternal("dummy", releaser); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); + rvalue_invoked = false; + + // NOLINTNEXTLINE: suppress clang-tidy std::move on trivially copyable type. + (void)absl::MakeCordFromExternal("dummy", std::move(releaser)); + EXPECT_FALSE(lvalue_invoked); + EXPECT_TRUE(rvalue_invoked); +} + +TEST(ExternalMemory, BasicUsage) { + static const char* strings[] = { "", "hello", "there" }; + for (const char* str : strings) { + absl::Cord dst("(prefix)"); + AddExternalMemory(str, &dst); + dst.Append("(suffix)"); + EXPECT_EQ((std::string("(prefix)") + str + std::string("(suffix)")), + std::string(dst)); + } +} + +TEST(ExternalMemory, RemovePrefixSuffix) { + // Exhaustively try all sub-strings. + absl::Cord cord = MakeComposite(); + std::string s = std::string(cord); + for (int offset = 0; offset <= s.size(); offset++) { + for (int length = 0; length <= s.size() - offset; length++) { + absl::Cord result(cord); + result.RemovePrefix(offset); + result.RemoveSuffix(result.size() - length); + EXPECT_EQ(s.substr(offset, length), std::string(result)) + << offset << " " << length; + } + } +} + +TEST(ExternalMemory, Get) { + absl::Cord cord("hello"); + AddExternalMemory(" world!", &cord); + AddExternalMemory(" how are ", &cord); + cord.Append(" you?"); + std::string s = std::string(cord); + for (int i = 0; i < s.size(); i++) { + EXPECT_EQ(s[i], cord[i]); + } +} + +// CordMemoryUsage tests verify the correctness of the EstimatedMemoryUsage() +// These tests take into account that the reported memory usage is approximate +// and non-deterministic. For all tests, We verify that the reported memory +// usage is larger than `size()`, and less than `size() * 1.5` as a cord should +// never reserve more 'extra' capacity than half of its size as it grows. +// Additionally we have some whiteboxed expectations based on our knowledge of +// the layout and size of empty and inlined cords, and flat nodes. + +TEST(CordMemoryUsage, Empty) { + EXPECT_EQ(sizeof(absl::Cord), absl::Cord().EstimatedMemoryUsage()); +} + +TEST(CordMemoryUsage, Embedded) { + absl::Cord a("hello"); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, EmbeddedAppend) { + absl::Cord a("a"); + absl::Cord b("bcd"); + EXPECT_EQ(b.EstimatedMemoryUsage(), sizeof(absl::Cord)); + a.Append(b); + EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +} + +TEST(CordMemoryUsage, ExternalMemory) { + static const int kLength = 1000; + absl::Cord cord; + AddExternalMemory(std::string(kLength, 'x'), &cord); + EXPECT_GT(cord.EstimatedMemoryUsage(), kLength); + EXPECT_LE(cord.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, Flat) { + static const int kLength = 125; + absl::Cord a(std::string(kLength, 'a')); + EXPECT_GT(a.EstimatedMemoryUsage(), kLength); + EXPECT_LE(a.EstimatedMemoryUsage(), kLength * 1.5); +} + +TEST(CordMemoryUsage, AppendFlat) { + using absl::strings_internal::CordTestAccess; + absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); + size_t length = a.EstimatedMemoryUsage(); + a.Append(std::string(CordTestAccess::MaxFlatLength(), 'b')); + size_t delta = a.EstimatedMemoryUsage() - length; + EXPECT_GT(delta, CordTestAccess::MaxFlatLength()); + EXPECT_LE(delta, CordTestAccess::MaxFlatLength() * 1.5); +} + +// Regtest for a change that had to be rolled back because it expanded out +// of the InlineRep too soon, which was observable through MemoryUsage(). +TEST(CordMemoryUsage, InlineRep) { + constexpr size_t kMaxInline = 15; // Cord::InlineRep::N + const std::string small_string(kMaxInline, 'x'); + absl::Cord c1(small_string); + + absl::Cord c2; + c2.Append(small_string); + EXPECT_EQ(c1, c2); + EXPECT_EQ(c1.EstimatedMemoryUsage(), c2.EstimatedMemoryUsage()); +} + +} // namespace + +// Regtest for 7510292 (fix a bug introduced by 7465150) +TEST(Cord, Concat_Append) { + // Create a rep of type CONCAT + absl::Cord s1("foobarbarbarbarbar"); + s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); + size_t size = s1.size(); + + // Create a copy of s1 and append to it. + absl::Cord s2 = s1; + s2.Append("x"); + + // 7465150 modifies s1 when it shouldn't. + EXPECT_EQ(s1.size(), size); + EXPECT_EQ(s2.size(), size + 1); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { + absl::Cord fragmented = + absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { + std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; + absl::Cord fragmented = absl::MakeFragmentedCord(chunks); + + EXPECT_EQ("A fragmented Cord", fragmented); + + auto chunk_it = fragmented.chunk_begin(); + + ASSERT_TRUE(chunk_it != fragmented.chunk_end()); + EXPECT_EQ("A ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("fragmented ", *chunk_it); + + ASSERT_TRUE(++chunk_it != fragmented.chunk_end()); + EXPECT_EQ("Cord", *chunk_it); + + ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); +} + +TEST(CordChunkIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::ChunkIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::ChunkIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::ChunkIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::value_type, + absl::string_view>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::ChunkIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::pointer, + const absl::string_view*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::ChunkIterator>::reference, + absl::string_view>::value, + ""); +} + +static void VerifyChunkIterator(const absl::Cord& cord, + size_t expected_chunks) { + EXPECT_EQ(cord.chunk_begin() == cord.chunk_end(), cord.empty()) << cord; + EXPECT_EQ(cord.chunk_begin() != cord.chunk_end(), !cord.empty()); + + absl::Cord::ChunkRange range = cord.Chunks(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + std::string content(cord); + size_t pos = 0; + auto pre_iter = cord.chunk_begin(), post_iter = cord.chunk_begin(); + size_t n_chunks = 0; + while (pre_iter != cord.chunk_end() && post_iter != cord.chunk_end()) { + EXPECT_FALSE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.chunk_end()); // NOLINT + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + + EXPECT_EQ(pre_iter->data(), (*pre_iter).data()); + EXPECT_EQ(pre_iter->size(), (*pre_iter).size()); + + absl::string_view chunk = *pre_iter; + EXPECT_FALSE(chunk.empty()); + EXPECT_LE(pos + chunk.size(), content.size()); + EXPECT_EQ(absl::string_view(content.c_str() + pos, chunk.size()), chunk); + + int n_equal_iterators = 0; + for (absl::Cord::ChunkIterator it = range.begin(); it != range.end(); + ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + ++pre_iter; + EXPECT_EQ(*post_iter++, chunk); + + pos += chunk.size(); + ++n_chunks; + } + EXPECT_EQ(expected_chunks, n_chunks); + EXPECT_EQ(pos, content.size()); + EXPECT_TRUE(pre_iter == cord.chunk_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.chunk_end()); // NOLINT +} + +TEST(CordChunkIterator, Operations) { + absl::Cord empty_cord; + VerifyChunkIterator(empty_cord, 0); + + absl::Cord small_buffer_cord("small cord"); + VerifyChunkIterator(small_buffer_cord, 1); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyChunkIterator(flat_node_cord, 1); + + VerifyChunkIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "chunk ", "iterations."}), + 8); + + absl::Cord reused_nodes_cord(std::string(40, 'c')); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'b'))); + reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'a'))); + size_t expected_chunks = 3; + for (int i = 0; i < 8; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + expected_chunks *= 2; + VerifyChunkIterator(reused_nodes_cord, expected_chunks); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); + VerifyChunkIterator(subcords, 128); +} + +TEST(CordCharIterator, Traits) { + static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_copy_assignable<absl::Cord::CharIterator>::value, ""); + + // Move semantics to satisfy swappable via std::swap + static_assert(std::is_move_constructible<absl::Cord::CharIterator>::value, + ""); + static_assert(std::is_move_assignable<absl::Cord::CharIterator>::value, ""); + + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::value_type, + char>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<absl::Cord::CharIterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::pointer, + const char*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<absl::Cord::CharIterator>::reference, + const char&>::value, + ""); +} + +static void VerifyCharIterator(const absl::Cord& cord) { + EXPECT_EQ(cord.char_begin() == cord.char_end(), cord.empty()); + EXPECT_EQ(cord.char_begin() != cord.char_end(), !cord.empty()); + + absl::Cord::CharRange range = cord.Chars(); + EXPECT_EQ(range.begin() == range.end(), cord.empty()); + EXPECT_EQ(range.begin() != range.end(), !cord.empty()); + + size_t i = 0; + absl::Cord::CharIterator pre_iter = cord.char_begin(); + absl::Cord::CharIterator post_iter = cord.char_begin(); + std::string content(cord); + while (pre_iter != cord.char_end() && post_iter != cord.char_end()) { + EXPECT_FALSE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_FALSE(post_iter == cord.char_end()); // NOLINT + + EXPECT_LT(i, cord.size()); + EXPECT_EQ(content[i], *pre_iter); + + EXPECT_EQ(pre_iter, post_iter); + EXPECT_EQ(*pre_iter, *post_iter); + EXPECT_EQ(&*pre_iter, &*post_iter); + + EXPECT_EQ(&*pre_iter, pre_iter.operator->()); + + const char* character_address = &*pre_iter; + absl::Cord::CharIterator copy = pre_iter; + ++copy; + EXPECT_EQ(character_address, &*pre_iter); + + int n_equal_iterators = 0; + for (absl::Cord::CharIterator it = range.begin(); it != range.end(); ++it) { + n_equal_iterators += static_cast<int>(it == pre_iter); + } + EXPECT_EQ(n_equal_iterators, 1); + + absl::Cord::CharIterator advance_iter = range.begin(); + absl::Cord::Advance(&advance_iter, i); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = range.begin(); + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, i), cord.Subcord(0, i)); + EXPECT_EQ(pre_iter, advance_iter); + + advance_iter = pre_iter; + absl::Cord::Advance(&advance_iter, cord.size() - i); + EXPECT_EQ(range.end(), advance_iter); + + advance_iter = pre_iter; + EXPECT_EQ(absl::Cord::AdvanceAndRead(&advance_iter, cord.size() - i), + cord.Subcord(i, cord.size() - i)); + EXPECT_EQ(range.end(), advance_iter); + + ++i; + ++pre_iter; + post_iter++; + } + EXPECT_EQ(i, cord.size()); + EXPECT_TRUE(pre_iter == cord.char_end()); // NOLINT: explicitly test == + EXPECT_TRUE(post_iter == cord.char_end()); // NOLINT + + absl::Cord::CharIterator zero_advanced_end = cord.char_end(); + absl::Cord::Advance(&zero_advanced_end, 0); + EXPECT_EQ(zero_advanced_end, cord.char_end()); + + absl::Cord::CharIterator it = cord.char_begin(); + for (absl::string_view chunk : cord.Chunks()) { + while (!chunk.empty()) { + EXPECT_EQ(absl::Cord::ChunkRemaining(it), chunk); + chunk.remove_prefix(1); + ++it; + } + } +} + +TEST(CordCharIterator, Operations) { + absl::Cord empty_cord; + VerifyCharIterator(empty_cord); + + absl::Cord small_buffer_cord("small cord"); + VerifyCharIterator(small_buffer_cord); + + absl::Cord flat_node_cord("larger than small buffer optimization"); + VerifyCharIterator(flat_node_cord); + + VerifyCharIterator( + absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "character ", "iteration."})); + + absl::Cord reused_nodes_cord("ghi"); + reused_nodes_cord.Prepend(absl::Cord("def")); + reused_nodes_cord.Prepend(absl::Cord("abc")); + for (int i = 0; i < 4; ++i) { + reused_nodes_cord.Prepend(reused_nodes_cord); + VerifyCharIterator(reused_nodes_cord); + } + + RandomEngine rng(testing::GTEST_FLAG(random_seed)); + absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); + absl::Cord subcords; + for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + VerifyCharIterator(subcords); +} + +TEST(Cord, StreamingOutput) { + absl::Cord c = + absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); + std::stringstream output; + output << c; + EXPECT_EQ("A small fragmented Cord.", output.str()); +} + +TEST(Cord, ForEachChunk) { + for (int num_elements : {1, 10, 200}) { + SCOPED_TRACE(num_elements); + std::vector<std::string> cord_chunks; + for (int i = 0; i < num_elements; ++i) { + cord_chunks.push_back(absl::StrCat("[", i, "]")); + } + absl::Cord c = absl::MakeFragmentedCord(cord_chunks); + + std::vector<std::string> iterated_chunks; + absl::CordTestPeer::ForEachChunk(c, + [&iterated_chunks](absl::string_view sv) { + iterated_chunks.emplace_back(sv); + }); + EXPECT_EQ(iterated_chunks, cord_chunks); + } +} + +TEST(Cord, SmallBufferAssignFromOwnData) { + constexpr size_t kMaxInline = 15; + std::string contents = "small buff cord"; + EXPECT_EQ(contents.size(), kMaxInline); + for (size_t pos = 0; pos < contents.size(); ++pos) { + for (size_t count = contents.size() - pos; count > 0; --count) { + absl::Cord c(contents); + absl::string_view flat = c.Flatten(); + c = flat.substr(pos, count); + EXPECT_EQ(c, contents.substr(pos, count)) + << "pos = " << pos << "; count = " << count; + } + } +} diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h new file mode 100644 index 00000000..f1036e3b --- /dev/null +++ b/absl/strings/cord_test_helpers.h @@ -0,0 +1,60 @@ +// +// Copyright 2018 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORD_TEST_HELPERS_H_ + +#include "absl/strings/cord.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Creates a multi-segment Cord from an iterable container of strings. The +// resulting Cord is guaranteed to have one segment for every string in the +// container. This allows code to be unit tested with multi-segment Cord +// inputs. +// +// Example: +// +// absl::Cord c = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); +// EXPECT_FALSE(c.GetFlat(&unused)); +// +// The mechanism by which this Cord is created is an implementation detail. Any +// implementation that produces a multi-segment Cord may produce a flat Cord in +// the future as new optimizations are added to the Cord class. +// MakeFragmentedCord will, however, always be updated to return a multi-segment +// Cord. +template <typename Container> +Cord MakeFragmentedCord(const Container& c) { + Cord result; + for (const auto& s : c) { + auto* external = new std::string(s); + Cord tmp = absl::MakeCordFromExternal( + *external, [external](absl::string_view) { delete external; }); + tmp.Prepend(result); + result = tmp; + } + return result; +} + +inline Cord MakeFragmentedCord(std::initializer_list<absl::string_view> list) { + return MakeFragmentedCord<std::initializer_list<absl::string_view>>(list); +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_TEST_HELPERS_H_ diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index eb0974dc..7adc1b65 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -26,6 +26,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" #include "absl/strings/internal/char_map.h" +#include "absl/strings/internal/escaping.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/utf8.h" #include "absl/strings/str_cat.h" @@ -33,30 +34,9 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { -// Digit conversion. -constexpr char kHexChar[] = "0123456789abcdef"; - -constexpr char kHexTable[513] = - "000102030405060708090a0b0c0d0e0f" - "101112131415161718191a1b1c1d1e1f" - "202122232425262728292a2b2c2d2e2f" - "303132333435363738393a3b3c3d3e3f" - "404142434445464748494a4b4c4d4e4f" - "505152535455565758595a5b5c5d5e5f" - "606162636465666768696a6b6c6d6e6f" - "707172737475767778797a7b7c7d7e7f" - "808182838485868788898a8b8c8d8e8f" - "909192939495969798999a9b9c9d9e9f" - "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" - "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" - "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" - "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" - "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" - "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; - // These are used for the leave_nulls_escaped argument to CUnescapeInternal(). constexpr bool kUnescapeNulls = false; @@ -349,14 +329,14 @@ std::string CEscapeInternal(absl::string_view src, bool use_hex, (last_hex_escape && absl::ascii_isxdigit(c)))) { if (use_hex) { dest.append("\\" "x"); - dest.push_back(kHexChar[c / 16]); - dest.push_back(kHexChar[c % 16]); + dest.push_back(numbers_internal::kHexChar[c / 16]); + dest.push_back(numbers_internal::kHexChar[c % 16]); is_hex_escape = true; } else { dest.append("\\"); - dest.push_back(kHexChar[c / 64]); - dest.push_back(kHexChar[(c % 64) / 8]); - dest.push_back(kHexChar[c % 8]); + dest.push_back(numbers_internal::kHexChar[c / 64]); + dest.push_back(numbers_internal::kHexChar[(c % 64) / 8]); + dest.push_back(numbers_internal::kHexChar[c % 8]); } } else { dest.push_back(c); @@ -785,177 +765,10 @@ constexpr signed char kUnWebSafeBase64[] = { }; /* clang-format on */ -size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { - // Base64 encodes three bytes of input at a time. If the input is not - // divisible by three, we pad as appropriate. - // - // (from https://tools.ietf.org/html/rfc3548) - // Special processing is performed if fewer than 24 bits are available - // at the end of the data being encoded. A full encoding quantum is - // always completed at the end of a quantity. When fewer than 24 input - // bits are available in an input group, zero bits are added (on the - // right) to form an integral number of 6-bit groups. Padding at the - // end of the data is performed using the '=' character. Since all base - // 64 input is an integral number of octets, only the following cases - // can arise: - - // Base64 encodes each three bytes of input into four bytes of output. - size_t len = (input_len / 3) * 4; - - if (input_len % 3 == 0) { - // (from https://tools.ietf.org/html/rfc3548) - // (1) the final quantum of encoding input is an integral multiple of 24 - // bits; here, the final unit of encoded output will be an integral - // multiple of 4 characters with no "=" padding, - } else if (input_len % 3 == 1) { - // (from https://tools.ietf.org/html/rfc3548) - // (2) the final quantum of encoding input is exactly 8 bits; here, the - // final unit of encoded output will be two characters followed by two - // "=" padding characters, or - len += 2; - if (do_padding) { - len += 2; - } - } else { // (input_len % 3 == 2) - // (from https://tools.ietf.org/html/rfc3548) - // (3) the final quantum of encoding input is exactly 16 bits; here, the - // final unit of encoded output will be three characters followed by one - // "=" padding character. - len += 3; - if (do_padding) { - len += 1; - } - } - - assert(len >= input_len); // make sure we didn't overflow - return len; -} - -size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, - size_t szdest, const char* base64, - bool do_padding) { - static const char kPad64 = '='; - - if (szsrc * 4 > szdest * 3) return 0; - - char* cur_dest = dest; - const unsigned char* cur_src = src; - - char* const limit_dest = dest + szdest; - const unsigned char* const limit_src = src + szsrc; - - // Three bytes of data encodes to four characters of cyphertext. - // So we can pump through three-byte chunks atomically. - if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. - while (cur_src < limit_src - 3) { // While we have >= 32 bits. - uint32_t in = absl::big_endian::Load32(cur_src) >> 8; - - cur_dest[0] = base64[in >> 18]; - in &= 0x3FFFF; - cur_dest[1] = base64[in >> 12]; - in &= 0xFFF; - cur_dest[2] = base64[in >> 6]; - in &= 0x3F; - cur_dest[3] = base64[in]; - - cur_dest += 4; - cur_src += 3; - } - } - // To save time, we didn't update szdest or szsrc in the loop. So do it now. - szdest = limit_dest - cur_dest; - szsrc = limit_src - cur_src; - - /* now deal with the tail (<=3 bytes) */ - switch (szsrc) { - case 0: - // Nothing left; nothing more to do. - break; - case 1: { - // One byte left: this encodes to two characters, and (optionally) - // two pad characters to round out the four-character cypherblock. - if (szdest < 2) return 0; - uint32_t in = cur_src[0]; - cur_dest[0] = base64[in >> 2]; - in &= 0x3; - cur_dest[1] = base64[in << 4]; - cur_dest += 2; - szdest -= 2; - if (do_padding) { - if (szdest < 2) return 0; - cur_dest[0] = kPad64; - cur_dest[1] = kPad64; - cur_dest += 2; - szdest -= 2; - } - break; - } - case 2: { - // Two bytes left: this encodes to three characters, and (optionally) - // one pad character to round out the four-character cypherblock. - if (szdest < 3) return 0; - uint32_t in = absl::big_endian::Load16(cur_src); - cur_dest[0] = base64[in >> 10]; - in &= 0x3FF; - cur_dest[1] = base64[in >> 4]; - in &= 0x00F; - cur_dest[2] = base64[in << 2]; - cur_dest += 3; - szdest -= 3; - if (do_padding) { - if (szdest < 1) return 0; - cur_dest[0] = kPad64; - cur_dest += 1; - szdest -= 1; - } - break; - } - case 3: { - // Three bytes left: same as in the big loop above. We can't do this in - // the loop because the loop above always reads 4 bytes, and the fourth - // byte is past the end of the input. - if (szdest < 4) return 0; - uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); - cur_dest[0] = base64[in >> 18]; - in &= 0x3FFFF; - cur_dest[1] = base64[in >> 12]; - in &= 0xFFF; - cur_dest[2] = base64[in >> 6]; - in &= 0x3F; - cur_dest[3] = base64[in]; - cur_dest += 4; - szdest -= 4; - break; - } - default: - // Should not be reached: blocks of 4 bytes are handled - // in the while loop before this switch statement. - ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); - break; - } - return (cur_dest - dest); -} - -constexpr char kBase64Chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - constexpr char kWebSafeBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; template <typename String> -void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, - bool do_padding, const char* base64_chars) { - const size_t calc_escaped_size = - CalculateBase64EscapedLenInternal(szsrc, do_padding); - strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); - - const size_t escaped_len = Base64EscapeInternal( - src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); - assert(calc_escaped_size == escaped_len); - dest->erase(escaped_len); -} - -template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { // Determine the size of the output std::string. Base64 encodes every 3 bytes into @@ -983,7 +796,7 @@ bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, } /* clang-format off */ -constexpr char kHexValue[256] = { +constexpr char kHexValueLenient[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -999,8 +812,9 @@ constexpr char kHexValue[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; + /* clang-format on */ // This is a templated function so that T can be either a char* @@ -1009,8 +823,8 @@ constexpr char kHexValue[256] = { template <typename T> void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { for (int i = 0; i < num; i++) { - to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) + - (kHexValue[from[i * 2 + 1] & 0xFF]); + to[i] = (kHexValueLenient[from[i * 2] & 0xFF] << 4) + + (kHexValueLenient[from[i * 2 + 1] & 0xFF]); } } @@ -1020,7 +834,7 @@ template <typename T> void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { auto dest_ptr = &dest[0]; for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { - const char* hex_p = &kHexTable[*src_ptr * 2]; + const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2]; std::copy(hex_p, hex_p + 2, dest_ptr); } } @@ -1088,26 +902,30 @@ bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { } void Base64Escape(absl::string_view src, std::string* dest) { - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), dest, true, kBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + true, strings_internal::kBase64Chars); } void WebSafeBase64Escape(absl::string_view src, std::string* dest) { - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), dest, false, kWebSafeBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, + false, kWebSafeBase64Chars); } std::string Base64Escape(absl::string_view src) { std::string dest; - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), &dest, true, kBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + true, strings_internal::kBase64Chars); return dest; } std::string WebSafeBase64Escape(absl::string_view src) { std::string dest; - Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), - src.size(), &dest, false, kWebSafeBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, + false, kWebSafeBase64Chars); return dest; } @@ -1127,5 +945,5 @@ std::string BytesToHexString(absl::string_view from) { return result; } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/escaping.h b/absl/strings/escaping.h index 7f1c5d46..f5ca26c5 100644 --- a/absl/strings/escaping.h +++ b/absl/strings/escaping.h @@ -33,7 +33,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // CUnescape() // @@ -158,7 +158,7 @@ std::string HexStringToBytes(absl::string_view from); // `2*from.size()`. std::string BytesToHexString(absl::string_view from); -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_ESCAPING_H_ diff --git a/absl/strings/internal/char_map.h b/absl/strings/internal/char_map.h index 772ae869..a76e6036 100644 --- a/absl/strings/internal/char_map.h +++ b/absl/strings/internal/char_map.h @@ -28,7 +28,7 @@ #include "absl/base/port.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { class Charmap { @@ -150,7 +150,7 @@ constexpr Charmap GraphCharmap() { return PrintCharmap() & ~SpaceCharmap(); } constexpr Charmap PunctCharmap() { return GraphCharmap() & ~AlnumCharmap(); } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CHAR_MAP_H_ diff --git a/absl/strings/internal/charconv_bigint.cc b/absl/strings/internal/charconv_bigint.cc index 58c909f4..66f33e72 100644 --- a/absl/strings/internal/charconv_bigint.cc +++ b/absl/strings/internal/charconv_bigint.cc @@ -19,7 +19,7 @@ #include <string> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { namespace { @@ -158,12 +158,12 @@ const uint32_t* LargePowerOfFiveData(int i) { int LargePowerOfFiveSize(int i) { return 2 * i; } } // namespace -const uint32_t kFiveToNth[14] = { +ABSL_DLL const uint32_t kFiveToNth[14] = { 1, 5, 25, 125, 625, 3125, 15625, 78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125, }; -const uint32_t kTenToNth[10] = { +ABSL_DLL const uint32_t kTenToNth[10] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, }; @@ -355,5 +355,5 @@ template class BigUnsigned<4>; template class BigUnsigned<84>; } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/charconv_bigint.h b/absl/strings/internal/charconv_bigint.h index 5aef416b..999e9ae3 100644 --- a/absl/strings/internal/charconv_bigint.h +++ b/absl/strings/internal/charconv_bigint.h @@ -20,12 +20,13 @@ #include <iostream> #include <string> +#include "absl/base/config.h" #include "absl/strings/ascii.h" #include "absl/strings/internal/charconv_parse.h" #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // The largest power that 5 that can be raised to, and still fit in a uint32_t. @@ -33,8 +34,9 @@ constexpr int kMaxSmallPowerOfFive = 13; // The largest power that 10 that can be raised to, and still fit in a uint32_t. constexpr int kMaxSmallPowerOfTen = 9; -extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1]; -extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; +ABSL_DLL extern const uint32_t + kFiveToNth[kMaxSmallPowerOfFive + 1]; +ABSL_DLL extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; // Large, fixed-width unsigned integer. // @@ -415,7 +417,7 @@ extern template class BigUnsigned<4>; extern template class BigUnsigned<84>; } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ diff --git a/absl/strings/internal/charconv_bigint_test.cc b/absl/strings/internal/charconv_bigint_test.cc index 590511d0..363bcb03 100644 --- a/absl/strings/internal/charconv_bigint_test.cc +++ b/absl/strings/internal/charconv_bigint_test.cc @@ -19,7 +19,7 @@ #include "gtest/gtest.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { TEST(BigUnsigned, ShiftLeft) { @@ -201,5 +201,5 @@ TEST(BigUnsigned, TenToTheNth) { } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/charconv_parse.cc b/absl/strings/internal/charconv_parse.cc index 4dd4ecb3..d9a57a78 100644 --- a/absl/strings/internal/charconv_parse.cc +++ b/absl/strings/internal/charconv_parse.cc @@ -22,7 +22,7 @@ #include "absl/strings/internal/memutil.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { // ParseFloat<10> will read the first 19 significant digits of the mantissa. @@ -254,6 +254,12 @@ std::size_t ConsumeDigits(const char* begin, const char* end, int max_digits, assert(max_digits * 4 <= std::numeric_limits<T>::digits); } const char* const original_begin = begin; + + // Skip leading zeros, but only if *out is zero. + // They don't cause an overflow so we don't have to count them for + // `max_digits`. + while (!*out && end != begin && *begin == '0') ++begin; + T accumulator = *out; const char* significant_digits_end = (end - begin > max_digits) ? begin + max_digits : end; @@ -494,5 +500,5 @@ template ParsedFloat ParseFloat<16>(const char* begin, const char* end, chars_format format_flags); } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/charconv_parse.h b/absl/strings/internal/charconv_parse.h index ddfc87f8..505998b5 100644 --- a/absl/strings/internal/charconv_parse.h +++ b/absl/strings/internal/charconv_parse.h @@ -17,10 +17,11 @@ #include <cstdint> +#include "absl/base/config.h" #include "absl/strings/charconv.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // Enum indicating whether a parsed float is a number or special value. @@ -93,6 +94,6 @@ extern template ParsedFloat ParseFloat<16>(const char* begin, const char* end, absl::chars_format format_flags); } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_CHARCONV_PARSE_H_ diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h new file mode 100644 index 00000000..5b5d1083 --- /dev/null +++ b/absl/strings/internal/cord_internal.h @@ -0,0 +1,151 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ + +#include <atomic> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <type_traits> + +#include "absl/meta/type_traits.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Wraps std::atomic for reference counting. +class Refcount { + public: + Refcount() : count_{1} {} + ~Refcount() {} + + // Increments the reference count by 1. Imposes no memory ordering. + inline void Increment() { count_.fetch_add(1, std::memory_order_relaxed); } + + // Asserts that the current refcount is greater than 0. If the refcount is + // greater than 1, decrements the reference count by 1. + // + // Returns false if there are no references outstanding; true otherwise. + // Inserts barriers to ensure that state written before this method returns + // false will be visible to a thread that just observed this method returning + // false. + inline bool Decrement() { + int32_t refcount = count_.load(std::memory_order_acquire); + assert(refcount > 0); + return refcount != 1 && count_.fetch_sub(1, std::memory_order_acq_rel) != 1; + } + + // Same as Decrement but expect that refcount is greater than 1. + inline bool DecrementExpectHighRefcount() { + int32_t refcount = count_.fetch_sub(1, std::memory_order_acq_rel); + assert(refcount > 0); + return refcount != 1; + } + + // Returns the current reference count using acquire semantics. + inline int32_t Get() const { return count_.load(std::memory_order_acquire); } + + // Returns whether the atomic integer is 1. + // If the reference count is used in the conventional way, a + // reference count of 1 implies that the current thread owns the + // reference and no other thread shares it. + // This call performs the test for a reference count of one, and + // performs the memory barrier needed for the owning thread + // to act on the object, knowing that it has exclusive access to the + // object. + inline bool IsOne() { return count_.load(std::memory_order_acquire) == 1; } + + private: + std::atomic<int32_t> count_; +}; + +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. + +struct CordRepConcat; +struct CordRepSubstring; +struct CordRepExternal; + +struct CordRep { + // The following three fields have to be less than 32 bytes since + // that is the smallest supported flat node size. + // We use uint64_t for the length even in 32-bit binaries. + uint64_t length; + Refcount refcount; + // If tag < FLAT, it represents CordRepKind and indicates the type of node. + // Otherwise, the node type is CordRepFlat and the tag is the encoded size. + uint8_t tag; + char data[1]; // Starting point for flat array: MUST BE LAST FIELD of CordRep + + inline CordRepConcat* concat(); + inline const CordRepConcat* concat() const; + inline CordRepSubstring* substring(); + inline const CordRepSubstring* substring() const; + inline CordRepExternal* external(); + inline const CordRepExternal* external() const; +}; + +struct CordRepConcat : public CordRep { + CordRep* left; + CordRep* right; + + uint8_t depth() const { return static_cast<uint8_t>(data[0]); } + void set_depth(uint8_t depth) { data[0] = static_cast<char>(depth); } +}; + +struct CordRepSubstring : public CordRep { + size_t start; // Starting offset of substring in child + CordRep* child; +}; + +// TODO(strel): replace the following logic (and related functions in cord.cc) +// with container_internal::Layout. + +// Alignment requirement for CordRepExternal so that the type erased releaser +// will be stored at a suitably aligned address. +constexpr size_t ExternalRepAlignment() { +#if defined(__STDCPP_DEFAULT_NEW_ALIGNMENT__) + return __STDCPP_DEFAULT_NEW_ALIGNMENT__; +#else + return alignof(max_align_t); +#endif +} + +// Type for function pointer that will invoke and destroy the type-erased +// releaser function object. Accepts a pointer to the releaser and the +// `string_view` that were passed in to `NewExternalRep` below. The return value +// is the size of the `Releaser` type. +using ExternalReleaserInvoker = size_t (*)(void*, absl::string_view); + +// External CordReps are allocated together with a type erased releaser. The +// releaser is stored in the memory directly following the CordRepExternal. +struct alignas(ExternalRepAlignment()) CordRepExternal : public CordRep { + const char* base; + // Pointer to function that knows how to call and destroy the releaser. + ExternalReleaserInvoker releaser_invoker; +}; + +// TODO(strel): look into removing, it doesn't seem like anything relies on this +static_assert(sizeof(CordRepConcat) == sizeof(CordRepSubstring), ""); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl +#endif // ABSL_STRINGS_INTERNAL_CORD_INTERNAL_H_ diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc new file mode 100644 index 00000000..c5271286 --- /dev/null +++ b/absl/strings/internal/escaping.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/escaping.h" + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from https://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from https://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from https://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. + while (cur_src < limit_src - 3) { // While we have >= 32 bits. + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h new file mode 100644 index 00000000..6a9ce602 --- /dev/null +++ b/absl/strings/internal/escaping.h @@ -0,0 +1,58 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_H_ + +#include <cassert> + +#include "absl/strings/internal/resize_uninitialized.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +ABSL_CONST_INIT extern const char kBase64Chars[]; + +// Calculates how long a string will be when it is base64 encoded given its +// length and whether or not the result should be padded. +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. Returns the length of `dest`. +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. +template <typename String> +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_ diff --git a/absl/strings/internal/escaping_test_common.h b/absl/strings/internal/escaping_test_common.h index ecd3aa35..7b18017a 100644 --- a/absl/strings/internal/escaping_test_common.h +++ b/absl/strings/internal/escaping_test_common.h @@ -22,7 +22,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { struct base64_testcase { @@ -127,7 +127,7 @@ inline const std::array<base64_testcase, 5>& base64_strings() { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_ESCAPING_TEST_COMMON_H_ diff --git a/absl/strings/internal/memutil.cc b/absl/strings/internal/memutil.cc index 05251377..2519c688 100644 --- a/absl/strings/internal/memutil.cc +++ b/absl/strings/internal/memutil.cc @@ -17,7 +17,7 @@ #include <cstdlib> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { int memcasecmp(const char* s1, const char* s2, size_t len) { @@ -108,5 +108,5 @@ const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/memutil.h b/absl/strings/internal/memutil.h index 4efac989..9ad05358 100644 --- a/absl/strings/internal/memutil.h +++ b/absl/strings/internal/memutil.h @@ -69,7 +69,7 @@ #include "absl/strings/ascii.h" // for absl::ascii_tolower namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { inline char* memcat(char* dest, size_t destlen, const char* src, @@ -142,7 +142,7 @@ const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, size_t neelen); } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ diff --git a/absl/strings/internal/numbers_test_common.h b/absl/strings/internal/numbers_test_common.h index 3f6965f2..1a1e50c4 100644 --- a/absl/strings/internal/numbers_test_common.h +++ b/absl/strings/internal/numbers_test_common.h @@ -23,8 +23,10 @@ #include <limits> #include <string> +#include "absl/base/config.h" + namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { template <typename IntType> @@ -43,8 +45,9 @@ inline bool Itoa(IntType value, int base, std::string* destination) { while (value != 0) { const IntType next_value = value / base; // Can't use std::abs here because of problems when IntType is unsigned. - int remainder = value > next_value * base ? value - next_value * base - : next_value * base - value; + int remainder = + static_cast<int>(value > next_value * base ? value - next_value * base + : next_value * base - value); char c = remainder < 10 ? '0' + remainder : 'A' + remainder - 10; destination->insert(0, 1, c); value = next_value; @@ -175,7 +178,7 @@ inline const std::array<uint64_test_case, 34>& strtouint64_test_cases() { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_NUMBERS_TEST_COMMON_H_ diff --git a/absl/strings/internal/ostringstream.cc b/absl/strings/internal/ostringstream.cc index ce2dd6c7..05324c78 100644 --- a/absl/strings/internal/ostringstream.cc +++ b/absl/strings/internal/ostringstream.cc @@ -15,7 +15,7 @@ #include "absl/strings/internal/ostringstream.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { OStringStream::Buf::int_type OStringStream::overflow(int c) { @@ -32,5 +32,5 @@ std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/ostringstream.h b/absl/strings/internal/ostringstream.h index 2cf65133..d25d6047 100644 --- a/absl/strings/internal/ostringstream.h +++ b/absl/strings/internal/ostringstream.h @@ -23,7 +23,7 @@ #include "absl/base/port.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // The same as std::ostringstream but appends to a user-specified std::string, @@ -83,7 +83,7 @@ class OStringStream : private std::basic_streambuf<char>, public std::ostream { }; } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_OSTRINGSTREAM_H_ diff --git a/absl/strings/internal/pow10_helper.cc b/absl/strings/internal/pow10_helper.cc index 5c02ab8f..42e96c34 100644 --- a/absl/strings/internal/pow10_helper.cc +++ b/absl/strings/internal/pow10_helper.cc @@ -17,7 +17,7 @@ #include <cmath> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { namespace { @@ -118,5 +118,5 @@ double Pow10(int exp) { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/pow10_helper.h b/absl/strings/internal/pow10_helper.h index c9a1b27f..c37c2c3f 100644 --- a/absl/strings/internal/pow10_helper.h +++ b/absl/strings/internal/pow10_helper.h @@ -22,8 +22,10 @@ #include <vector> +#include "absl/base/config.h" + namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // Computes the precise value of 10^exp. (I.e. the nearest representable @@ -32,7 +34,7 @@ namespace strings_internal { double Pow10(int exp); } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_POW10_HELPER_H_ diff --git a/absl/strings/internal/pow10_helper_test.cc b/absl/strings/internal/pow10_helper_test.cc index 4a62a70d..a4ff76d3 100644 --- a/absl/strings/internal/pow10_helper_test.cc +++ b/absl/strings/internal/pow10_helper_test.cc @@ -20,7 +20,7 @@ #include "absl/strings/str_format.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { namespace { @@ -118,5 +118,5 @@ TEST(Pow10HelperTest, Works) { } // namespace } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/resize_uninitialized.h b/absl/strings/internal/resize_uninitialized.h index ab1d8684..e42628e3 100644 --- a/absl/strings/internal/resize_uninitialized.h +++ b/absl/strings/internal/resize_uninitialized.h @@ -25,7 +25,7 @@ #include "absl/meta/type_traits.h" // for void_t namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // Is a subclass of true_type or false_type, depending on whether or not @@ -36,8 +36,7 @@ struct ResizeUninitializedTraits { static void Resize(string_type* s, size_t new_size) { s->resize(new_size); } }; -// __resize_default_init is provided by libc++ >= 8.0 and by Google's internal -// ::string implementation. +// __resize_default_init is provided by libc++ >= 8.0 template <typename string_type> struct ResizeUninitializedTraits< string_type, absl::void_t<decltype(std::declval<string_type&>() @@ -68,7 +67,7 @@ inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ diff --git a/absl/strings/internal/resize_uninitialized_test.cc b/absl/strings/internal/resize_uninitialized_test.cc index c5be0b12..0f8b3c2a 100644 --- a/absl/strings/internal/resize_uninitialized_test.cc +++ b/absl/strings/internal/resize_uninitialized_test.cc @@ -20,13 +20,27 @@ namespace { int resize_call_count = 0; +// A mock string class whose only purpose is to track how many times its +// resize() method has been called. struct resizable_string { + size_t size() const { return 0; } + char& operator[](size_t) { + static char c = '\0'; + return c; + } void resize(size_t) { resize_call_count += 1; } }; int resize_default_init_call_count = 0; +// A mock string class whose only purpose is to track how many times its +// resize() and __resize_default_init() methods have been called. struct resize_default_init_string { + size_t size() const { return 0; } + char& operator[](size_t) { + static char c = '\0'; + return c; + } void resize(size_t) { resize_call_count += 1; } void __resize_default_init(size_t) { resize_default_init_call_count += 1; } }; diff --git a/absl/strings/internal/stl_type_traits.h b/absl/strings/internal/stl_type_traits.h index af50be7c..6035ca45 100644 --- a/absl/strings/internal/stl_type_traits.h +++ b/absl/strings/internal/stl_type_traits.h @@ -40,7 +40,7 @@ #include "absl/meta/type_traits.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { template <typename C, template <typename...> class T> @@ -243,6 +243,6 @@ struct IsStrictlyBaseOfAndConvertibleToSTLContainer IsConvertibleToSTLContainer<C>> {}; } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STL_TYPE_TRAITS_H_ diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index 667cc133..4d0604e0 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -14,7 +14,7 @@ #include "absl/strings/internal/str_format/float_conversion.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -33,6 +33,10 @@ void ReducePadding(size_t n, size_t *capacity) { template <typename T> struct MakeUnsigned : std::make_unsigned<T> {}; template <> +struct MakeUnsigned<absl::int128> { + using type = absl::uint128; +}; +template <> struct MakeUnsigned<absl::uint128> { using type = absl::uint128; }; @@ -40,6 +44,8 @@ struct MakeUnsigned<absl::uint128> { template <typename T> struct IsSigned : std::is_signed<T> {}; template <> +struct IsSigned<absl::int128> : std::true_type {}; +template <> struct IsSigned<absl::uint128> : std::false_type {}; class ConvertedIntInfo { @@ -82,7 +88,7 @@ class ConvertedIntInfo { template <typename T> void UnsignedToStringRight(T u, ConversionChar conv) { char *p = end(); - switch (conv.radix()) { + switch (FormatConversionCharRadix(conv)) { default: case 10: for (; u; u /= 10) @@ -93,7 +99,7 @@ class ConvertedIntInfo { *--p = static_cast<char>('0' + static_cast<size_t>(u % 8)); break; case 16: { - const char *digits = kDigit[conv.upper() ? 1 : 0]; + const char *digits = kDigit[FormatConversionCharIsUpper(conv) ? 1 : 0]; for (; u; u /= 16) *--p = digits[static_cast<size_t>(u % 16)]; break; } @@ -115,21 +121,20 @@ class ConvertedIntInfo { string_view BaseIndicator(const ConvertedIntInfo &info, const ConversionSpec conv) { bool alt = conv.flags().alt; - int radix = conv.conv().radix(); - if (conv.conv().id() == ConversionChar::p) - alt = true; // always show 0x for %p. + int radix = FormatConversionCharRadix(conv.conv()); + if (conv.conv() == ConversionChar::p) alt = true; // always show 0x for %p. // From the POSIX description of '#' flag: // "For x or X conversion specifiers, a non-zero result shall have // 0x (or 0X) prefixed to it." if (alt && radix == 16 && !info.digits().empty()) { - if (conv.conv().upper()) return "0X"; + if (FormatConversionCharIsUpper(conv.conv())) return "0X"; return "0x"; } return {}; } string_view SignColumn(bool neg, const ConversionSpec conv) { - if (conv.conv().is_signed()) { + if (FormatConversionCharIsSigned(conv.conv())) { if (neg) return "-"; if (conv.flags().show_pos) return "+"; if (conv.flags().sign_col) return " "; @@ -169,7 +174,7 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, if (!precision_specified) precision = 1; - if (conv.flags().alt && conv.conv().id() == ConversionChar::o) { + if (conv.flags().alt && conv.conv() == ConversionChar::o) { // From POSIX description of the '#' (alt) flag: // "For o conversion, it increases the precision (if necessary) to // force the first digit of the result to be zero." @@ -205,7 +210,7 @@ bool ConvertIntImplInner(const ConvertedIntInfo &info, template <typename T> bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { ConvertedIntInfo info(v, conv.conv()); - if (conv.flags().basic && conv.conv().id() != ConversionChar::p) { + if (conv.flags().basic && (conv.conv() != ConversionChar::p)) { if (info.is_neg()) sink->Append(1, '-'); if (info.digits().empty()) { sink->Append(1, '0'); @@ -219,14 +224,13 @@ bool ConvertIntImplInner(T v, const ConversionSpec conv, FormatSinkImpl *sink) { template <typename T> bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().is_float()) { + if (FormatConversionCharIsFloat(conv.conv())) { return FormatConvertImpl(static_cast<double>(v), conv, sink).value; } - if (conv.conv().id() == ConversionChar::c) + if (conv.conv() == ConversionChar::c) return ConvertCharImpl(static_cast<unsigned char>(v), conv, sink); - if (!conv.conv().is_integral()) - return false; - if (!conv.conv().is_signed() && IsSigned<T>::value) { + if (!FormatConversionCharIsIntegral(conv.conv())) return false; + if (!FormatConversionCharIsSigned(conv.conv()) && IsSigned<T>::value) { using U = typename MakeUnsigned<T>::type; return FormatConvertImpl(static_cast<U>(v), conv, sink).value; } @@ -235,13 +239,13 @@ bool ConvertIntArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { template <typename T> bool ConvertFloatArg(T v, const ConversionSpec conv, FormatSinkImpl *sink) { - return conv.conv().is_float() && ConvertFloatImpl(v, conv, sink); + return FormatConversionCharIsFloat(conv.conv()) && + ConvertFloatImpl(v, conv, sink); } inline bool ConvertStringArg(string_view v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() != ConversionChar::s) - return false; + if (conv.conv() != ConversionChar::s) return false; if (conv.flags().basic) { sink->Append(v); return true; @@ -268,7 +272,7 @@ ConvertResult<Conv::s> FormatConvertImpl(string_view v, ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() == ConversionChar::p) + if (conv.conv() == ConversionChar::p) return {FormatConvertImpl(VoidPtr(v), conv, sink).value}; size_t len; if (v == nullptr) { @@ -276,7 +280,7 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, } else if (conv.precision() < 0) { len = std::strlen(v); } else { - // If precision is set, we look for the null terminator on the valid range. + // If precision is set, we look for the NUL-terminator on the valid range. len = std::find(v, v + conv.precision(), '\0') - v; } return {ConvertStringArg(string_view(v, len), conv, sink)}; @@ -285,8 +289,7 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char *v, // ==================== Raw pointers ==================== ConvertResult<Conv::p> FormatConvertImpl(VoidPtr v, const ConversionSpec conv, FormatSinkImpl *sink) { - if (conv.conv().id() != ConversionChar::p) - return {false}; + if (conv.conv() != ConversionChar::p) return {false}; if (!v.value) { sink->Append("(nil)"); return {true}; @@ -364,6 +367,11 @@ IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT FormatSinkImpl *sink) { return {ConvertIntArg(v, conv, sink)}; } +IntegralConvertResult FormatConvertImpl(absl::int128 v, + const ConversionSpec conv, + FormatSinkImpl *sink) { + return {ConvertIntArg(v, conv, sink)}; +} IntegralConvertResult FormatConvertImpl(absl::uint128 v, const ConversionSpec conv, FormatSinkImpl *sink) { @@ -373,7 +381,8 @@ IntegralConvertResult FormatConvertImpl(absl::uint128 v, ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(); + } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index b6f708c6..7a937563 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -18,12 +18,10 @@ #include "absl/strings/internal/str_format/extension.h" #include "absl/strings/string_view.h" -class Cord; -class CordReader; - namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN +class Cord; class FormatCountCapture; class FormatSink; @@ -68,12 +66,11 @@ ConvertResult<Conv::s | Conv::p> FormatConvertImpl(const char* v, FormatSinkImpl* sink); template <class AbslCord, typename std::enable_if< - std::is_same<AbslCord, ::Cord>::value>::type* = nullptr, - class AbslCordReader = ::CordReader> + std::is_same<AbslCord, absl::Cord>::value>::type* = nullptr> ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, ConversionSpec conv, FormatSinkImpl* sink) { - if (conv.conv().id() != ConversionChar::s) return {false}; + if (conv.conv() != ConversionChar::s) return {false}; bool is_left = conv.flags().left; size_t space_remaining = 0; @@ -91,11 +88,17 @@ ConvertResult<Conv::s> FormatConvertImpl(const AbslCord& value, if (space_remaining > 0 && !is_left) sink->Append(space_remaining, ' '); - string_view piece; - for (AbslCordReader reader(value); - to_write > 0 && reader.ReadFragment(&piece); to_write -= piece.size()) { - if (piece.size() > to_write) piece.remove_suffix(piece.size() - to_write); + for (string_view piece : value.Chunks()) { + if (piece.size() > to_write) { + piece.remove_suffix(piece.size() - to_write); + to_write = 0; + } else { + to_write -= piece.size(); + } sink->Append(piece); + if (to_write == 0) { + break; + } } if (space_remaining > 0 && is_left) sink->Append(space_remaining, ' '); @@ -145,6 +148,8 @@ IntegralConvertResult FormatConvertImpl(long long v, // NOLINT IntegralConvertResult FormatConvertImpl(unsigned long long v, // NOLINT ConversionSpec conv, FormatSinkImpl* sink); +IntegralConvertResult FormatConvertImpl(int128 v, ConversionSpec conv, + FormatSinkImpl* sink); IntegralConvertResult FormatConvertImpl(uint128 v, ConversionSpec conv, FormatSinkImpl* sink); template <typename T, enable_if_t<std::is_same<T, bool>::value, int> = 0> @@ -180,8 +185,7 @@ struct FormatCountCaptureHelper { FormatSinkImpl* sink) { const absl::enable_if_t<sizeof(T) != 0, FormatCountCapture>& v2 = v; - if (conv.conv().id() != str_format_internal::ConversionChar::n) - return {false}; + if (conv.conv() != str_format_internal::ConversionChar::n) return {false}; *v2.p_ = static_cast<int>(sink->size()); return {true}; } @@ -373,7 +377,7 @@ class FormatArgImpl { template <typename T> static bool Dispatch(Data arg, ConversionSpec spec, void* out) { // A `none` conv indicates that we want the `int` conversion. - if (ABSL_PREDICT_FALSE(spec.conv().id() == ConversionChar::none)) { + if (ABSL_PREDICT_FALSE(spec.conv() == ConversionChar::none)) { return ToInt<T>(arg, static_cast<int*>(out), std::is_integral<T>(), std::is_enum<T>()); } @@ -409,6 +413,7 @@ class FormatArgImpl { __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(unsigned long long, /* NOLINT */ \ __VA_ARGS__); \ + ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(int128, __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(uint128, __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(float, __VA_ARGS__); \ ABSL_INTERNAL_FORMAT_DISPATCH_INSTANTIATE_(double, __VA_ARGS__); \ @@ -419,8 +424,9 @@ class FormatArgImpl { ABSL_INTERNAL_FORMAT_DISPATCH_OVERLOADS_EXPAND_(extern); + } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_ARG_H_ diff --git a/absl/strings/internal/str_format/arg_test.cc b/absl/strings/internal/str_format/arg_test.cc index c9c51951..8d30d8b8 100644 --- a/absl/strings/internal/str_format/arg_test.cc +++ b/absl/strings/internal/str_format/arg_test.cc @@ -14,7 +14,7 @@ #include "absl/strings/str_format.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -96,10 +96,10 @@ TEST_F(FormatArgImplTest, WorksWithCharArraysOfUnknownSize) { std::string s; FormatSinkImpl sink(&s); ConversionSpec conv; - conv.set_conv(ConversionChar::FromChar('s')); - conv.set_flags(Flags()); - conv.set_width(-1); - conv.set_precision(-1); + FormatConversionSpecImplFriend::SetConversionChar(ConversionChar::s, &conv); + FormatConversionSpecImplFriend::SetFlags(Flags(), &conv); + FormatConversionSpecImplFriend::SetWidth(-1, &conv); + FormatConversionSpecImplFriend::SetPrecision(-1, &conv); EXPECT_TRUE( FormatArgImplFriend::Convert(FormatArgImpl(kMyArray), conv, &sink)); sink.Flush(); @@ -109,5 +109,5 @@ const char kMyArray[] = "ABCDE"; } // namespace } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc index 48a306d4..27522fdb 100644 --- a/absl/strings/internal/str_format/bind.cc +++ b/absl/strings/internal/str_format/bind.cc @@ -6,7 +6,7 @@ #include <string> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -66,19 +66,22 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, return false; } - bound->set_width(width); - bound->set_precision(precision); - bound->set_flags(unbound->flags); - if (force_left) - bound->set_left(true); + FormatConversionSpecImplFriend::SetWidth(width, bound); + FormatConversionSpecImplFriend::SetPrecision(precision, bound); + + if (force_left) { + Flags flags = unbound->flags; + flags.left = true; + FormatConversionSpecImplFriend::SetFlags(flags, bound); + } else { + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + } } else { - bound->set_flags(unbound->flags); - bound->set_width(-1); - bound->set_precision(-1); + FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); + FormatConversionSpecImplFriend::SetWidth(-1, bound); + FormatConversionSpecImplFriend::SetPrecision(-1, bound); } - - bound->set_length_mod(unbound->length_mod); - bound->set_conv(unbound->conv); + FormatConversionSpecImplFriend::SetConversionChar(unbound->conv, bound); bound->set_arg(arg); return true; } @@ -140,10 +143,11 @@ class SummarizingConverter { UntypedFormatSpecImpl spec("%d"); std::ostringstream ss; - ss << "{" << Streamable(spec, {*bound.arg()}) << ":" << bound.flags(); + ss << "{" << Streamable(spec, {*bound.arg()}) << ":" + << FormatConversionSpecImplFriend::FlagsToString(bound); if (bound.width() >= 0) ss << bound.width(); if (bound.precision() >= 0) ss << "." << bound.precision(); - ss << bound.length_mod() << bound.conv() << "}"; + ss << bound.conv() << "}"; Append(ss.str()); return true; } @@ -197,6 +201,15 @@ std::string& AppendPack(std::string* out, const UntypedFormatSpecImpl format, return *out; } +std::string FormatPack(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args) { + std::string out; + if (ABSL_PREDICT_FALSE(!FormatUntyped(&out, format, args))) { + out.clear(); + } + return out; +} + int FprintF(std::FILE* output, const UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args) { FILERawSink sink(output); @@ -228,5 +241,5 @@ int SnprintF(char* output, size_t size, const UntypedFormatSpecImpl format, } } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h index db79eb6c..cf41b197 100644 --- a/absl/strings/internal/str_format/bind.h +++ b/absl/strings/internal/str_format/bind.h @@ -13,7 +13,7 @@ #include "absl/types/span.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN class UntypedFormatSpec; @@ -74,7 +74,7 @@ class FormatSpecTemplate using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; public: -#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER // Honeypot overload for when the std::string is not constexpr. // We use the 'unavailable' attribute to give a better compiler error than @@ -122,8 +122,8 @@ class FormatSpecTemplate #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER template <Conv... C, typename = typename std::enable_if< - sizeof...(C) == sizeof...(Args) && - AllOf(Contains(ArgumentToConv<Args>(), + AllOf(sizeof...(C) == sizeof...(Args), + Contains(ArgumentToConv<Args>(), C)...)>::type> FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT : Base(&pc) {} @@ -179,12 +179,8 @@ bool FormatUntyped(FormatRawSinkImpl raw_sink, std::string& AppendPack(std::string* out, UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args); -inline std::string FormatPack(const UntypedFormatSpecImpl format, - absl::Span<const FormatArgImpl> args) { - std::string out; - AppendPack(&out, format, args); - return out; -} +std::string FormatPack(const UntypedFormatSpecImpl format, + absl::Span<const FormatArgImpl> args); int FprintF(std::FILE* output, UntypedFormatSpecImpl format, absl::Span<const FormatArgImpl> args); @@ -207,7 +203,7 @@ class StreamedWrapper { }; } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_BIND_H_ diff --git a/absl/strings/internal/str_format/bind_test.cc b/absl/strings/internal/str_format/bind_test.cc index 8bccc92a..64790a85 100644 --- a/absl/strings/internal/str_format/bind_test.cc +++ b/absl/strings/internal/str_format/bind_test.cc @@ -6,7 +6,7 @@ #include "gtest/gtest.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -113,18 +113,18 @@ TEST_F(FormatBindTest, FormatPack) { FormatArgImpl(ia[2]), FormatArgImpl(ia[3]), FormatArgImpl(ia[4])}; const Expectation kExpect[] = { - {__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"}, - {__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"}, - {__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"}, - {__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"}, - {__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"}, - {__LINE__, "a%.*fb", "a{20:.10f}b"}, - {__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"}, - {__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"}, - {__LINE__, "a%04ldb", "a{10:04ld}b"}, - {__LINE__, "a%-#04lldb", "a{10:-#04lld}b"}, - {__LINE__, "a%1$*5$db", "a{10:-10d}b"}, - {__LINE__, "a%1$.*5$db", "a{10:d}b"}, + {__LINE__, "a%4db%dc", "a{10:4d}b{20:d}c"}, + {__LINE__, "a%.4db%dc", "a{10:.4d}b{20:d}c"}, + {__LINE__, "a%4.5db%dc", "a{10:4.5d}b{20:d}c"}, + {__LINE__, "a%db%4.5dc", "a{10:d}b{20:4.5d}c"}, + {__LINE__, "a%db%*.*dc", "a{10:d}b{40:20.30d}c"}, + {__LINE__, "a%.*fb", "a{20:.10f}b"}, + {__LINE__, "a%1$db%2$*3$.*4$dc", "a{10:d}b{20:30.40d}c"}, + {__LINE__, "a%4$db%3$*2$.*1$dc", "a{40:d}b{30:20.10d}c"}, + {__LINE__, "a%04ldb", "a{10:04d}b"}, + {__LINE__, "a%-#04lldb", "a{10:-#04d}b"}, + {__LINE__, "a%1$*5$db", "a{10:-10d}b"}, + {__LINE__, "a%1$.*5$db", "a{10:d}b"}, }; for (const Expectation &e : kExpect) { absl::string_view fmt = e.fmt; @@ -139,5 +139,5 @@ TEST_F(FormatBindTest, FormatPack) { } // namespace } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index 262887cd..8993a79b 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -1,21 +1,20 @@ #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ +#include "absl/base/attributes.h" #include "absl/strings/internal/str_format/arg.h" #include "absl/strings/internal/str_format/extension.h" // Compile time check support for entry points. #ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER -#if defined(__clang__) && !defined(__native_client__) -#if __has_attribute(enable_if) +#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) #define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1 -#endif // __has_attribute(enable_if) -#endif // defined(__clang__) && !defined(__native_client__) +#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { constexpr bool AllOf() { return true; } @@ -32,7 +31,7 @@ constexpr Conv ArgumentToConv() { std::declval<FormatSinkImpl*>()))::kConv; } -#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER constexpr bool ContainsChar(const char* chars, char c) { return *chars == c || (*chars && ContainsChar(chars + 1, c)); @@ -321,7 +320,7 @@ constexpr bool ValidFormatImpl(string_view format) { #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_CHECKER_H_ diff --git a/absl/strings/internal/str_format/checker_test.cc b/absl/strings/internal/str_format/checker_test.cc index 2aa3b128..ea2a7681 100644 --- a/absl/strings/internal/str_format/checker_test.cc +++ b/absl/strings/internal/str_format/checker_test.cc @@ -5,7 +5,7 @@ #include "absl/strings/str_format.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -13,7 +13,7 @@ std::string ConvToString(Conv conv) { std::string out; #define CONV_SET_CASE(c) \ if (Contains(conv, Conv::c)) out += #c; - ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) #undef CONV_SET_CASE if (Contains(conv, Conv::star)) out += "*"; return out; @@ -36,7 +36,7 @@ TEST(StrFormatChecker, ArgumentToConv) { EXPECT_EQ(ConvToString(conv), "p"); } -#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER struct Case { bool result; @@ -148,5 +148,5 @@ TEST(StrFormatChecker, LongFormat) { } // namespace } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 751bfc34..cbcd7caf 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -5,11 +5,13 @@ #include <cmath> #include <string> +#include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/str_format/bind.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -152,18 +154,26 @@ TEST_F(FormatConvertTest, StringPrecision) { UntypedFormatSpecImpl format("%.1s"); EXPECT_EQ("a", FormatPack(format, {FormatArgImpl(p)})); - // We cap at the nul terminator. + // We cap at the NUL-terminator. p = "ABC"; UntypedFormatSpecImpl format2("%.10s"); EXPECT_EQ("ABC", FormatPack(format2, {FormatArgImpl(p)})); } +// Pointer formatting is implementation defined. This checks that the argument +// can be matched to `ptr`. +MATCHER_P(MatchesPointerString, ptr, "") { + if (ptr == nullptr && arg == "(nil)") { + return true; + } + void* parsed = nullptr; + if (sscanf(arg.c_str(), "%p", &parsed) != 1) { + ABSL_RAW_LOG(FATAL, "Could not parse %s", arg.c_str()); + } + return ptr == parsed; +} + TEST_F(FormatConvertTest, Pointer) { -#ifdef _MSC_VER - // MSVC's printf implementation prints pointers differently. We can't easily - // compare our implementation to theirs. - return; -#endif static int x = 0; const int *xp = &x; char c = 'h'; @@ -174,48 +184,62 @@ TEST_F(FormatConvertTest, Pointer) { using VoidF = void (*)(); VoidF fp = [] {}, fnil = nullptr; volatile char vc; - volatile char* vcp = &vc; - volatile char* vcnil = nullptr; - const FormatArgImpl args[] = { + volatile char *vcp = &vc; + volatile char *vcnil = nullptr; + const FormatArgImpl args_array[] = { FormatArgImpl(xp), FormatArgImpl(cp), FormatArgImpl(inil), FormatArgImpl(cnil), FormatArgImpl(mcp), FormatArgImpl(fp), FormatArgImpl(fnil), FormatArgImpl(vcp), FormatArgImpl(vcnil), }; - struct Expectation { - std::string out; - const char *fmt; - }; - const Expectation kExpect[] = { - {StrPrint("%p", &x), "%p"}, - {StrPrint("%20p", &x), "%20p"}, - {StrPrint("%.1p", &x), "%.1p"}, - {StrPrint("%.20p", &x), "%.20p"}, - {StrPrint("%30.20p", &x), "%30.20p"}, - - {StrPrint("%-p", &x), "%-p"}, - {StrPrint("%-20p", &x), "%-20p"}, - {StrPrint("%-.1p", &x), "%-.1p"}, - {StrPrint("%.20p", &x), "%.20p"}, - {StrPrint("%-30.20p", &x), "%-30.20p"}, - - {StrPrint("%p", cp), "%2$p"}, // const char* - {"(nil)", "%3$p"}, // null const char * - {"(nil)", "%4$p"}, // null const int * - {StrPrint("%p", mcp), "%5$p"}, // nonconst char* - - {StrPrint("%p", fp), "%6$p"}, // function pointer - {StrPrint("%p", vcp), "%8$p"}, // function pointer - -#ifndef __APPLE__ - // Apple's printf differs here (0x0 vs. nil) - {StrPrint("%p", fnil), "%7$p"}, // null function pointer - {StrPrint("%p", vcnil), "%9$p"}, // null function pointer -#endif - }; - for (const Expectation &e : kExpect) { - UntypedFormatSpecImpl format(e.fmt); - EXPECT_EQ(e.out, FormatPack(format, absl::MakeSpan(args))) << e.fmt; - } + auto args = absl::MakeConstSpan(args_array); + + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.1p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%30.20p"), args), + MatchesPointerString(&x)); + + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-.1p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%.20p"), args), + MatchesPointerString(&x)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%-30.20p"), args), + MatchesPointerString(&x)); + + // const char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%2$p"), args), + MatchesPointerString(cp)); + // null const int* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%3$p"), args), + MatchesPointerString(nullptr)); + // null const char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%4$p"), args), + MatchesPointerString(nullptr)); + // nonconst char* + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%5$p"), args), + MatchesPointerString(mcp)); + + // function pointers + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%6$p"), args), + MatchesPointerString(reinterpret_cast<const void*>(fp))); + EXPECT_THAT( + FormatPack(UntypedFormatSpecImpl("%8$p"), args), + MatchesPointerString(reinterpret_cast<volatile const void *>(vcp))); + + // null function pointers + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%7$p"), args), + MatchesPointerString(nullptr)); + EXPECT_THAT(FormatPack(UntypedFormatSpecImpl("%9$p"), args), + MatchesPointerString(nullptr)); } struct Cardinal { @@ -367,7 +391,6 @@ typedef ::testing::Types< AllIntTypes; INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes, TypedFormatConvertTest, AllIntTypes); - TEST_F(FormatConvertTest, VectorBool) { // Make sure vector<bool>'s values behave as bools. std::vector<bool> v = {true, false}; @@ -379,6 +402,42 @@ TEST_F(FormatConvertTest, VectorBool) { FormatArgImpl(cv[0]), FormatArgImpl(cv[1])}))); } + +TEST_F(FormatConvertTest, Int128) { + absl::int128 positive = static_cast<absl::int128>(0x1234567890abcdef) * 1979; + absl::int128 negative = -positive; + absl::int128 max = absl::Int128Max(), min = absl::Int128Min(); + const FormatArgImpl args[] = {FormatArgImpl(positive), + FormatArgImpl(negative), FormatArgImpl(max), + FormatArgImpl(min)}; + + struct Case { + const char* format; + const char* expected; + } cases[] = { + {"%1$d", "2595989796776606496405"}, + {"%1$30d", " 2595989796776606496405"}, + {"%1$-30d", "2595989796776606496405 "}, + {"%1$u", "2595989796776606496405"}, + {"%1$x", "8cba9876066020f695"}, + {"%2$d", "-2595989796776606496405"}, + {"%2$30d", " -2595989796776606496405"}, + {"%2$-30d", "-2595989796776606496405 "}, + {"%2$u", "340282366920938460867384810655161715051"}, + {"%2$x", "ffffffffffffff73456789f99fdf096b"}, + {"%3$d", "170141183460469231731687303715884105727"}, + {"%3$u", "170141183460469231731687303715884105727"}, + {"%3$x", "7fffffffffffffffffffffffffffffff"}, + {"%4$d", "-170141183460469231731687303715884105728"}, + {"%4$x", "80000000000000000000000000000000"}, + }; + + for (auto c : cases) { + UntypedFormatSpecImpl format(c.format); + EXPECT_EQ(c.expected, FormatPack(format, absl::MakeSpan(args))); + } +} + TEST_F(FormatConvertTest, Uint128) { absl::uint128 v = static_cast<absl::uint128>(0x1234567890abcdef) * 1979; absl::uint128 max = absl::Uint128Max(); @@ -588,5 +647,5 @@ TEST_F(FormatConvertTest, ExpectedFailures) { } // namespace } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/extension.cc b/absl/strings/internal/str_format/extension.cc index 1a0c4172..2e5bc2ce 100644 --- a/absl/strings/internal/str_format/extension.cc +++ b/absl/strings/internal/str_format/extension.cc @@ -20,39 +20,8 @@ #include <string> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { -namespace { -// clang-format off -#define ABSL_LENGTH_MODS_EXPAND_ \ - X_VAL(h) X_SEP \ - X_VAL(hh) X_SEP \ - X_VAL(l) X_SEP \ - X_VAL(ll) X_SEP \ - X_VAL(L) X_SEP \ - X_VAL(j) X_SEP \ - X_VAL(z) X_SEP \ - X_VAL(t) X_SEP \ - X_VAL(q) -// clang-format on -} // namespace - -const LengthMod::Spec LengthMod::kSpecs[] = { -#define X_VAL(id) { LengthMod::id, #id, strlen(#id) } -#define X_SEP , - ABSL_LENGTH_MODS_EXPAND_, {LengthMod::none, "", 0} -#undef X_VAL -#undef X_SEP -}; - -const ConversionChar::Spec ConversionChar::kSpecs[] = { -#define X_VAL(id) { ConversionChar::id, #id[0] } -#define X_SEP , - ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP), - {ConversionChar::none, '\0'}, -#undef X_VAL -#undef X_SEP -}; std::string Flags::ToString() const { std::string s; @@ -64,10 +33,6 @@ std::string Flags::ToString() const { return s; } -const size_t LengthMod::kNumValues; - -const size_t ConversionChar::kNumValues; - bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { size_t space_remaining = 0; if (w >= 0) space_remaining = w; @@ -82,5 +47,5 @@ bool FormatSinkImpl::PutPaddedString(string_view v, int w, int p, bool l) { } } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index 4fbe4a06..d1665753 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -17,19 +17,18 @@ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ #include <limits.h> + #include <cstddef> #include <cstring> #include <ostream> +#include "absl/base/config.h" #include "absl/base/port.h" #include "absl/strings/internal/str_format/output.h" #include "absl/strings/string_view.h" -class Cord; - namespace absl { -inline namespace lts_2019_08_08 { - +ABSL_NAMESPACE_BEGIN namespace str_format_internal { class FormatRawSinkImpl { @@ -137,56 +136,8 @@ struct Flags { } }; -struct LengthMod { - public: - enum Id : uint8_t { - h, hh, l, ll, L, j, z, t, q, none - }; - static const size_t kNumValues = none + 1; - - LengthMod() : id_(none) {} - - // Index into the opaque array of LengthMod enums. - // Requires: i < kNumValues - static LengthMod FromIndex(size_t i) { - return LengthMod(kSpecs[i].value); - } - - static LengthMod FromId(Id id) { return LengthMod(id); } - - // The length modifier std::string associated with a specified LengthMod. - string_view name() const { - const Spec& spec = kSpecs[id_]; - return {spec.name, spec.name_length}; - } - - Id id() const { return id_; } - - friend bool operator==(const LengthMod& a, const LengthMod& b) { - return a.id() == b.id(); - } - friend bool operator!=(const LengthMod& a, const LengthMod& b) { - return !(a == b); - } - friend std::ostream& operator<<(std::ostream& os, const LengthMod& v) { - return os << v.name(); - } - - private: - struct Spec { - Id value; - const char *name; - size_t name_length; - }; - static const Spec kSpecs[]; - - explicit LengthMod(Id id) : id_(id) {} - - Id id_; -}; - // clang-format off -#define ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ +#define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ /* text */ \ X_VAL(c) X_SEP X_VAL(C) X_SEP X_VAL(s) X_SEP X_VAL(S) X_SEP \ /* ints */ \ @@ -197,121 +148,135 @@ struct LengthMod { X_VAL(g) X_SEP X_VAL(G) X_SEP X_VAL(a) X_SEP X_VAL(A) X_SEP \ /* misc */ \ X_VAL(n) X_SEP X_VAL(p) -// clang-format on -struct ConversionChar { - public: - enum Id : uint8_t { +enum class FormatConversionChar : uint8_t { c, C, s, S, // text d, i, o, u, x, X, // int f, F, e, E, g, G, a, A, // float n, p, // misc - none - }; - static const size_t kNumValues = none + 1; - - ConversionChar() : id_(none) {} - - public: - // Index into the opaque array of ConversionChar enums. - // Requires: i < kNumValues - static ConversionChar FromIndex(size_t i) { - return ConversionChar(kSpecs[i].value); - } + kNone, + none = kNone +}; +// clang-format on - static ConversionChar FromChar(char c) { - ConversionChar::Id out_id = ConversionChar::none; - switch (c) { -#define X_VAL(id) \ - case #id[0]: \ - out_id = ConversionChar::id; \ - break; - ABSL_CONVERSION_CHARS_EXPAND_(X_VAL, ) -#undef X_VAL - default: - break; - } - return ConversionChar(out_id); +inline FormatConversionChar FormatConversionCharFromChar(char c) { + switch (c) { +#define ABSL_INTERNAL_X_VAL(id) \ + case #id[0]: \ + return FormatConversionChar::id; + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) +#undef ABSL_INTERNAL_X_VAL } + return FormatConversionChar::kNone; +} - static ConversionChar FromId(Id id) { return ConversionChar(id); } - Id id() const { return id_; } - - int radix() const { - switch (id()) { - case x: case X: case a: case A: case p: return 16; - case o: return 8; - default: return 10; - } +inline int FormatConversionCharRadix(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::x: + case FormatConversionChar::X: + case FormatConversionChar::a: + case FormatConversionChar::A: + case FormatConversionChar::p: + return 16; + case FormatConversionChar::o: + return 8; + default: + return 10; } +} - bool upper() const { - switch (id()) { - case X: case F: case E: case G: case A: return true; - default: return false; - } +inline bool FormatConversionCharIsUpper(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::X: + case FormatConversionChar::F: + case FormatConversionChar::E: + case FormatConversionChar::G: + case FormatConversionChar::A: + return true; + default: + return false; } +} - bool is_signed() const { - switch (id()) { - case d: case i: return true; - default: return false; - } +inline bool FormatConversionCharIsSigned(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::d: + case FormatConversionChar::i: + return true; + default: + return false; } +} - bool is_integral() const { - switch (id()) { - case d: case i: case u: case o: case x: case X: - return true; - default: return false; - } +inline bool FormatConversionCharIsIntegral(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::d: + case FormatConversionChar::i: + case FormatConversionChar::u: + case FormatConversionChar::o: + case FormatConversionChar::x: + case FormatConversionChar::X: + return true; + default: + return false; } +} - bool is_float() const { - switch (id()) { - case a: case e: case f: case g: case A: case E: case F: case G: - return true; - default: return false; - } +inline bool FormatConversionCharIsFloat(FormatConversionChar c) { + switch (c) { + case FormatConversionChar::a: + case FormatConversionChar::e: + case FormatConversionChar::f: + case FormatConversionChar::g: + case FormatConversionChar::A: + case FormatConversionChar::E: + case FormatConversionChar::F: + case FormatConversionChar::G: + return true; + default: + return false; } +} - bool IsValid() const { return id() != none; } - - // The associated char. - char Char() const { return kSpecs[id_].name; } - - friend bool operator==(const ConversionChar& a, const ConversionChar& b) { - return a.id() == b.id(); - } - friend bool operator!=(const ConversionChar& a, const ConversionChar& b) { - return !(a == b); +inline char FormatConversionCharToChar(FormatConversionChar c) { + switch (c) { +#define ABSL_INTERNAL_X_VAL(e) \ + case FormatConversionChar::e: \ + return #e[0]; +#define ABSL_INTERNAL_X_SEP + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, + ABSL_INTERNAL_X_SEP) + case FormatConversionChar::kNone: + return '\0'; +#undef ABSL_INTERNAL_X_VAL +#undef ABSL_INTERNAL_X_SEP } - friend std::ostream& operator<<(std::ostream& os, const ConversionChar& v) { - char c = v.Char(); - if (!c) c = '?'; - return os << c; - } - - private: - struct Spec { - Id value; - char name; - }; - static const Spec kSpecs[]; + return '\0'; +} - explicit ConversionChar(Id id) : id_(id) {} +// The associated char. +inline std::ostream& operator<<(std::ostream& os, FormatConversionChar v) { + char c = FormatConversionCharToChar(v); + if (!c) c = '?'; + return os << c; +} - Id id_; -}; +struct FormatConversionSpecImplFriend; -class ConversionSpec { +class FormatConversionSpec { public: - Flags flags() const { return flags_; } - LengthMod length_mod() const { return length_mod_; } - ConversionChar conv() const { + // Width and precison are not specified, no flags are set. + bool is_basic() const { return flags_.basic; } + bool has_left_flag() const { return flags_.left; } + bool has_show_pos_flag() const { return flags_.show_pos; } + bool has_sign_col_flag() const { return flags_.sign_col; } + bool has_alt_flag() const { return flags_.alt; } + bool has_zero_flag() const { return flags_.zero; } + + FormatConversionChar conversion_char() const { // Keep this field first in the struct . It generates better code when // accessing it when ConversionSpec is passed by value in registers. - static_assert(offsetof(ConversionSpec, conv_) == 0, ""); + static_assert(offsetof(FormatConversionSpec, conv_) == 0, ""); return conv_; } @@ -322,46 +287,71 @@ class ConversionSpec { // negative value. int precision() const { return precision_; } - void set_flags(Flags f) { flags_ = f; } - void set_length_mod(LengthMod lm) { length_mod_ = lm; } - void set_conv(ConversionChar c) { conv_ = c; } - void set_width(int w) { width_ = w; } - void set_precision(int p) { precision_ = p; } - void set_left(bool b) { flags_.left = b; } + // Deprecated (use has_x_flag() instead). + Flags flags() const { return flags_; } + // Deprecated + FormatConversionChar conv() const { return conversion_char(); } private: - ConversionChar conv_; + friend struct str_format_internal::FormatConversionSpecImplFriend; + FormatConversionChar conv_ = FormatConversionChar::kNone; Flags flags_; - LengthMod length_mod_; int width_; int precision_; }; -constexpr uint64_t ConversionCharToConvValue(char conv) { +struct FormatConversionSpecImplFriend final { + static void SetFlags(Flags f, FormatConversionSpec* conv) { + conv->flags_ = f; + } + static void SetConversionChar(FormatConversionChar c, + FormatConversionSpec* conv) { + conv->conv_ = c; + } + static void SetWidth(int w, FormatConversionSpec* conv) { conv->width_ = w; } + static void SetPrecision(int p, FormatConversionSpec* conv) { + conv->precision_ = p; + } + static std::string FlagsToString(const FormatConversionSpec& spec) { + return spec.flags_.ToString(); + } +}; + +constexpr uint64_t FormatConversionCharToConvValue(char conv) { return -#define CONV_SET_CASE(c) \ - conv == #c[0] ? (uint64_t{1} << (1 + ConversionChar::Id::c)): - ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) -#undef CONV_SET_CASE +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + conv == #c[0] \ + ? (uint64_t{1} << (1 + static_cast<uint8_t>(FormatConversionChar::c))) \ + : + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE conv == '*' ? 1 : 0; } -enum class Conv : uint64_t { -#define CONV_SET_CASE(c) c = ConversionCharToConvValue(#c[0]), - ABSL_CONVERSION_CHARS_EXPAND_(CONV_SET_CASE, ) -#undef CONV_SET_CASE +enum class FormatConversionCharSet : uint64_t { +#define ABSL_INTERNAL_CHAR_SET_CASE(c) \ + c = FormatConversionCharToConvValue(#c[0]), + ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) +#undef ABSL_INTERNAL_CHAR_SET_CASE // Used for width/precision '*' specification. - star = ConversionCharToConvValue('*'), - + kStar = FormatConversionCharToConvValue('*'), // Some predefined values: - integral = d | i | u | o | x | X, - floating = a | e | f | g | A | E | F | G, - numeric = integral | floating, - string = s, - pointer = p + kIntegral = d | i | u | o | x | X, + kFloating = a | e | f | g | A | E | F | G, + kNumeric = kIntegral | kFloating, + kString = s, + kPointer = p, + + // The following are deprecated + star = kStar, + integral = kIntegral, + floating = kFloating, + numeric = kNumeric, + string = kString, + pointer = kPointer }; // Type safe OR operator. @@ -369,45 +359,57 @@ enum class Conv : uint64_t { // 1. operator| on enums makes them decay to integers and the result is an // integer. We need the result to stay as an enum. // 2. We use "enum class" which would not work even if we accepted the decay. -constexpr Conv operator|(Conv a, Conv b) { - return Conv(static_cast<uint64_t>(a) | static_cast<uint64_t>(b)); +constexpr FormatConversionCharSet operator|(FormatConversionCharSet a, + FormatConversionCharSet b) { + return FormatConversionCharSet(static_cast<uint64_t>(a) | + static_cast<uint64_t>(b)); } // Get a conversion with a single character in it. -constexpr Conv ConversionCharToConv(char c) { - return Conv(ConversionCharToConvValue(c)); +constexpr FormatConversionCharSet ConversionCharToConv(char c) { + return FormatConversionCharSet(FormatConversionCharToConvValue(c)); } // Checks whether `c` exists in `set`. -constexpr bool Contains(Conv set, char c) { - return (static_cast<uint64_t>(set) & ConversionCharToConvValue(c)) != 0; +constexpr bool Contains(FormatConversionCharSet set, char c) { + return (static_cast<uint64_t>(set) & FormatConversionCharToConvValue(c)) != 0; } // Checks whether all the characters in `c` are contained in `set` -constexpr bool Contains(Conv set, Conv c) { +constexpr bool Contains(FormatConversionCharSet set, + FormatConversionCharSet c) { return (static_cast<uint64_t>(set) & static_cast<uint64_t>(c)) == static_cast<uint64_t>(c); } // Return type of the AbslFormatConvert() functions. -// The Conv template parameter is used to inform the framework of what -// conversion characters are supported by that AbslFormatConvert routine. -template <Conv C> -struct ConvertResult { - static constexpr Conv kConv = C; +// The FormatConversionCharSet template parameter is used to inform the +// framework of what conversion characters are supported by that +// AbslFormatConvert routine. +template <FormatConversionCharSet C> +struct FormatConvertResult { + static constexpr FormatConversionCharSet kConv = C; bool value; }; -template <Conv C> -constexpr Conv ConvertResult<C>::kConv; + +template <FormatConversionCharSet C> +constexpr FormatConversionCharSet FormatConvertResult<C>::kConv; // Return capacity - used, clipped to a minimum of 0. inline size_t Excess(size_t used, size_t capacity) { return used < capacity ? capacity - used : 0; } +// Type alias for use during migration. +using ConversionChar = FormatConversionChar; +using ConversionSpec = FormatConversionSpec; +using Conv = FormatConversionCharSet; +template <FormatConversionCharSet C> +using ConvertResult = FormatConvertResult<C>; + } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_EXTENSION_H_ diff --git a/absl/strings/internal/str_format/float_conversion.cc b/absl/strings/internal/str_format/float_conversion.cc index a0484feb..d4c647c3 100644 --- a/absl/strings/internal/str_format/float_conversion.cc +++ b/absl/strings/internal/str_format/float_conversion.cc @@ -9,7 +9,7 @@ #include "absl/base/config.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -28,12 +28,12 @@ bool FallbackToSnprintf(const Float v, const ConversionSpec &conv, { char *fp = fmt; *fp++ = '%'; - fp = CopyStringTo(conv.flags().ToString(), fp); + fp = CopyStringTo(FormatConversionSpecImplFriend::FlagsToString(conv), fp); fp = CopyStringTo("*.*", fp); if (std::is_same<long double, Float>()) { *fp++ = 'L'; } - *fp++ = conv.conv().Char(); + *fp++ = FormatConversionCharToChar(conv.conv()); *fp = 0; assert(fp < fmt + sizeof(fmt)); } @@ -100,9 +100,11 @@ bool ConvertNonNumericFloats(char sign_char, Float v, char text[4], *ptr = text; if (sign_char) *ptr++ = sign_char; if (std::isnan(v)) { - ptr = std::copy_n(conv.conv().upper() ? "NAN" : "nan", 3, ptr); + ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "NAN" : "nan", + 3, ptr); } else if (std::isinf(v)) { - ptr = std::copy_n(conv.conv().upper() ? "INF" : "inf", 3, ptr); + ptr = std::copy_n(FormatConversionCharIsUpper(conv.conv()) ? "INF" : "inf", + 3, ptr); } else { return false; } @@ -399,7 +401,7 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, Buffer buffer; - switch (conv.conv().id()) { + switch (conv.conv()) { case ConversionChar::f: case ConversionChar::F: if (!FloatToBuffer<FormatStyle::Fixed>(decomposed, precision, &buffer, @@ -416,7 +418,8 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, return FallbackToSnprintf(v, conv, sink); } if (!conv.flags().alt && buffer.back() == '.') buffer.pop_back(); - PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', + &buffer); break; case ConversionChar::g: @@ -447,7 +450,10 @@ bool FloatToSink(const Float v, const ConversionSpec &conv, while (buffer.back() == '0') buffer.pop_back(); if (buffer.back() == '.') buffer.pop_back(); } - if (exp) PrintExponent(exp, conv.conv().upper() ? 'E' : 'e', &buffer); + if (exp) { + PrintExponent(exp, FormatConversionCharIsUpper(conv.conv()) ? 'E' : 'e', + &buffer); + } break; case ConversionChar::a: @@ -483,5 +489,5 @@ bool ConvertFloatImpl(double v, const ConversionSpec &conv, } } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/float_conversion.h b/absl/strings/internal/str_format/float_conversion.h index 5d76ce2b..49a6a636 100644 --- a/absl/strings/internal/str_format/float_conversion.h +++ b/absl/strings/internal/str_format/float_conversion.h @@ -4,7 +4,7 @@ #include "absl/strings/internal/str_format/extension.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { bool ConvertFloatImpl(float v, const ConversionSpec &conv, @@ -17,7 +17,7 @@ bool ConvertFloatImpl(long double v, const ConversionSpec &conv, FormatSinkImpl *sink); } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_FLOAT_CONVERSION_H_ diff --git a/absl/strings/internal/str_format/output.cc b/absl/strings/internal/str_format/output.cc index d5ead8d5..c4b24706 100644 --- a/absl/strings/internal/str_format/output.cc +++ b/absl/strings/internal/str_format/output.cc @@ -18,7 +18,7 @@ #include <cstring> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -68,5 +68,5 @@ void FILERawSink::Write(string_view v) { } } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/output.h b/absl/strings/internal/str_format/output.h index ba847471..28b288b7 100644 --- a/absl/strings/internal/str_format/output.h +++ b/absl/strings/internal/str_format/output.h @@ -28,10 +28,11 @@ #include "absl/base/port.h" #include "absl/strings/string_view.h" +namespace absl { +ABSL_NAMESPACE_BEGIN + class Cord; -namespace absl { -inline namespace lts_2019_08_08 { namespace str_format_internal { // RawSink implementation that writes into a char* buffer. @@ -77,7 +78,7 @@ inline void AbslFormatFlush(std::ostream* out, string_view s) { } template <class AbslCord, typename = typename std::enable_if< - std::is_same<AbslCord, ::Cord>::value>::type> + std::is_same<AbslCord, absl::Cord>::value>::type> inline void AbslFormatFlush(AbslCord* out, string_view s) { out->Append(s); } @@ -97,7 +98,7 @@ auto InvokeFlush(T* out, string_view s) } } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ diff --git a/absl/strings/internal/str_format/output_test.cc b/absl/strings/internal/str_format/output_test.cc index 5c3c4afb..e54e6f70 100644 --- a/absl/strings/internal/str_format/output_test.cc +++ b/absl/strings/internal/str_format/output_test.cc @@ -21,7 +21,7 @@ #include "gtest/gtest.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { TEST(InvokeFlush, String) { @@ -68,6 +68,6 @@ TEST(BufferRawSink, Limits) { } } // namespace -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index 4b8ca4d2..aab68db9 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -14,11 +14,12 @@ #include <unordered_set> namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { -using CC = ConversionChar::Id; -using LM = LengthMod::Id; +using CC = ConversionChar; +using LM = LengthMod; + ABSL_CONST_INIT const ConvTag kTags[256] = { {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f @@ -205,11 +206,11 @@ flags_done: using str_format_internal::LengthMod; LengthMod length_mod = tag.as_length(); ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - if (c == 'h' && length_mod.id() == LengthMod::h) { - conv->length_mod = LengthMod::FromId(LengthMod::hh); + if (c == 'h' && length_mod == LengthMod::h) { + conv->length_mod = LengthMod::hh; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); - } else if (c == 'l' && length_mod.id() == LengthMod::l) { - conv->length_mod = LengthMod::FromId(LengthMod::ll); + } else if (c == 'l' && length_mod == LengthMod::l) { + conv->length_mod = LengthMod::ll; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } else { conv->length_mod = length_mod; @@ -228,6 +229,32 @@ flags_done: } // namespace +std::string LengthModToString(LengthMod v) { + switch (v) { + case LengthMod::h: + return "h"; + case LengthMod::hh: + return "hh"; + case LengthMod::l: + return "l"; + case LengthMod::ll: + return "ll"; + case LengthMod::L: + return "L"; + case LengthMod::j: + return "j"; + case LengthMod::z: + return "z"; + case LengthMod::t: + return "t"; + case LengthMod::q: + return "q"; + case LengthMod::none: + return ""; + } + return ""; +} + const char *ConsumeUnboundConversion(const char *p, const char *end, UnboundConversion *conv, int *next_arg) { if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg); @@ -295,11 +322,13 @@ bool ParsedFormatBase::MatchesConversions( if (conv.width.is_from_arg() && !add_if_valid_conv(conv.width.get_from_arg(), '*')) return false; - if (!add_if_valid_conv(conv.arg_position, conv.conv.Char())) return false; + if (!add_if_valid_conv(conv.arg_position, + FormatConversionCharToChar(conv.conv))) + return false; } return used.size() == convs.size() || allow_ignored; } } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index d3357fde..45c90d1d 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -6,19 +6,25 @@ #include <stdlib.h> #include <cassert> +#include <cstdint> #include <initializer_list> #include <iosfwd> #include <iterator> #include <memory> +#include <string> #include <vector> #include "absl/strings/internal/str_format/checker.h" #include "absl/strings/internal/str_format/extension.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { +enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none }; + +std::string LengthModToString(LengthMod v); + // The analyzed properties of a single specified conversion. struct UnboundConversion { UnboundConversion() @@ -60,8 +66,8 @@ struct UnboundConversion { InputValue precision; Flags flags; - LengthMod length_mod; - ConversionChar conv; + LengthMod length_mod = LengthMod::none; + ConversionChar conv = FormatConversionChar::kNone; }; // Consume conversion spec prefix (not including '%') of [p, end) if valid. @@ -73,13 +79,16 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, UnboundConversion* conv, int* next_arg); // Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod` checking and conversions. +// It allows fast `char -> ConversionChar/LengthMod` checking and +// conversions. class ConvTag { public: - constexpr ConvTag(ConversionChar::Id id) : tag_(id) {} // NOLINT + constexpr ConvTag(ConversionChar conversion_char) // NOLINT + : tag_(static_cast<int8_t>(conversion_char)) {} // We invert the length modifiers to make them negative so that we can easily // test for them. - constexpr ConvTag(LengthMod::Id id) : tag_(~id) {} // NOLINT + constexpr ConvTag(LengthMod length_mod) // NOLINT + : tag_(~static_cast<std::int8_t>(length_mod)) {} // Everything else is -128, which is negative to make is_conv() simpler. constexpr ConvTag() : tag_(-128) {} @@ -87,11 +96,11 @@ class ConvTag { bool is_length() const { return tag_ < 0 && tag_ != -128; } ConversionChar as_conv() const { assert(is_conv()); - return ConversionChar::FromId(static_cast<ConversionChar::Id>(tag_)); + return static_cast<ConversionChar>(tag_); } LengthMod as_length() const { assert(is_length()); - return LengthMod::FromId(static_cast<LengthMod::Id>(~tag_)); + return static_cast<LengthMod>(~tag_); } private: @@ -275,7 +284,7 @@ template <str_format_internal::Conv... C> class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { public: explicit ExtendedParsedFormat(string_view format) -#if ABSL_INTERNAL_ENABLE_FORMAT_CHECKER +#ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER __attribute__(( enable_if(str_format_internal::EnsureConstexpr(format), "Format std::string is not constexpr."), @@ -318,7 +327,7 @@ class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase { : ParsedFormatBase(s, allow_ignored, {C...}) {} }; } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_ diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index d77a8ea5..1b1ee030 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -7,7 +7,7 @@ #include "absl/base/macros.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace str_format_internal { namespace { @@ -17,7 +17,7 @@ using testing::Pair; TEST(LengthModTest, Names) { struct Expectation { int line; - LengthMod::Id id; + LengthMod mod; const char *name; }; const Expectation kExpect[] = { @@ -32,18 +32,16 @@ TEST(LengthModTest, Names) { {__LINE__, LengthMod::t, "t" }, {__LINE__, LengthMod::q, "q" }, }; - EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), LengthMod::kNumValues); + EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), 10); for (auto e : kExpect) { SCOPED_TRACE(e.line); - LengthMod mod = LengthMod::FromId(e.id); - EXPECT_EQ(e.id, mod.id()); - EXPECT_EQ(e.name, mod.name()); + EXPECT_EQ(e.name, LengthModToString(e.mod)); } } TEST(ConversionCharTest, Names) { struct Expectation { - ConversionChar::Id id; + ConversionChar id; char name; }; // clang-format off @@ -57,12 +55,10 @@ TEST(ConversionCharTest, Names) { {ConversionChar::none, '\0'}, }; // clang-format on - EXPECT_EQ(ABSL_ARRAYSIZE(kExpect), ConversionChar::kNumValues); for (auto e : kExpect) { SCOPED_TRACE(e.name); - ConversionChar v = ConversionChar::FromId(e.id); - EXPECT_EQ(e.id, v.id()); - EXPECT_EQ(e.name, v.Char()); + ConversionChar v = e.id; + EXPECT_EQ(e.name, FormatConversionCharToChar(v)); } } @@ -121,13 +117,12 @@ TEST_F(ConsumeUnboundConversionTest, BasicConversion) { EXPECT_FALSE(Run("dd")); // no excess allowed EXPECT_TRUE(Run("d")); - EXPECT_EQ('d', o.conv.Char()); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); EXPECT_FALSE(o.width.is_from_arg()); EXPECT_LT(o.width.value(), 0); EXPECT_FALSE(o.precision.is_from_arg()); EXPECT_LT(o.precision.value(), 0); EXPECT_EQ(1, o.arg_position); - EXPECT_EQ(LengthMod::none, o.length_mod.id()); } TEST_F(ConsumeUnboundConversionTest, ArgPosition) { @@ -163,7 +158,7 @@ TEST_F(ConsumeUnboundConversionTest, ArgPosition) { TEST_F(ConsumeUnboundConversionTest, WidthAndPrecision) { EXPECT_TRUE(Run("14d")); - EXPECT_EQ('d', o.conv.Char()); + EXPECT_EQ('d', FormatConversionCharToChar(o.conv)); EXPECT_FALSE(o.width.is_from_arg()); EXPECT_EQ(14, o.width.value()); EXPECT_FALSE(o.precision.is_from_arg()); @@ -290,6 +285,29 @@ TEST_F(ConsumeUnboundConversionTest, BasicFlag) { } } +TEST_F(ConsumeUnboundConversionTest, LengthMod) { + EXPECT_TRUE(Run("d")); + EXPECT_EQ(LengthMod::none, o.length_mod); + EXPECT_TRUE(Run("hd")); + EXPECT_EQ(LengthMod::h, o.length_mod); + EXPECT_TRUE(Run("hhd")); + EXPECT_EQ(LengthMod::hh, o.length_mod); + EXPECT_TRUE(Run("ld")); + EXPECT_EQ(LengthMod::l, o.length_mod); + EXPECT_TRUE(Run("lld")); + EXPECT_EQ(LengthMod::ll, o.length_mod); + EXPECT_TRUE(Run("Lf")); + EXPECT_EQ(LengthMod::L, o.length_mod); + EXPECT_TRUE(Run("qf")); + EXPECT_EQ(LengthMod::q, o.length_mod); + EXPECT_TRUE(Run("jd")); + EXPECT_EQ(LengthMod::j, o.length_mod); + EXPECT_TRUE(Run("zd")); + EXPECT_EQ(LengthMod::z, o.length_mod); + EXPECT_TRUE(Run("td")); + EXPECT_EQ(LengthMod::t, o.length_mod); +} + struct SummarizeConsumer { std::string* out; explicit SummarizeConsumer(std::string* out) : out(out) {} @@ -310,7 +328,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += conv.conv.Char(); + *out += FormatConversionCharToChar(conv.conv); *out += "}"; return true; } @@ -390,5 +408,5 @@ TEST_F(ParsedFormatTest, ParsingFlagOrder) { } // namespace } // namespace str_format_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h index 02787dd1..31dbf672 100644 --- a/absl/strings/internal/str_join_internal.h +++ b/absl/strings/internal/str_join_internal.h @@ -43,7 +43,7 @@ #include "absl/strings/str_cat.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // @@ -308,7 +308,7 @@ std::string JoinRange(const Range& range, absl::string_view separator) { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_JOIN_INTERNAL_H_ diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index 92a678e0..b54f6ebe 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -47,7 +47,7 @@ #endif // _GLIBCXX_DEBUG namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // This class is implicitly constructible from everything that absl::string_view @@ -449,7 +449,7 @@ class Splitter { }; } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc index fe4a9beb..8fd8edc1 100644 --- a/absl/strings/internal/utf8.cc +++ b/absl/strings/internal/utf8.cc @@ -17,7 +17,7 @@ #include "absl/strings/internal/utf8.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { @@ -49,5 +49,5 @@ size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { } } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/utf8.h b/absl/strings/internal/utf8.h index b1bb954e..32fb1093 100644 --- a/absl/strings/internal/utf8.h +++ b/absl/strings/internal/utf8.h @@ -20,8 +20,10 @@ #include <cstddef> #include <cstdint> +#include "absl/base/config.h" + namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // For Unicode code points 0 through 0x10FFFF, EncodeUTF8Char writes @@ -42,7 +44,7 @@ enum { kMaxEncodedUTF8Size = 4 }; size_t EncodeUTF8Char(char *buffer, char32_t utf8_char); } // namespace strings_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_INTERNAL_UTF8_H_ diff --git a/absl/strings/match.cc b/absl/strings/match.cc index 01ed1f15..8127cb0c 100644 --- a/absl/strings/match.cc +++ b/absl/strings/match.cc @@ -17,7 +17,7 @@ #include "absl/strings/internal/memutil.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN bool EqualsIgnoreCase(absl::string_view piece1, absl::string_view piece2) { return (piece1.size() == piece2.size() && @@ -36,5 +36,5 @@ bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix) { EqualsIgnoreCase(text.substr(text.size() - suffix.size()), suffix); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/match.h b/absl/strings/match.h index 15fdc0c8..90fca98a 100644 --- a/absl/strings/match.h +++ b/absl/strings/match.h @@ -20,7 +20,7 @@ // This file contains simple utilities for performing string matching checks. // All of these function parameters are specified as `absl::string_view`, // meaning that these functions can accept `std::string`, `absl::string_view` or -// nul-terminated C-style strings. +// NUL-terminated C-style strings. // // Examples: // std::string s = "foo"; @@ -38,7 +38,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // StrContains() // @@ -84,7 +84,7 @@ bool StartsWithIgnoreCase(absl::string_view text, absl::string_view prefix); // case in the comparison. bool EndsWithIgnoreCase(absl::string_view text, absl::string_view suffix); -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_MATCH_H_ diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 2bac4adc..68c26dd6 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -19,8 +19,8 @@ #include <algorithm> #include <cassert> -#include <cfloat> // for DBL_DIG and FLT_DIG -#include <cmath> // for HUGE_VAL +#include <cfloat> // for DBL_DIG and FLT_DIG +#include <cmath> // for HUGE_VAL #include <cstdint> #include <cstdio> #include <cstdlib> @@ -30,16 +30,18 @@ #include <memory> #include <utility> +#include "absl/base/attributes.h" #include "absl/base/internal/bits.h" #include "absl/base/internal/raw_logging.h" #include "absl/strings/ascii.h" #include "absl/strings/charconv.h" +#include "absl/strings/escaping.h" #include "absl/strings/internal/memutil.h" #include "absl/strings/match.h" #include "absl/strings/str_cat.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN bool SimpleAtof(absl::string_view str, float* out) { *out = 0.0; @@ -93,43 +95,6 @@ bool SimpleAtod(absl::string_view str, double* out) { return true; } -namespace { - -// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the -// range 0 <= i < 100, and buf must have space for two characters. Example: -// char buf[2]; -// PutTwoDigits(42, buf); -// // buf[0] == '4' -// // buf[1] == '2' -inline void PutTwoDigits(size_t i, char* buf) { - static const char two_ASCII_digits[100][2] = { - {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, - {'0', '5'}, {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, - {'1', '0'}, {'1', '1'}, {'1', '2'}, {'1', '3'}, {'1', '4'}, - {'1', '5'}, {'1', '6'}, {'1', '7'}, {'1', '8'}, {'1', '9'}, - {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, {'2', '4'}, - {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, - {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, - {'3', '5'}, {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, - {'4', '0'}, {'4', '1'}, {'4', '2'}, {'4', '3'}, {'4', '4'}, - {'4', '5'}, {'4', '6'}, {'4', '7'}, {'4', '8'}, {'4', '9'}, - {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, {'5', '4'}, - {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, - {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, - {'6', '5'}, {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, - {'7', '0'}, {'7', '1'}, {'7', '2'}, {'7', '3'}, {'7', '4'}, - {'7', '5'}, {'7', '6'}, {'7', '7'}, {'7', '8'}, {'7', '9'}, - {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, {'8', '4'}, - {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, - {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, - {'9', '5'}, {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'} - }; - assert(i < 100); - memcpy(buf, two_ASCII_digits[i], 2); -} - -} // namespace - bool SimpleAtob(absl::string_view str, bool* out) { ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr."); if (EqualsIgnoreCase(str, "true") || EqualsIgnoreCase(str, "t") || @@ -496,13 +461,13 @@ static ExpDigits SplitToSix(const double value) { int two_digits = dddddd / 10000; dddddd -= two_digits * 10000; - PutTwoDigits(two_digits, &exp_dig.digits[0]); + numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[0]); two_digits = dddddd / 100; dddddd -= two_digits * 100; - PutTwoDigits(two_digits, &exp_dig.digits[2]); + numbers_internal::PutTwoDigits(two_digits, &exp_dig.digits[2]); - PutTwoDigits(dddddd, &exp_dig.digits[4]); + numbers_internal::PutTwoDigits(dddddd, &exp_dig.digits[4]); return exp_dig; } @@ -755,8 +720,8 @@ inline bool safe_parse_sign_and_base(absl::string_view* text /*inout*/, // commonly used bases. template <typename IntType> struct LookupTables { - static const IntType kVmaxOverBase[]; - static const IntType kVminOverBase[]; + ABSL_CONST_INIT static const IntType kVmaxOverBase[]; + ABSL_CONST_INIT static const IntType kVminOverBase[]; }; // An array initializer macro for X/base where base in [0, 36]. @@ -771,6 +736,49 @@ struct LookupTables { X / 35, X / 36, \ } +// uint128& operator/=(uint128) is not constexpr, so hardcode the resulting +// array to avoid a static initializer. +template <> +const uint128 LookupTables<uint128>::kVmaxOverBase[] = { + 0, + 0, + MakeUint128(9223372036854775807u, 18446744073709551615u), + MakeUint128(6148914691236517205u, 6148914691236517205u), + MakeUint128(4611686018427387903u, 18446744073709551615u), + MakeUint128(3689348814741910323u, 3689348814741910323u), + MakeUint128(3074457345618258602u, 12297829382473034410u), + MakeUint128(2635249153387078802u, 5270498306774157604u), + MakeUint128(2305843009213693951u, 18446744073709551615u), + MakeUint128(2049638230412172401u, 14347467612885206812u), + MakeUint128(1844674407370955161u, 11068046444225730969u), + MakeUint128(1676976733973595601u, 8384883669867978007u), + MakeUint128(1537228672809129301u, 6148914691236517205u), + MakeUint128(1418980313362273201u, 4256940940086819603u), + MakeUint128(1317624576693539401u, 2635249153387078802u), + MakeUint128(1229782938247303441u, 1229782938247303441u), + MakeUint128(1152921504606846975u, 18446744073709551615u), + MakeUint128(1085102592571150095u, 1085102592571150095u), + MakeUint128(1024819115206086200u, 16397105843297379214u), + MakeUint128(970881267037344821u, 16504981539634861972u), + MakeUint128(922337203685477580u, 14757395258967641292u), + MakeUint128(878416384462359600u, 14054662151397753612u), + MakeUint128(838488366986797800u, 13415813871788764811u), + MakeUint128(802032351030850070u, 4812194106185100421u), + MakeUint128(768614336404564650u, 12297829382473034410u), + MakeUint128(737869762948382064u, 11805916207174113034u), + MakeUint128(709490156681136600u, 11351842506898185609u), + MakeUint128(683212743470724133u, 17080318586768103348u), + MakeUint128(658812288346769700u, 10540996613548315209u), + MakeUint128(636094623231363848u, 15266270957552732371u), + MakeUint128(614891469123651720u, 9838263505978427528u), + MakeUint128(595056260442243600u, 9520900167075897608u), + MakeUint128(576460752303423487u, 18446744073709551615u), + MakeUint128(558992244657865200u, 8943875914525843207u), + MakeUint128(542551296285575047u, 9765923333140350855u), + MakeUint128(527049830677415760u, 8432797290838652167u), + MakeUint128(512409557603043100u, 8198552921648689607u), +}; + template <typename IntType> const IntType LookupTables<IntType>::kVmaxOverBase[] = X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); @@ -790,6 +798,8 @@ inline bool safe_parse_positive_int(absl::string_view text, int base, assert(base >= 0); assert(vmax >= static_cast<IntType>(base)); const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base]; + assert(base < 2 || + std::numeric_limits<IntType>::max() / base == vmax_over_base); const char* start = text.data(); const char* end = start + text.size(); // loop over digits @@ -823,6 +833,8 @@ inline bool safe_parse_negative_int(absl::string_view text, int base, assert(vmin < 0); assert(vmin <= 0 - base); IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base]; + assert(base < 2 || + std::numeric_limits<IntType>::min() / base == vmin_over_base); // 2003 c++ standard [expr.mul] // "... the sign of the remainder is implementation-defined." // Although (vmin/base)*base + vmin%base is always vmin. @@ -886,6 +898,48 @@ inline bool safe_uint_internal(absl::string_view text, IntType* value_p, } // anonymous namespace namespace numbers_internal { + +// Digit conversion. +ABSL_CONST_INIT ABSL_DLL const char kHexChar[] = + "0123456789abcdef"; + +ABSL_CONST_INIT ABSL_DLL const char kHexTable[513] = + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; + +ABSL_CONST_INIT ABSL_DLL const char two_ASCII_digits[100][2] = { + {'0', '0'}, {'0', '1'}, {'0', '2'}, {'0', '3'}, {'0', '4'}, {'0', '5'}, + {'0', '6'}, {'0', '7'}, {'0', '8'}, {'0', '9'}, {'1', '0'}, {'1', '1'}, + {'1', '2'}, {'1', '3'}, {'1', '4'}, {'1', '5'}, {'1', '6'}, {'1', '7'}, + {'1', '8'}, {'1', '9'}, {'2', '0'}, {'2', '1'}, {'2', '2'}, {'2', '3'}, + {'2', '4'}, {'2', '5'}, {'2', '6'}, {'2', '7'}, {'2', '8'}, {'2', '9'}, + {'3', '0'}, {'3', '1'}, {'3', '2'}, {'3', '3'}, {'3', '4'}, {'3', '5'}, + {'3', '6'}, {'3', '7'}, {'3', '8'}, {'3', '9'}, {'4', '0'}, {'4', '1'}, + {'4', '2'}, {'4', '3'}, {'4', '4'}, {'4', '5'}, {'4', '6'}, {'4', '7'}, + {'4', '8'}, {'4', '9'}, {'5', '0'}, {'5', '1'}, {'5', '2'}, {'5', '3'}, + {'5', '4'}, {'5', '5'}, {'5', '6'}, {'5', '7'}, {'5', '8'}, {'5', '9'}, + {'6', '0'}, {'6', '1'}, {'6', '2'}, {'6', '3'}, {'6', '4'}, {'6', '5'}, + {'6', '6'}, {'6', '7'}, {'6', '8'}, {'6', '9'}, {'7', '0'}, {'7', '1'}, + {'7', '2'}, {'7', '3'}, {'7', '4'}, {'7', '5'}, {'7', '6'}, {'7', '7'}, + {'7', '8'}, {'7', '9'}, {'8', '0'}, {'8', '1'}, {'8', '2'}, {'8', '3'}, + {'8', '4'}, {'8', '5'}, {'8', '6'}, {'8', '7'}, {'8', '8'}, {'8', '9'}, + {'9', '0'}, {'9', '1'}, {'9', '2'}, {'9', '3'}, {'9', '4'}, {'9', '5'}, + {'9', '6'}, {'9', '7'}, {'9', '8'}, {'9', '9'}}; + bool safe_strto32_base(absl::string_view text, int32_t* value, int base) { return safe_int_internal<int32_t>(text, value, base); } @@ -901,7 +955,11 @@ bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) { bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) { return safe_uint_internal<uint64_t>(text, value, base); } -} // namespace numbers_internal -} // inline namespace lts_2019_08_08 +bool safe_strtou128_base(absl::string_view text, uint128* value, int base) { + return safe_uint_internal<absl::uint128>(text, value, base); +} + +} // namespace numbers_internal +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index e9878016..d872cca5 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -24,6 +24,10 @@ #ifndef ABSL_STRINGS_NUMBERS_H_ #define ABSL_STRINGS_NUMBERS_H_ +#ifdef __SSE4_2__ +#include <x86intrin.h> +#endif + #include <cstddef> #include <cstdlib> #include <cstring> @@ -32,39 +36,54 @@ #include <string> #include <type_traits> +#include "absl/base/config.h" +#include "absl/base/internal/bits.h" +#ifdef __SSE4_2__ +// TODO(jorg): Remove this when we figure out the right way +// to swap bytes on SSE 4.2 that works with the compilers +// we claim to support. Also, add tests for the compiler +// that doesn't support the Intel _bswap64 intrinsic but +// does support all the SSE 4.2 intrinsics +#include "absl/base/internal/endian.h" +#endif #include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/numeric/int128.h" #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // SimpleAtoi() // -// Converts the given string into an integer value, returning `true` if -// successful. The string must reflect a base-10 integer (optionally followed or -// preceded by ASCII whitespace) whose value falls within the range of the -// integer type. If any errors are encountered, this function returns `false`, -// leaving `out` in an unspecified state. +// Converts the given string (optionally followed or preceded by ASCII +// whitespace) into an integer value, returning `true` if successful. The string +// must reflect a base-10 integer whose value falls within the range of the +// integer type (optionally preceded by a `+` or `-`). If any errors are +// encountered, this function returns `false`, leaving `out` in an unspecified +// state. template <typename int_type> ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out); // SimpleAtof() // // Converts the given string (optionally followed or preceded by ASCII -// whitespace) into a float, which may be rounded on overflow or underflow. +// whitespace) into a float, which may be rounded on overflow or underflow, +// returning `true` if successful. // See https://en.cppreference.com/w/c/string/byte/strtof for details about the -// allowed formats for `str`. If any errors are encountered, this function +// allowed formats for `str`, except SimpleAtof() is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function // returns `false`, leaving `out` in an unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtof(absl::string_view str, float* out); // SimpleAtod() // // Converts the given string (optionally followed or preceded by ASCII -// whitespace) into a double, which may be rounded on overflow or underflow. +// whitespace) into a double, which may be rounded on overflow or underflow, +// returning `true` if successful. // See https://en.cppreference.com/w/c/string/byte/strtof for details about the -// allowed formats for `str`. If any errors are encountered, this function +// allowed formats for `str`, except SimpleAtod is locale-independent and will +// always use the "C" locale. If any errors are encountered, this function // returns `false`, leaving `out` in an unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); @@ -78,20 +97,40 @@ ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); // unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl // End of public API. Implementation details follow. namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace numbers_internal { +// Digit conversion. +ABSL_DLL extern const char kHexChar[17]; // 0123456789abcdef +ABSL_DLL extern const char + kHexTable[513]; // 000102030405060708090a0b0c0d0e0f1011... +ABSL_DLL extern const char + two_ASCII_digits[100][2]; // 00, 01, 02, 03... + +// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the +// range 0 <= i < 100, and buf must have space for two characters. Example: +// char buf[2]; +// PutTwoDigits(42, buf); +// // buf[0] == '4' +// // buf[1] == '2' +inline void PutTwoDigits(size_t i, char* buf) { + assert(i < 100); + memcpy(buf, two_ASCII_digits[i], 2); +} + // safe_strto?() functions for implementing SimpleAtoi() bool safe_strto32_base(absl::string_view text, int32_t* value, int base); bool safe_strto64_base(absl::string_view text, int64_t* value, int base); bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base); bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base); +bool safe_strtou128_base(absl::string_view text, absl::uint128* value, + int base); static const int kFastToBufferSize = 32; static const int kSixDigitsToBufferSize = 16; @@ -173,6 +212,35 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out, return parsed; } +// FastHexToBufferZeroPad16() +// +// Outputs `val` into `out` as if by `snprintf(out, 17, "%016x", val)` but +// without the terminating null character. Thus `out` must be of length >= 16. +// Returns the number of non-pad digits of the output (it can never be zero +// since 0 has one digit). +inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { +#ifdef __SSE4_2__ + uint64_t be = absl::big_endian::FromHost64(val); + const auto kNibbleMask = _mm_set1_epi8(0xf); + const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'); + auto v = _mm_loadl_epi64(reinterpret_cast<__m128i*>(&be)); // load lo dword + auto v4 = _mm_srli_epi64(v, 4); // shift 4 right + auto il = _mm_unpacklo_epi8(v4, v); // interleave bytes + auto m = _mm_and_si128(il, kNibbleMask); // mask out nibbles + auto hexchars = _mm_shuffle_epi8(kHexDigits, m); // hex chars + _mm_storeu_si128(reinterpret_cast<__m128i*>(out), hexchars); +#else + for (int i = 0; i < 8; ++i) { + auto byte = (val >> (56 - 8 * i)) & 0xFF; + auto* hex = &absl::numbers_internal::kHexTable[byte * 2]; + std::memcpy(out + 2 * i, hex, 2); + } +#endif + // | 0x1 so that even 0 has 1 digit. + return 16 - absl::base_internal::CountLeadingZeros64(val | 0x1) / 4; +} + } // namespace numbers_internal // SimpleAtoi() @@ -187,7 +255,12 @@ ABSL_MUST_USE_RESULT bool SimpleAtoi(absl::string_view str, int_type* out) { return numbers_internal::safe_strtoi_base(str, out, 10); } -} // inline namespace lts_2019_08_08 +ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, + absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 10); +} + +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_NUMBERS_H_ diff --git a/absl/strings/numbers_benchmark.cc b/absl/strings/numbers_benchmark.cc index 54dbedd3..6e79b3e8 100644 --- a/absl/strings/numbers_benchmark.cc +++ b/absl/strings/numbers_benchmark.cc @@ -20,6 +20,8 @@ #include "benchmark/benchmark.h" #include "absl/base/internal/raw_logging.h" +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "absl/strings/numbers.h" namespace { @@ -260,4 +262,25 @@ BENCHMARK_TEMPLATE(BM_SimpleAtod, std::string) ->ArgPair(10, 4) ->ArgPair(10, 8); +void BM_FastHexToBufferZeroPad16(benchmark::State& state) { + absl::BitGen rng; + std::vector<uint64_t> nums; + nums.resize(1000); + auto min = std::numeric_limits<uint64_t>::min(); + auto max = std::numeric_limits<uint64_t>::max(); + for (auto& num : nums) { + num = absl::LogUniform(rng, min, max); + } + + char buf[16]; + while (state.KeepRunningBatch(nums.size())) { + for (auto num : nums) { + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(num, buf); + benchmark::DoNotOptimize(digits); + benchmark::DoNotOptimize(buf); + } + } +} +BENCHMARK(BM_FastHexToBufferZeroPad16); + } // namespace diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index 77d7e783..68229b15 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -17,6 +17,7 @@ #include "absl/strings/numbers.h" #include <sys/types.h> + #include <cfenv> // NOLINT(build/c++11) #include <cinttypes> #include <climits> @@ -36,10 +37,11 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" -#include "absl/strings/str_cat.h" - +#include "absl/random/distributions.h" +#include "absl/random/random.h" #include "absl/strings/internal/numbers_test_common.h" #include "absl/strings/internal/pow10_helper.h" +#include "absl/strings/str_cat.h" namespace { @@ -204,6 +206,9 @@ void CheckHex64(uint64_t v) { std::string actual = absl::StrCat(absl::Hex(v, absl::kZeroPad16)); snprintf(expected, sizeof(expected), "%016" PRIx64, static_cast<uint64_t>(v)); EXPECT_EQ(expected, actual) << " Input " << v; + actual = absl::StrCat(absl::Hex(v, absl::kSpacePad16)); + snprintf(expected, sizeof(expected), "%16" PRIx64, static_cast<uint64_t>(v)); + EXPECT_EQ(expected, actual) << " Input " << v; } TEST(Numbers, TestFastPrints) { @@ -244,7 +249,9 @@ TEST(Numbers, TestFastPrints) { template <typename int_type, typename in_val_type> void VerifySimpleAtoiGood(in_val_type in_value, int_type exp_value) { - std::string s = absl::StrCat(in_value); + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream(&s) << in_value; int_type x = static_cast<int_type>(~exp_value); EXPECT_TRUE(SimpleAtoi(s, &x)) << "in_value=" << in_value << " s=" << s << " x=" << x; @@ -320,6 +327,25 @@ TEST(NumbersTest, Atoi) { VerifySimpleAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(), std::numeric_limits<uint64_t>::max()); + // SimpleAtoi(absl::string_view, absl::uint128) + VerifySimpleAtoiGood<absl::uint128>(0, 0); + VerifySimpleAtoiGood<absl::uint128>(42, 42); + VerifySimpleAtoiBad<absl::uint128>(-42); + + VerifySimpleAtoiBad<absl::uint128>(std::numeric_limits<int32_t>::min()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleAtoiBad<absl::uint128>(std::numeric_limits<int64_t>::min()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleAtoiGood<absl::uint128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleAtoiGood<absl::uint128>( + std::numeric_limits<absl::uint128>::max(), + std::numeric_limits<absl::uint128>::max()); + // Some other types VerifySimpleAtoiGood<int>(-42, -42); VerifySimpleAtoiGood<int32_t>(-42, -42); @@ -652,6 +678,46 @@ TEST(stringtest, safe_strtou32_random) { TEST(stringtest, safe_strtou64_random) { test_random_integer_parse_base<uint64_t>(&safe_strtou64_base); } +TEST(stringtest, safe_strtou128_random) { + // random number generators don't work for uint128, and + // uint128 can be streamed but not StrCat'd, so this code must be custom + // implemented for uint128, but is generally the same as what's above. + // test_random_integer_parse_base<absl::uint128>( + // &absl::numbers_internal::safe_strtou128_base); + using RandomEngine = std::minstd_rand0; + using IntType = absl::uint128; + constexpr auto parse_func = &absl::numbers_internal::safe_strtou128_base; + + std::random_device rd; + RandomEngine rng(rd()); + std::uniform_int_distribution<uint64_t> random_uint64( + std::numeric_limits<uint64_t>::min()); + std::uniform_int_distribution<int> random_base(2, 35); + + for (size_t i = 0; i < kNumRandomTests; i++) { + IntType value = random_uint64(rng); + value = (value << 64) + random_uint64(rng); + int base = random_base(rng); + std::string str_value; + EXPECT_TRUE(Itoa<IntType>(value, base, &str_value)); + IntType parsed_value; + + // Test successful parse + EXPECT_TRUE(parse_func(str_value, &parsed_value, base)); + EXPECT_EQ(parsed_value, value); + + // Test overflow + std::string s; + absl::strings_internal::OStringStream(&s) + << std::numeric_limits<IntType>::max() << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + + // Test underflow + s.clear(); + absl::strings_internal::OStringStream(&s) << "-" << value; + EXPECT_FALSE(parse_func(s, &parsed_value, base)); + } +} TEST(stringtest, safe_strtou32_base) { for (int i = 0; strtouint32_test_cases()[i].str != nullptr; ++i) { @@ -713,11 +779,11 @@ TEST(stringtest, safe_strtou64_base_length_delimited) { } } -// feenableexcept() and fedisableexcept() are missing on macOS, MSVC, -// and WebAssembly. -#if defined(_MSC_VER) || defined(__APPLE__) || defined(__EMSCRIPTEN__) -#define ABSL_MISSING_FEENABLEEXCEPT 1 -#define ABSL_MISSING_FEDISABLEEXCEPT 1 +// feenableexcept() and fedisableexcept() are extensions supported by some libc +// implementations. +#if defined(__GLIBC__) || defined(__BIONIC__) +#define ABSL_HAVE_FEENABLEEXCEPT 1 +#define ABSL_HAVE_FEDISABLEEXCEPT 1 #endif class SimpleDtoaTest : public testing::Test { @@ -725,7 +791,7 @@ class SimpleDtoaTest : public testing::Test { void SetUp() override { // Store the current floating point env & clear away any pending exceptions. feholdexcept(&fp_env_); -#ifndef ABSL_MISSING_FEENABLEEXCEPT +#ifdef ABSL_HAVE_FEENABLEEXCEPT // Turn on floating point exceptions. feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); #endif @@ -735,7 +801,7 @@ class SimpleDtoaTest : public testing::Test { // Restore the floating point environment to the original state. // In theory fedisableexcept is unnecessary; fesetenv will also do it. // In practice, our toolchains have subtle bugs. -#ifndef ABSL_MISSING_FEDISABLEEXCEPT +#ifdef ABSL_HAVE_FEDISABLEEXCEPT fedisableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); #endif fesetenv(&fp_env_); @@ -1184,4 +1250,28 @@ TEST(StrToUint64Base, PrefixOnly) { } } +void TestFastHexToBufferZeroPad16(uint64_t v) { + char buf[16]; + auto digits = absl::numbers_internal::FastHexToBufferZeroPad16(v, buf); + absl::string_view res(buf, 16); + char buf2[17]; + snprintf(buf2, sizeof(buf2), "%016" PRIx64, v); + EXPECT_EQ(res, buf2) << v; + size_t expected_digits = snprintf(buf2, sizeof(buf2), "%" PRIx64, v); + EXPECT_EQ(digits, expected_digits) << v; +} + +TEST(FastHexToBufferZeroPad16, Smoke) { + TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits<uint64_t>::max()); + TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::min()); + TestFastHexToBufferZeroPad16(std::numeric_limits<int64_t>::max()); + absl::BitGen rng; + for (int i = 0; i < 100000; ++i) { + TestFastHexToBufferZeroPad16( + absl::LogUniform(rng, std::numeric_limits<uint64_t>::min(), + std::numeric_limits<uint64_t>::max())); + } +} + } // namespace diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index 73b9e0ba..d9afe2f3 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -15,35 +15,34 @@ #include "absl/strings/str_cat.h" #include <assert.h> + #include <algorithm> #include <cstdint> #include <cstring> #include "absl/strings/ascii.h" #include "absl/strings/internal/resize_uninitialized.h" +#include "absl/strings/numbers.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN AlphaNum::AlphaNum(Hex hex) { + static_assert(numbers_internal::kFastToBufferSize >= 32, + "This function only works when output buffer >= 32 bytes long"); char* const end = &digits_[numbers_internal::kFastToBufferSize]; - char* writer = end; - uint64_t value = hex.value; - static const char hexdigits[] = "0123456789abcdef"; - do { - *--writer = hexdigits[value & 0xF]; - value >>= 4; - } while (value != 0); - - char* beg; - if (end - writer < hex.width) { - beg = end - hex.width; - std::fill_n(beg, writer - beg, hex.fill); + auto real_width = + absl::numbers_internal::FastHexToBufferZeroPad16(hex.value, end - 16); + if (real_width >= hex.width) { + piece_ = absl::string_view(end - real_width, real_width); } else { - beg = writer; + // Pad first 16 chars because FastHexToBufferZeroPad16 pads only to 16 and + // max pad width can be up to 20. + std::memset(end - 32, hex.fill, 16); + // Patch up everything else up to the real_width. + std::memset(end - real_width - 16, hex.fill, 16); + piece_ = absl::string_view(end - hex.width, hex.width); } - - piece_ = absl::string_view(beg, end - beg); } AlphaNum::AlphaNum(Dec dec) { @@ -100,7 +99,7 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b) { std::string result; absl::strings_internal::STLStringResizeUninitialized(&result, a.size() + b.size()); - char* const begin = &*result.begin(); + char* const begin = &result[0]; char* out = begin; out = Append(out, a); out = Append(out, b); @@ -112,7 +111,7 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c) { std::string result; strings_internal::STLStringResizeUninitialized( &result, a.size() + b.size() + c.size()); - char* const begin = &*result.begin(); + char* const begin = &result[0]; char* out = begin; out = Append(out, a); out = Append(out, b); @@ -126,7 +125,7 @@ std::string StrCat(const AlphaNum& a, const AlphaNum& b, const AlphaNum& c, std::string result; strings_internal::STLStringResizeUninitialized( &result, a.size() + b.size() + c.size() + d.size()); - char* const begin = &*result.begin(); + char* const begin = &result[0]; char* out = begin; out = Append(out, a); out = Append(out, b); @@ -145,7 +144,7 @@ std::string CatPieces(std::initializer_list<absl::string_view> pieces) { for (const absl::string_view piece : pieces) total_size += piece.size(); strings_internal::STLStringResizeUninitialized(&result, total_size); - char* const begin = &*result.begin(); + char* const begin = &result[0]; char* out = begin; for (const absl::string_view piece : pieces) { const size_t this_size = piece.size(); @@ -177,7 +176,7 @@ void AppendPieces(std::string* dest, } strings_internal::STLStringResizeUninitialized(dest, total_size); - char* const begin = &*dest->begin(); + char* const begin = &(*dest)[0]; char* out = begin + old_size; for (const absl::string_view piece : pieces) { const size_t this_size = piece.size(); @@ -202,7 +201,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) { std::string::size_type old_size = dest->size(); strings_internal::STLStringResizeUninitialized( dest, old_size + a.size() + b.size()); - char* const begin = &*dest->begin(); + char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); out = Append(out, b); @@ -217,7 +216,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, std::string::size_type old_size = dest->size(); strings_internal::STLStringResizeUninitialized( dest, old_size + a.size() + b.size() + c.size()); - char* const begin = &*dest->begin(); + char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); out = Append(out, b); @@ -234,7 +233,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, std::string::size_type old_size = dest->size(); strings_internal::STLStringResizeUninitialized( dest, old_size + a.size() + b.size() + c.size() + d.size()); - char* const begin = &*dest->begin(); + char* const begin = &(*dest)[0]; char* out = begin + old_size; out = Append(out, a); out = Append(out, b); @@ -243,5 +242,5 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, assert(out == begin + dest->size()); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index c2700475..292fa235 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -64,7 +64,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { // AlphaNumBuffer allows a way to pass a string to StrCat without having to do @@ -291,7 +291,8 @@ class AlphaNum { // StrCat() // ----------------------------------------------------------------------------- // -// Merges given strings or numbers, using no delimiter(s). +// Merges given strings or numbers, using no delimiter(s), returning the merged +// result as a string. // // `StrCat()` is designed to be the fastest possible way to construct a string // out of a mix of raw C strings, string_views, strings, bool values, @@ -401,7 +402,7 @@ SixDigits(double d) { return result; } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STR_CAT_H_ diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index 29db9c02..be39880b 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -195,6 +195,21 @@ TEST(StrCat, Basics) { EXPECT_EQ(result, "12333444455555666666777777788888888999999999"); } +TEST(StrCat, CornerCases) { + std::string result; + + result = absl::StrCat(""); // NOLINT + EXPECT_EQ(result, ""); + result = absl::StrCat("", ""); + EXPECT_EQ(result, ""); + result = absl::StrCat("", "", ""); + EXPECT_EQ(result, ""); + result = absl::StrCat("", "", "", ""); + EXPECT_EQ(result, ""); + result = absl::StrCat("", "", "", "", ""); + EXPECT_EQ(result, ""); +} + // A minimal allocator that uses malloc(). template <typename T> struct Mallocator { @@ -433,10 +448,34 @@ TEST(StrAppend, Death) { } #endif // GTEST_HAS_DEATH_TEST -TEST(StrAppend, EmptyString) { - std::string s = ""; - absl::StrAppend(&s, s); - EXPECT_EQ(s, ""); +TEST(StrAppend, CornerCases) { + std::string result; + absl::StrAppend(&result, ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", "", ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", "", "", ""); + EXPECT_EQ(result, ""); + absl::StrAppend(&result, "", "", "", "", ""); + EXPECT_EQ(result, ""); +} + +TEST(StrAppend, CornerCasesNonEmptyAppend) { + for (std::string result : {"hello", "a std::string too long to fit in the SSO"}) { + const std::string expected = result; + absl::StrAppend(&result, ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", "", ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", "", "", ""); + EXPECT_EQ(result, expected); + absl::StrAppend(&result, "", "", "", "", ""); + EXPECT_EQ(result, expected); + } } template <typename IntType> diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index b4d1b7bd..2f9b4b27 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -82,7 +82,7 @@ #include "absl/strings/internal/str_format/parser.h" // IWYU pragma: export namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // UntypedFormatSpec // @@ -401,6 +401,12 @@ int FPrintF(std::FILE* output, const FormatSpec<Args...>& format, // This function is functionally equivalent to `std::snprintf()` (and // type-safe); prefer `absl::SNPrintF()` over `std::snprintf()`. // +// In particular, a successful call to `absl::SNPrintF()` writes at most `size` +// bytes of the formatted output to `output`, including a NUL-terminator, and +// returns the number of bytes that would have been written if truncation did +// not occur. In the event of an error, a negative value is returned and `errno` +// is set. +// // Example: // // std::string_view s = "Ulaanbaatar"; @@ -525,7 +531,7 @@ ABSL_MUST_USE_RESULT inline bool FormatUntyped( str_format_internal::UntypedFormatSpecImpl::Extract(format), args); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STR_FORMAT_H_ diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index bb0f58bf..acbdbf4a 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -10,7 +10,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { using str_format_internal::FormatArgImpl; @@ -450,7 +450,7 @@ struct SummarizeConsumer { if (conv.precision.is_from_arg()) { *out += "." + std::to_string(conv.precision.get_from_arg()) + "$*"; } - *out += conv.conv.Char(); + *out += FormatConversionCharToChar(conv.conv); *out += "}"; return true; } @@ -623,7 +623,7 @@ TEST_F(FormatWrapperTest, ParsedFormat) { } } // namespace -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl // Some codegen thunks that we can use to easily dump the generated assembly for diff --git a/absl/strings/str_join.h b/absl/strings/str_join.h index c6c0c98a..ae5731a4 100644 --- a/absl/strings/str_join.h +++ b/absl/strings/str_join.h @@ -60,7 +60,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // ----------------------------------------------------------------------------- // Concept: Formatter @@ -287,7 +287,7 @@ std::string StrJoin(const std::tuple<T...>& value, return strings_internal::JoinAlgorithm(value, separator, AlphaNumFormatter()); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STR_JOIN_H_ diff --git a/absl/strings/str_replace.cc b/absl/strings/str_replace.cc index 3bd8bae1..2bd5fa98 100644 --- a/absl/strings/str_replace.cc +++ b/absl/strings/str_replace.cc @@ -17,7 +17,7 @@ #include "absl/strings/str_cat.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace strings_internal { using FixedMapping = @@ -78,5 +78,5 @@ int StrReplaceAll(strings_internal::FixedMapping replacements, return StrReplaceAll<strings_internal::FixedMapping>(replacements, target); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/str_replace.h b/absl/strings/str_replace.h index cb995456..273c7077 100644 --- a/absl/strings/str_replace.h +++ b/absl/strings/str_replace.h @@ -46,7 +46,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // StrReplaceAll() // @@ -213,7 +213,7 @@ int StrReplaceAll(const StrToStrMapping& replacements, std::string* target) { return substitutions; } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STR_REPLACE_H_ diff --git a/absl/strings/str_split.cc b/absl/strings/str_split.cc index f1e03717..d0f86669 100644 --- a/absl/strings/str_split.cc +++ b/absl/strings/str_split.cc @@ -27,7 +27,7 @@ #include "absl/strings/ascii.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { @@ -135,5 +135,5 @@ absl::string_view ByLength::Find(absl::string_view text, return absl::string_view(substr.data() + length_, 0); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index 463ca008..a79cd4a0 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -49,7 +49,7 @@ #include "absl/strings/strip.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN //------------------------------------------------------------------------------ // Delimiters @@ -507,7 +507,7 @@ StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, std::move(text), DelimiterType(d), std::move(p)); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STR_SPLIT_H_ diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc index 9d241e51..c5f5de93 100644 --- a/absl/strings/string_view.cc +++ b/absl/strings/string_view.cc @@ -14,7 +14,7 @@ #include "absl/strings/string_view.h" -#ifndef ABSL_HAVE_STD_STRING_VIEW +#ifndef ABSL_USES_STD_STRING_VIEW #include <algorithm> #include <climits> @@ -24,7 +24,7 @@ #include "absl/strings/internal/memutil.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace { void WritePadding(std::ostream& o, size_t pad) { @@ -229,7 +229,7 @@ constexpr string_view::size_type string_view::npos; ABSL_STRING_VIEW_SELECTANY constexpr string_view::size_type string_view::kMaxSize; -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl -#endif // ABSL_HAVE_STD_STRING_VIEW +#endif // ABSL_USES_STD_STRING_VIEW diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 3a0a4609..1861ea62 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -28,20 +28,6 @@ #define ABSL_STRINGS_STRING_VIEW_H_ #include <algorithm> -#include "absl/base/config.h" - -#ifdef ABSL_HAVE_STD_STRING_VIEW - -#include <string_view> // IWYU pragma: export - -namespace absl { -inline namespace lts_2019_08_08 { -using std::string_view; -} // inline namespace lts_2019_08_08 -} // namespace absl - -#else // ABSL_HAVE_STD_STRING_VIEW - #include <cassert> #include <cstddef> #include <cstring> @@ -50,13 +36,33 @@ using std::string_view; #include <limits> #include <string> +#include "absl/base/config.h" #include "absl/base/internal/throw_delegate.h" #include "absl/base/macros.h" #include "absl/base/optimization.h" #include "absl/base/port.h" +#ifdef ABSL_USES_STD_STRING_VIEW + +#include <string_view> // IWYU pragma: export + +namespace absl { +ABSL_NAMESPACE_BEGIN +using std::string_view; +ABSL_NAMESPACE_END +} // namespace absl + +#else // ABSL_USES_STD_STRING_VIEW + +#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp +#else // ABSL_HAVE_BUILTIN(__builtin_memcmp) +#define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp +#endif // ABSL_HAVE_BUILTIN(__builtin_memcmp) + namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // absl::string_view // @@ -105,17 +111,17 @@ inline namespace lts_2019_08_08 { // example, when splitting a string, `std::vector<absl::string_view>` is a // natural data type for the output. // -// When constructed from a source which is nul-terminated, the `string_view` -// itself will not include the nul-terminator unless a specific size (including -// the nul) is passed to the constructor. As a result, common idioms that work -// on nul-terminated strings do not work on `string_view` objects. If you write +// When constructed from a source which is NUL-terminated, the `string_view` +// itself will not include the NUL-terminator unless a specific size (including +// the NUL) is passed to the constructor. As a result, common idioms that work +// on NUL-terminated strings do not work on `string_view` objects. If you write // code that scans a `string_view`, you must check its length rather than test // for nul, for example. Note, however, that nuls may still be embedded within // a `string_view` explicitly. // // You may create a null `string_view` in two ways: // -// absl::string_view sv(); +// absl::string_view sv; // absl::string_view sv(nullptr, 0); // // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and @@ -171,9 +177,11 @@ class string_view { string_view( // NOLINT(runtime/explicit) const std::basic_string<char, std::char_traits<char>, Allocator>& str) noexcept - : ptr_(str.data()), length_(CheckLengthInternal(str.size())) {} + // This is implemented in terms of `string_view(p, n)` so `str.size()` + // doesn't need to be reevaluated after `ptr_` is set. + : string_view(str.data(), str.size()) {} - // Implicit constructor of a `string_view` from nul-terminated `str`. When + // Implicit constructor of a `string_view` from NUL-terminated `str`. When // accepting possibly null strings, use `absl::NullSafeStringView(str)` // instead (see below). constexpr string_view(const char* str) // NOLINT(runtime/explicit) @@ -272,26 +280,45 @@ class string_view { // string_view::operator[] // - // Returns the ith element of an `string_view` using the array operator. + // Returns the ith element of the `string_view` using the array operator. // Note that this operator does not perform any bounds checking. - constexpr const_reference operator[](size_type i) const { return ptr_[i]; } + constexpr const_reference operator[](size_type i) const { + return ABSL_ASSERT(i < size()), ptr_[i]; + } + + // string_view::at() + // + // Returns the ith element of the `string_view`. Bounds checking is performed, + // and an exception of type `std::out_of_range` will be thrown on invalid + // access. + constexpr const_reference at(size_type i) const { + return ABSL_PREDICT_TRUE(i < size()) + ? ptr_[i] + : ((void)base_internal::ThrowStdOutOfRange( + "absl::string_view::at"), + ptr_[i]); + } // string_view::front() // // Returns the first element of a `string_view`. - constexpr const_reference front() const { return ptr_[0]; } + constexpr const_reference front() const { + return ABSL_ASSERT(!empty()), ptr_[0]; + } // string_view::back() // // Returns the last element of a `string_view`. - constexpr const_reference back() const { return ptr_[size() - 1]; } + constexpr const_reference back() const { + return ABSL_ASSERT(!empty()), ptr_[size() - 1]; + } // string_view::data() // // Returns a pointer to the underlying character array (which is of course // stored elsewhere). Note that `string_view::data()` may contain embedded nul - // characters, but the returned buffer may or may not be nul-terminated; - // therefore, do not pass `data()` to a routine that expects a nul-terminated + // characters, but the returned buffer may or may not be NUL-terminated; + // therefore, do not pass `data()` to a routine that expects a NUL-terminated // std::string. constexpr const_pointer data() const noexcept { return ptr_; } @@ -345,7 +372,7 @@ class string_view { size_type rlen = (std::min)(length_ - pos, n); if (rlen > 0) { const char* start = ptr_ + pos; - std::copy(start, start + rlen, buf); + traits_type::copy(buf, start, rlen); } return rlen; } @@ -370,16 +397,12 @@ class string_view { // view. Note that in the case of data equality, a further comparison is made // on the respective sizes of the two `string_view`s to determine which is // smaller, equal, or greater. - int compare(string_view x) const noexcept { - auto min_length = (std::min)(length_, x.length_); - if (min_length > 0) { - int r = memcmp(ptr_, x.ptr_, min_length); - if (r < 0) return -1; - if (r > 0) return 1; - } - if (length_ < x.length_) return -1; - if (length_ > x.length_) return 1; - return 0; + constexpr int compare(string_view x) const noexcept { + return CompareImpl(length_, x.length_, + Min(length_, x.length_) == 0 + ? 0 + : ABSL_INTERNAL_STRING_VIEW_MEMCMP( + ptr_, x.ptr_, Min(length_, x.length_))); } // Overload of `string_view::compare()` for comparing a substring of the @@ -496,7 +519,7 @@ class string_view { (std::numeric_limits<difference_type>::max)(); static constexpr size_type CheckLengthInternal(size_type len) { - return ABSL_ASSERT(len <= kMaxSize), len; + return (void)ABSL_ASSERT(len <= kMaxSize), len; } static constexpr size_type StrlenInternal(const char* str) { @@ -517,6 +540,17 @@ class string_view { #endif } + static constexpr size_t Min(size_type length_a, size_type length_b) { + return length_a < length_b ? length_a : length_b; + } + + static constexpr int CompareImpl(size_type length_a, size_type length_b, + int compare_result) { + return compare_result == 0 ? static_cast<int>(length_a > length_b) - + static_cast<int>(length_a < length_b) + : (compare_result < 0 ? -1 : 1); + } + const char* ptr_; size_type length_; }; @@ -524,46 +558,44 @@ class string_view { // This large function is defined inline so that in a fairly common case where // one of the arguments is a literal, the compiler can elide a lot of the // following comparisons. -inline bool operator==(string_view x, string_view y) noexcept { - auto len = x.size(); - if (len != y.size()) { - return false; - } - - return x.data() == y.data() || len <= 0 || - memcmp(x.data(), y.data(), len) == 0; +constexpr bool operator==(string_view x, string_view y) noexcept { + return x.size() == y.size() && + (x.empty() || + ABSL_INTERNAL_STRING_VIEW_MEMCMP(x.data(), y.data(), x.size()) == 0); } -inline bool operator!=(string_view x, string_view y) noexcept { +constexpr bool operator!=(string_view x, string_view y) noexcept { return !(x == y); } -inline bool operator<(string_view x, string_view y) noexcept { - auto min_size = (std::min)(x.size(), y.size()); - const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); - return (r < 0) || (r == 0 && x.size() < y.size()); +constexpr bool operator<(string_view x, string_view y) noexcept { + return x.compare(y) < 0; } -inline bool operator>(string_view x, string_view y) noexcept { return y < x; } +constexpr bool operator>(string_view x, string_view y) noexcept { + return y < x; +} -inline bool operator<=(string_view x, string_view y) noexcept { +constexpr bool operator<=(string_view x, string_view y) noexcept { return !(y < x); } -inline bool operator>=(string_view x, string_view y) noexcept { +constexpr bool operator>=(string_view x, string_view y) noexcept { return !(x < y); } // IO Insertion Operator std::ostream& operator<<(std::ostream& o, string_view piece); -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl -#endif // ABSL_HAVE_STD_STRING_VIEW +#undef ABSL_INTERNAL_STRING_VIEW_MEMCMP + +#endif // ABSL_USES_STD_STRING_VIEW namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // ClippedSubstr() // @@ -580,11 +612,11 @@ inline string_view ClippedSubstr(string_view s, size_t pos, // Creates an `absl::string_view` from a pointer `p` even if it's null-valued. // This function should be used where an `absl::string_view` can be created from // a possibly-null pointer. -inline string_view NullSafeStringView(const char* p) { +constexpr string_view NullSafeStringView(const char* p) { return p ? string_view(p) : string_view(); } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STRING_VIEW_H_ diff --git a/absl/strings/string_view_benchmark.cc b/absl/strings/string_view_benchmark.cc index 46909cb0..0d74e23e 100644 --- a/absl/strings/string_view_benchmark.cc +++ b/absl/strings/string_view_benchmark.cc @@ -30,6 +30,24 @@ namespace { +void BM_StringViewFromString(benchmark::State& state) { + std::string s(state.range(0), 'x'); + std::string* ps = &s; + struct SV { + SV() = default; + explicit SV(const std::string& s) : sv(s) {} + absl::string_view sv; + } sv; + SV* psv = &sv; + benchmark::DoNotOptimize(ps); + benchmark::DoNotOptimize(psv); + for (auto _ : state) { + new (psv) SV(*ps); + benchmark::DoNotOptimize(sv); + } +} +BENCHMARK(BM_StringViewFromString)->Arg(12)->Arg(128); + // Provide a forcibly out-of-line wrapper for operator== that can be used in // benchmarks to measure the impact of inlining. ABSL_ATTRIBUTE_NOINLINE @@ -142,11 +160,45 @@ void BM_CompareSame(benchmark::State& state) { absl::string_view b = y; for (auto _ : state) { + benchmark::DoNotOptimize(a); + benchmark::DoNotOptimize(b); benchmark::DoNotOptimize(a.compare(b)); } } BENCHMARK(BM_CompareSame)->DenseRange(0, 3)->Range(4, 1 << 10); +void BM_CompareFirstOneLess(benchmark::State& state) { + const int len = state.range(0); + std::string x(len, 'a'); + std::string y = x; + y.back() = 'b'; + absl::string_view a = x; + absl::string_view b = y; + + for (auto _ : state) { + benchmark::DoNotOptimize(a); + benchmark::DoNotOptimize(b); + benchmark::DoNotOptimize(a.compare(b)); + } +} +BENCHMARK(BM_CompareFirstOneLess)->DenseRange(1, 3)->Range(4, 1 << 10); + +void BM_CompareSecondOneLess(benchmark::State& state) { + const int len = state.range(0); + std::string x(len, 'a'); + std::string y = x; + x.back() = 'b'; + absl::string_view a = x; + absl::string_view b = y; + + for (auto _ : state) { + benchmark::DoNotOptimize(a); + benchmark::DoNotOptimize(b); + benchmark::DoNotOptimize(a.compare(b)); + } +} +BENCHMARK(BM_CompareSecondOneLess)->DenseRange(1, 3)->Range(4, 1 << 10); + void BM_find_string_view_len_one(benchmark::State& state) { std::string haystack(state.range(0), '0'); absl::string_view s(haystack); diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 22d43536..7b1d56fa 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -29,7 +29,8 @@ #include "absl/base/config.h" #include "absl/base/dynamic_annotations.h" -#ifdef __ANDROID__ +#if defined(ABSL_HAVE_STD_STRING_VIEW) || defined(__ANDROID__) +// We don't control the death messaging when using std::string_view. // Android assert messages only go to system log, so death tests cannot inspect // the message for matching. #define ABSL_EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ @@ -372,7 +373,7 @@ TEST(StringViewTest, STL1) { #ifdef ABSL_HAVE_EXCEPTIONS EXPECT_THROW(a.copy(buf, 1, 27), std::out_of_range); #else - EXPECT_DEATH(a.copy(buf, 1, 27), "absl::string_view::copy"); + ABSL_EXPECT_DEATH_IF_SUPPORTED(a.copy(buf, 1, 27), "absl::string_view::copy"); #endif } @@ -686,7 +687,8 @@ TEST(StringViewTest, STL2Substr) { #ifdef ABSL_HAVE_EXCEPTIONS EXPECT_THROW((void)a.substr(99, 2), std::out_of_range); #else - EXPECT_DEATH((void)a.substr(99, 2), "absl::string_view::substr"); + ABSL_EXPECT_DEATH_IF_SUPPORTED((void)a.substr(99, 2), + "absl::string_view::substr"); #endif } @@ -816,21 +818,35 @@ TEST(StringViewTest, FrontBackSingleChar) { EXPECT_EQ(&c, &csp.back()); } +TEST(StringViewTest, FrontBackEmpty) { +#ifndef ABSL_USES_STD_STRING_VIEW +#ifndef NDEBUG + // Abseil's string_view implementation has debug assertions that check that + // front() and back() are not called on an empty string_view. + absl::string_view sv; + ABSL_EXPECT_DEATH_IF_SUPPORTED(sv.front(), ""); + ABSL_EXPECT_DEATH_IF_SUPPORTED(sv.back(), ""); +#endif +#endif +} + // `std::string_view::string_view(const char*)` calls // `std::char_traits<char>::length(const char*)` to get the string length. In // libc++, it doesn't allow `nullptr` in the constexpr context, with the error // "read of dereferenced null pointer is not allowed in a constant expression". // At run time, the behavior of `std::char_traits::length()` on `nullptr` is // undefined by the standard and usually results in crash with libc++. +// GCC also started rejected this in libstdc++ starting in GCC9. // In MSVC, creating a constexpr string_view from nullptr also triggers an // "unevaluable pointer value" error. This compiler implementation conforms // to the standard, but `absl::string_view` implements a different // behavior for historical reasons. We work around tests that construct // `string_view` from `nullptr` when using libc++. -#if !defined(ABSL_HAVE_STD_STRING_VIEW) || \ - (!defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) +#if !defined(ABSL_USES_STD_STRING_VIEW) || \ + (!(defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE >= 9) && \ + !defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) #define ABSL_HAVE_STRING_VIEW_FROM_NULLPTR 1 -#endif // !defined(ABSL_HAVE_STD_STRING_VIEW) || !defined(_LIBCPP_VERSION) +#endif TEST(StringViewTest, NULLInput) { absl::string_view s; @@ -892,6 +908,18 @@ TEST(StringViewTest, Comparisons2) { EXPECT_LT(digits.compare(0, npos, "0123456789", 3, 5), 0); // 6 } +TEST(StringViewTest, At) { + absl::string_view abc = "abc"; + EXPECT_EQ(abc.at(0), 'a'); + EXPECT_EQ(abc.at(1), 'b'); + EXPECT_EQ(abc.at(2), 'c'); +#ifdef ABSL_HAVE_EXCEPTIONS + EXPECT_THROW(abc.at(3), std::out_of_range); +#else + ABSL_EXPECT_DEATH_IF_SUPPORTED(abc.at(3), "absl::string_view::at"); +#endif +} + struct MyCharAlloc : std::allocator<char> {}; TEST(StringViewTest, ExplicitConversionOperator) { @@ -915,6 +943,31 @@ TEST(StringViewTest, NullSafeStringView) { } } +TEST(StringViewTest, ConstexprNullSafeStringView) { + { + constexpr absl::string_view s = absl::NullSafeStringView(nullptr); + EXPECT_EQ(nullptr, s.data()); + EXPECT_EQ(0, s.size()); + EXPECT_EQ(absl::string_view(), s); + } +#if !defined(_MSC_VER) || _MSC_VER >= 1910 + // MSVC 2017+ is required for good constexpr string_view support. + // See the implementation of `absl::string_view::StrlenInternal()`. + { + static constexpr char kHi[] = "hi"; + absl::string_view s = absl::NullSafeStringView(kHi); + EXPECT_EQ(kHi, s.data()); + EXPECT_EQ(strlen(kHi), s.size()); + EXPECT_EQ(absl::string_view("hi"), s); + } + { + constexpr absl::string_view s = absl::NullSafeStringView("hello"); + EXPECT_EQ(s.size(), 5); + EXPECT_EQ("hello", s); + } +#endif +} + TEST(StringViewTest, ConstexprCompiles) { constexpr absl::string_view sp; #ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR @@ -922,7 +975,7 @@ TEST(StringViewTest, ConstexprCompiles) { #endif constexpr absl::string_view cstr_len("cstr", 4); -#if defined(ABSL_HAVE_STD_STRING_VIEW) +#if defined(ABSL_USES_STD_STRING_VIEW) // In libstdc++ (as of 7.2), `std::string_view::string_view(const char*)` // calls `std::char_traits<char>::length(const char*)` to get the std::string // length, but it is not marked constexpr yet. See GCC bug: @@ -936,7 +989,7 @@ TEST(StringViewTest, ConstexprCompiles) { #define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 #endif // !__GLIBCXX__ -#else // ABSL_HAVE_STD_STRING_VIEW +#else // ABSL_USES_STD_STRING_VIEW // This duplicates the check for __builtin_strlen in the header. #if ABSL_HAVE_BUILTIN(__builtin_strlen) || \ @@ -951,13 +1004,36 @@ TEST(StringViewTest, ConstexprCompiles) { #define ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR 1 #endif -#endif // ABSL_HAVE_STD_STRING_VIEW +#endif // ABSL_USES_STD_STRING_VIEW #ifdef ABSL_HAVE_CONSTEXPR_STRING_VIEW_FROM_CSTR constexpr absl::string_view cstr_strlen("foo"); EXPECT_EQ(cstr_strlen.length(), 3); constexpr absl::string_view cstr_strlen2 = "bar"; EXPECT_EQ(cstr_strlen2, "bar"); + +#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ + (defined(__GNUC__) && !defined(__clang__)) +#define ABSL_HAVE_CONSTEXPR_STRING_VIEW_COMPARISON 1 +#endif +#ifdef ABSL_HAVE_CONSTEXPR_STRING_VIEW_COMPARISON + constexpr absl::string_view foo = "foo"; + constexpr absl::string_view bar = "bar"; + constexpr bool foo_eq_bar = foo == bar; + constexpr bool foo_ne_bar = foo != bar; + constexpr bool foo_lt_bar = foo < bar; + constexpr bool foo_le_bar = foo <= bar; + constexpr bool foo_gt_bar = foo > bar; + constexpr bool foo_ge_bar = foo >= bar; + constexpr int foo_compare_bar = foo.compare(bar); + EXPECT_FALSE(foo_eq_bar); + EXPECT_TRUE(foo_ne_bar); + EXPECT_FALSE(foo_lt_bar); + EXPECT_FALSE(foo_le_bar); + EXPECT_TRUE(foo_gt_bar); + EXPECT_TRUE(foo_ge_bar); + EXPECT_GT(foo_compare_bar, 0); +#endif #endif #if !defined(__clang__) || 3 < __clang_major__ || \ @@ -979,8 +1055,16 @@ TEST(StringViewTest, ConstexprCompiles) { constexpr absl::string_view::iterator const_end = cstr_len.end(); constexpr absl::string_view::size_type const_size = cstr_len.size(); constexpr absl::string_view::size_type const_length = cstr_len.length(); + static_assert(const_begin + const_size == const_end, + "pointer arithmetic check"); + static_assert(const_begin + const_length == const_end, + "pointer arithmetic check"); +#ifndef _MSC_VER + // MSVC has bugs doing constexpr pointer arithmetic. + // https://developercommunity.visualstudio.com/content/problem/482192/bad-pointer-arithmetic-in-constepxr-2019-rc1-svc1.html EXPECT_EQ(const_begin + const_size, const_end); EXPECT_EQ(const_begin + const_length, const_end); +#endif constexpr bool isempty = sp.empty(); EXPECT_TRUE(isempty); @@ -1036,6 +1120,17 @@ TEST(StringViewTest, Noexcept) { EXPECT_TRUE(noexcept(sp.find_last_not_of('f'))); } +TEST(StringViewTest, BoundsCheck) { +#ifndef ABSL_USES_STD_STRING_VIEW +#ifndef NDEBUG + // Abseil's string_view implementation has bounds-checking in debug mode. + absl::string_view h = "hello"; + ABSL_EXPECT_DEATH_IF_SUPPORTED(h[5], ""); + ABSL_EXPECT_DEATH_IF_SUPPORTED(h[-1], ""); +#endif +#endif +} + TEST(ComparisonOpsTest, StringCompareNotAmbiguous) { EXPECT_EQ("hello", std::string("hello")); EXPECT_LT("hello", std::string("world")); @@ -1089,7 +1184,7 @@ TEST(HugeStringView, TwoPointTwoGB) { } #endif // THREAD_SANITIZER -#if !defined(NDEBUG) && !defined(ABSL_HAVE_STD_STRING_VIEW) +#if !defined(NDEBUG) && !defined(ABSL_USES_STD_STRING_VIEW) TEST(NonNegativeLenTest, NonNegativeLen) { ABSL_EXPECT_DEATH_IF_SUPPORTED(absl::string_view("xyz", -1), "len <= kMaxSize"); @@ -1105,7 +1200,7 @@ TEST(LenExceedsMaxSizeTest, LenExceedsMaxSize) { ABSL_EXPECT_DEATH_IF_SUPPORTED(absl::string_view("", max_size + 1), "len <= kMaxSize"); } -#endif // !defined(NDEBUG) && !defined(ABSL_HAVE_STD_STRING_VIEW) +#endif // !defined(NDEBUG) && !defined(ABSL_USES_STD_STRING_VIEW) class StringViewStreamTest : public ::testing::Test { public: diff --git a/absl/strings/strip.h b/absl/strings/strip.h index 8e2ae422..111872ca 100644 --- a/absl/strings/strip.h +++ b/absl/strings/strip.h @@ -30,7 +30,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN // ConsumePrefix() // @@ -85,7 +85,7 @@ ABSL_MUST_USE_RESULT inline absl::string_view StripSuffix( return str; } -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_STRIP_H_ diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index 6bc9641d..5b69a3ef 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -23,7 +23,7 @@ #include "absl/strings/string_view.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace substitute_internal { void SubstituteAndAppendArray(std::string* output, absl::string_view format, @@ -36,7 +36,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, if (i + 1 >= format.size()) { #ifndef NDEBUG ABSL_RAW_LOG(FATAL, - "Invalid strings::Substitute() format std::string: \"%s\".", + "Invalid absl::Substitute() format std::string: \"%s\".", absl::CEscape(format).c_str()); #endif return; @@ -46,7 +46,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, #ifndef NDEBUG ABSL_RAW_LOG( FATAL, - "Invalid strings::Substitute() format std::string: asked for \"$" + "Invalid absl::Substitute() format std::string: asked for \"$" "%d\", but only %d args were given. Full format std::string was: " "\"%s\".", index, static_cast<int>(num_args), absl::CEscape(format).c_str()); @@ -61,7 +61,7 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, } else { #ifndef NDEBUG ABSL_RAW_LOG(FATAL, - "Invalid strings::Substitute() format std::string: \"%s\".", + "Invalid absl::Substitute() format std::string: \"%s\".", absl::CEscape(format).c_str()); #endif return; @@ -95,7 +95,6 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, assert(target == output->data() + output->size()); } -static const char kHexDigits[] = "0123456789abcdef"; Arg::Arg(const void* value) { static_assert(sizeof(scratch_) >= sizeof(value) * 2 + 2, "fix sizeof(scratch_)"); @@ -105,7 +104,7 @@ Arg::Arg(const void* value) { char* ptr = scratch_ + sizeof(scratch_); uintptr_t num = reinterpret_cast<uintptr_t>(value); do { - *--ptr = kHexDigits[num & 0xf]; + *--ptr = absl::numbers_internal::kHexChar[num & 0xf]; num >>= 4; } while (num != 0); *--ptr = 'x'; @@ -120,7 +119,7 @@ Arg::Arg(Hex hex) { char* writer = end; uint64_t value = hex.value; do { - *--writer = kHexDigits[value & 0xF]; + *--writer = absl::numbers_internal::kHexChar[value & 0xF]; value >>= 4; } while (value != 0); @@ -168,5 +167,5 @@ Arg::Arg(Dec dec) { } } // namespace substitute_internal -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h index 3ba7f4d3..4d0984d3 100644 --- a/absl/strings/substitute.h +++ b/absl/strings/substitute.h @@ -86,7 +86,7 @@ #include "absl/strings/strip.h" namespace absl { -inline namespace lts_2019_08_08 { +ABSL_NAMESPACE_BEGIN namespace substitute_internal { // Arg @@ -190,7 +190,12 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, #if defined(ABSL_BAD_CALL_IF) constexpr int CalculateOneBit(const char* format) { - return (*format < '0' || *format > '9') ? 0 : (1 << (*format - '0')); + // Returns: + // * 2^N for '$N' when N is in [0-9] + // * 0 for correct '$' escaping: '$$'. + // * -1 otherwise. + return (*format < '0' || *format > '9') ? (*format == '$' ? 0 : -1) + : (1 << (*format - '0')); } constexpr const char* SkipNumber(const char* format) { @@ -682,7 +687,7 @@ std::string Substitute( "format std::string doesn't contain all of $0 through $9"); #endif // ABSL_BAD_CALL_IF -} // inline namespace lts_2019_08_08 +ABSL_NAMESPACE_END } // namespace absl #endif // ABSL_STRINGS_SUBSTITUTE_H_ diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc index e27abb17..450cd2bc 100644 --- a/absl/strings/substitute_test.cc +++ b/absl/strings/substitute_test.cc @@ -189,14 +189,14 @@ TEST(SubstituteTest, VectorBoolRef) { TEST(SubstituteDeathTest, SubstituteDeath) { EXPECT_DEBUG_DEATH( static_cast<void>(absl::Substitute(absl::string_view("-$2"), "a", "b")), - "Invalid strings::Substitute\\(\\) format std::string: asked for \"\\$2\", " + "Invalid absl::Substitute\\(\\) format std::string: asked for \"\\$2\", " "but only 2 args were given."); EXPECT_DEBUG_DEATH( - static_cast<void>(absl::Substitute("-$z-")), - "Invalid strings::Substitute\\(\\) format std::string: \"-\\$z-\""); + static_cast<void>(absl::Substitute(absl::string_view("-$z-"))), + "Invalid absl::Substitute\\(\\) format std::string: \"-\\$z-\""); EXPECT_DEBUG_DEATH( - static_cast<void>(absl::Substitute("-$")), - "Invalid strings::Substitute\\(\\) format std::string: \"-\\$\""); + static_cast<void>(absl::Substitute(absl::string_view("-$"))), + "Invalid absl::Substitute\\(\\) format std::string: \"-\\$\""); } #endif // GTEST_HAS_DEATH_TEST |