diff options
Diffstat (limited to 'absl/strings')
99 files changed, 15890 insertions, 2519 deletions
diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 123b5efb..3f51252f 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -13,10 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") load( "//absl:copts/configure_copts.bzl", "ABSL_DEFAULT_COPTS", + "ABSL_DEFAULT_LINKOPTS", "ABSL_TEST_COPTS", ) @@ -66,6 +66,7 @@ cc_library( "substitute.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":internal", "//absl/base", @@ -96,6 +97,7 @@ cc_library( "internal/utf8.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ "//absl/base:config", "//absl/base:core_headers", @@ -269,15 +271,27 @@ cc_library( name = "cord_internal", srcs = [ "internal/cord_internal.cc", + "internal/cord_rep_btree.cc", + "internal/cord_rep_btree_navigator.cc", + "internal/cord_rep_btree_reader.cc", + "internal/cord_rep_consume.cc", + "internal/cord_rep_crc.cc", "internal/cord_rep_ring.cc", ], hdrs = [ + "internal/cord_data_edge.h", "internal/cord_internal.h", + "internal/cord_rep_btree.h", + "internal/cord_rep_btree_navigator.h", + "internal/cord_rep_btree_reader.h", + "internal/cord_rep_consume.h", + "internal/cord_rep_crc.h", "internal/cord_rep_flat.h", "internal/cord_rep_ring.h", "internal/cord_rep_ring_reader.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, visibility = [ "//visibility:private", ], @@ -292,7 +306,111 @@ cc_library( "//absl/container:compressed_tuple", "//absl/container:inlined_vector", "//absl/container:layout", + "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/types:span", + ], +) + +cc_test( + name = "cord_data_edge_test", + size = "small", + srcs = ["internal/cord_data_edge_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_btree_test", + size = "medium", + srcs = ["internal/cord_rep_btree_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "//absl/cleanup", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_btree_navigator_test", + size = "medium", + srcs = ["internal/cord_rep_btree_navigator_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_btree_reader_test", + size = "medium", + srcs = ["internal/cord_rep_btree_reader_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_internal", + ":cord_rep_test_util", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cord_rep_crc_test", + size = "small", + srcs = ["internal/cord_rep_crc_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], + deps = [ + ":cord_internal", + ":cord_rep_test_util", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "cordz_update_tracker", + hdrs = ["internal/cordz_update_tracker.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = ["//absl/base:config"], +) + +cc_test( + name = "cordz_update_tracker_test", + srcs = ["internal/cordz_update_tracker_test.cc"], + deps = [ + ":cordz_update_tracker", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/synchronization", + "@com_google_googletest//:gtest_main", ], ) @@ -300,17 +418,28 @@ cc_library( name = "cord", srcs = [ "cord.cc", + "cord_analysis.cc", + "cord_analysis.h", + "cord_buffer.cc", ], hdrs = [ "cord.h", + "cord_buffer.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":cord_internal", + ":cordz_functions", + ":cordz_info", + ":cordz_statistics", + ":cordz_update_scope", + ":cordz_update_tracker", ":internal", ":str_format", ":strings", "//absl/base", + "//absl/base:config", "//absl/base:core_headers", "//absl/base:endian", "//absl/base:raw_logging_internal", @@ -318,7 +447,224 @@ cc_library( "//absl/container:inlined_vector", "//absl/functional:function_ref", "//absl/meta:type_traits", + "//absl/numeric:bits", "//absl/types:optional", + "//absl/types:span", + ], +) + +cc_library( + name = "cordz_handle", + srcs = ["internal/cordz_handle.cc"], + hdrs = ["internal/cordz_handle.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + "//absl/base", + "//absl/base:config", + "//absl/base:raw_logging_internal", + "//absl/synchronization", + ], +) + +cc_library( + name = "cordz_info", + srcs = ["internal/cordz_info.cc"], + hdrs = ["internal/cordz_info.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cord_internal", + ":cordz_functions", + ":cordz_handle", + ":cordz_statistics", + ":cordz_update_tracker", + "//absl/base", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "//absl/container:inlined_vector", + "//absl/debugging:stacktrace", + "//absl/synchronization", + "//absl/types:span", + ], +) + +cc_library( + name = "cordz_update_scope", + hdrs = ["internal/cordz_update_scope.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cord_internal", + ":cordz_info", + ":cordz_update_tracker", + "//absl/base:config", + "//absl/base:core_headers", + ], +) + +cc_test( + name = "cordz_update_scope_test", + srcs = ["internal/cordz_update_scope_test.cc"], + copts = ABSL_DEFAULT_COPTS, + deps = [ + ":cord_internal", + ":cordz_info", + ":cordz_test_helpers", + ":cordz_update_scope", + ":cordz_update_tracker", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_library( + name = "cordz_sample_token", + srcs = ["internal/cordz_sample_token.cc"], + hdrs = ["internal/cordz_sample_token.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cordz_handle", + ":cordz_info", + "//absl/base:config", + ], +) + +cc_library( + name = "cordz_functions", + srcs = ["internal/cordz_functions.cc"], + hdrs = ["internal/cordz_functions.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", + "//absl/profiling:exponential_biased", + ], +) + +cc_library( + name = "cordz_statistics", + hdrs = ["internal/cordz_statistics.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + visibility = [ + "//absl:__subpackages__", + ], + deps = [ + ":cordz_update_tracker", + "//absl/base:config", + ], +) + +cc_test( + name = "cordz_functions_test", + srcs = [ + "internal/cordz_functions_test.cc", + ], + deps = [ + ":cordz_functions", + ":cordz_test_helpers", + "//absl/base:config", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_handle_test", + srcs = [ + "internal/cordz_handle_test.cc", + ], + deps = [ + ":cordz_handle", + "//absl/base:config", + "//absl/memory", + "//absl/random", + "//absl/random:distributions", + "//absl/synchronization", + "//absl/synchronization:thread_pool", + "//absl/time", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_info_test", + srcs = [ + "internal/cordz_info_test.cc", + ], + deps = [ + ":cord_internal", + ":cordz_handle", + ":cordz_info", + ":cordz_statistics", + ":cordz_test_helpers", + ":cordz_update_tracker", + ":strings", + "//absl/base:config", + "//absl/debugging:stacktrace", + "//absl/debugging:symbolize", + "//absl/types:span", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_info_statistics_test", + srcs = [ + "internal/cordz_info_statistics_test.cc", + ], + deps = [ + ":cord", + ":cord_internal", + ":cordz_info", + ":cordz_sample_token", + ":cordz_statistics", + ":cordz_update_scope", + ":cordz_update_tracker", + "//absl/base:config", + "//absl/synchronization", + "//absl/synchronization:thread_pool", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_sample_token_test", + srcs = [ + "internal/cordz_sample_token_test.cc", + ], + deps = [ + ":cord_internal", + ":cordz_handle", + ":cordz_info", + ":cordz_sample_token", + ":cordz_test_helpers", + "//absl/base:config", + "//absl/memory", + "//absl/random", + "//absl/synchronization", + "//absl/synchronization:thread_pool", + "//absl/time", + "@com_google_googletest//:gtest_main", ], ) @@ -329,8 +675,62 @@ cc_library( "cord_test_helpers.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":cord", + ":cord_internal", + ":strings", + "//absl/base:config", + ], +) + +cc_library( + name = "cord_rep_test_util", + testonly = 1, + hdrs = ["internal/cord_rep_test_util.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":cord_internal", + ":strings", + "//absl/base:config", + "//absl/base:raw_logging_internal", + ], +) + +cc_library( + name = "cordz_test_helpers", + testonly = 1, + hdrs = ["cordz_test_helpers.h"], + copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":cord", + ":cord_internal", + ":cordz_info", + ":cordz_sample_token", + ":cordz_statistics", + ":cordz_update_tracker", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "cord_buffer_test", + size = "small", + srcs = ["cord_buffer_test.cc"], + copts = ABSL_TEST_COPTS, + visibility = ["//visibility:private"], deps = [ ":cord", + ":cord_internal", + ":cord_rep_test_util", + "//absl/base:config", + "//absl/types:span", + "@com_google_googletest//:gtest_main", ], ) @@ -343,6 +743,8 @@ cc_test( deps = [ ":cord", ":cord_test_helpers", + ":cordz_functions", + ":cordz_test_helpers", ":str_format", ":strings", "//absl/base", @@ -351,6 +753,40 @@ cc_test( "//absl/base:endian", "//absl/base:raw_logging_internal", "//absl/container:fixed_array", + "//absl/hash", + "//absl/random", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( + name = "cordz_test", + size = "medium", + srcs = ["cordz_test.cc"], + copts = ABSL_TEST_COPTS, + tags = [ + "benchmark", + "no_test_android_arm", + "no_test_android_arm64", + "no_test_android_x86", + "no_test_ios_x86_64", + "no_test_loonix", + "no_test_msvc_x64", + ], + visibility = ["//visibility:private"], + deps = [ + ":cord", + ":cord_test_helpers", + ":cordz_functions", + ":cordz_info", + ":cordz_sample_token", + ":cordz_statistics", + ":cordz_test_helpers", + ":cordz_update_tracker", + ":strings", + "//absl/base:config", + "//absl/base:core_headers", + "//absl/base:raw_logging_internal", "@com_google_googletest//:gtest_main", ], ) @@ -434,6 +870,7 @@ cc_test( ":strings", "//absl/base:core_headers", "//absl/base:dynamic_annotations", + "//absl/container:btree", "//absl/container:flat_hash_map", "//absl/container:node_hash_map", "@com_google_googletest//:gtest_main", @@ -675,6 +1112,7 @@ cc_library( "str_format.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":str_format_internal", ], @@ -700,6 +1138,7 @@ cc_library( "internal/str_format/parser.h", ], copts = ABSL_DEFAULT_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:private"], deps = [ ":strings", @@ -712,6 +1151,7 @@ cc_library( "//absl/numeric:representation", "//absl/types:optional", "//absl/types:span", + "//absl/utility", ], ) @@ -787,6 +1227,7 @@ cc_test( deps = [ ":str_format_internal", ":strings", + "//absl/base:core_headers", "//absl/base:raw_logging_internal", "//absl/types:optional", "@com_google_googletest//:gtest_main", @@ -822,6 +1263,7 @@ cc_library( testonly = True, srcs = ["internal/pow10_helper.cc"], hdrs = ["internal/pow10_helper.h"], + linkopts = ABSL_DEFAULT_LINKOPTS, visibility = ["//visibility:private"], deps = ["//absl/base:config"], ) diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 3b7ae639..bb073301 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -68,6 +68,7 @@ absl_cc_library( PUBLIC ) +# Internal-only target, do not depend on directly. absl_cc_library( NAME strings_internal @@ -101,7 +102,7 @@ absl_cc_test( DEPS absl::strings absl::base - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -115,7 +116,7 @@ absl_cc_test( absl::strings absl::core_headers absl::fixed_array - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -128,7 +129,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -142,7 +143,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -156,7 +157,7 @@ absl_cc_test( absl::strings_internal absl::base absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -169,7 +170,7 @@ absl_cc_test( DEPS absl::strings absl::type_traits - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -184,7 +185,7 @@ absl_cc_test( absl::config absl::core_headers absl::dynamic_annotations - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -197,7 +198,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -209,7 +210,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -221,12 +222,12 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings - absl::base absl::core_headers absl::dynamic_annotations + absl::btree absl::flat_hash_map absl::node_hash_map - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -238,7 +239,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -253,7 +254,7 @@ absl_cc_test( absl::base absl::core_headers absl::type_traits - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -268,7 +269,7 @@ absl_cc_test( absl::base absl::core_headers absl::memory - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -281,7 +282,7 @@ absl_cc_test( DEPS absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -301,7 +302,7 @@ absl_cc_test( absl::random_random absl::random_distributions absl::strings_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -314,7 +315,7 @@ absl_cc_test( DEPS absl::strings absl::base - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -326,7 +327,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::strings_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -340,7 +341,7 @@ absl_cc_test( absl::strings absl::str_format absl::pow10_helper - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -355,7 +356,7 @@ absl_cc_test( absl::strings absl::config absl::raw_logging_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -370,7 +371,7 @@ absl_cc_test( DEPS absl::strings absl::config - gmock_main + GTest::gmock_main ) absl_cc_library( @@ -385,6 +386,7 @@ absl_cc_library( PUBLIC ) +# Internal-only target, do not depend on directly. absl_cc_library( NAME str_format_internal @@ -412,6 +414,7 @@ absl_cc_library( absl::core_headers absl::numeric_representation absl::type_traits + absl::utility absl::int128 absl::span ) @@ -428,7 +431,7 @@ absl_cc_test( absl::cord absl::strings absl::core_headers - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -442,7 +445,7 @@ absl_cc_test( absl::str_format absl::str_format_internal absl::strings - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -455,7 +458,7 @@ absl_cc_test( DEPS absl::str_format absl::str_format_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -467,7 +470,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format_internal - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -479,7 +482,7 @@ absl_cc_test( ${ABSL_TEST_COPTS} DEPS absl::str_format - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -492,9 +495,10 @@ absl_cc_test( DEPS absl::strings absl::str_format_internal + absl::core_headers absl::raw_logging_internal absl::int128 - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -507,7 +511,7 @@ absl_cc_test( DEPS absl::str_format_internal absl::cord - gmock_main + GTest::gmock_main ) absl_cc_test( @@ -520,9 +524,10 @@ absl_cc_test( DEPS absl::str_format_internal absl::core_headers - gmock_main + GTest::gmock_main ) +# Internal-only target, do not depend on directly. absl_cc_library( NAME pow10_helper @@ -547,43 +552,344 @@ absl_cc_test( DEPS absl::pow10_helper absl::str_format - gmock_main + GTest::gmock_main ) +# Internal-only target, do not depend on directly. absl_cc_library( NAME - cord + cord_internal HDRS - "cord.h" - SRCS - "cord.cc" - "internal/cord_internal.cc" + "internal/cord_data_edge.h" "internal/cord_internal.h" + "internal/cord_rep_btree.h" + "internal/cord_rep_btree_navigator.h" + "internal/cord_rep_btree_reader.h" + "internal/cord_rep_crc.h" + "internal/cord_rep_consume.h" + "internal/cord_rep_flat.h" "internal/cord_rep_ring.h" - "internal/cord_rep_ring.cc" "internal/cord_rep_ring_reader.h" - "internal/cord_rep_flat.h" + SRCS + "internal/cord_internal.cc" + "internal/cord_rep_btree.cc" + "internal/cord_rep_btree_navigator.cc" + "internal/cord_rep_btree_reader.cc" + "internal/cord_rep_crc.cc" + "internal/cord_rep_consume.cc" + "internal/cord_rep_ring.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS - absl::base absl::base_internal absl::compressed_tuple absl::config absl::core_headers absl::endian + absl::inlined_vector + absl::layout + absl::raw_logging_internal + absl::strings + absl::throw_delegate + absl::type_traits +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_update_tracker + HDRS + "internal/cordz_update_tracker.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config +) + +absl_cc_test( + NAME + cordz_update_tracker_test + SRCS + "internal/cordz_update_tracker_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cordz_update_tracker + absl::core_headers + absl::synchronization + GTest::gmock_main +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_functions + HDRS + "internal/cordz_functions.h" + SRCS + "internal/cordz_functions.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::core_headers + absl::exponential_biased + absl::raw_logging_internal +) + +absl_cc_test( + NAME + cordz_functions_test + SRCS + "internal/cordz_functions_test.cc" + DEPS + absl::config + absl::cordz_functions + absl::cordz_test_helpers + GTest::gmock_main +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_statistics + HDRS + "internal/cordz_statistics.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::core_headers + absl::cordz_update_tracker + absl::synchronization +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_handle + HDRS + "internal/cordz_handle.h" + SRCS + "internal/cordz_handle.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::config + absl::raw_logging_internal + absl::synchronization +) + +absl_cc_test( + NAME + cordz_handle_test + SRCS + "internal/cordz_handle_test.cc" + DEPS + absl::config + absl::cordz_handle + absl::cordz_test_helpers + absl::memory + absl::random_random + absl::random_distributions + absl::synchronization + absl::time + GTest::gmock_main +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_info + HDRS + "internal/cordz_info.h" + SRCS + "internal/cordz_info.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cordz_functions + absl::cordz_handle + absl::cordz_statistics + absl::cordz_update_tracker + absl::core_headers + absl::inlined_vector + absl::span + absl::raw_logging_internal + absl::stacktrace + absl::synchronization +) + +absl_cc_test( + NAME + cordz_info_test + SRCS + "internal/cordz_info_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_test_helpers + absl::cordz_handle + absl::cordz_info + absl::cordz_statistics + absl::cordz_test_helpers + absl::cordz_update_tracker + absl::span + absl::stacktrace + absl::symbolize + GTest::gmock_main +) + +absl_cc_test( + NAME + cordz_info_statistics_test + SRCS + "internal/cordz_info_statistics_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord + absl::cord_internal + absl::cordz_info + absl::cordz_sample_token + absl::cordz_statistics + absl::cordz_update_scope + absl::cordz_update_tracker + absl::thread_pool + GTest::gmock_main +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_sample_token + HDRS + "internal/cordz_sample_token.h" + SRCS + "internal/cordz_sample_token.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::cordz_handle + absl::cordz_info +) + +absl_cc_test( + NAME + cordz_sample_token_test + SRCS + "internal/cordz_sample_token_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_handle + absl::cordz_info + absl::cordz_info + absl::cordz_sample_token + absl::cordz_test_helpers + absl::memory + absl::random_random + absl::synchronization + absl::thread_pool + absl::time + GTest::gmock_main +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_update_scope + HDRS + "internal/cordz_update_scope.h" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_info + absl::cordz_update_tracker + absl::core_headers +) + +absl_cc_test( + NAME + cordz_update_scope_test + SRCS + "internal/cordz_update_scope_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cordz_info + absl::cordz_test_helpers + absl::cordz_update_scope + absl::cordz_update_tracker + absl::core_headers + GTest::gmock_main +) + +absl_cc_library( + NAME + cord + HDRS + "cord.h" + "cord_buffer.h" + SRCS + "cord.cc" + "cord_analysis.cc" + "cord_analysis.h" + "cord_buffer.cc" + COPTS + ${ABSL_DEFAULT_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cordz_functions + absl::cordz_info + absl::cordz_update_scope + absl::cordz_update_tracker + absl::core_headers + absl::endian absl::fixed_array absl::function_ref absl::inlined_vector absl::optional absl::raw_logging_internal + absl::span absl::strings - absl::strings_internal - absl::throw_delegate absl::type_traits PUBLIC ) +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cord_rep_test_util + HDRS + "internal/cord_rep_test_util.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::raw_logging_internal + absl::strings + TESTONLY +) + absl_cc_library( NAME cord_test_helpers @@ -592,7 +898,31 @@ absl_cc_library( COPTS ${ABSL_TEST_COPTS} DEPS + absl::config + absl::cord + absl::cord_internal + absl::strings + TESTONLY +) + +# Internal-only target, do not depend on directly. +absl_cc_library( + NAME + cordz_test_helpers + HDRS + "cordz_test_helpers.h" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config absl::cord + absl::cord_internal + absl::cordz_info + absl::cordz_sample_token + absl::cordz_statistics + absl::cordz_update_tracker + absl::core_headers + absl::strings TESTONLY ) @@ -609,28 +939,118 @@ absl_cc_test( absl::strings absl::base absl::config + absl::cord_test_helpers + absl::cordz_test_helpers absl::core_headers absl::endian + absl::hash + absl::random_random absl::raw_logging_internal absl::fixed_array - gmock_main + GTest::gmock_main ) absl_cc_test( NAME - cord_ring_test + cord_data_edge_test SRCS - "cord_ring_test.cc" + "internal/cord_data_edge_test.cc" COPTS ${ABSL_TEST_COPTS} DEPS + absl::base absl::config - absl::cord + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_btree_test + SRCS + "internal/cord_rep_btree_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::cleanup + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_btree_navigator_test + SRCS + "internal/cord_rep_btree_navigator_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_btree_reader_test + SRCS + "internal/cord_rep_btree_reader_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::cord_rep_test_util absl::core_headers absl::raw_logging_internal - gmock_main + absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_rep_crc_test + SRCS + "internal/cord_rep_crc_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::config + absl::cord_internal + absl::cord_rep_test_util + GTest::gmock_main +) + +absl_cc_test( + NAME + cord_ring_test + SRCS + "cord_ring_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::base + absl::config + absl::cord_internal + absl::core_headers + absl::raw_logging_internal + absl::strings + GTest::gmock_main ) absl_cc_test( @@ -641,9 +1061,33 @@ absl_cc_test( COPTS ${ABSL_TEST_COPTS} DEPS - absl::cord + absl::base + absl::cord_internal + absl::core_headers absl::strings + GTest::gmock_main +) + +absl_cc_test( + NAME + cordz_test + SRCS + "cordz_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::cord + absl::cord_test_helpers + absl::cordz_test_helpers + absl::cordz_functions + absl::cordz_info + absl::cordz_sample_token + absl::cordz_statistics + absl::cordz_update_tracker absl::base + absl::config absl::core_headers - gmock_main + absl::raw_logging_internal + absl::strings + GTest::gmock_main ) diff --git a/absl/strings/ascii.h b/absl/strings/ascii.h index b46bc71f..42eadaea 100644 --- a/absl/strings/ascii.h +++ b/absl/strings/ascii.h @@ -133,7 +133,7 @@ inline bool ascii_isdigit(unsigned char c) { return c >= '0' && c <= '9'; } // ascii_isprint() // -// Determines whether the given character is printable, including whitespace. +// Determines whether the given character is printable, including spaces. inline bool ascii_isprint(unsigned char c) { return c >= 32 && c < 127; } // ascii_isgraph() @@ -197,7 +197,7 @@ ABSL_MUST_USE_RESULT inline std::string AsciiStrToUpper(absl::string_view s) { ABSL_MUST_USE_RESULT inline absl::string_view StripLeadingAsciiWhitespace( absl::string_view str) { auto it = std::find_if_not(str.begin(), str.end(), absl::ascii_isspace); - return str.substr(it - str.begin()); + return str.substr(static_cast<size_t>(it - str.begin())); } // Strips in place whitespace from the beginning of the given string. @@ -211,13 +211,13 @@ inline void StripLeadingAsciiWhitespace(std::string* str) { ABSL_MUST_USE_RESULT inline absl::string_view StripTrailingAsciiWhitespace( absl::string_view str) { auto it = std::find_if_not(str.rbegin(), str.rend(), absl::ascii_isspace); - return str.substr(0, str.rend() - it); + return str.substr(0, static_cast<size_t>(str.rend() - it)); } // Strips in place whitespace from the end of the given string inline void StripTrailingAsciiWhitespace(std::string* str) { auto it = std::find_if_not(str->rbegin(), str->rend(), absl::ascii_isspace); - str->erase(str->rend() - it); + str->erase(static_cast<size_t>(str->rend() - it)); } // Returns absl::string_view with whitespace stripped from both ends of the diff --git a/absl/strings/charconv.cc b/absl/strings/charconv.cc index b8674c28..fefcfc90 100644 --- a/absl/strings/charconv.cc +++ b/absl/strings/charconv.cc @@ -111,7 +111,7 @@ struct FloatTraits<double> { return sign ? -ldexp(mantissa, exponent) : ldexp(mantissa, exponent); #else constexpr uint64_t kMantissaMask = - (uint64_t(1) << (kTargetMantissaBits - 1)) - 1; + (uint64_t{1} << (kTargetMantissaBits - 1)) - 1; uint64_t dbl = static_cast<uint64_t>(sign) << 63; if (mantissa > kMantissaMask) { // Normal value. @@ -151,7 +151,7 @@ struct FloatTraits<float> { return sign ? -ldexpf(mantissa, exponent) : ldexpf(mantissa, exponent); #else constexpr uint32_t kMantissaMask = - (uint32_t(1) << (kTargetMantissaBits - 1)) - 1; + (uint32_t{1} << (kTargetMantissaBits - 1)) - 1; uint32_t flt = static_cast<uint32_t>(sign) << 31; if (mantissa > kMantissaMask) { // Normal value. @@ -499,7 +499,7 @@ bool MustRoundUp(uint64_t guess_mantissa, int guess_exponent, template <typename FloatType> CalculatedFloat CalculatedFloatFromRawValues(uint64_t mantissa, int exponent) { CalculatedFloat result; - if (mantissa == uint64_t(1) << FloatTraits<FloatType>::kTargetMantissaBits) { + if (mantissa == uint64_t{1} << FloatTraits<FloatType>::kTargetMantissaBits) { mantissa >>= 1; exponent += 1; } diff --git a/absl/strings/charconv.h b/absl/strings/charconv.h index e04be32f..7c509812 100644 --- a/absl/strings/charconv.h +++ b/absl/strings/charconv.h @@ -64,8 +64,9 @@ struct from_chars_result { // the result in `value`. // // The matching pattern format is almost the same as that of strtod(), except -// that C locale is not respected, and an initial '+' character in the input -// range will never be matched. +// that (1) C locale is not respected, (2) an initial '+' character in the +// input range will never be matched, and (3) leading whitespaces are not +// ignored. // // If `fmt` is set, it must be one of the enumerator values of the chars_format. // (This is despite the fact that chars_format is a bitmask type.) If set to diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 93533757..85a67a08 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -34,10 +34,16 @@ #include "absl/base/port.h" #include "absl/container/fixed_array.h" #include "absl/container/inlined_vector.h" +#include "absl/strings/cord_buffer.h" #include "absl/strings/escaping.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" #include "absl/strings/internal/cord_rep_flat.h" -#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" @@ -48,73 +54,19 @@ namespace absl { ABSL_NAMESPACE_BEGIN using ::absl::cord_internal::CordRep; -using ::absl::cord_internal::CordRepConcat; +using ::absl::cord_internal::CordRepBtree; +using ::absl::cord_internal::CordRepCrc; using ::absl::cord_internal::CordRepExternal; using ::absl::cord_internal::CordRepFlat; -using ::absl::cord_internal::CordRepRing; using ::absl::cord_internal::CordRepSubstring; -using ::absl::cord_internal::kMinFlatLength; +using ::absl::cord_internal::CordzUpdateTracker; +using ::absl::cord_internal::InlineData; using ::absl::cord_internal::kMaxFlatLength; - -using ::absl::cord_internal::CONCAT; -using ::absl::cord_internal::EXTERNAL; -using ::absl::cord_internal::FLAT; -using ::absl::cord_internal::RING; -using ::absl::cord_internal::SUBSTRING; +using ::absl::cord_internal::kMinFlatLength; using ::absl::cord_internal::kInlinedVectorSize; using ::absl::cord_internal::kMaxBytesToCopy; -constexpr uint64_t Fibonacci(unsigned char n, uint64_t a = 0, uint64_t b = 1) { - return n == 0 ? a : Fibonacci(n - 1, b, a + b); -} - -static_assert(Fibonacci(63) == 6557470319842, - "Fibonacci values computed incorrectly"); - -// Minimum length required for a given depth tree -- a tree is considered -// balanced if -// length(t) >= min_length[depth(t)] -// The root node depth is allowed to become twice as large to reduce rebalancing -// for larger strings (see IsRootBalanced). -static constexpr uint64_t min_length[] = { - Fibonacci(2), Fibonacci(3), Fibonacci(4), Fibonacci(5), - Fibonacci(6), Fibonacci(7), Fibonacci(8), Fibonacci(9), - Fibonacci(10), Fibonacci(11), Fibonacci(12), Fibonacci(13), - Fibonacci(14), Fibonacci(15), Fibonacci(16), Fibonacci(17), - Fibonacci(18), Fibonacci(19), Fibonacci(20), Fibonacci(21), - Fibonacci(22), Fibonacci(23), Fibonacci(24), Fibonacci(25), - Fibonacci(26), Fibonacci(27), Fibonacci(28), Fibonacci(29), - Fibonacci(30), Fibonacci(31), Fibonacci(32), Fibonacci(33), - Fibonacci(34), Fibonacci(35), Fibonacci(36), Fibonacci(37), - Fibonacci(38), Fibonacci(39), Fibonacci(40), Fibonacci(41), - Fibonacci(42), Fibonacci(43), Fibonacci(44), Fibonacci(45), - Fibonacci(46), Fibonacci(47), - 0xffffffffffffffffull, // Avoid overflow -}; - -static const int kMinLengthSize = ABSL_ARRAYSIZE(min_length); - -static inline bool cord_ring_enabled() { - return cord_internal::cord_ring_buffer_enabled.load( - std::memory_order_relaxed); -} - -static inline bool IsRootBalanced(CordRep* node) { - if (node->tag != CONCAT) { - return true; - } else if (node->concat()->depth() <= 15) { - return true; - } else if (node->concat()->depth() > kMinLengthSize) { - return false; - } else { - // Allow depth to become twice as large as implied by fibonacci rule to - // reduce rebalancing for larger strings. - return (node->length >= min_length[node->concat()->depth() / 2]); - } -} - -static CordRep* Rebalance(CordRep* node); static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, int indent = 0); static bool VerifyNode(CordRep* root, CordRep* start_node, @@ -136,119 +88,32 @@ static inline CordRep* VerifyTree(CordRep* node) { return node; } -// Return the depth of a node -static int Depth(const CordRep* rep) { - if (rep->tag == CONCAT) { - return rep->concat()->depth(); - } else { - return 0; - } -} - -static void SetConcatChildren(CordRepConcat* concat, CordRep* left, - CordRep* right) { - concat->left = left; - concat->right = right; - - concat->length = left->length + right->length; - concat->set_depth(1 + std::max(Depth(left), Depth(right))); -} - -// Create a concatenation of the specified nodes. -// Does not change the refcounts of "left" and "right". -// The returned node has a refcount of 1. -static CordRep* RawConcat(CordRep* left, CordRep* right) { - // Avoid making degenerate concat nodes (one child is empty) - if (left == nullptr) return right; - if (right == nullptr) return left; - if (left->length == 0) { - CordRep::Unref(left); - return right; - } - if (right->length == 0) { - CordRep::Unref(right); - return left; - } - - CordRepConcat* rep = new CordRepConcat(); - rep->tag = CONCAT; - SetConcatChildren(rep, left, right); - - return rep; -} - -static CordRep* Concat(CordRep* left, CordRep* right) { - CordRep* rep = RawConcat(left, right); - if (rep != nullptr && !IsRootBalanced(rep)) { - rep = Rebalance(rep); - } - return VerifyTree(rep); -} - -// Make a balanced tree out of an array of leaf nodes. -static CordRep* MakeBalancedTree(CordRep** reps, size_t n) { - // Make repeated passes over the array, merging adjacent pairs - // until we are left with just a single node. - while (n > 1) { - size_t dst = 0; - for (size_t src = 0; src < n; src += 2) { - if (src + 1 < n) { - reps[dst] = Concat(reps[src], reps[src + 1]); - } else { - reps[dst] = reps[src]; - } - dst++; - } - n = dst; - } - - return reps[0]; -} - static CordRepFlat* CreateFlat(const char* data, size_t length, - size_t alloc_hint) { + size_t alloc_hint) { CordRepFlat* flat = CordRepFlat::New(length + alloc_hint); flat->length = length; memcpy(flat->Data(), data, length); return flat; } -// Creates a new flat or ringbuffer out of the specified array. +// Creates a new flat or Btree out of the specified array. // The returned node has a refcount of 1. -static CordRep* RingNewTree(const char* data, size_t length, - size_t alloc_hint) { +static CordRep* NewBtree(const char* data, size_t length, size_t alloc_hint) { if (length <= kMaxFlatLength) { return CreateFlat(data, length, alloc_hint); } CordRepFlat* flat = CreateFlat(data, kMaxFlatLength, 0); data += kMaxFlatLength; length -= kMaxFlatLength; - size_t extra = (length - 1) / kMaxFlatLength + 1; - auto* root = CordRepRing::Create(flat, extra); - return CordRepRing::Append(root, {data, length}, alloc_hint); + auto* root = CordRepBtree::Create(flat); + return CordRepBtree::Append(root, {data, length}, alloc_hint); } // Create a new tree out of the specified array. // The returned node has a refcount of 1. -static CordRep* NewTree(const char* data, - size_t length, - size_t alloc_hint) { +static CordRep* NewTree(const char* data, size_t length, size_t alloc_hint) { if (length == 0) return nullptr; - if (cord_ring_enabled()) { - return RingNewTree(data, length, alloc_hint); - } - absl::FixedArray<CordRep*> reps((length - 1) / kMaxFlatLength + 1); - size_t n = 0; - do { - const size_t len = std::min(length, kMaxFlatLength); - CordRepFlat* rep = CordRepFlat::New(len + alloc_hint); - rep->length = len; - memcpy(rep->Data(), data, len); - reps[n++] = VerifyTree(rep); - data += len; - length -= len; - } while (length != 0); - return MakeBalancedTree(reps.data(), n); + return NewBtree(data, length, alloc_hint); } namespace cord_internal { @@ -263,32 +128,46 @@ void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep) { } // namespace cord_internal -static CordRep* NewSubstring(CordRep* child, size_t offset, size_t length) { - // Never create empty substring nodes - if (length == 0) { - CordRep::Unref(child); - return nullptr; - } else { - CordRepSubstring* rep = new CordRepSubstring(); - assert((offset + length) <= child->length); - rep->length = length; - rep->tag = SUBSTRING; - rep->start = offset; - rep->child = child; - return VerifyTree(rep); +// Creates a CordRep from the provided string. If the string is large enough, +// and not wasteful, we move the string into an external cord rep, preserving +// the already allocated string contents. +// Requires the provided string length to be larger than `kMaxInline`. +static CordRep* CordRepFromString(std::string&& src) { + assert(src.length() > cord_internal::kMaxInline); + if ( + // String is short: copy data to avoid external block overhead. + src.size() <= kMaxBytesToCopy || + // String is wasteful: copy data to avoid pinning too much unused memory. + src.size() < src.capacity() / 2 + ) { + return NewTree(src.data(), src.size(), 0); } + + struct StringReleaser { + void operator()(absl::string_view /* data */) {} + std::string data; + }; + const absl::string_view original_data = src; + auto* rep = + static_cast<::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( + absl::cord_internal::NewExternalRep(original_data, + StringReleaser{std::move(src)})); + // Moving src may have invalidated its data pointer, so adjust it. + rep->base = rep->template get<0>().data.data(); + return rep; } // -------------------------------------------------------------------- // Cord::InlineRep functions +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL constexpr unsigned char Cord::InlineRep::kMaxInline; +#endif -inline void Cord::InlineRep::set_data(const char* data, size_t n, - bool nullify_tail) { +inline void Cord::InlineRep::set_data(const char* data, size_t n) { static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); - cord_internal::SmallMemmove(data_.as_chars(), data, n, nullify_tail); + cord_internal::SmallMemmove<true>(data_.as_chars(), data, n); set_inline_size(n); } @@ -299,20 +178,6 @@ inline char* Cord::InlineRep::set_data(size_t n) { return data_.as_chars(); } -inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { - if (data_.is_tree()) { - return data_.as_tree(); - } - - size_t len = inline_size(); - CordRepFlat* result = CordRepFlat::New(len + extra_hint); - result->length = len; - static_assert(kMinFlatLength >= sizeof(data_), ""); - memcpy(result->Data(), data_.as_chars(), sizeof(data_)); - set_tree(result); - return result; -} - inline void Cord::InlineRep::reduce_size(size_t n) { size_t tag = inline_size(); assert(tag <= kMaxInline); @@ -328,31 +193,68 @@ inline void Cord::InlineRep::remove_prefix(size_t n) { reduce_size(n); } -// Returns `rep` converted into a CordRepRing. -// Directly returns `rep` if `rep` is already a CordRepRing. -static CordRepRing* ForceRing(CordRep* rep, size_t extra) { - return (rep->tag == RING) ? rep->ring() : CordRepRing::Create(rep, extra); +// Returns `rep` converted into a CordRepBtree. +// Directly returns `rep` if `rep` is already a CordRepBtree. +static CordRepBtree* ForceBtree(CordRep* rep) { + return rep->IsBtree() + ? rep->btree() + : CordRepBtree::Create(cord_internal::RemoveCrcNode(rep)); +} + +void Cord::InlineRep::AppendTreeToInlined(CordRep* tree, + MethodIdentifier method) { + assert(!is_tree()); + if (!data_.is_empty()) { + CordRepFlat* flat = MakeFlatWithExtraCapacity(0); + tree = CordRepBtree::Append(CordRepBtree::Create(flat), tree); + } + EmplaceTree(tree, method); +} + +void Cord::InlineRep::AppendTreeToTree(CordRep* tree, MethodIdentifier method) { + assert(is_tree()); + const CordzUpdateScope scope(data_.cordz_info(), method); + tree = CordRepBtree::Append(ForceBtree(data_.as_tree()), tree); + SetTree(tree, scope); } -void Cord::InlineRep::AppendTree(CordRep* tree) { - if (tree == nullptr) return; - if (data_.is_empty()) { - set_tree(tree); - } else if (cord_ring_enabled()) { - set_tree(CordRepRing::Append(ForceRing(force_tree(0), 1), tree)); +void Cord::InlineRep::AppendTree(CordRep* tree, MethodIdentifier method) { + assert(tree != nullptr); + assert(tree->length != 0); + assert(!tree->IsCrc()); + if (data_.is_tree()) { + AppendTreeToTree(tree, method); } else { - set_tree(Concat(force_tree(0), tree)); + AppendTreeToInlined(tree, method); } } -void Cord::InlineRep::PrependTree(CordRep* tree) { +void Cord::InlineRep::PrependTreeToInlined(CordRep* tree, + MethodIdentifier method) { + assert(!is_tree()); + if (!data_.is_empty()) { + CordRepFlat* flat = MakeFlatWithExtraCapacity(0); + tree = CordRepBtree::Prepend(CordRepBtree::Create(flat), tree); + } + EmplaceTree(tree, method); +} + +void Cord::InlineRep::PrependTreeToTree(CordRep* tree, + MethodIdentifier method) { + assert(is_tree()); + const CordzUpdateScope scope(data_.cordz_info(), method); + tree = CordRepBtree::Prepend(ForceBtree(data_.as_tree()), tree); + SetTree(tree, scope); +} + +void Cord::InlineRep::PrependTree(CordRep* tree, MethodIdentifier method) { assert(tree != nullptr); - if (data_.is_empty()) { - set_tree(tree); - } else if (cord_ring_enabled()) { - set_tree(CordRepRing::Prepend(ForceRing(force_tree(0), 1), tree)); + assert(tree->length != 0); + assert(!tree->IsCrc()); + if (data_.is_tree()) { + PrependTreeToTree(tree, method); } else { - set_tree(Concat(tree, force_tree(0))); + PrependTreeToInlined(tree, method); } } @@ -362,8 +264,8 @@ void Cord::InlineRep::PrependTree(CordRep* tree) { // written to region and the actual size increase will be written to size. static inline bool PrepareAppendRegion(CordRep* root, char** region, size_t* size, size_t max_length) { - if (root->tag == RING && root->refcount.IsOne()) { - Span<char> span = root->ring()->GetAppendBuffer(max_length); + if (root->IsBtree() && root->refcount.IsOne()) { + Span<char> span = root->btree()->GetAppendBuffer(max_length); if (!span.empty()) { *region = span.data(); *size = span.size(); @@ -371,13 +273,8 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, } } - // Search down the right-hand path for a non-full FLAT node. CordRep* dst = root; - while (dst->tag == CONCAT && dst->refcount.IsOne()) { - dst = dst->concat()->right; - } - - if (dst->tag < FLAT || !dst->refcount.IsOne()) { + if (!dst->IsFlat() || !dst->refcount.IsOne()) { *region = nullptr; *size = 0; return false; @@ -391,12 +288,7 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, return false; } - size_t size_increase = std::min(capacity - in_use, max_length); - - // We need to update the length fields for all nodes, including the leaf node. - for (CordRep* rep = root; rep != dst; rep = rep->concat()->right) { - rep->length += size_increase; - } + const size_t size_increase = std::min(capacity - in_use, max_length); dst->length += size_increase; *region = dst->flat()->Data() + in_use; @@ -404,148 +296,56 @@ static inline bool PrepareAppendRegion(CordRep* root, char** region, return true; } -void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, - size_t max_length) { - if (max_length == 0) { - *region = nullptr; - *size = 0; - return; - } - - // Try to fit in the inline buffer if possible. - if (!is_tree()) { - size_t inline_length = inline_size(); - if (max_length <= kMaxInline - inline_length) { - *region = data_.as_chars() + inline_length; - *size = max_length; - set_inline_size(inline_length + max_length); - return; - } - } - - CordRep* root = force_tree(max_length); - - if (PrepareAppendRegion(root, region, size, max_length)) { - return; - } - - // Allocate new node. - CordRepFlat* new_node = - CordRepFlat::New(std::max(static_cast<size_t>(root->length), max_length)); - new_node->length = std::min(new_node->Capacity(), max_length); - *region = new_node->Data(); - *size = new_node->length; - - if (cord_ring_enabled()) { - replace_tree(CordRepRing::Append(ForceRing(root, 1), new_node)); - return; - } - replace_tree(Concat(root, new_node)); -} - -void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { - const size_t max_length = std::numeric_limits<size_t>::max(); - - // Try to fit in the inline buffer if possible. - if (!data_.is_tree()) { - size_t inline_length = inline_size(); - if (inline_length < kMaxInline) { - *region = data_.as_chars() + inline_length; - *size = kMaxInline - inline_length; - set_inline_size(kMaxInline); - return; - } - } - - CordRep* root = force_tree(max_length); - - if (PrepareAppendRegion(root, region, size, max_length)) { - return; - } - - // Allocate new node. - CordRepFlat* new_node = CordRepFlat::New(root->length); - new_node->length = new_node->Capacity(); - *region = new_node->Data(); - *size = new_node->length; - - if (cord_ring_enabled()) { - replace_tree(CordRepRing::Append(ForceRing(root, 1), new_node)); +void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { + assert(&src != this); + assert(is_tree() || src.is_tree()); + auto constexpr method = CordzUpdateTracker::kAssignCord; + if (ABSL_PREDICT_TRUE(!is_tree())) { + EmplaceTree(CordRep::Ref(src.as_tree()), src.data_, method); return; } - replace_tree(Concat(root, new_node)); -} -// If the rep is a leaf, this will increment the value at total_mem_usage and -// will return true. -static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { - if (rep->tag >= FLAT) { - *total_mem_usage += rep->flat()->AllocatedSize(); - return true; - } - if (rep->tag == EXTERNAL) { - *total_mem_usage += sizeof(CordRepConcat) + rep->length; - return true; - } - return false; -} - -void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { - ClearSlow(); - - data_ = src.data_; - if (is_tree()) { - data_.set_profiled(false); - CordRep::Ref(tree()); - clear_cordz_info(); + CordRep* tree = as_tree(); + if (CordRep* src_tree = src.tree()) { + // Leave any existing `cordz_info` in place, and let MaybeTrackCord() + // decide if this cord should be (or remains to be) sampled or not. + data_.set_tree(CordRep::Ref(src_tree)); + CordzInfo::MaybeTrackCord(data_, src.data_, method); + } else { + CordzInfo::MaybeUntrackCord(data_.cordz_info()); + data_ = src.data_; } + CordRep::Unref(tree); } -void Cord::InlineRep::ClearSlow() { +void Cord::InlineRep::UnrefTree() { if (is_tree()) { + CordzInfo::MaybeUntrackCord(data_.cordz_info()); CordRep::Unref(tree()); } - ResetToEmpty(); } // -------------------------------------------------------------------- // Constructors and destructors -Cord::Cord(absl::string_view src) { +Cord::Cord(absl::string_view src, MethodIdentifier method) + : contents_(InlineData::kDefaultInit) { const size_t n = src.size(); if (n <= InlineRep::kMaxInline) { - contents_.set_data(src.data(), n, false); + contents_.set_data(src.data(), n); } else { - contents_.set_tree(NewTree(src.data(), n, 0)); + CordRep* rep = NewTree(src.data(), n, 0); + contents_.EmplaceTree(rep, method); } } template <typename T, Cord::EnableIfString<T>> -Cord::Cord(T&& src) { - if ( - // String is short: copy data to avoid external block overhead. - src.size() <= kMaxBytesToCopy || - // String is wasteful: copy data to avoid pinning too much unused memory. - src.size() < src.capacity() / 2 - ) { - if (src.size() <= InlineRep::kMaxInline) { - contents_.set_data(src.data(), src.size(), false); - } else { - contents_.set_tree(NewTree(src.data(), src.size(), 0)); - } +Cord::Cord(T&& src) : contents_(InlineData::kDefaultInit) { + if (src.size() <= InlineRep::kMaxInline) { + contents_.set_data(src.data(), src.size()); } else { - struct StringReleaser { - void operator()(absl::string_view /* data */) {} - std::string data; - }; - const absl::string_view original_data = src; - auto* rep = static_cast< - ::absl::cord_internal::CordRepExternalImpl<StringReleaser>*>( - absl::cord_internal::NewExternalRep( - original_data, StringReleaser{std::forward<T>(src)})); - // Moving src may have invalidated its data pointer, so adjust it. - rep->base = rep->template get<0>().data.data(); - contents_.set_tree(rep); + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.EmplaceTree(rep, CordzUpdateTracker::kConstructorString); } } @@ -554,9 +354,9 @@ template Cord::Cord(std::string&& src); // The destruction code is separate so that the compiler can determine // that it does not need to call the destructor on a moved-from Cord. void Cord::DestroyCordSlow() { - if (CordRep* tree = contents_.tree()) { - CordRep::Unref(VerifyTree(tree)); - } + assert(contents_.is_tree()); + CordzInfo::MaybeUntrackCord(contents_.cordz_info()); + CordRep::Unref(VerifyTree(contents_.as_tree())); } // -------------------------------------------------------------------- @@ -568,109 +368,101 @@ void Cord::Clear() { } } -Cord& Cord::operator=(absl::string_view src) { +Cord& Cord::AssignLargeString(std::string&& src) { + auto constexpr method = CordzUpdateTracker::kAssignString; + assert(src.size() > kMaxBytesToCopy); + CordRep* rep = CordRepFromString(std::move(src)); + if (CordRep* tree = contents_.tree()) { + CordzUpdateScope scope(contents_.cordz_info(), method); + contents_.SetTree(rep, scope); + CordRep::Unref(tree); + } else { + contents_.EmplaceTree(rep, method); + } + return *this; +} +Cord& Cord::operator=(absl::string_view src) { + auto constexpr method = CordzUpdateTracker::kAssignString; const char* data = src.data(); size_t length = src.size(); CordRep* tree = contents_.tree(); if (length <= InlineRep::kMaxInline) { - // Embed into this->contents_ - contents_.set_data(data, length, true); - if (tree) CordRep::Unref(tree); - return *this; - } - if (tree != nullptr && tree->tag >= FLAT && - tree->flat()->Capacity() >= length && - tree->refcount.IsOne()) { - // Copy in place if the existing FLAT node is reusable. - memmove(tree->flat()->Data(), data, length); - tree->length = length; - VerifyTree(tree); + // Embed into this->contents_, which is somewhat subtle: + // - MaybeUntrackCord must be called before Unref(tree). + // - MaybeUntrackCord must be called before set_data() clobbers cordz_info. + // - set_data() must be called before Unref(tree) as it may reference tree. + if (tree != nullptr) CordzInfo::MaybeUntrackCord(contents_.cordz_info()); + contents_.set_data(data, length); + if (tree != nullptr) CordRep::Unref(tree); return *this; } - contents_.set_tree(NewTree(data, length, 0)); - if (tree) CordRep::Unref(tree); - return *this; -} - -template <typename T, Cord::EnableIfString<T>> -Cord& Cord::operator=(T&& src) { - if (src.size() <= kMaxBytesToCopy) { - *this = absl::string_view(src); + if (tree != nullptr) { + CordzUpdateScope scope(contents_.cordz_info(), method); + if (tree->IsFlat() && tree->flat()->Capacity() >= length && + tree->refcount.IsOne()) { + // Copy in place if the existing FLAT node is reusable. + memmove(tree->flat()->Data(), data, length); + tree->length = length; + VerifyTree(tree); + return *this; + } + contents_.SetTree(NewTree(data, length, 0), scope); + CordRep::Unref(tree); } else { - *this = Cord(std::forward<T>(src)); + contents_.EmplaceTree(NewTree(data, length, 0), method); } return *this; } -template Cord& Cord::operator=(std::string&& src); - // TODO(sanjay): Move to Cord::InlineRep section of file. For now, // we keep it here to make diffs easier. -void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { - if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. +void Cord::InlineRep::AppendArray(absl::string_view src, + MethodIdentifier method) { + if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. size_t appended = 0; - CordRep* root = nullptr; - if (is_tree()) { - root = data_.as_tree(); + CordRep* rep = tree(); + const CordRep* const root = rep; + CordzUpdateScope scope(root ? cordz_info() : nullptr, method); + if (root != nullptr) { + rep = cord_internal::RemoveCrcNode(rep); char* region; - if (PrepareAppendRegion(root, ®ion, &appended, src_size)) { - memcpy(region, src_data, appended); + if (PrepareAppendRegion(rep, ®ion, &appended, src.size())) { + memcpy(region, src.data(), appended); } } else { // Try to fit in the inline buffer if possible. size_t inline_length = inline_size(); - if (src_size <= kMaxInline - inline_length) { + if (src.size() <= kMaxInline - inline_length) { // Append new data to embedded array - memcpy(data_.as_chars() + inline_length, src_data, src_size); - set_inline_size(inline_length + src_size); + memcpy(data_.as_chars() + inline_length, src.data(), src.size()); + set_inline_size(inline_length + src.size()); return; } - // It is possible that src_data == data_, but when we transition from an - // InlineRep to a tree we need to assign data_ = root via set_tree. To - // avoid corrupting the source data before we copy it, delay calling - // set_tree until after we've copied data. - // We are going from an inline size to beyond inline size. Make the new size - // either double the inlined size, or the added size + 10%. - const size_t size1 = inline_length * 2 + src_size; - const size_t size2 = inline_length + src_size / 10; - root = CordRepFlat::New(std::max<size_t>(size1, size2)); - appended = std::min( - src_size, root->flat()->Capacity() - inline_length); - memcpy(root->flat()->Data(), data_.as_chars(), inline_length); - memcpy(root->flat()->Data() + inline_length, src_data, appended); - root->length = inline_length + appended; - set_tree(root); + // Allocate flat to be a perfect fit on first append exceeding inlined size. + // Subsequent growth will use amortized growth until we reach maximum flat + // size. + rep = CordRepFlat::New(inline_length + src.size()); + appended = std::min(src.size(), rep->flat()->Capacity() - inline_length); + memcpy(rep->flat()->Data(), data_.as_chars(), inline_length); + memcpy(rep->flat()->Data() + inline_length, src.data(), appended); + rep->length = inline_length + appended; } - src_data += appended; - src_size -= appended; - if (src_size == 0) { + src.remove_prefix(appended); + if (src.empty()) { + CommitTree(root, rep, scope, method); return; } - if (cord_ring_enabled()) { - absl::string_view data(src_data, src_size); - root = ForceRing(root, (data.size() - 1) / kMaxFlatLength + 1); - replace_tree(CordRepRing::Append(root->ring(), data)); - return; - } + // TODO(b/192061034): keep legacy 10% growth rate: consider other rates. + rep = ForceBtree(rep); + const size_t min_growth = std::max<size_t>(rep->length / 10, src.size()); + rep = CordRepBtree::Append(rep->btree(), src, min_growth - src.size()); - // Use new block(s) for any remaining bytes that were not handled above. - // Alloc extra memory only if the right child of the root of the new tree is - // going to be a FLAT node, which will permit further inplace appends. - size_t length = src_size; - if (src_size < kMaxFlatLength) { - // The new length is either - // - old size + 10% - // - old_size + src_size - // This will cause a reasonable conservative step-up in size that is still - // large enough to avoid excessive amounts of small fragments being added. - length = std::max<size_t>(root->length / 10, src_size); - } - set_tree(Concat(root, NewTree(src_data, src_size, length - src_size))); + CommitTree(root, rep, scope, method); } inline CordRep* Cord::TakeRep() const& { @@ -685,10 +477,18 @@ inline CordRep* Cord::TakeRep() && { template <typename C> inline void Cord::AppendImpl(C&& src) { + auto constexpr method = CordzUpdateTracker::kAppendCord; if (empty()) { - // In case of an empty destination avoid allocating a new node, do not copy - // data. - *this = std::forward<C>(src); + // Since destination is empty, we can avoid allocating a node, + if (src.contents_.is_tree()) { + // by taking the tree directly + CordRep* rep = + cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); + contents_.EmplaceTree(rep, method); + } else { + // or copying over inline data + contents_.data_ = src.contents_.data_; + } return; } @@ -698,12 +498,12 @@ inline void Cord::AppendImpl(C&& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree == nullptr) { // src has embedded data. - contents_.AppendArray(src.contents_.data(), src_size); + contents_.AppendArray({src.contents_.data(), src_size}, method); return; } - if (src_tree->tag >= FLAT) { + if (src_tree->IsFlat()) { // src tree just has one flat node. - contents_.AppendArray(src_tree->flat()->Data(), src_size); + contents_.AppendArray({src_tree->flat()->Data(), src_size}, method); return; } if (&src == this) { @@ -719,19 +519,65 @@ inline void Cord::AppendImpl(C&& src) { } // Guaranteed to be a tree (kMaxBytesToCopy > kInlinedSize) - contents_.AppendTree(std::forward<C>(src).TakeRep()); + CordRep* rep = cord_internal::RemoveCrcNode(std::forward<C>(src).TakeRep()); + contents_.AppendTree(rep, CordzUpdateTracker::kAppendCord); +} + +static CordRep::ExtractResult ExtractAppendBuffer(CordRep* rep, + size_t min_capacity) { + switch (rep->tag) { + case cord_internal::BTREE: + return CordRepBtree::ExtractAppendBuffer(rep->btree(), min_capacity); + default: + if (rep->IsFlat() && rep->refcount.IsOne() && + rep->flat()->Capacity() - rep->length >= min_capacity) { + return {nullptr, rep}; + } + return {rep, nullptr}; + } } -void Cord::Append(const Cord& src) { AppendImpl(src); } +static CordBuffer CreateAppendBuffer(InlineData& data, size_t capacity) { + // Watch out for overflow, people can ask for size_t::max(). + const size_t size = data.inline_size(); + capacity = (std::min)(std::numeric_limits<size_t>::max() - size, capacity); + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(size + capacity); + cord_internal::SmallMemmove(buffer.data(), data.as_chars(), size); + buffer.SetLength(size); + data = {}; + return buffer; +} -void Cord::Append(Cord&& src) { AppendImpl(std::move(src)); } +CordBuffer Cord::GetAppendBufferSlowPath(size_t capacity, size_t min_capacity) { + auto constexpr method = CordzUpdateTracker::kGetAppendBuffer; + CordRep* tree = contents_.tree(); + if (tree != nullptr) { + CordzUpdateScope scope(contents_.cordz_info(), method); + CordRep::ExtractResult result = ExtractAppendBuffer(tree, min_capacity); + if (result.extracted != nullptr) { + contents_.SetTreeOrEmpty(result.tree, scope); + return CordBuffer(result.extracted->flat()); + } + return CordBuffer::CreateWithDefaultLimit(capacity); + } + return CreateAppendBuffer(contents_.data_, capacity); +} + +void Cord::Append(const Cord& src) { + AppendImpl(src); +} + +void Cord::Append(Cord&& src) { + AppendImpl(std::move(src)); +} template <typename T, Cord::EnableIfString<T>> void Cord::Append(T&& src) { if (src.size() <= kMaxBytesToCopy) { Append(absl::string_view(src)); } else { - Append(Cord(std::forward<T>(src))); + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.AppendTree(rep, CordzUpdateTracker::kAppendString); } } @@ -741,7 +587,8 @@ void Cord::Prepend(const Cord& src) { CordRep* src_tree = src.contents_.tree(); if (src_tree != nullptr) { CordRep::Ref(src_tree); - contents_.PrependTree(src_tree); + contents_.PrependTree(cord_internal::RemoveCrcNode(src_tree), + CordzUpdateTracker::kPrependCord); return; } @@ -750,7 +597,7 @@ void Cord::Prepend(const Cord& src) { return Prepend(src_contents); } -void Cord::Prepend(absl::string_view src) { +void Cord::PrependArray(absl::string_view src, MethodIdentifier method) { if (src.empty()) return; // memcpy(_, nullptr, 0) is undefined. if (!contents_.is_tree()) { size_t cur_size = contents_.inline_size(); @@ -764,105 +611,49 @@ void Cord::Prepend(absl::string_view src) { return; } } - contents_.PrependTree(NewTree(src.data(), src.size(), 0)); + CordRep* rep = NewTree(src.data(), src.size(), 0); + contents_.PrependTree(rep, method); } -template <typename T, Cord::EnableIfString<T>> -inline void Cord::Prepend(T&& src) { - if (src.size() <= kMaxBytesToCopy) { - Prepend(absl::string_view(src)); +void Cord::AppendPrecise(absl::string_view src, MethodIdentifier method) { + assert(!src.empty()); + assert(src.size() <= cord_internal::kMaxFlatLength); + if (contents_.remaining_inline_capacity() >= src.size()) { + const size_t inline_length = contents_.inline_size(); + memcpy(contents_.data_.as_chars() + inline_length, src.data(), src.size()); + contents_.set_inline_size(inline_length + src.size()); } else { - Prepend(Cord(std::forward<T>(src))); + contents_.AppendTree(CordRepFlat::Create(src), method); } } -template void Cord::Prepend(std::string&& src); - -static CordRep* RemovePrefixFrom(CordRep* node, size_t n) { - if (n >= node->length) return nullptr; - if (n == 0) return CordRep::Ref(node); - absl::InlinedVector<CordRep*, kInlinedVectorSize> rhs_stack; - - while (node->tag == CONCAT) { - assert(n <= node->length); - if (n < node->concat()->left->length) { - // Push right to stack, descend left. - rhs_stack.push_back(node->concat()->right); - node = node->concat()->left; - } else { - // Drop left, descend right. - n -= node->concat()->left->length; - node = node->concat()->right; - } - } - assert(n <= node->length); - - if (n == 0) { - CordRep::Ref(node); +void Cord::PrependPrecise(absl::string_view src, MethodIdentifier method) { + assert(!src.empty()); + assert(src.size() <= cord_internal::kMaxFlatLength); + if (contents_.remaining_inline_capacity() >= src.size()) { + const size_t inline_length = contents_.inline_size(); + char data[InlineRep::kMaxInline + 1] = {0}; + memcpy(data, src.data(), src.size()); + memcpy(data + src.size(), contents_.data(), inline_length); + memcpy(contents_.data_.as_chars(), data, InlineRep::kMaxInline + 1); + contents_.set_inline_size(inline_length + src.size()); } else { - size_t start = n; - size_t len = node->length - n; - if (node->tag == SUBSTRING) { - // Consider in-place update of node, similar to in RemoveSuffixFrom(). - start += node->substring()->start; - node = node->substring()->child; - } - node = NewSubstring(CordRep::Ref(node), start, len); - } - while (!rhs_stack.empty()) { - node = Concat(node, CordRep::Ref(rhs_stack.back())); - rhs_stack.pop_back(); + contents_.PrependTree(CordRepFlat::Create(src), method); } - return node; } -// RemoveSuffixFrom() is very similar to RemovePrefixFrom(), with the -// exception that removing a suffix has an optimization where a node may be -// edited in place iff that node and all its ancestors have a refcount of 1. -static CordRep* RemoveSuffixFrom(CordRep* node, size_t n) { - if (n >= node->length) return nullptr; - if (n == 0) return CordRep::Ref(node); - absl::InlinedVector<CordRep*, kInlinedVectorSize> lhs_stack; - bool inplace_ok = node->refcount.IsOne(); - - while (node->tag == CONCAT) { - assert(n <= node->length); - if (n < node->concat()->right->length) { - // Push left to stack, descend right. - lhs_stack.push_back(node->concat()->left); - node = node->concat()->right; - } else { - // Drop right, descend left. - n -= node->concat()->right->length; - node = node->concat()->left; - } - inplace_ok = inplace_ok && node->refcount.IsOne(); - } - assert(n <= node->length); - - if (n == 0) { - CordRep::Ref(node); - } else if (inplace_ok && node->tag != EXTERNAL) { - // Consider making a new buffer if the current node capacity is much - // larger than the new length. - CordRep::Ref(node); - node->length -= n; +template <typename T, Cord::EnableIfString<T>> +inline void Cord::Prepend(T&& src) { + if (src.size() <= kMaxBytesToCopy) { + Prepend(absl::string_view(src)); } else { - size_t start = 0; - size_t len = node->length - n; - if (node->tag == SUBSTRING) { - start = node->substring()->start; - node = node->substring()->child; - } - node = NewSubstring(CordRep::Ref(node), start, len); + CordRep* rep = CordRepFromString(std::forward<T>(src)); + contents_.PrependTree(rep, CordzUpdateTracker::kPrependString); } - while (!lhs_stack.empty()) { - node = Concat(CordRep::Ref(lhs_stack.back()), node); - lhs_stack.pop_back(); - } - return node; } +template void Cord::Prepend(std::string&& src); + void Cord::RemovePrefix(size_t n) { ABSL_INTERNAL_CHECK(n <= size(), absl::StrCat("Requested prefix size ", n, @@ -870,12 +661,26 @@ void Cord::RemovePrefix(size_t n) { CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.remove_prefix(n); - } else if (tree->tag == RING) { - contents_.replace_tree(CordRepRing::RemovePrefix(tree->ring(), n)); } else { - CordRep* newrep = RemovePrefixFrom(tree, n); - CordRep::Unref(tree); - contents_.replace_tree(VerifyTree(newrep)); + auto constexpr method = CordzUpdateTracker::kRemovePrefix; + CordzUpdateScope scope(contents_.cordz_info(), method); + tree = cord_internal::RemoveCrcNode(tree); + if (n >= tree->length) { + CordRep::Unref(tree); + tree = nullptr; + } else if (tree->IsBtree()) { + CordRep* old = tree; + tree = tree->btree()->SubTree(n, tree->length - n); + CordRep::Unref(old); + } else if (tree->IsSubstring() && tree->refcount.IsOne()) { + tree->substring()->start += n; + tree->length -= n; + } else { + CordRep* rep = CordRepSubstring::Substring(tree, n, tree->length - n); + CordRep::Unref(tree); + tree = rep; + } + contents_.SetTreeOrEmpty(tree, scope); } } @@ -886,64 +691,25 @@ void Cord::RemoveSuffix(size_t n) { CordRep* tree = contents_.tree(); if (tree == nullptr) { contents_.reduce_size(n); - } else if (tree->tag == RING) { - contents_.replace_tree(CordRepRing::RemoveSuffix(tree->ring(), n)); } else { - CordRep* newrep = RemoveSuffixFrom(tree, n); - CordRep::Unref(tree); - contents_.replace_tree(VerifyTree(newrep)); - } -} - -// Work item for NewSubRange(). -struct SubRange { - SubRange(CordRep* a_node, size_t a_pos, size_t a_n) - : node(a_node), pos(a_pos), n(a_n) {} - CordRep* node; // nullptr means concat last 2 results. - size_t pos; - size_t n; -}; - -static CordRep* NewSubRange(CordRep* node, size_t pos, size_t n) { - absl::InlinedVector<CordRep*, kInlinedVectorSize> results; - absl::InlinedVector<SubRange, kInlinedVectorSize> todo; - todo.push_back(SubRange(node, pos, n)); - do { - const SubRange& sr = todo.back(); - node = sr.node; - pos = sr.pos; - n = sr.n; - todo.pop_back(); - - if (node == nullptr) { - assert(results.size() >= 2); - CordRep* right = results.back(); - results.pop_back(); - CordRep* left = results.back(); - results.pop_back(); - results.push_back(Concat(left, right)); - } else if (pos == 0 && n == node->length) { - results.push_back(CordRep::Ref(node)); - } else if (node->tag != CONCAT) { - if (node->tag == SUBSTRING) { - pos += node->substring()->start; - node = node->substring()->child; - } - results.push_back(NewSubstring(CordRep::Ref(node), pos, n)); - } else if (pos + n <= node->concat()->left->length) { - todo.push_back(SubRange(node->concat()->left, pos, n)); - } else if (pos >= node->concat()->left->length) { - pos -= node->concat()->left->length; - todo.push_back(SubRange(node->concat()->right, pos, n)); + auto constexpr method = CordzUpdateTracker::kRemoveSuffix; + CordzUpdateScope scope(contents_.cordz_info(), method); + tree = cord_internal::RemoveCrcNode(tree); + if (n >= tree->length) { + CordRep::Unref(tree); + tree = nullptr; + } else if (tree->IsBtree()) { + tree = CordRepBtree::RemoveSuffix(tree->btree(), n); + } else if (!tree->IsExternal() && tree->refcount.IsOne()) { + assert(tree->IsFlat() || tree->IsSubstring()); + tree->length -= n; } else { - size_t left_n = node->concat()->left->length - pos; - todo.push_back(SubRange(nullptr, 0, 0)); // Concat() - todo.push_back(SubRange(node->concat()->right, 0, n - left_n)); - todo.push_back(SubRange(node->concat()->left, pos, left_n)); + CordRep* rep = CordRepSubstring::Substring(tree, 0, tree->length - n); + CordRep::Unref(tree); + tree = rep; } - } while (!todo.empty()); - assert(results.size() == 1); - return results[0]; + contents_.SetTreeOrEmpty(tree, scope); + } } Cord Cord::Subcord(size_t pos, size_t new_size) const { @@ -951,17 +717,18 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { size_t length = size(); if (pos > length) pos = length; if (new_size > length - pos) new_size = length - pos; + if (new_size == 0) return sub_cord; + CordRep* tree = contents_.tree(); if (tree == nullptr) { - // sub_cord is newly constructed, no need to re-zero-out the tail of - // contents_ memory. - sub_cord.contents_.set_data(contents_.data() + pos, new_size, false); - } else if (new_size == 0) { - // We want to return empty subcord, so nothing to do. - } else if (new_size <= InlineRep::kMaxInline) { + sub_cord.contents_.set_data(contents_.data() + pos, new_size); + return sub_cord; + } + + if (new_size <= InlineRep::kMaxInline) { + char* dest = sub_cord.contents_.data_.as_chars(); Cord::ChunkIterator it = chunk_begin(); it.AdvanceBytes(pos); - char* dest = sub_cord.contents_.data_.as_chars(); size_t remaining_size = new_size; while (remaining_size > it->size()) { cord_internal::SmallMemmove(dest, it->data(), it->size()); @@ -971,153 +738,18 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { } cord_internal::SmallMemmove(dest, it->data(), remaining_size); sub_cord.contents_.set_inline_size(new_size); - } else if (tree->tag == RING) { - tree = CordRepRing::SubRing(CordRep::Ref(tree)->ring(), pos, new_size); - sub_cord.contents_.set_tree(tree); - } else { - sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); - } - return sub_cord; -} - -// -------------------------------------------------------------------- -// Balancing - -class CordForest { - public: - explicit CordForest(size_t length) - : root_length_(length), trees_(kMinLengthSize, nullptr) {} - - void Build(CordRep* cord_root) { - std::vector<CordRep*> pending = {cord_root}; - - while (!pending.empty()) { - CordRep* node = pending.back(); - pending.pop_back(); - CheckNode(node); - if (ABSL_PREDICT_FALSE(node->tag != CONCAT)) { - AddNode(node); - continue; - } - - CordRepConcat* concat_node = node->concat(); - if (concat_node->depth() >= kMinLengthSize || - concat_node->length < min_length[concat_node->depth()]) { - pending.push_back(concat_node->right); - pending.push_back(concat_node->left); - - if (concat_node->refcount.IsOne()) { - concat_node->left = concat_freelist_; - concat_freelist_ = concat_node; - } else { - CordRep::Ref(concat_node->right); - CordRep::Ref(concat_node->left); - CordRep::Unref(concat_node); - } - } else { - AddNode(node); - } - } - } - - CordRep* ConcatNodes() { - CordRep* sum = nullptr; - for (auto* node : trees_) { - if (node == nullptr) continue; - - sum = PrependNode(node, sum); - root_length_ -= node->length; - if (root_length_ == 0) break; - } - ABSL_INTERNAL_CHECK(sum != nullptr, "Failed to locate sum node"); - return VerifyTree(sum); - } - - private: - CordRep* AppendNode(CordRep* node, CordRep* sum) { - return (sum == nullptr) ? node : MakeConcat(sum, node); - } - - CordRep* PrependNode(CordRep* node, CordRep* sum) { - return (sum == nullptr) ? node : MakeConcat(node, sum); - } - - void AddNode(CordRep* node) { - CordRep* sum = nullptr; - - // Collect together everything with which we will merge with node - int i = 0; - for (; node->length > min_length[i + 1]; ++i) { - auto& tree_at_i = trees_[i]; - - if (tree_at_i == nullptr) continue; - sum = PrependNode(tree_at_i, sum); - tree_at_i = nullptr; - } - - sum = AppendNode(node, sum); - - // Insert sum into appropriate place in the forest - for (; sum->length >= min_length[i]; ++i) { - auto& tree_at_i = trees_[i]; - if (tree_at_i == nullptr) continue; - - sum = MakeConcat(tree_at_i, sum); - tree_at_i = nullptr; - } - - // min_length[0] == 1, which means sum->length >= min_length[0] - assert(i > 0); - trees_[i - 1] = sum; - } - - // Make concat node trying to resue existing CordRepConcat nodes we - // already collected in the concat_freelist_. - CordRep* MakeConcat(CordRep* left, CordRep* right) { - if (concat_freelist_ == nullptr) return RawConcat(left, right); - - CordRepConcat* rep = concat_freelist_; - if (concat_freelist_->left == nullptr) { - concat_freelist_ = nullptr; - } else { - concat_freelist_ = concat_freelist_->left->concat(); - } - SetConcatChildren(rep, left, right); - - return rep; + return sub_cord; } - static void CheckNode(CordRep* node) { - ABSL_INTERNAL_CHECK(node->length != 0u, ""); - if (node->tag == CONCAT) { - ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, ""); - ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, ""); - ABSL_INTERNAL_CHECK(node->length == (node->concat()->left->length + - node->concat()->right->length), - ""); - } - } - - size_t root_length_; - - // use an inlined vector instead of a flat array to get bounds checking - absl::InlinedVector<CordRep*, kInlinedVectorSize> trees_; - - // List of concat nodes we can re-use for Cord balancing. - CordRepConcat* concat_freelist_ = nullptr; -}; - -static CordRep* Rebalance(CordRep* node) { - VerifyTree(node); - assert(node->tag == CONCAT); - - if (node->length == 0) { - return nullptr; + tree = cord_internal::SkipCrcNode(tree); + if (tree->IsBtree()) { + tree = tree->btree()->SubTree(pos, new_size); + } else { + tree = CordRepSubstring::Substring(tree, pos, new_size); } - - CordForest forest(node->length); - forest.Build(node); - return forest.ConcatNodes(); + sub_cord.contents_.EmplaceTree(tree, contents_.data_, + CordzUpdateTracker::kSubCord); + return sub_cord; } // -------------------------------------------------------------------- @@ -1159,29 +791,29 @@ bool ComputeCompareResult<bool>(int memcmp_res) { } // namespace -// Helper routine. Locates the first flat chunk of the Cord without -// initializing the iterator. +// Helper routine. Locates the first flat or external chunk of the Cord without +// initializing the iterator, and returns a string_view referencing the data. inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { if (!is_tree()) { return absl::string_view(data_.as_chars(), data_.inline_size()); } - CordRep* node = tree(); - if (node->tag >= FLAT) { + CordRep* node = cord_internal::SkipCrcNode(tree()); + if (node->IsFlat()) { return absl::string_view(node->flat()->Data(), node->length); } - if (node->tag == EXTERNAL) { + if (node->IsExternal()) { return absl::string_view(node->external()->base, node->length); } - if (node->tag == RING) { - return node->ring()->entry_data(node->ring()->head()); - } - - // Walk down the left branches until we hit a non-CONCAT node. - while (node->tag == CONCAT) { - node = node->concat()->left; + if (node->IsBtree()) { + CordRepBtree* tree = node->btree(); + int height = tree->height(); + while (--height >= 0) { + tree = tree->Edge(CordRepBtree::kFront)->btree(); + } + return tree->Data(tree->begin()); } // Get the child node if we encounter a SUBSTRING. @@ -1189,20 +821,42 @@ inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { size_t length = node->length; assert(length != 0); - if (node->tag == SUBSTRING) { + if (node->IsSubstring()) { offset = node->substring()->start; node = node->substring()->child; } - if (node->tag >= FLAT) { + if (node->IsFlat()) { return absl::string_view(node->flat()->Data() + offset, length); } - assert((node->tag == EXTERNAL) && "Expect FLAT or EXTERNAL node here"); + assert(node->IsExternal() && "Expect FLAT or EXTERNAL node here"); return absl::string_view(node->external()->base + offset, length); } +void Cord::SetExpectedChecksum(uint32_t crc) { + auto constexpr method = CordzUpdateTracker::kSetExpectedChecksum; + if (empty()) return; + + if (!contents_.is_tree()) { + CordRep* rep = contents_.MakeFlatWithExtraCapacity(0); + rep = CordRepCrc::New(rep, crc); + contents_.EmplaceTree(rep, method); + } else { + const CordzUpdateScope scope(contents_.data_.cordz_info(), method); + CordRep* rep = CordRepCrc::New(contents_.data_.as_tree(), crc); + contents_.SetTree(rep, scope); + } +} + +absl::optional<uint32_t> Cord::ExpectedChecksum() const { + if (!contents_.is_tree() || !contents_.tree()->IsCrc()) { + return absl::nullopt; + } + return contents_.tree()->crc()->crc; +} + inline int Cord::CompareSlowPath(absl::string_view rhs, size_t compared_size, size_t size_to_compare) const { auto advance = [](Cord::ChunkIterator* it, absl::string_view* chunk) { @@ -1378,46 +1032,11 @@ void Cord::CopyToArraySlowPath(char* dst) const { } } -Cord::ChunkIterator& Cord::ChunkIterator::AdvanceStack() { - auto& stack_of_right_children = stack_of_right_children_; - if (stack_of_right_children.empty()) { - assert(!current_chunk_.empty()); // Called on invalid iterator. - // We have reached the end of the Cord. - return *this; - } - - // Process the next node on the stack. - CordRep* node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - - // Walk down the left branches until we hit a non-CONCAT node. Save the - // right children to the stack for subsequent traversal. - while (node->tag == CONCAT) { - stack_of_right_children.push_back(node->concat()->right); - node = node->concat()->left; - } - - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->tag == SUBSTRING) { - offset = node->substring()->start; - node = node->substring()->child; - } - - assert(node->tag == EXTERNAL || node->tag >= FLAT); - assert(length != 0); - const char* data = - node->tag == EXTERNAL ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset, length); - current_leaf_ = node; - return *this; -} - Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { ABSL_HARDENING_ASSERT(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); Cord subcord; + auto constexpr method = CordzUpdateTracker::kCordReader; if (n <= InlineRep::kMaxInline) { // Range to read fits in inline data. Flatten it. @@ -1437,184 +1056,51 @@ Cord Cord::ChunkIterator::AdvanceAndReadBytes(size_t n) { return subcord; } - if (ring_reader_) { + if (btree_reader_) { size_t chunk_size = current_chunk_.size(); if (n <= chunk_size && n <= kMaxBytesToCopy) { - subcord = Cord(current_chunk_.substr(0, n)); - } else { - auto* ring = CordRep::Ref(ring_reader_.ring())->ring(); - size_t offset = ring_reader_.length() - bytes_remaining_; - subcord.contents_.set_tree(CordRepRing::SubRing(ring, offset, n)); - } - if (n < chunk_size) { - bytes_remaining_ -= n; - current_chunk_.remove_prefix(n); + subcord = Cord(current_chunk_.substr(0, n), method); + if (n < chunk_size) { + current_chunk_.remove_prefix(n); + } else { + current_chunk_ = btree_reader_.Next(); + } } else { - AdvanceBytesRing(n); + CordRep* rep; + current_chunk_ = btree_reader_.Read(n, chunk_size, rep); + subcord.contents_.EmplaceTree(rep, method); } + bytes_remaining_ -= n; return subcord; } - auto& stack_of_right_children = stack_of_right_children_; - if (n < current_chunk_.size()) { - // Range to read is a proper subrange of the current chunk. - assert(current_leaf_ != nullptr); - CordRep* subnode = CordRep::Ref(current_leaf_); - const char* data = subnode->tag == EXTERNAL ? subnode->external()->base - : subnode->flat()->Data(); - subnode = NewSubstring(subnode, current_chunk_.data() - data, n); - subcord.contents_.set_tree(VerifyTree(subnode)); - RemoveChunkPrefix(n); - return subcord; - } - - // Range to read begins with a proper subrange of the current chunk. - assert(!current_chunk_.empty()); + // Short circuit if reading the entire data edge. assert(current_leaf_ != nullptr); - CordRep* subnode = CordRep::Ref(current_leaf_); - if (current_chunk_.size() < subnode->length) { - const char* data = subnode->tag == EXTERNAL ? subnode->external()->base - : subnode->flat()->Data(); - subnode = NewSubstring(subnode, current_chunk_.data() - data, - current_chunk_.size()); - } - n -= current_chunk_.size(); - bytes_remaining_ -= current_chunk_.size(); - - // Process the next node(s) on the stack, reading whole subtrees depending on - // their length and how many bytes we are advancing. - CordRep* node = nullptr; - while (!stack_of_right_children.empty()) { - node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - if (node->length > n) break; - // TODO(qrczak): This might unnecessarily recreate existing concat nodes. - // Avoiding that would need pretty complicated logic (instead of - // current_leaf, keep current_subtree_ which points to the highest node - // such that the current leaf can be found on the path of left children - // starting from current_subtree_; delay creating subnode while node is - // below current_subtree_; find the proper node along the path of left - // children starting from current_subtree_ if this loop exits while staying - // below current_subtree_; etc.; alternatively, push parents instead of - // right children on the stack). - subnode = Concat(subnode, CordRep::Ref(node)); - n -= node->length; - bytes_remaining_ -= node->length; - node = nullptr; - } - - if (node == nullptr) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - subcord.contents_.set_tree(VerifyTree(subnode)); + if (n == current_leaf_->length) { + bytes_remaining_ = 0; + current_chunk_ = {}; + CordRep* tree = CordRep::Ref(current_leaf_); + subcord.contents_.EmplaceTree(VerifyTree(tree), method); return subcord; } - // Walk down the appropriate branches until we hit a non-CONCAT node. Save the - // right children to the stack for subsequent traversal. - while (node->tag == CONCAT) { - if (node->concat()->left->length > n) { - // Push right, descend left. - stack_of_right_children.push_back(node->concat()->right); - node = node->concat()->left; - } else { - // Read left, descend right. - subnode = Concat(subnode, CordRep::Ref(node->concat()->left)); - n -= node->concat()->left->length; - bytes_remaining_ -= node->concat()->left->length; - node = node->concat()->right; - } - } + // From this point on, we need a partial substring node. + // Get pointer to the underlying flat or external data payload and + // compute data pointer and offset into current flat or external. + CordRep* payload = current_leaf_->IsSubstring() + ? current_leaf_->substring()->child + : current_leaf_; + const char* data = payload->IsExternal() ? payload->external()->base + : payload->flat()->Data(); + const size_t offset = current_chunk_.data() - data; - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->tag == SUBSTRING) { - offset = node->substring()->start; - node = node->substring()->child; - } - - // Range to read ends with a proper (possibly empty) subrange of the current - // chunk. - assert(node->tag == EXTERNAL || node->tag >= FLAT); - assert(length > n); - if (n > 0) { - subnode = Concat(subnode, NewSubstring(CordRep::Ref(node), offset, n)); - } - const char* data = - node->tag == EXTERNAL ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset + n, length - n); - current_leaf_ = node; + auto* tree = CordRepSubstring::Substring(payload, offset, n); + subcord.contents_.EmplaceTree(VerifyTree(tree), method); bytes_remaining_ -= n; - subcord.contents_.set_tree(VerifyTree(subnode)); + current_chunk_.remove_prefix(n); return subcord; } -void Cord::ChunkIterator::AdvanceBytesSlowPath(size_t n) { - assert(bytes_remaining_ >= n && "Attempted to iterate past `end()`"); - assert(n >= current_chunk_.size()); // This should only be called when - // iterating to a new node. - - n -= current_chunk_.size(); - bytes_remaining_ -= current_chunk_.size(); - - if (stack_of_right_children_.empty()) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - return; - } - - // Process the next node(s) on the stack, skipping whole subtrees depending on - // their length and how many bytes we are advancing. - CordRep* node = nullptr; - auto& stack_of_right_children = stack_of_right_children_; - while (!stack_of_right_children.empty()) { - node = stack_of_right_children.back(); - stack_of_right_children.pop_back(); - if (node->length > n) break; - n -= node->length; - bytes_remaining_ -= node->length; - node = nullptr; - } - - if (node == nullptr) { - // We have reached the end of the Cord. - assert(bytes_remaining_ == 0); - return; - } - - // Walk down the appropriate branches until we hit a non-CONCAT node. Save the - // right children to the stack for subsequent traversal. - while (node->tag == CONCAT) { - if (node->concat()->left->length > n) { - // Push right, descend left. - stack_of_right_children.push_back(node->concat()->right); - node = node->concat()->left; - } else { - // Skip left, descend right. - n -= node->concat()->left->length; - bytes_remaining_ -= node->concat()->left->length; - node = node->concat()->right; - } - } - - // Get the child node if we encounter a SUBSTRING. - size_t offset = 0; - size_t length = node->length; - if (node->tag == SUBSTRING) { - offset = node->substring()->start; - node = node->substring()->child; - } - - assert(node->tag == EXTERNAL || node->tag >= FLAT); - assert(length > n); - const char* data = - node->tag == EXTERNAL ? node->external()->base : node->flat()->Data(); - current_chunk_ = absl::string_view(data + offset + n, length - n); - current_leaf_ = node; - bytes_remaining_ -= n; -} - char Cord::operator[](size_t i) const { ABSL_HARDENING_ASSERT(i < size()); size_t offset = i; @@ -1622,30 +1108,21 @@ char Cord::operator[](size_t i) const { if (rep == nullptr) { return contents_.data()[i]; } + rep = cord_internal::SkipCrcNode(rep); while (true) { assert(rep != nullptr); assert(offset < rep->length); - if (rep->tag >= FLAT) { + if (rep->IsFlat()) { // Get the "i"th character directly from the flat array. return rep->flat()->Data()[offset]; - } else if (rep->tag == RING) { - return rep->ring()->GetCharacter(offset); - } else if (rep->tag == EXTERNAL) { + } else if (rep->IsBtree()) { + return rep->btree()->GetCharacter(offset); + } else if (rep->IsExternal()) { // Get the "i"th character from the external array. return rep->external()->base[offset]; - } else if (rep->tag == CONCAT) { - // Recursively branch to the side of the concatenation that the "i"th - // character is on. - size_t left_length = rep->concat()->left->length; - if (offset < left_length) { - rep = rep->concat()->left; - } else { - offset -= left_length; - rep = rep->concat()->right; - } } else { // This must be a substring a node, so bypass it to get to the child. - assert(rep->tag == SUBSTRING); + assert(rep->IsSubstring()); offset += rep->substring()->start; rep = rep->substring()->child; } @@ -1653,6 +1130,7 @@ char Cord::operator[](size_t i) const { } absl::string_view Cord::FlattenSlowPath() { + assert(contents_.is_tree()); size_t total_size = size(); CordRep* new_rep; char* new_buffer; @@ -1673,31 +1151,36 @@ absl::string_view Cord::FlattenSlowPath() { s.size()); }); } - if (CordRep* tree = contents_.tree()) { - CordRep::Unref(tree); - } - contents_.set_tree(new_rep); + CordzUpdateScope scope(contents_.cordz_info(), CordzUpdateTracker::kFlatten); + CordRep::Unref(contents_.as_tree()); + contents_.SetTree(new_rep, scope); return absl::string_view(new_buffer, total_size); } /* static */ bool Cord::GetFlatAux(CordRep* rep, absl::string_view* fragment) { assert(rep != nullptr); - if (rep->tag >= FLAT) { + rep = cord_internal::SkipCrcNode(rep); + if (rep->IsFlat()) { *fragment = absl::string_view(rep->flat()->Data(), rep->length); return true; - } else if (rep->tag == EXTERNAL) { + } else if (rep->IsExternal()) { *fragment = absl::string_view(rep->external()->base, rep->length); return true; - } else if (rep->tag == SUBSTRING) { + } else if (rep->IsBtree()) { + return rep->btree()->IsFlat(fragment); + } else if (rep->IsSubstring()) { CordRep* child = rep->substring()->child; - if (child->tag >= FLAT) { + if (child->IsFlat()) { *fragment = absl::string_view( child->flat()->Data() + rep->substring()->start, rep->length); return true; - } else if (child->tag == EXTERNAL) { + } else if (child->IsExternal()) { *fragment = absl::string_view( child->external()->base + rep->substring()->start, rep->length); return true; + } else if (child->IsBtree()) { + return child->btree()->IsFlat(rep->substring()->start, rep->length, + fragment); } } return false; @@ -1706,7 +1189,10 @@ absl::string_view Cord::FlattenSlowPath() { /* static */ void Cord::ForEachChunkAux( absl::cord_internal::CordRep* rep, absl::FunctionRef<void(absl::string_view)> callback) { - if (rep->tag == RING) { + assert(rep != nullptr); + rep = cord_internal::SkipCrcNode(rep); + + if (rep->IsBtree()) { ChunkIterator it(rep), end; while (it != end) { callback(*it); @@ -1715,44 +1201,13 @@ absl::string_view Cord::FlattenSlowPath() { return; } - assert(rep != nullptr); - int stack_pos = 0; - constexpr int stack_max = 128; - // Stack of right branches for tree traversal - absl::cord_internal::CordRep* stack[stack_max]; - absl::cord_internal::CordRep* current_node = rep; - while (true) { - if (current_node->tag == CONCAT) { - if (stack_pos == stack_max) { - // There's no more room on our stack array to add another right branch, - // and the idea is to avoid allocations, so call this function - // recursively to navigate this subtree further. (This is not something - // we expect to happen in practice). - ForEachChunkAux(current_node, callback); - - // Pop the next right branch and iterate. - current_node = stack[--stack_pos]; - continue; - } else { - // Save the right branch for later traversal and continue down the left - // branch. - stack[stack_pos++] = current_node->concat()->right; - current_node = current_node->concat()->left; - continue; - } - } - // This is a leaf node, so invoke our callback. - absl::string_view chunk; - bool success = GetFlatAux(current_node, &chunk); - assert(success); - if (success) { - callback(chunk); - } - if (stack_pos == 0) { - // end of traversal - return; - } - current_node = stack[--stack_pos]; + // This is a leaf node, so invoke our callback. + absl::cord_internal::CordRep* current_node = cord_internal::SkipCrcNode(rep); + absl::string_view chunk; + bool success = GetFlatAux(current_node, &chunk); + assert(success); + if (success) { + callback(chunk); } } @@ -1767,40 +1222,28 @@ static void DumpNode(CordRep* rep, bool include_data, std::ostream* os, *os << " ["; if (include_data) *os << static_cast<void*>(rep); *os << "]"; - *os << " " << (IsRootBalanced(rep) ? 'b' : 'u'); *os << " " << std::setw(indent) << ""; - if (rep->tag == CONCAT) { - *os << "CONCAT depth=" << Depth(rep) << "\n"; + if (rep->IsCrc()) { + *os << "CRC crc=" << rep->crc()->crc << "\n"; indent += kIndentStep; - indents.push_back(indent); - stack.push_back(rep->concat()->right); - rep = rep->concat()->left; - } else if (rep->tag == SUBSTRING) { + rep = rep->crc()->child; + } else if (rep->IsSubstring()) { *os << "SUBSTRING @ " << rep->substring()->start << "\n"; indent += kIndentStep; rep = rep->substring()->child; } else { // Leaf or ring - if (rep->tag == EXTERNAL) { + if (rep->IsExternal()) { *os << "EXTERNAL ["; if (include_data) *os << absl::CEscape(std::string(rep->external()->base, rep->length)); *os << "]\n"; - } else if (rep->tag >= FLAT) { - *os << "FLAT cap=" << rep->flat()->Capacity() - << " ["; + } else if (rep->IsFlat()) { + *os << "FLAT cap=" << rep->flat()->Capacity() << " ["; if (include_data) *os << absl::CEscape(std::string(rep->flat()->Data(), rep->length)); *os << "]\n"; } else { - assert(rep->tag == RING); - auto* ring = rep->ring(); - *os << "RING, entries = " << ring->entries() << "\n"; - CordRepRing::index_type head = ring->head(); - do { - DumpNode(ring->entry_child(head), include_data, os, - indent + kIndentStep); - head = ring->advance(head);; - } while (head != ring->tail()); + CordRepBtree::Dump(rep, /*label=*/ "", include_data, *os); } if (stack.empty()) break; rep = stack.back(); @@ -1820,7 +1263,7 @@ static std::string ReportError(CordRep* root, CordRep* node) { } static bool VerifyNode(CordRep* root, CordRep* start_node, - bool full_validation) { + bool /* full_validation */) { absl::InlinedVector<CordRep*, 2> worklist; worklist.push_back(start_node); do { @@ -1830,100 +1273,33 @@ static bool VerifyNode(CordRep* root, CordRep* start_node, ABSL_INTERNAL_CHECK(node != nullptr, ReportError(root, node)); if (node != root) { ABSL_INTERNAL_CHECK(node->length != 0, ReportError(root, node)); + ABSL_INTERNAL_CHECK(!node->IsCrc(), ReportError(root, node)); } - if (node->tag == CONCAT) { - ABSL_INTERNAL_CHECK(node->concat()->left != nullptr, - ReportError(root, node)); - ABSL_INTERNAL_CHECK(node->concat()->right != nullptr, - ReportError(root, node)); - ABSL_INTERNAL_CHECK((node->length == node->concat()->left->length + - node->concat()->right->length), + if (node->IsFlat()) { + ABSL_INTERNAL_CHECK(node->length <= node->flat()->Capacity(), ReportError(root, node)); - if (full_validation) { - worklist.push_back(node->concat()->right); - worklist.push_back(node->concat()->left); - } - } else if (node->tag >= FLAT) { - ABSL_INTERNAL_CHECK( - node->length <= node->flat()->Capacity(), - ReportError(root, node)); - } else if (node->tag == EXTERNAL) { + } else if (node->IsExternal()) { ABSL_INTERNAL_CHECK(node->external()->base != nullptr, ReportError(root, node)); - } else if (node->tag == SUBSTRING) { + } else if (node->IsSubstring()) { ABSL_INTERNAL_CHECK( node->substring()->start < node->substring()->child->length, ReportError(root, node)); ABSL_INTERNAL_CHECK(node->substring()->start + node->length <= node->substring()->child->length, ReportError(root, node)); + } else if (node->IsCrc()) { + ABSL_INTERNAL_CHECK(node->crc()->child != nullptr, + ReportError(root, node)); + ABSL_INTERNAL_CHECK(node->crc()->length == node->crc()->child->length, + ReportError(root, node)); + worklist.push_back(node->crc()->child); } } while (!worklist.empty()); return true; } -// Traverses the tree and computes the total memory allocated. -/* static */ size_t Cord::MemoryUsageAux(const CordRep* rep) { - size_t total_mem_usage = 0; - - // Allow a quick exit for the common case that the root is a leaf. - if (RepMemoryUsageLeaf(rep, &total_mem_usage)) { - return total_mem_usage; - } - - // Iterate over the tree. cur_node is never a leaf node and leaf nodes will - // never be appended to tree_stack. This reduces overhead from manipulating - // tree_stack. - absl::InlinedVector<const CordRep*, kInlinedVectorSize> tree_stack; - const CordRep* cur_node = rep; - while (true) { - const CordRep* next_node = nullptr; - - if (cur_node->tag == CONCAT) { - total_mem_usage += sizeof(CordRepConcat); - const CordRep* left = cur_node->concat()->left; - if (!RepMemoryUsageLeaf(left, &total_mem_usage)) { - next_node = left; - } - - const CordRep* right = cur_node->concat()->right; - if (!RepMemoryUsageLeaf(right, &total_mem_usage)) { - if (next_node) { - tree_stack.push_back(next_node); - } - next_node = right; - } - } else if (cur_node->tag == RING) { - total_mem_usage += CordRepRing::AllocSize(cur_node->ring()->capacity()); - const CordRepRing* ring = cur_node->ring(); - CordRepRing::index_type pos = ring->head(), tail = ring->tail(); - do { - CordRep* node = ring->entry_child(pos); - assert(node->tag >= FLAT || node->tag == EXTERNAL); - RepMemoryUsageLeaf(node, &total_mem_usage); - } while ((pos = ring->advance(pos)) != tail); - } else { - // Since cur_node is not a leaf or a concat node it must be a substring. - assert(cur_node->tag == SUBSTRING); - total_mem_usage += sizeof(CordRepSubstring); - next_node = cur_node->substring()->child; - if (RepMemoryUsageLeaf(next_node, &total_mem_usage)) { - next_node = nullptr; - } - } - - if (!next_node) { - if (tree_stack.empty()) { - return total_mem_usage; - } - next_node = tree_stack.back(); - tree_stack.pop_back(); - } - cur_node = next_node; - } -} - std::ostream& operator<<(std::ostream& out, const Cord& cord) { for (absl::string_view chunk : cord.Chunks()) { out.write(chunk.data(), chunk.size()); @@ -1941,7 +1317,6 @@ uint8_t CordTestAccess::LengthToTag(size_t s) { ABSL_INTERNAL_CHECK(s <= kMaxFlatLength, absl::StrCat("Invalid length ", s)); return cord_internal::AllocatedSizeToTag(s + cord_internal::kFlatOverhead); } -size_t CordTestAccess::SizeofCordRepConcat() { return sizeof(CordRepConcat); } size_t CordTestAccess::SizeofCordRepExternal() { return sizeof(CordRepExternal); } diff --git a/absl/strings/cord.h b/absl/strings/cord.h index fa9cb913..18d6ab85 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -70,6 +70,8 @@ #include <string> #include <type_traits> +#include "absl/base/attributes.h" +#include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/per_thread_tls.h" #include "absl/base/macros.h" @@ -77,9 +79,19 @@ #include "absl/container/inlined_vector.h" #include "absl/functional/function_ref.h" #include "absl/meta/type_traits.h" +#include "absl/strings/cord_analysis.h" +#include "absl/strings/cord_buffer.h" +#include "absl/strings/internal/cord_data_edge.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_reader.h" +#include "absl/strings/internal/cord_rep_crc.h" #include "absl/strings/internal/cord_rep_ring.h" -#include "absl/strings/internal/cord_rep_ring_reader.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/string_constant.h" #include "absl/strings/string_view.h" @@ -93,6 +105,20 @@ template <typename Releaser> Cord MakeCordFromExternal(absl::string_view, Releaser&&); void CopyCordToString(const Cord& src, std::string* dst); +// Cord memory accounting modes +enum class CordMemoryAccounting { + // Counts the *approximate* number of bytes held in full or in part by this + // Cord (which may not remain the same between invocations). Cords that share + // memory could each be "charged" independently for the same shared memory. + kTotal, + + // Counts the *approximate* number of bytes held in full or in part by this + // Cord weighted by the sharing ratio of that data. For example, if some data + // edge is shared by 4 different Cords, then each cord is attributed 1/4th of + // the total memory usage as a 'fair share' of the total memory usage. + kFairShare, +}; + // Cord // // A Cord is a sequence of characters, designed to be more efficient than a @@ -207,7 +233,7 @@ class Cord { // // Releases the Cord data. Any nodes that share data with other Cords, if // applicable, will have their reference counts reduced by 1. - void Clear(); + ABSL_ATTRIBUTE_REINITIALIZES void Clear(); // Cord::Append() // @@ -219,6 +245,45 @@ class Cord { template <typename T, EnableIfString<T> = 0> void Append(T&& src); + // Appends `buffer` to this cord, unless `buffer` has a zero length in which + // case this method has no effect on this cord instance. + // This method is guaranteed to consume `buffer`. + void Append(CordBuffer buffer); + + // Returns a CordBuffer, re-using potential existing capacity in this cord. + // + // Cord instances may have additional unused capacity in the last (or first) + // nodes of the underlying tree to facilitate amortized growth. This method + // allows applications to explicitly use this spare capacity if available, + // or create a new CordBuffer instance otherwise. + // If this cord has a final non-shared node with at least `min_capacity` + // available, then this method will return that buffer including its data + // contents. I.e.; the returned buffer will have a non-zero length, and + // a capacity of at least `buffer.length + min_capacity`. Otherwise, this + // method will return `CordBuffer::CreateWithDefaultLimit(capacity)`. + // + // Below an example of using GetAppendBuffer. Notice that in this example we + // use `GetAppendBuffer()` only on the first iteration. As we know nothing + // about any initial extra capacity in `cord`, we may be able to use the extra + // capacity. But as we add new buffers with fully utilized contents after that + // we avoid calling `GetAppendBuffer()` on subsequent iterations: while this + // works fine, it results in an unnecessary inspection of cord contents: + // + // void AppendRandomDataToCord(absl::Cord &cord, size_t n) { + // bool first = true; + // while (n > 0) { + // CordBuffer buffer = first ? cord.GetAppendBuffer(n) + // : CordBuffer::CreateWithDefaultLimit(n); + // absl::Span<char> data = buffer.available_up_to(n); + // FillRandomValues(data.data(), data.size()); + // buffer.IncreaseLengthBy(data.size()); + // cord.Append(std::move(buffer)); + // n -= data.size(); + // first = false; + // } + // } + CordBuffer GetAppendBuffer(size_t capacity, size_t min_capacity = 16); + // Cord::Prepend() // // Prepends data to the Cord, which may come from another Cord or other string @@ -228,6 +293,11 @@ class Cord { template <typename T, EnableIfString<T> = 0> void Prepend(T&& src); + // Prepends `buffer` to this cord, unless `buffer` has a zero length in which + // case this method has no effect on this cord instance. + // This method is guaranteed to consume `buffer`. + void Prepend(CordBuffer buffer); + // Cord::RemovePrefix() // // Removes the first `n` bytes of a Cord. @@ -249,9 +319,7 @@ class Cord { // swap() // // Swaps the contents of two Cords. - friend void swap(Cord& x, Cord& y) noexcept { - x.swap(y); - } + friend void swap(Cord& x, Cord& y) noexcept { x.swap(y); } // Cord::size() // @@ -265,11 +333,10 @@ class Cord { // Cord::EstimatedMemoryUsage() // - // Returns the *approximate* number of bytes held in full or in part by this - // Cord (which may not remain the same between invocations). Note that Cords - // that share memory could each be "charged" independently for the same shared - // memory. - size_t EstimatedMemoryUsage() const; + // Returns the *approximate* number of bytes held by this cord. + // See CordMemoryAccounting for more information on the accounting method. + size_t EstimatedMemoryUsage(CordMemoryAccounting accounting_method = + CordMemoryAccounting::kTotal) const; // Cord::Compare() // @@ -319,7 +386,7 @@ class Cord { //---------------------------------------------------------------------------- // // A `Cord::ChunkIterator` allows iteration over the constituent chunks of its - // Cord. Such iteration allows you to perform non-const operatons on the data + // Cord. Such iteration allows you to perform non-const operations on the data // of a Cord without modifying it. // // Generally, you do not instantiate a `Cord::ChunkIterator` directly; @@ -364,14 +431,8 @@ class Cord { private: using CordRep = absl::cord_internal::CordRep; - using CordRepRing = absl::cord_internal::CordRepRing; - using CordRepRingReader = absl::cord_internal::CordRepRingReader; - - // Stack of right children of concat nodes that we have to visit. - // Keep this at the end of the structure to avoid cache-thrashing. - // TODO(jgm): Benchmark to see if there's a more optimal value than 47 for - // the inlined vector size (47 exists for backward compatibility). - using Stack = absl::InlinedVector<absl::cord_internal::CordRep*, 47>; + using CordRepBtree = absl::cord_internal::CordRepBtree; + using CordRepBtreeReader = absl::cord_internal::CordRepBtreeReader; // Constructs a `begin()` iterator from `tree`. `tree` must not be null. explicit ChunkIterator(cord_internal::CordRep* tree); @@ -388,16 +449,9 @@ class Cord { Cord AdvanceAndReadBytes(size_t n); void AdvanceBytes(size_t n); - // Stack specific operator++ - ChunkIterator& AdvanceStack(); - - // Ring buffer specific operator++ - ChunkIterator& AdvanceRing(); - void AdvanceBytesRing(size_t n); - - // Iterates `n` bytes, where `n` is expected to be greater than or equal to - // `current_chunk_.size()`. - void AdvanceBytesSlowPath(size_t n); + // Btree specific operator++ + ChunkIterator& AdvanceBtree(); + void AdvanceBytesBtree(size_t n); // A view into bytes of the current `CordRep`. It may only be a view to a // suffix of bytes if this is being used by `CharIterator`. @@ -409,14 +463,11 @@ class Cord { // The number of bytes left in the `Cord` over which we are iterating. size_t bytes_remaining_ = 0; - // Cord reader for ring buffers. Empty if not traversing a ring buffer. - CordRepRingReader ring_reader_; - - // See 'Stack' alias definition. - Stack stack_of_right_children_; + // Cord reader for cord btrees. Empty if not traversing a btree. + CordRepBtreeReader btree_reader_; }; - // Cord::ChunkIterator::chunk_begin() + // Cord::chunk_begin() // // Returns an iterator to the first chunk of the `Cord`. // @@ -432,7 +483,7 @@ class Cord { // } ChunkIterator chunk_begin() const; - // Cord::ChunkItertator::chunk_end() + // Cord::chunk_end() // // Returns an iterator one increment past the last chunk of the `Cord`. // @@ -442,7 +493,7 @@ class Cord { ChunkIterator chunk_end() const; //---------------------------------------------------------------------------- - // Cord::ChunkIterator::ChunkRange + // Cord::ChunkRange //---------------------------------------------------------------------------- // // `ChunkRange` is a helper class for iterating over the chunks of the `Cord`, @@ -455,6 +506,16 @@ class Cord { // `Cord::chunk_begin()` and `Cord::chunk_end()`. class ChunkRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // These (partial) container type definitions allow ChunkRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = absl::string_view; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = ChunkIterator; + using const_iterator = ChunkIterator; + explicit ChunkRange(const Cord* cord) : cord_(cord) {} ChunkIterator begin() const; @@ -466,9 +527,9 @@ class Cord { // Cord::Chunks() // - // Returns a `Cord::ChunkIterator::ChunkRange` for iterating over the chunks - // of a `Cord` with a range-based for-loop. For most iteration tasks on a - // Cord, use `Cord::Chunks()` to retrieve this iterator. + // Returns a `Cord::ChunkRange` for iterating over the chunks of a `Cord` with + // a range-based for-loop. For most iteration tasks on a Cord, use + // `Cord::Chunks()` to retrieve this iterator. // // Example: // @@ -534,7 +595,7 @@ class Cord { ChunkIterator chunk_iterator_; }; - // Cord::CharIterator::AdvanceAndRead() + // Cord::AdvanceAndRead() // // Advances the `Cord::CharIterator` by `n_bytes` and returns the bytes // advanced as a separate `Cord`. `n_bytes` must be less than or equal to the @@ -542,21 +603,21 @@ class Cord { // valid to pass `char_end()` and `0`. static Cord AdvanceAndRead(CharIterator* it, size_t n_bytes); - // Cord::CharIterator::Advance() + // Cord::Advance() // // Advances the `Cord::CharIterator` by `n_bytes`. `n_bytes` must be less than // or equal to the number of bytes remaining within the Cord; otherwise, // behavior is undefined. It is valid to pass `char_end()` and `0`. static void Advance(CharIterator* it, size_t n_bytes); - // Cord::CharIterator::ChunkRemaining() + // Cord::ChunkRemaining() // // Returns the longest contiguous view starting at the iterator's position. // // `it` must be dereferenceable. static absl::string_view ChunkRemaining(const CharIterator& it); - // Cord::CharIterator::char_begin() + // Cord::char_begin() // // Returns an iterator to the first character of the `Cord`. // @@ -565,7 +626,7 @@ class Cord { // a `CharIterator` where range-based for-loops may not be available. CharIterator char_begin() const; - // Cord::CharIterator::char_end() + // Cord::char_end() // // Returns an iterator to one past the last character of the `Cord`. // @@ -574,18 +635,28 @@ class Cord { // a `CharIterator` where range-based for-loops are not useful. CharIterator char_end() const; - // Cord::CharIterator::CharRange + // Cord::CharRange // // `CharRange` is a helper class for iterating over the characters of a // producing an iterator which can be used within a range-based for loop. // Construction of a `CharRange` will return an iterator pointing to the first // character of the Cord. Generally, do not construct a `CharRange` directly; - // instead, prefer to use the `Cord::Chars()` method show below. + // instead, prefer to use the `Cord::Chars()` method shown below. // // Implementation note: `CharRange` is simply a convenience wrapper over // `Cord::char_begin()` and `Cord::char_end()`. class CharRange { public: + // Fulfill minimum c++ container requirements [container.requirements] + // Theses (partial) container type definitions allow CharRange to be used + // in various utilities expecting a subset of [container.requirements]. + // For example, the below enables using `::testing::ElementsAre(...)` + using value_type = char; + using reference = value_type&; + using const_reference = const value_type&; + using iterator = CharIterator; + using const_iterator = CharIterator; + explicit CharRange(const Cord* cord) : cord_(cord) {} CharIterator begin() const; @@ -595,11 +666,11 @@ class Cord { const Cord* cord_; }; - // Cord::CharIterator::Chars() + // Cord::Chars() // - // Returns a `Cord::CharIterator` for iterating over the characters of a - // `Cord` with a range-based for-loop. For most character-based iteration - // tasks on a Cord, use `Cord::Chars()` to retrieve this iterator. + // Returns a `Cord::CharRange` for iterating over the characters of a `Cord` + // with a range-based for-loop. For most character-based iteration tasks on a + // Cord, use `Cord::Chars()` to retrieve this iterator. // // Example: // @@ -646,6 +717,29 @@ class Cord { cord->Append(part); } + // Cord::SetExpectedChecksum() + // + // Stores a checksum value with this non-empty cord instance, for later + // retrieval. + // + // The expected checksum is a number stored out-of-band, alongside the data. + // It is preserved across copies and assignments, but any mutations to a cord + // will cause it to lose its expected checksum. + // + // The expected checksum is not part of a Cord's value, and does not affect + // operations such as equality or hashing. + // + // This field is intended to store a CRC32C checksum for later validation, to + // help support end-to-end checksum workflows. However, the Cord API itself + // does no CRC validation, and assigns no meaning to this number. + // + // This call has no effect if this cord is empty. + void SetExpectedChecksum(uint32_t crc); + + // Returns this cord's expected checksum, if it has one. Otherwise, returns + // nullopt. + absl::optional<uint32_t> ExpectedChecksum() const; + template <typename H> friend H AbslHashValue(H hash_state, const absl::Cord& c) { absl::optional<absl::string_view> maybe_flat = c.TryFlat(); @@ -661,13 +755,28 @@ class Cord { // be used by spelling absl::strings_internal::MakeStringConstant, which is // also an internal API. template <typename T> - explicit constexpr Cord(strings_internal::StringConstant<T>); + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr Cord(strings_internal::StringConstant<T>); private: + using CordRep = absl::cord_internal::CordRep; + using CordRepFlat = absl::cord_internal::CordRepFlat; + using CordzInfo = cord_internal::CordzInfo; + using CordzUpdateScope = cord_internal::CordzUpdateScope; + using CordzUpdateTracker = cord_internal::CordzUpdateTracker; + using InlineData = cord_internal::InlineData; + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Creates a cord instance with `method` representing the originating + // public API call causing the cord to be created. + explicit Cord(absl::string_view src, MethodIdentifier method); + friend class CordTestPeer; friend bool operator==(const Cord& lhs, const Cord& rhs); friend bool operator==(const Cord& lhs, absl::string_view rhs); + friend const CordzInfo* GetCordzInfoForTesting(const Cord& cord); + // Calls the provided function once for each cord chunk, in order. Unlike // Chunks(), this API will not allocate memory. void ForEachChunk(absl::FunctionRef<void(absl::string_view)>) const; @@ -687,6 +796,7 @@ class Cord { static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); constexpr InlineRep() : data_() {} + explicit InlineRep(InlineData::DefaultInitType init) : data_(init) {} InlineRep(const InlineRep& src); InlineRep(InlineRep&& src); InlineRep& operator=(const InlineRep& src); @@ -698,29 +808,62 @@ class Cord { bool empty() const; size_t size() const; const char* data() const; // Returns nullptr if holding pointer - void set_data(const char* data, size_t n, - bool nullify_tail); // Discards pointer, if any - char* set_data(size_t n); // Write data to the result + void set_data(const char* data, size_t n); // Discards pointer, if any + char* set_data(size_t n); // Write data to the result // Returns nullptr if holding bytes absl::cord_internal::CordRep* tree() const; absl::cord_internal::CordRep* as_tree() const; - // Discards old pointer, if any - void set_tree(absl::cord_internal::CordRep* rep); - // Replaces a tree with a new root. This is faster than set_tree, but it - // should only be used when it's clear that the old rep was a tree. - void replace_tree(absl::cord_internal::CordRep* rep); + const char* as_chars() const; // Returns non-null iff was holding a pointer absl::cord_internal::CordRep* clear(); // Converts to pointer if necessary. - absl::cord_internal::CordRep* force_tree(size_t extra_hint); - void reduce_size(size_t n); // REQUIRES: holding data + void reduce_size(size_t n); // REQUIRES: holding data void remove_prefix(size_t n); // REQUIRES: holding data - void AppendArray(const char* src_data, size_t src_size); + void AppendArray(absl::string_view src, MethodIdentifier method); absl::string_view FindFlatStartPiece() const; - void AppendTree(absl::cord_internal::CordRep* tree); - void PrependTree(absl::cord_internal::CordRep* tree); - void GetAppendRegion(char** region, size_t* size, size_t max_length); - void GetAppendRegion(char** region, size_t* size); + + // Creates a CordRepFlat instance from the current inlined data with `extra' + // bytes of desired additional capacity. + CordRepFlat* MakeFlatWithExtraCapacity(size_t extra); + + // Sets the tree value for this instance. `rep` must not be null. + // Requires the current instance to hold a tree, and a lock to be held on + // any CordzInfo referenced by this instance. The latter is enforced through + // the CordzUpdateScope argument. If the current instance is sampled, then + // the CordzInfo instance is updated to reference the new `rep` value. + void SetTree(CordRep* rep, const CordzUpdateScope& scope); + + // Identical to SetTree(), except that `rep` is allowed to be null, in + // which case the current instance is reset to an empty value. + void SetTreeOrEmpty(CordRep* rep, const CordzUpdateScope& scope); + + // Sets the tree value for this instance, and randomly samples this cord. + // This function disregards existing contents in `data_`, and should be + // called when a Cord is 'promoted' from an 'uninitialized' or 'inlined' + // value to a non-inlined (tree / ring) value. + void EmplaceTree(CordRep* rep, MethodIdentifier method); + + // Identical to EmplaceTree, except that it copies the parent stack from + // the provided `parent` data if the parent is sampled. + void EmplaceTree(CordRep* rep, const InlineData& parent, + MethodIdentifier method); + + // Commits the change of a newly created, or updated `rep` root value into + // this cord. `old_rep` indicates the old (inlined or tree) value of the + // cord, and determines if the commit invokes SetTree() or EmplaceTree(). + void CommitTree(const CordRep* old_rep, CordRep* rep, + const CordzUpdateScope& scope, MethodIdentifier method); + + void AppendTreeToInlined(CordRep* tree, MethodIdentifier method); + void AppendTreeToTree(CordRep* tree, MethodIdentifier method); + void AppendTree(CordRep* tree, MethodIdentifier method); + void PrependTreeToInlined(CordRep* tree, MethodIdentifier method); + void PrependTreeToTree(CordRep* tree, MethodIdentifier method); + void PrependTree(CordRep* tree, MethodIdentifier method); + + template <bool has_length> + void GetAppendRegion(char** region, size_t* size, size_t length); + bool IsSame(const InlineRep& other) const { return memcmp(&data_, &other.data_, sizeof(data_)) == 0; } @@ -758,6 +901,11 @@ class Cord { // Returns true if the Cord is being profiled by cordz. bool is_profiled() const { return data_.is_tree() && data_.is_profiled(); } + // Returns the available inlined capacity, or 0 if is_tree() == true. + size_t remaining_inline_capacity() const { + return data_.is_tree() ? 0 : kMaxInline - data_.inline_size(); + } + // Returns the profiled CordzInfo, or nullptr if not sampled. absl::cord_internal::CordzInfo* cordz_info() const { return data_.cordz_info(); @@ -776,8 +924,8 @@ class Cord { friend class Cord; void AssignSlow(const InlineRep& src); - // Unrefs the tree, stops profiling, and zeroes the contents - void ClearSlow(); + // Unrefs the tree and stops profiling. + void UnrefTree(); void ResetToEmpty() { data_ = {}; } @@ -788,9 +936,6 @@ class Cord { }; InlineRep contents_; - // Helper for MemoryUsage(). - static size_t MemoryUsageAux(const absl::cord_internal::CordRep* rep); - // Helper for GetFlat() and TryFlat(). static bool GetFlatAux(absl::cord_internal::CordRep* rep, absl::string_view* fragment); @@ -828,6 +973,23 @@ class Cord { template <typename C> void AppendImpl(C&& src); + // Appends / Prepends `src` to this instance, using precise sizing. + // This method does explicitly not attempt to use any spare capacity + // in any pending last added private owned flat. + // Requires `src` to be <= kMaxFlatLength. + void AppendPrecise(absl::string_view src, MethodIdentifier method); + void PrependPrecise(absl::string_view src, MethodIdentifier method); + + CordBuffer GetAppendBufferSlowPath(size_t capacity, size_t min_capacity); + + // Prepends the provided data to this instance. `method` contains the public + // API method for this action which is tracked for Cordz sampling purposes. + void PrependArray(absl::string_view src, MethodIdentifier method); + + // Assigns the value in 'src' to this instance, 'stealing' its contents. + // Requires src.length() > kMaxBytesToCopy. + Cord& AssignLargeString(std::string&& src); + // Helper for AbslHashValue(). template <typename H> H HashFragmented(H hash_state) const { @@ -857,8 +1019,8 @@ namespace cord_internal { // Fast implementation of memmove for up to 15 bytes. This implementation is // safe for overlapping regions. If nullify_tail is true, the destination is // padded with '\0' up to 16 bytes. -inline void SmallMemmove(char* dst, const char* src, size_t n, - bool nullify_tail = false) { +template <bool nullify_tail = false> +inline void SmallMemmove(char* dst, const char* src, size_t n) { if (n >= 8) { assert(n <= 16); uint64_t buf1; @@ -895,22 +1057,16 @@ inline void SmallMemmove(char* dst, const char* src, size_t n, } // Does non-template-specific `CordRepExternal` initialization. -// Expects `data` to be non-empty. +// Requires `data` to be non-empty. void InitializeCordRepExternal(absl::string_view data, CordRepExternal* rep); // Creates a new `CordRep` that owns `data` and `releaser` and returns a pointer -// to it, or `nullptr` if `data` was empty. +// to it. Requires `data` to be non-empty. template <typename Releaser> // NOLINTNEXTLINE - suppress clang-tidy raw pointer return. CordRep* NewExternalRep(absl::string_view data, Releaser&& releaser) { + assert(!data.empty()); using ReleaserType = absl::decay_t<Releaser>; - if (data.empty()) { - // Never create empty external nodes. - InvokeReleaser(Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), - data); - return nullptr; - } - CordRepExternal* rep = new CordRepExternalImpl<ReleaserType>( std::forward<Releaser>(releaser), 0); InitializeCordRepExternal(data, rep); @@ -930,8 +1086,16 @@ inline CordRep* NewExternalRep(absl::string_view data, template <typename Releaser> Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { Cord cord; - cord.contents_.set_tree(::absl::cord_internal::NewExternalRep( - data, std::forward<Releaser>(releaser))); + if (ABSL_PREDICT_TRUE(!data.empty())) { + cord.contents_.EmplaceTree(::absl::cord_internal::NewExternalRep( + data, std::forward<Releaser>(releaser)), + Cord::MethodIdentifier::kMakeCordFromExternal); + } else { + using ReleaserType = absl::decay_t<Releaser>; + cord_internal::InvokeReleaser( + cord_internal::Rank0{}, ReleaserType(std::forward<Releaser>(releaser)), + data); + } return cord; } @@ -939,15 +1103,16 @@ constexpr Cord::InlineRep::InlineRep(cord_internal::InlineData data) : data_(data) {} inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) - : data_(src.data_) { - if (is_tree()) { - data_.clear_cordz_info(); - absl::cord_internal::CordRep::Ref(as_tree()); + : data_(InlineData::kDefaultInit) { + if (CordRep* tree = src.tree()) { + EmplaceTree(CordRep::Ref(tree), src.data_, + CordzUpdateTracker::kConstructorCord); + } else { + data_ = src.data_; } } -inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { - data_ = src.data_; +inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) : data_(src.data_) { src.ResetToEmpty(); } @@ -966,7 +1131,7 @@ inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { inline Cord::InlineRep& Cord::InlineRep::operator=( Cord::InlineRep&& src) noexcept { if (is_tree()) { - ClearSlow(); + UnrefTree(); } data_ = src.data_; src.ResetToEmpty(); @@ -984,6 +1149,11 @@ inline const char* Cord::InlineRep::data() const { return is_tree() ? nullptr : data_.as_chars(); } +inline const char* Cord::InlineRep::as_chars() const { + assert(!data_.is_tree()); + return data_.as_chars(); +} + inline absl::cord_internal::CordRep* Cord::InlineRep::as_tree() const { assert(data_.is_tree()); return data_.as_tree(); @@ -1003,31 +1173,62 @@ inline size_t Cord::InlineRep::size() const { return is_tree() ? as_tree()->length : inline_size(); } -inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { - if (rep == nullptr) { - ResetToEmpty(); +inline cord_internal::CordRepFlat* Cord::InlineRep::MakeFlatWithExtraCapacity( + size_t extra) { + static_assert(cord_internal::kMinFlatLength >= sizeof(data_), ""); + size_t len = data_.inline_size(); + auto* result = CordRepFlat::New(len + extra); + result->length = len; + memcpy(result->Data(), data_.as_chars(), sizeof(data_)); + return result; +} + +inline void Cord::InlineRep::EmplaceTree(CordRep* rep, + MethodIdentifier method) { + assert(rep); + data_.make_tree(rep); + CordzInfo::MaybeTrackCord(data_, method); +} + +inline void Cord::InlineRep::EmplaceTree(CordRep* rep, const InlineData& parent, + MethodIdentifier method) { + data_.make_tree(rep); + CordzInfo::MaybeTrackCord(data_, parent, method); +} + +inline void Cord::InlineRep::SetTree(CordRep* rep, + const CordzUpdateScope& scope) { + assert(rep); + assert(data_.is_tree()); + data_.set_tree(rep); + scope.SetCordRep(rep); +} + +inline void Cord::InlineRep::SetTreeOrEmpty(CordRep* rep, + const CordzUpdateScope& scope) { + assert(data_.is_tree()); + if (rep) { + data_.set_tree(rep); } else { - if (data_.is_tree()) { - // `data_` already holds a 'tree' value and an optional cordz_info value. - // Replace the tree value only, leaving the cordz_info value unchanged. - data_.set_tree(rep); - } else { - // `data_` contains inlined data: initialize data_ to tree value `rep`. - data_.make_tree(rep); - } + data_ = {}; } + scope.SetCordRep(rep); } -inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { - ABSL_ASSERT(is_tree()); - if (ABSL_PREDICT_FALSE(rep == nullptr)) { - set_tree(rep); - return; +inline void Cord::InlineRep::CommitTree(const CordRep* old_rep, CordRep* rep, + const CordzUpdateScope& scope, + MethodIdentifier method) { + if (old_rep) { + SetTree(rep, scope); + } else { + EmplaceTree(rep, method); } - data_.set_tree(rep); } inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { + if (is_tree()) { + CordzInfo::MaybeUntrackCord(cordz_info()); + } absl::cord_internal::CordRep* result = tree(); ResetToEmpty(); return result; @@ -1042,6 +1243,9 @@ inline void Cord::InlineRep::CopyToArray(char* dst) const { constexpr inline Cord::Cord() noexcept {} +inline Cord::Cord(absl::string_view src) + : Cord(src, CordzUpdateTracker::kConstructorString) {} + template <typename T> constexpr Cord::Cord(strings_internal::StringConstant<T>) : contents_(strings_internal::StringConstant<T>::value.size() <= @@ -1057,6 +1261,15 @@ inline Cord& Cord::operator=(const Cord& x) { return *this; } +template <typename T, Cord::EnableIfString<T>> +Cord& Cord::operator=(T&& src) { + if (src.size() <= cord_internal::kMaxBytesToCopy) { + return operator=(absl::string_view(src)); + } else { + return AssignLargeString(std::forward<T>(src)); + } +} + inline Cord::Cord(const Cord& src) : contents_(src.contents_) {} inline Cord::Cord(Cord&& src) noexcept : contents_(std::move(src.contents_)) {} @@ -1071,7 +1284,6 @@ inline Cord& Cord::operator=(Cord&& x) noexcept { } extern template Cord::Cord(std::string&& src); -extern template Cord& Cord::operator=(std::string&& src); inline size_t Cord::size() const { // Length is 1st field in str.rep_ @@ -1080,10 +1292,15 @@ inline size_t Cord::size() const { inline bool Cord::empty() const { return contents_.empty(); } -inline size_t Cord::EstimatedMemoryUsage() const { +inline size_t Cord::EstimatedMemoryUsage( + CordMemoryAccounting accounting_method) const { size_t result = sizeof(Cord); if (const absl::cord_internal::CordRep* rep = contents_.tree()) { - result += MemoryUsageAux(rep); + if (accounting_method == CordMemoryAccounting::kFairShare) { + result += cord_internal::GetEstimatedFairShareMemoryUsage(rep); + } else { + result += cord_internal::GetEstimatedMemoryUsage(rep); + } } return result; } @@ -1114,7 +1331,36 @@ inline absl::string_view Cord::Flatten() { } inline void Cord::Append(absl::string_view src) { - contents_.AppendArray(src.data(), src.size()); + contents_.AppendArray(src, CordzUpdateTracker::kAppendString); +} + +inline void Cord::Prepend(absl::string_view src) { + PrependArray(src, CordzUpdateTracker::kPrependString); +} + +inline void Cord::Append(CordBuffer buffer) { + if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; + absl::string_view short_value; + if (CordRep* rep = buffer.ConsumeValue(short_value)) { + contents_.AppendTree(rep, CordzUpdateTracker::kAppendCordBuffer); + } else { + AppendPrecise(short_value, CordzUpdateTracker::kAppendCordBuffer); + } +} + +inline void Cord::Prepend(CordBuffer buffer) { + if (ABSL_PREDICT_FALSE(buffer.length() == 0)) return; + absl::string_view short_value; + if (CordRep* rep = buffer.ConsumeValue(short_value)) { + contents_.PrependTree(rep, CordzUpdateTracker::kPrependCordBuffer); + } else { + PrependPrecise(short_value, CordzUpdateTracker::kPrependCordBuffer); + } +} + +inline CordBuffer Cord::GetAppendBuffer(size_t capacity, size_t min_capacity) { + if (empty()) return CordBuffer::CreateWithDefaultLimit(capacity); + return GetAppendBufferSlowPath(capacity, min_capacity); } extern template void Cord::Append(std::string&& src); @@ -1143,44 +1389,44 @@ inline bool Cord::StartsWith(absl::string_view rhs) const { } inline void Cord::ChunkIterator::InitTree(cord_internal::CordRep* tree) { - if (tree->tag == cord_internal::RING) { - current_chunk_ = ring_reader_.Reset(tree->ring()); - return; + tree = cord_internal::SkipCrcNode(tree); + if (tree->tag == cord_internal::BTREE) { + current_chunk_ = btree_reader_.Init(tree->btree()); + } else { + current_leaf_ = tree; + current_chunk_ = cord_internal::EdgeData(tree); } - - stack_of_right_children_.push_back(tree); - operator++(); } -inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) - : bytes_remaining_(tree->length) { +inline Cord::ChunkIterator::ChunkIterator(cord_internal::CordRep* tree) { + bytes_remaining_ = tree->length; InitTree(tree); } -inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) - : bytes_remaining_(cord->size()) { - if (cord->contents_.is_tree()) { - InitTree(cord->contents_.as_tree()); +inline Cord::ChunkIterator::ChunkIterator(const Cord* cord) { + if (CordRep* tree = cord->contents_.tree()) { + bytes_remaining_ = tree->length; + InitTree(tree); } else { - current_chunk_ = - absl::string_view(cord->contents_.data(), bytes_remaining_); + bytes_remaining_ = cord->contents_.inline_size(); + current_chunk_ = {cord->contents_.data(), bytes_remaining_}; } } -inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceRing() { - current_chunk_ = ring_reader_.Next(); +inline Cord::ChunkIterator& Cord::ChunkIterator::AdvanceBtree() { + current_chunk_ = btree_reader_.Next(); return *this; } -inline void Cord::ChunkIterator::AdvanceBytesRing(size_t n) { +inline void Cord::ChunkIterator::AdvanceBytesBtree(size_t n) { assert(n >= current_chunk_.size()); bytes_remaining_ -= n; if (bytes_remaining_) { if (n == current_chunk_.size()) { - current_chunk_ = ring_reader_.Next(); + current_chunk_ = btree_reader_.Next(); } else { - size_t offset = ring_reader_.length() - bytes_remaining_; - current_chunk_ = ring_reader_.Seek(offset); + size_t offset = btree_reader_.length() - bytes_remaining_; + current_chunk_ = btree_reader_.Seek(offset); } } else { current_chunk_ = {}; @@ -1193,8 +1439,11 @@ inline Cord::ChunkIterator& Cord::ChunkIterator::operator++() { assert(bytes_remaining_ >= current_chunk_.size()); bytes_remaining_ -= current_chunk_.size(); if (bytes_remaining_ > 0) { - return ring_reader_ ? AdvanceRing() : AdvanceStack(); - } else { + if (btree_reader_) { + return AdvanceBtree(); + } else { + assert(!current_chunk_.empty()); // Called on invalid iterator. + } current_chunk_ = {}; } return *this; @@ -1235,7 +1484,11 @@ inline void Cord::ChunkIterator::AdvanceBytes(size_t n) { if (ABSL_PREDICT_TRUE(n < current_chunk_.size())) { RemoveChunkPrefix(n); } else if (n != 0) { - ring_reader_ ? AdvanceBytesRing(n) : AdvanceBytesSlowPath(n); + if (btree_reader_) { + AdvanceBytesBtree(n); + } else { + bytes_remaining_ = 0; + } } } @@ -1326,7 +1579,7 @@ inline void Cord::ForEachChunk( } } -// Nonmember Cord-to-Cord relational operarators. +// Nonmember Cord-to-Cord relational operators. inline bool operator==(const Cord& lhs, const Cord& rhs) { if (lhs.contents_.IsSame(rhs.contents_)) return true; size_t rhs_size = rhs.size(); @@ -1335,12 +1588,8 @@ inline bool operator==(const Cord& lhs, const Cord& rhs) { } inline bool operator!=(const Cord& x, const Cord& y) { return !(x == y); } -inline bool operator<(const Cord& x, const Cord& y) { - return x.Compare(y) < 0; -} -inline bool operator>(const Cord& x, const Cord& y) { - return x.Compare(y) > 0; -} +inline bool operator<(const Cord& x, const Cord& y) { return x.Compare(y) < 0; } +inline bool operator>(const Cord& x, const Cord& y) { return x.Compare(y) > 0; } inline bool operator<=(const Cord& x, const Cord& y) { return x.Compare(y) <= 0; } @@ -1381,7 +1630,6 @@ class CordTestAccess { public: static size_t FlatOverhead(); static size_t MaxFlatLength(); - static size_t SizeofCordRepConcat(); static size_t SizeofCordRepExternal(); static size_t SizeofCordRepSubstring(); static size_t FlatTagToLength(uint8_t tag); diff --git a/absl/strings/cord_analysis.cc b/absl/strings/cord_analysis.cc new file mode 100644 index 00000000..73d3c4e6 --- /dev/null +++ b/absl/strings/cord_analysis.cc @@ -0,0 +1,188 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord_analysis.h" + +#include <cstddef> +#include <cstdint> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" +// +#include "absl/base/macros.h" +#include "absl/base/port.h" +#include "absl/functional/function_ref.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +// Accounting mode for analyzing memory usage. +enum class Mode { kTotal, kFairShare }; + +// CordRepRef holds a `const CordRep*` reference in rep, and depending on mode, +// holds a 'fraction' representing a cumulative inverse refcount weight. +template <Mode mode> +struct CordRepRef { + // Instantiates a CordRepRef instance. + explicit CordRepRef(const CordRep* r) : rep(r) {} + + // Creates a child reference holding the provided child. + // Overloaded to add cumulative reference count for kFairShare. + CordRepRef Child(const CordRep* child) const { return CordRepRef(child); } + + const CordRep* rep; +}; + +// RawUsage holds the computed total number of bytes. +template <Mode mode> +struct RawUsage { + size_t total = 0; + + // Add 'size' to total, ignoring the CordRepRef argument. + void Add(size_t size, CordRepRef<mode>) { total += size; } +}; + +// Returns n / refcount avoiding a div for the common refcount == 1. +template <typename refcount_t> +double MaybeDiv(double d, refcount_t refcount) { + return refcount == 1 ? d : d / refcount; +} + +// Overloaded 'kFairShare' specialization for CordRepRef. This class holds a +// `fraction` value which represents a cumulative inverse refcount weight. +// For example, a top node with a reference count of 2 will have a fraction +// value of 1/2 = 0.5, representing the 'fair share' of memory it references. +// A node below such a node with a reference count of 5 then has a fraction of +// 0.5 / 5 = 0.1 representing the fair share of memory below that node, etc. +template <> +struct CordRepRef<Mode::kFairShare> { + // Creates a CordRepRef with the provided rep and top (parent) fraction. + explicit CordRepRef(const CordRep* r, double frac = 1.0) + : rep(r), fraction(MaybeDiv(frac, r->refcount.Get())) {} + + // Returns a CordRepRef with a fraction of `this->fraction / child.refcount` + CordRepRef Child(const CordRep* child) const { + return CordRepRef(child, fraction); + } + + const CordRep* rep; + double fraction; +}; + +// Overloaded 'kFairShare' specialization for RawUsage +template <> +struct RawUsage<Mode::kFairShare> { + double total = 0; + + // Adds `size` multiplied by `rep.fraction` to the total size. + void Add(size_t size, CordRepRef<Mode::kFairShare> rep) { + total += static_cast<double>(size) * rep.fraction; + } +}; + +// Computes the estimated memory size of the provided data edge. +// External reps are assumed 'heap allocated at their exact size'. +template <Mode mode> +void AnalyzeDataEdge(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) { + assert(IsDataEdge(rep.rep)); + + // Consume all substrings + if (rep.rep->tag == SUBSTRING) { + raw_usage.Add(sizeof(CordRepSubstring), rep); + rep = rep.Child(rep.rep->substring()->child); + } + + // Consume FLAT / EXTERNAL + const size_t size = + rep.rep->tag >= FLAT + ? rep.rep->flat()->AllocatedSize() + : rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>); + raw_usage.Add(size, rep); +} + +// Computes the memory size of the provided Ring tree. +template <Mode mode> +void AnalyzeRing(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) { + const CordRepRing* ring = rep.rep->ring(); + raw_usage.Add(CordRepRing::AllocSize(ring->capacity()), rep); + ring->ForEach([&](CordRepRing::index_type pos) { + AnalyzeDataEdge(rep.Child(ring->entry_child(pos)), raw_usage); + }); +} + +// Computes the memory size of the provided Btree tree. +template <Mode mode> +void AnalyzeBtree(CordRepRef<mode> rep, RawUsage<mode>& raw_usage) { + raw_usage.Add(sizeof(CordRepBtree), rep); + const CordRepBtree* tree = rep.rep->btree(); + if (tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + AnalyzeBtree(rep.Child(edge), raw_usage); + } + } else { + for (CordRep* edge : tree->Edges()) { + AnalyzeDataEdge(rep.Child(edge), raw_usage); + } + } +} + +template <Mode mode> +size_t GetEstimatedUsage(const CordRep* rep) { + // Zero initialized memory usage totals. + RawUsage<mode> raw_usage; + + // Capture top level node and refcount into a CordRepRef. + CordRepRef<mode> repref(rep); + + // Consume the top level CRC node if present. + if (repref.rep->tag == CRC) { + raw_usage.Add(sizeof(CordRepCrc), repref); + repref = repref.Child(repref.rep->crc()->child); + } + + if (IsDataEdge(repref.rep)) { + AnalyzeDataEdge(repref, raw_usage); + } else if (repref.rep->tag == BTREE) { + AnalyzeBtree(repref, raw_usage); + } else if (repref.rep->tag == RING) { + AnalyzeRing(repref, raw_usage); + } else { + assert(false); + } + + return static_cast<size_t>(raw_usage.total); +} + +} // namespace + +size_t GetEstimatedMemoryUsage(const CordRep* rep) { + return GetEstimatedUsage<Mode::kTotal>(rep); +} + +size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep) { + return GetEstimatedUsage<Mode::kFairShare>(rep); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord_analysis.h b/absl/strings/cord_analysis.h new file mode 100644 index 00000000..7041ad1a --- /dev/null +++ b/absl/strings/cord_analysis.h @@ -0,0 +1,44 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORD_ANALYSIS_H_ +#define ABSL_STRINGS_CORD_ANALYSIS_H_ + +#include <cstddef> +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns the *approximate* number of bytes held in full or in part by this +// Cord (which may not remain the same between invocations). Cords that share +// memory could each be "charged" independently for the same shared memory. +size_t GetEstimatedMemoryUsage(const CordRep* rep); + +// Returns the *approximate* number of bytes held in full or in part by this +// CordRep weighted by the sharing ratio of that data. For example, if some data +// edge is shared by 4 different Cords, then each cord is attribute 1/4th of +// the total memory usage as a 'fair share' of the total memory usage. +size_t GetEstimatedFairShareMemoryUsage(const CordRep* rep); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + + +#endif // ABSL_STRINGS_CORD_ANALYSIS_H_ diff --git a/absl/strings/cord_buffer.cc b/absl/strings/cord_buffer.cc new file mode 100644 index 00000000..fad6269c --- /dev/null +++ b/absl/strings/cord_buffer.cc @@ -0,0 +1,30 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord_buffer.h" + +#include <cstddef> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL +constexpr size_t CordBuffer::kDefaultLimit; +constexpr size_t CordBuffer::kCustomLimit; +#endif + +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord_buffer.h b/absl/strings/cord_buffer.h new file mode 100644 index 00000000..56a6ce6f --- /dev/null +++ b/absl/strings/cord_buffer.h @@ -0,0 +1,572 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// ----------------------------------------------------------------------------- +// File: cord_buffer.h +// ----------------------------------------------------------------------------- +// +// This file defines an `absl::CordBuffer` data structure to hold data for +// eventual inclusion within an existing `Cord` data structure. Cord buffers are +// useful for building large Cords that may require custom allocation of its +// associated memory. +// +#ifndef ABSL_STRINGS_CORD_BUFFER_H_ +#define ABSL_STRINGS_CORD_BUFFER_H_ + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <utility> + +#include "absl/base/config.h" +#include "absl/base/macros.h" +#include "absl/numeric/bits.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class Cord; +class CordBufferTestPeer; + +// CordBuffer +// +// CordBuffer manages memory buffers for purposes such as zero-copy APIs as well +// as applications building cords with large data requiring granular control +// over the allocation and size of cord data. For example, a function creating +// a cord of random data could use a CordBuffer as follows: +// +// absl::Cord CreateRandomCord(size_t length) { +// absl::Cord cord; +// while (length > 0) { +// CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(length); +// absl::Span<char> data = buffer.available_up_to(length); +// FillRandomValues(data.data(), data.size()); +// buffer.IncreaseLengthBy(data.size()); +// cord.Append(std::move(buffer)); +// length -= data.size(); +// } +// return cord; +// } +// +// CordBuffer instances are by default limited to a capacity of `kDefaultLimit` +// bytes. `kDefaultLimit` is currently just under 4KiB, but this default may +// change in the future and/or for specific architectures. The default limit is +// aimed to provide a good trade-off between performance and memory overhead. +// Smaller buffers typically incur more compute cost while larger buffers are +// more CPU efficient but create significant memory overhead because of such +// allocations being less granular. Using larger buffers may also increase the +// risk of memory fragmentation. +// +// Applications create a buffer using one of the `CreateWithDefaultLimit()` or +// `CreateWithCustomLimit()` methods. The returned instance will have a non-zero +// capacity and a zero length. Applications use the `data()` method to set the +// contents of the managed memory, and once done filling the buffer, use the +// `IncreaseLengthBy()` or 'SetLength()' method to specify the length of the +// initialized data before adding the buffer to a Cord. +// +// The `CreateWithCustomLimit()` method is intended for applications needing +// larger buffers than the default memory limit, allowing the allocation of up +// to a capacity of `kCustomLimit` bytes minus some minimum internal overhead. +// The usage of `CreateWithCustomLimit()` should be limited to only those use +// cases where the distribution of the input is relatively well known, and/or +// where the trade-off between the efficiency gains outweigh the risk of memory +// fragmentation. See the documentation for `CreateWithCustomLimit()` for more +// information on using larger custom limits. +// +// The capacity of a `CordBuffer` returned by one of the `Create` methods may +// be larger than the requested capacity due to rounding, alignment and +// granularity of the memory allocator. Applications should use the `capacity` +// method to obtain the effective capacity of the returned instance as +// demonstrated in the provided example above. +// +// CordBuffer is a move-only class. All references into the managed memory are +// invalidated when an instance is moved into either another CordBuffer instance +// or a Cord. Writing to a location obtained by a previous call to `data()` +// after an instance was moved will lead to undefined behavior. +// +// A `moved from` CordBuffer instance will have a valid, but empty state. +// CordBuffer is thread compatible. +class CordBuffer { + public: + // kDefaultLimit + // + // Default capacity limits of allocated CordBuffers. + // See the class comments for more information on allocation limits. + static constexpr size_t kDefaultLimit = cord_internal::kMaxFlatLength; + + // kCustomLimit + // + // Maximum size for CreateWithCustomLimit() allocated buffers. + // Note that the effective capacity may be slightly less + // because of internal overhead of internal cord buffers. + static constexpr size_t kCustomLimit = 64U << 10; + + // Constructors, Destructors and Assignment Operators + + // Creates an empty CordBuffer. + CordBuffer() = default; + + // Destroys this CordBuffer instance and, if not empty, releases any memory + // managed by this instance, invalidating previously returned references. + ~CordBuffer(); + + // CordBuffer is move-only + CordBuffer(CordBuffer&& rhs) noexcept; + CordBuffer& operator=(CordBuffer&&) noexcept; + CordBuffer(const CordBuffer&) = delete; + CordBuffer& operator=(const CordBuffer&) = delete; + + // CordBuffer::MaximumPayload() + // + // Returns the guaranteed maximum payload for a CordBuffer returned by the + // `CreateWithDefaultLimit()` method. While small, each internal buffer inside + // a Cord incurs an overhead to manage the length, type and reference count + // for the buffer managed inside the cord tree. Applications can use this + // method to get approximate number of buffers required for a given byte + // size, etc. + // + // For example: + // const size_t payload = absl::CordBuffer::MaximumPayload(); + // const size_t buffer_count = (total_size + payload - 1) / payload; + // buffers.reserve(buffer_count); + static constexpr size_t MaximumPayload(); + + // Overload to the above `MaximumPayload()` except that it returns the + // maximum payload for a CordBuffer returned by the `CreateWithCustomLimit()` + // method given the provided `block_size`. + static constexpr size_t MaximumPayload(size_t block_size); + + // CordBuffer::CreateWithDefaultLimit() + // + // Creates a CordBuffer instance of the desired `capacity`, capped at the + // default limit `kDefaultLimit`. The returned buffer has a guaranteed + // capacity of at least `min(kDefaultLimit, capacity)`. See the class comments + // for more information on buffer capacities and intended usage. + static CordBuffer CreateWithDefaultLimit(size_t capacity); + + + // CordBuffer::CreateWithCustomLimit() + // + // Creates a CordBuffer instance of the desired `capacity` rounded to an + // appropriate power of 2 size less than, or equal to `block_size`. + // Requires `block_size` to be a power of 2. + // + // If `capacity` is less than or equal to `kDefaultLimit`, then this method + // behaves identical to `CreateWithDefaultLimit`, which means that the caller + // is guaranteed to get a buffer of at least the requested capacity. + // + // If `capacity` is greater than or equal to `block_size`, then this method + // returns a buffer with an `allocated size` of `block_size` bytes. Otherwise, + // this methods returns a buffer with a suitable smaller power of 2 block size + // to satisfy the request. The actual size depends on a number of factors, and + // is typically (but not necessarily) the highest or second highest power of 2 + // value less than or equal to `capacity`. + // + // The 'allocated size' includes a small amount of overhead required for + // internal state, which is currently 13 bytes on 64-bit platforms. For + // example: a buffer created with `block_size` and `capacity' set to 8KiB + // will have an allocated size of 8KiB, and an effective internal `capacity` + // of 8KiB - 13 = 8179 bytes. + // + // To demonstrate this in practice, let's assume we want to read data from + // somewhat larger files using approximately 64KiB buffers: + // + // absl::Cord ReadFromFile(int fd, size_t n) { + // absl::Cord cord; + // while (n > 0) { + // CordBuffer buffer = CordBuffer::CreateWithCustomLimit(64 << 10, n); + // absl::Span<char> data = buffer.available_up_to(n); + // ReadFileDataOrDie(fd, data.data(), data.size()); + // buffer.IncreaseLengthBy(data.size()); + // cord.Append(std::move(buffer)); + // n -= data.size(); + // } + // return cord; + // } + // + // If we'd use this function to read a file of 659KiB, we may get the + // following pattern of allocated cord buffer sizes: + // + // CreateWithCustomLimit(64KiB, 674816) --> ~64KiB (65523) + // CreateWithCustomLimit(64KiB, 674816) --> ~64KiB (65523) + // ... + // CreateWithCustomLimit(64KiB, 19586) --> ~16KiB (16371) + // CreateWithCustomLimit(64KiB, 3215) --> 3215 (at least 3215) + // + // The reason the method returns a 16K buffer instead of a roughly 19K buffer + // is to reduce memory overhead and fragmentation risks. Using carefully + // chosen power of 2 values reduces the entropy of allocated memory sizes. + // + // Additionally, let's assume we'd use the above function on files that are + // generally smaller than 64K. If we'd use 'precise' sized buffers for such + // files, than we'd get a very wide distribution of allocated memory sizes + // rounded to 4K page sizes, and we'd end up with a lot of unused capacity. + // + // In general, application should only use custom sizes if the data they are + // consuming or storing is expected to be many times the chosen block size, + // and be based on objective data and performance metrics. For example, a + // compress function may work faster and consume less CPU when using larger + // buffers. Such an application should pick a size offering a reasonable + // trade-off between expected data size, compute savings with larger buffers, + // and the cost or fragmentation effect of larger buffers. + // Applications must pick a reasonable spot on that curve, and make sure their + // data meets their expectations in size distributions such as "mostly large". + static CordBuffer CreateWithCustomLimit(size_t block_size, size_t capacity); + + // CordBuffer::available() + // + // Returns the span delineating the available capacity in this buffer + // which is defined as `{ data() + length(), capacity() - length() }`. + absl::Span<char> available(); + + // CordBuffer::available_up_to() + // + // Returns the span delineating the available capacity in this buffer limited + // to `size` bytes. This is equivalent to `available().subspan(0, size)`. + absl::Span<char> available_up_to(size_t size); + + // CordBuffer::data() + // + // Returns a non-null reference to the data managed by this instance. + // Applications are allowed to write up to `capacity` bytes of instance data. + // CordBuffer data is uninitialized by default. Reading data from an instance + // that has not yet been initialized will lead to undefined behavior. + char* data(); + const char* data() const; + + // CordBuffer::length() + // + // Returns the length of this instance. The default length of a CordBuffer is + // 0, indicating an 'empty' CordBuffer. Applications must specify the length + // of the data in a CordBuffer before adding it to a Cord. + size_t length() const; + + // CordBuffer::capacity() + // + // Returns the capacity of this instance. All instances have a non-zero + // capacity: default and `moved from` instances have a small internal buffer. + size_t capacity() const; + + // CordBuffer::IncreaseLengthBy() + // + // Increases the length of this buffer by the specified 'n' bytes. + // Applications must make sure all data in this buffer up to the new length + // has been initialized before adding a CordBuffer to a Cord: failure to do so + // will lead to undefined behavior. Requires `length() + n <= capacity()`. + // Typically, applications will use 'available_up_to()` to get a span of the + // desired capacity, and use `span.size()` to increase the length as in: + // absl::Span<char> span = buffer.available_up_to(desired); + // buffer.IncreaseLengthBy(span.size()); + // memcpy(span.data(), src, span.size()); + // etc... + void IncreaseLengthBy(size_t n); + + // CordBuffer::SetLength() + // + // Sets the data length of this instance. Applications must make sure all data + // of the specified length has been initialized before adding a CordBuffer to + // a Cord: failure to do so will lead to undefined behavior. + // Setting the length to a small value or zero does not release any memory + // held by this CordBuffer instance. Requires `length <= capacity()`. + // Applications should preferably use the `IncreaseLengthBy()` method above + // in combination with the 'available()` or `available_up_to()` methods. + void SetLength(size_t length); + + private: + // Make sure we don't accidentally over promise. + static_assert(kCustomLimit <= cord_internal::kMaxLargeFlatSize, ""); + + // Assume the cost of an 'uprounded' allocation to CeilPow2(size) versus + // the cost of allocating at least 1 extra flat <= 4KB: + // - Flat overhead = 13 bytes + // - Btree amortized cost / node =~ 13 bytes + // - 64 byte granularity of tcmalloc at 4K =~ 32 byte average + // CPU cost and efficiency requires we should at least 'save' something by + // splitting, as a poor man's measure, we say the slop needs to be + // at least double the cost offset to make it worth splitting: ~128 bytes. + static constexpr size_t kMaxPageSlop = 128; + + // Overhead for allocation a flat. + static constexpr size_t kOverhead = cord_internal::kFlatOverhead; + + using CordRepFlat = cord_internal::CordRepFlat; + + // `Rep` is the internal data representation of a CordBuffer. The internal + // representation has an internal small size optimization similar to + // std::string (SSO). + struct Rep { + // Inline SSO size of a CordBuffer + static constexpr size_t kInlineCapacity = sizeof(intptr_t) * 2 - 1; + + // Creates a default instance with kInlineCapacity. + Rep() : short_rep{} {} + + // Creates an instance managing an allocated non zero CordRep. + explicit Rep(cord_internal::CordRepFlat* rep) : long_rep{rep} { + assert(rep != nullptr); + } + + // Returns true if this instance manages the SSO internal buffer. + bool is_short() const { + constexpr size_t offset = offsetof(Short, raw_size); + return (reinterpret_cast<const char*>(this)[offset] & 1) != 0; + } + + // Returns the available area of the internal SSO data + absl::Span<char> short_available() { + assert(is_short()); + const size_t length = (short_rep.raw_size >> 1); + return absl::Span<char>(short_rep.data + length, + kInlineCapacity - length); + } + + // Returns the available area of the internal SSO data + absl::Span<char> long_available() { + assert(!is_short()); + const size_t length = long_rep.rep->length; + return absl::Span<char>(long_rep.rep->Data() + length, + long_rep.rep->Capacity() - length); + } + + // Returns the length of the internal SSO data. + size_t short_length() const { + assert(is_short()); + return short_rep.raw_size >> 1; + } + + // Sets the length of the internal SSO data. + // Disregards any previously set CordRep instance. + void set_short_length(size_t length) { + short_rep.raw_size = static_cast<char>((length << 1) + 1); + } + + // Adds `n` to the current short length. + void add_short_length(size_t n) { + assert(is_short()); + short_rep.raw_size += static_cast<char>(n << 1); + } + + // Returns reference to the internal SSO data buffer. + char* data() { + assert(is_short()); + return short_rep.data; + } + const char* data() const { + assert(is_short()); + return short_rep.data; + } + + // Returns a pointer the external CordRep managed by this instance. + cord_internal::CordRepFlat* rep() const { + assert(!is_short()); + return long_rep.rep; + } + + // The internal representation takes advantage of the fact that allocated + // memory is always on an even address, and uses the least significant bit + // of the first or last byte (depending on endianness) as the inline size + // indicator overlapping with the least significant byte of the CordRep*. +#if defined(ABSL_IS_BIG_ENDIAN) + struct Long { + explicit Long(cord_internal::CordRepFlat* rep_arg) : rep(rep_arg) {} + void* padding; + cord_internal::CordRepFlat* rep; + }; + struct Short { + char data[sizeof(Long) - 1]; + char raw_size = 1; + }; +#else + struct Long { + explicit Long(cord_internal::CordRepFlat* rep_arg) : rep(rep_arg) {} + cord_internal::CordRepFlat* rep; + void* padding; + }; + struct Short { + char raw_size = 1; + char data[sizeof(Long) - 1]; + }; +#endif + + union { + Long long_rep; + Short short_rep; + }; + }; + + // Power2 functions + static bool IsPow2(size_t size) { return absl::has_single_bit(size); } + static size_t Log2Floor(size_t size) { return absl::bit_width(size) - 1; } + static size_t Log2Ceil(size_t size) { return absl::bit_width(size - 1); } + + // Implementation of `CreateWithCustomLimit()`. + // This implementation allows for future memory allocation hints to + // be passed down into the CordRepFlat allocation function. + template <typename... AllocationHints> + static CordBuffer CreateWithCustomLimitImpl(size_t block_size, + size_t capacity, + AllocationHints... hints); + + // Consumes the value contained in this instance and resets the instance. + // This method returns a non-null Cordrep* if the current instances manages a + // CordRep*, and resets the instance to an empty SSO instance. If the current + // instance is an SSO instance, then this method returns nullptr and sets + // `short_value` to the inlined data value. In either case, the current + // instance length is reset to zero. + // This method is intended to be used by Cord internal functions only. + cord_internal::CordRep* ConsumeValue(absl::string_view& short_value) { + cord_internal::CordRep* rep = nullptr; + if (rep_.is_short()) { + short_value = absl::string_view(rep_.data(), rep_.short_length()); + } else { + rep = rep_.rep(); + } + rep_.set_short_length(0); + return rep; + } + + // Internal constructor. + explicit CordBuffer(cord_internal::CordRepFlat* rep) : rep_(rep) { + assert(rep != nullptr); + } + + Rep rep_; + + friend class Cord; + friend class CordBufferTestPeer; +}; + +inline constexpr size_t CordBuffer::MaximumPayload() { + return cord_internal::kMaxFlatLength; +} + +inline constexpr size_t CordBuffer::MaximumPayload(size_t block_size) { + // TODO(absl-team): Use std::min when C++11 support is dropped. + return (kCustomLimit < block_size ? kCustomLimit : block_size) - + cord_internal::kFlatOverhead; +} + +inline CordBuffer CordBuffer::CreateWithDefaultLimit(size_t capacity) { + if (capacity > Rep::kInlineCapacity) { + auto* rep = cord_internal::CordRepFlat::New(capacity); + rep->length = 0; + return CordBuffer(rep); + } + return CordBuffer(); +} + +template <typename... AllocationHints> +inline CordBuffer CordBuffer::CreateWithCustomLimitImpl( + size_t block_size, size_t capacity, AllocationHints... hints) { + assert(IsPow2(block_size)); + capacity = (std::min)(capacity, kCustomLimit); + block_size = (std::min)(block_size, kCustomLimit); + if (capacity + kOverhead >= block_size) { + capacity = block_size; + } else if (capacity <= kDefaultLimit) { + capacity = capacity + kOverhead; + } else if (!IsPow2(capacity)) { + // Check if rounded up to next power 2 is a good enough fit + // with limited waste making it an acceptable direct fit. + const size_t rounded_up = size_t{1} << Log2Ceil(capacity); + const size_t slop = rounded_up - capacity; + if (slop >= kOverhead && slop <= kMaxPageSlop + kOverhead) { + capacity = rounded_up; + } else { + // Round down to highest power of 2 <= capacity. + // Consider a more aggressive step down if that may reduce the + // risk of fragmentation where 'people are holding it wrong'. + const size_t rounded_down = size_t{1} << Log2Floor(capacity); + capacity = rounded_down; + } + } + const size_t length = capacity - kOverhead; + auto* rep = CordRepFlat::New(CordRepFlat::Large(), length, hints...); + rep->length = 0; + return CordBuffer(rep); +} + +inline CordBuffer CordBuffer::CreateWithCustomLimit(size_t block_size, + size_t capacity) { + return CreateWithCustomLimitImpl(block_size, capacity); +} + +inline CordBuffer::~CordBuffer() { + if (!rep_.is_short()) { + cord_internal::CordRepFlat::Delete(rep_.rep()); + } +} + +inline CordBuffer::CordBuffer(CordBuffer&& rhs) noexcept : rep_(rhs.rep_) { + rhs.rep_.set_short_length(0); +} + +inline CordBuffer& CordBuffer::operator=(CordBuffer&& rhs) noexcept { + if (!rep_.is_short()) cord_internal::CordRepFlat::Delete(rep_.rep()); + rep_ = rhs.rep_; + rhs.rep_.set_short_length(0); + return *this; +} + +inline absl::Span<char> CordBuffer::available() { + return rep_.is_short() ? rep_.short_available() : rep_.long_available(); +} + +inline absl::Span<char> CordBuffer::available_up_to(size_t size) { + return available().subspan(0, size); +} + +inline char* CordBuffer::data() { + return rep_.is_short() ? rep_.data() : rep_.rep()->Data(); +} + +inline const char* CordBuffer::data() const { + return rep_.is_short() ? rep_.data() : rep_.rep()->Data(); +} + +inline size_t CordBuffer::capacity() const { + return rep_.is_short() ? Rep::kInlineCapacity : rep_.rep()->Capacity(); +} + +inline size_t CordBuffer::length() const { + return rep_.is_short() ? rep_.short_length() : rep_.rep()->length; +} + +inline void CordBuffer::SetLength(size_t length) { + ABSL_HARDENING_ASSERT(length <= capacity()); + if (rep_.is_short()) { + rep_.set_short_length(length); + } else { + rep_.rep()->length = length; + } +} + +inline void CordBuffer::IncreaseLengthBy(size_t n) { + ABSL_HARDENING_ASSERT(n <= capacity() && length() + n <= capacity()); + if (rep_.is_short()) { + rep_.add_short_length(n); + } else { + rep_.rep()->length += n; + } +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORD_BUFFER_H_ diff --git a/absl/strings/cord_buffer_test.cc b/absl/strings/cord_buffer_test.cc new file mode 100644 index 00000000..5c7437ae --- /dev/null +++ b/absl/strings/cord_buffer_test.cc @@ -0,0 +1,320 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/cord_buffer.h" + + +#include <algorithm> +#include <climits> +#include <cstring> +#include <string> +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/types/span.h" + +using testing::Eq; +using testing::Ge; +using testing::Le; +using testing::Ne; + +namespace absl { +ABSL_NAMESPACE_BEGIN + +class CordBufferTestPeer { + public: + static cord_internal::CordRep* ConsumeValue(CordBuffer& buffer, + absl::string_view& short_value) { + return buffer.ConsumeValue(short_value); + } +}; + +namespace { + +using ::absl::cordrep_testing::CordToString; + +constexpr size_t kInlinedSize = sizeof(CordBuffer) - 1; +constexpr size_t kDefaultLimit = CordBuffer::kDefaultLimit; +constexpr size_t kCustomLimit = CordBuffer::kCustomLimit; +constexpr size_t kMaxFlatSize = cord_internal::kMaxFlatSize; +constexpr size_t kMaxFlatLength = cord_internal::kMaxFlatLength; +constexpr size_t kFlatOverhead = cord_internal::kFlatOverhead; + +constexpr size_t k8KiB = 8 << 10; +constexpr size_t k16KiB = 16 << 10; +constexpr size_t k64KiB = 64 << 10; +constexpr size_t k1MB = 1 << 20; + +class CordBufferTest : public testing::TestWithParam<size_t> {}; + +INSTANTIATE_TEST_SUITE_P(MediumSize, CordBufferTest, + testing::Values(1, kInlinedSize - 1, kInlinedSize, + kInlinedSize + 1, kDefaultLimit - 1, + kDefaultLimit)); + +TEST_P(CordBufferTest, MaximumPayload) { + EXPECT_THAT(CordBuffer::MaximumPayload(), Eq(kMaxFlatLength)); + EXPECT_THAT(CordBuffer::MaximumPayload(512), Eq(512 - kFlatOverhead)); + EXPECT_THAT(CordBuffer::MaximumPayload(k64KiB), Eq(k64KiB - kFlatOverhead)); + EXPECT_THAT(CordBuffer::MaximumPayload(k1MB), Eq(k64KiB - kFlatOverhead)); +} + +TEST(CordBufferTest, ConstructDefault) { + CordBuffer buffer; + EXPECT_THAT(buffer.capacity(), Eq(sizeof(CordBuffer) - 1)); + EXPECT_THAT(buffer.length(), Eq(0)); + EXPECT_THAT(buffer.data(), Ne(nullptr)); + EXPECT_THAT(buffer.available().data(), Eq(buffer.data())); + EXPECT_THAT(buffer.available().size(), Eq(buffer.capacity())); + memset(buffer.data(), 0xCD, buffer.capacity()); +} + +TEST(CordBufferTest, CreateSsoWithDefaultLimit) { + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(3); + EXPECT_THAT(buffer.capacity(), Ge(3)); + EXPECT_THAT(buffer.capacity(), Le(sizeof(CordBuffer))); + EXPECT_THAT(buffer.length(), Eq(0)); + memset(buffer.data(), 0xCD, buffer.capacity()); + + memcpy(buffer.data(), "Abc", 3); + buffer.SetLength(3); + EXPECT_THAT(buffer.length(), Eq(3)); + absl::string_view short_value; + EXPECT_THAT(CordBufferTestPeer::ConsumeValue(buffer, short_value), + Eq(nullptr)); + EXPECT_THAT(absl::string_view(buffer.data(), 3), Eq("Abc")); + EXPECT_THAT(short_value, Eq("Abc")); +} + +TEST_P(CordBufferTest, Available) { + const size_t requested = GetParam(); + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested); + EXPECT_THAT(buffer.available().data(), Eq(buffer.data())); + EXPECT_THAT(buffer.available().size(), Eq(buffer.capacity())); + + buffer.SetLength(2); + EXPECT_THAT(buffer.available().data(), Eq(buffer.data() + 2)); + EXPECT_THAT(buffer.available().size(), Eq(buffer.capacity() - 2)); +} + +TEST_P(CordBufferTest, IncreaseLengthBy) { + const size_t requested = GetParam(); + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested); + buffer.IncreaseLengthBy(2); + EXPECT_THAT(buffer.length(), Eq(2)); + buffer.IncreaseLengthBy(5); + EXPECT_THAT(buffer.length(), Eq(7)); +} + +TEST_P(CordBufferTest, AvailableUpTo) { + const size_t requested = GetParam(); + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested); + size_t expected_up_to = std::min<size_t>(3, buffer.capacity()); + EXPECT_THAT(buffer.available_up_to(3).data(), Eq(buffer.data())); + EXPECT_THAT(buffer.available_up_to(3).size(), Eq(expected_up_to)); + + buffer.SetLength(2); + expected_up_to = std::min<size_t>(3, buffer.capacity() - 2); + EXPECT_THAT(buffer.available_up_to(3).data(), Eq(buffer.data() + 2)); + EXPECT_THAT(buffer.available_up_to(3).size(), Eq(expected_up_to)); +} + +// Returns the maximum capacity for a given block_size and requested size. +size_t MaxCapacityFor(size_t block_size, size_t requested) { + requested = (std::min)(requested, cord_internal::kMaxLargeFlatSize); + // Maximum returned size is always capped at block_size - kFlatOverhead. + return block_size - kFlatOverhead; +} + +TEST_P(CordBufferTest, CreateWithDefaultLimit) { + const size_t requested = GetParam(); + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested); + EXPECT_THAT(buffer.capacity(), Ge(requested)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(kMaxFlatSize, requested))); + EXPECT_THAT(buffer.length(), Eq(0)); + + memset(buffer.data(), 0xCD, buffer.capacity()); + + std::string data(requested - 1, 'x'); + memcpy(buffer.data(), data.c_str(), requested); + buffer.SetLength(requested); + + EXPECT_THAT(buffer.length(), Eq(requested)); + EXPECT_THAT(absl::string_view(buffer.data()), Eq(data)); +} + +TEST(CordBufferTest, CreateWithDefaultLimitAskingFor2GB) { + constexpr size_t k2GiB = 1U << 31; + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(k2GiB); + // Expect to never be awarded more than a reasonable memory size, even in + // cases where a (debug) memory allocator may grant us somewhat more memory + // than `kDefaultLimit` which should be no more than `2 * kDefaultLimit` + EXPECT_THAT(buffer.capacity(), Le(2 * CordBuffer::kDefaultLimit)); + EXPECT_THAT(buffer.length(), Eq(0)); + EXPECT_THAT(buffer.data(), Ne(nullptr)); + memset(buffer.data(), 0xCD, buffer.capacity()); +} + +TEST_P(CordBufferTest, MoveConstruct) { + const size_t requested = GetParam(); + CordBuffer from = CordBuffer::CreateWithDefaultLimit(requested); + const size_t capacity = from.capacity(); + memcpy(from.data(), "Abc", 4); + from.SetLength(4); + + CordBuffer to(std::move(from)); + EXPECT_THAT(to.capacity(), Eq(capacity)); + EXPECT_THAT(to.length(), Eq(4)); + EXPECT_THAT(absl::string_view(to.data()), Eq("Abc")); + + EXPECT_THAT(from.length(), Eq(0)); // NOLINT +} + +TEST_P(CordBufferTest, MoveAssign) { + const size_t requested = GetParam(); + CordBuffer from = CordBuffer::CreateWithDefaultLimit(requested); + const size_t capacity = from.capacity(); + memcpy(from.data(), "Abc", 4); + from.SetLength(4); + + CordBuffer to; + to = std::move(from); + EXPECT_THAT(to.capacity(), Eq(capacity)); + EXPECT_THAT(to.length(), Eq(4)); + EXPECT_THAT(absl::string_view(to.data()), Eq("Abc")); + + EXPECT_THAT(from.length(), Eq(0)); // NOLINT +} + +TEST_P(CordBufferTest, ConsumeValue) { + const size_t requested = GetParam(); + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(requested); + memcpy(buffer.data(), "Abc", 4); + buffer.SetLength(3); + + absl::string_view short_value; + if (cord_internal::CordRep* rep = + CordBufferTestPeer::ConsumeValue(buffer, short_value)) { + EXPECT_THAT(CordToString(rep), Eq("Abc")); + cord_internal::CordRep::Unref(rep); + } else { + EXPECT_THAT(short_value, Eq("Abc")); + } + EXPECT_THAT(buffer.length(), Eq(0)); +} + +TEST_P(CordBufferTest, CreateWithCustomLimitWithinDefaultLimit) { + const size_t requested = GetParam(); + CordBuffer buffer = + CordBuffer::CreateWithCustomLimit(kMaxFlatSize, requested); + EXPECT_THAT(buffer.capacity(), Ge(requested)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(kMaxFlatSize, requested))); + EXPECT_THAT(buffer.length(), Eq(0)); + + memset(buffer.data(), 0xCD, buffer.capacity()); + + std::string data(requested - 1, 'x'); + memcpy(buffer.data(), data.c_str(), requested); + buffer.SetLength(requested); + + EXPECT_THAT(buffer.length(), Eq(requested)); + EXPECT_THAT(absl::string_view(buffer.data()), Eq(data)); +} + +TEST(CordLargeBufferTest, CreateAtOrBelowDefaultLimit) { + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k64KiB, kDefaultLimit); + EXPECT_THAT(buffer.capacity(), Ge(kDefaultLimit)); + EXPECT_THAT(buffer.capacity(), + Le(MaxCapacityFor(kMaxFlatSize, kDefaultLimit))); + + buffer = CordBuffer::CreateWithCustomLimit(k64KiB, 3178); + EXPECT_THAT(buffer.capacity(), Ge(3178)); +} + +TEST(CordLargeBufferTest, CreateWithCustomLimit) { + ASSERT_THAT((kMaxFlatSize & (kMaxFlatSize - 1)) == 0, "Must be power of 2"); + + for (size_t size = kMaxFlatSize; size <= kCustomLimit; size *= 2) { + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(size, size); + size_t expected = size - kFlatOverhead; + ASSERT_THAT(buffer.capacity(), Ge(expected)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(size, expected))); + } +} + +TEST(CordLargeBufferTest, CreateWithTooLargeLimit) { + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k64KiB, k1MB); + ASSERT_THAT(buffer.capacity(), Ge(k64KiB - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(k64KiB, k1MB))); +} + +TEST(CordLargeBufferTest, CreateWithHugeValueForOverFlowHardening) { + for (size_t dist_from_max = 0; dist_from_max <= 32; ++dist_from_max) { + size_t capacity = std::numeric_limits<size_t>::max() - dist_from_max; + + CordBuffer buffer = CordBuffer::CreateWithDefaultLimit(capacity); + ASSERT_THAT(buffer.capacity(), Ge(kDefaultLimit)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(kMaxFlatSize, capacity))); + + for (size_t limit = kMaxFlatSize; limit <= kCustomLimit; limit *= 2) { + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(limit, capacity); + ASSERT_THAT(buffer.capacity(), Ge(limit - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(limit, capacity))); + } + } +} + +TEST(CordLargeBufferTest, CreateWithSmallLimit) { + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(512, 1024); + ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 1024))); + + // Ask for precise block size, should return size - kOverhead + buffer = CordBuffer::CreateWithCustomLimit(512, 512); + ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 512))); + + // Corner case: 511 < block_size, but 511 + kOverhead is above + buffer = CordBuffer::CreateWithCustomLimit(512, 511); + ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 511))); + + // Corner case: 498 + kOverhead < block_size + buffer = CordBuffer::CreateWithCustomLimit(512, 498); + ASSERT_THAT(buffer.capacity(), Ge(512 - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(512, 498))); +} + +TEST(CordLargeBufferTest, CreateWasteFull) { + // 15 KiB gets rounded down to next pow2 value. + const size_t requested = (15 << 10); + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k16KiB, requested); + ASSERT_THAT(buffer.capacity(), Ge(k8KiB - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(k8KiB, requested))); +} + +TEST(CordLargeBufferTest, CreateSmallSlop) { + const size_t requested = k16KiB - 2 * kFlatOverhead; + CordBuffer buffer = CordBuffer::CreateWithCustomLimit(k16KiB, requested); + ASSERT_THAT(buffer.capacity(), Ge(k16KiB - kFlatOverhead)); + EXPECT_THAT(buffer.capacity(), Le(MaxCapacityFor(k16KiB, requested))); +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/cord_ring_reader_test.cc b/absl/strings/cord_ring_reader_test.cc index 585616f3..8e7183bf 100644 --- a/absl/strings/cord_ring_reader_test.cc +++ b/absl/strings/cord_ring_reader_test.cc @@ -78,6 +78,7 @@ TEST(CordRingReaderTest, Reset) { EXPECT_TRUE(static_cast<bool>(reader)); EXPECT_THAT(reader.ring(), Eq(ring)); EXPECT_THAT(reader.index(), Eq(ring->head())); + EXPECT_THAT(reader.node(), Eq(ring->entry_child(ring->head()))); EXPECT_THAT(reader.length(), Eq(ring->length)); EXPECT_THAT(reader.consumed(), Eq(flats[0].length())); EXPECT_THAT(reader.remaining(), Eq(ring->length - reader.consumed())); @@ -99,11 +100,13 @@ TEST(CordRingReaderTest, Next) { size_t consumed = reader.consumed(); size_t remaining = reader.remaining(); for (int i = 1; i < flats.size(); ++i) { + CordRepRing::index_type index = ring->advance(head, i); consumed += flats[i].length(); remaining -= flats[i].length(); absl::string_view next = reader.Next(); ASSERT_THAT(next, Eq(flats[i])); - ASSERT_THAT(reader.index(), Eq(ring->advance(head, i))); + ASSERT_THAT(reader.index(), Eq(index)); + ASSERT_THAT(reader.node(), Eq(ring->entry_child(index))); ASSERT_THAT(reader.consumed(), Eq(consumed)); ASSERT_THAT(reader.remaining(), Eq(remaining)); } @@ -123,15 +126,17 @@ TEST(CordRingReaderTest, SeekForward) { reader.Reset(ring); size_t consumed = 0; - size_t remaining = ring->length;; + size_t remaining = ring->length; for (int i = 0; i < flats.size(); ++i) { + CordRepRing::index_type index = ring->advance(head, i); size_t offset = consumed; consumed += flats[i].length(); remaining -= flats[i].length(); for (int off = 0; off < flats[i].length(); ++off) { absl::string_view chunk = reader.Seek(offset + off); ASSERT_THAT(chunk, Eq(flats[i].substr(off))); - ASSERT_THAT(reader.index(), Eq(ring->advance(head, i))); + ASSERT_THAT(reader.index(), Eq(index)); + ASSERT_THAT(reader.node(), Eq(ring->entry_child(index))); ASSERT_THAT(reader.consumed(), Eq(consumed)); ASSERT_THAT(reader.remaining(), Eq(remaining)); } @@ -150,11 +155,13 @@ TEST(CordRingReaderTest, SeekBackward) { size_t consumed = ring->length; size_t remaining = 0; for (int i = flats.size() - 1; i >= 0; --i) { + CordRepRing::index_type index = ring->advance(head, i); size_t offset = consumed - flats[i].length(); for (int off = 0; off < flats[i].length(); ++off) { absl::string_view chunk = reader.Seek(offset + off); ASSERT_THAT(chunk, Eq(flats[i].substr(off))); - ASSERT_THAT(reader.index(), Eq(ring->advance(head, i))); + ASSERT_THAT(reader.index(), Eq(index)); + ASSERT_THAT(reader.node(), Eq(ring->entry_child(index))); ASSERT_THAT(reader.consumed(), Eq(consumed)); ASSERT_THAT(reader.remaining(), Eq(remaining)); } diff --git a/absl/strings/cord_ring_test.cc b/absl/strings/cord_ring_test.cc index 7d75e106..f39a0a4f 100644 --- a/absl/strings/cord_ring_test.cc +++ b/absl/strings/cord_ring_test.cc @@ -31,9 +31,6 @@ extern thread_local bool cord_ring; -// TOOD(b/177688959): weird things happened with the original test -#define ASAN_BUG_177688959_FIXED false - namespace absl { ABSL_NAMESPACE_BEGIN namespace { @@ -47,7 +44,6 @@ using ::absl::cord_internal::CordRepFlat; using ::absl::cord_internal::CordRepRing; using ::absl::cord_internal::CordRepSubstring; -using ::absl::cord_internal::CONCAT; using ::absl::cord_internal::EXTERNAL; using ::absl::cord_internal::SUBSTRING; @@ -101,15 +97,22 @@ using TestParams = std::vector<TestParam>; // Matcher validating when mutable copies are required / performed. MATCHER_P2(EqIfPrivate, param, rep, absl::StrCat("Equal 0x", absl::Hex(rep), " if private")) { - return param.refcount_is_one ? arg == rep : arg != rep; + return param.refcount_is_one ? arg == rep : true; } // Matcher validating when mutable copies are required / performed. MATCHER_P2(EqIfPrivateAndCapacity, param, rep, absl::StrCat("Equal 0x", absl::Hex(rep), " if private and capacity")) { - return (param.refcount_is_one && param.with_capacity) ? arg == rep - : arg != rep; + return (param.refcount_is_one && param.with_capacity) ? arg == rep : true; +} + +// Matcher validating a shared ring was re-allocated. Should only be used for +// tests doing exactly one update as subsequent updates could return the +// original (freed and re-used) pointer. +MATCHER_P2(NeIfShared, param, rep, + absl::StrCat("Not equal 0x", absl::Hex(rep), " if shared")) { + return param.refcount_is_one ? true : arg != rep; } MATCHER_P2(EqIfInputPrivate, param, rep, "Equal if input is private") { @@ -219,7 +222,7 @@ CordRepExternal* MakeFakeExternal(size_t length) { std::string s; explicit Rep(size_t len) { this->tag = EXTERNAL; - this->base = this->storage; + this->base = reinterpret_cast<const char*>(this->storage); this->length = len; this->releaser_invoker = [](CordRepExternal* self) { delete static_cast<Rep*>(self); @@ -258,20 +261,10 @@ CordRepSubstring* RemoveSuffix(size_t length, CordRep* rep) { return MakeSubstring(0, rep->length - length, rep); } -CordRepConcat* MakeConcat(CordRep* left, CordRep* right, int depth = 0) { - auto* concat = new CordRepConcat; - concat->tag = CONCAT; - concat->length = left->length + right->length; - concat->left = left; - concat->right = right; - concat->set_depth(depth); - return concat; -} - enum Composition { kMix, kAppend, kPrepend }; Composition RandomComposition() { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); return (rng() & 1) ? kMix : ((rng() & 1) ? kAppend : kPrepend); } @@ -292,7 +285,6 @@ constexpr const char* kFox = "The quick brown fox jumps over the lazy dog"; constexpr const char* kFoxFlats[] = {"The ", "quick ", "brown ", "fox ", "jumps ", "over ", "the ", "lazy ", "dog"}; -constexpr const char* kAlphabet = "abcdefghijklmnopqrstuvwxyz"; CordRepRing* FromFlats(Span<const char* const> flats, Composition composition = kAppend) { @@ -340,19 +332,15 @@ std::string TestParamToString(const testing::TestParamInfo<TestParam>& info) { class CordRingTest : public testing::Test { public: ~CordRingTest() override { -#if ASAN_BUG_177688959_FIXED for (CordRep* rep : unrefs_) { CordRep::Unref(rep); } -#endif } template <typename CordRepType> CordRepType* NeedsUnref(CordRepType* rep) { assert(rep); -#if ASAN_BUG_177688959_FIXED unrefs_.push_back(rep); -#endif return rep; } @@ -362,26 +350,16 @@ class CordRingTest : public testing::Test { return NeedsUnref(rep); } - void Unref(CordRep* rep) { -#if !ASAN_BUG_177688959_FIXED - CordRep::Unref(rep); -#endif - } - private: -#if ASAN_BUG_177688959_FIXED std::vector<CordRep*> unrefs_; -#endif }; class CordRingTestWithParam : public testing::TestWithParam<TestParam> { public: ~CordRingTestWithParam() override { -#if ASAN_BUG_177688959_FIXED for (CordRep* rep : unrefs_) { CordRep::Unref(rep); } -#endif } CordRepRing* CreateWithCapacity(CordRep* child, size_t extra_capacity) { @@ -400,9 +378,7 @@ class CordRingTestWithParam : public testing::TestWithParam<TestParam> { template <typename CordRepType> CordRepType* NeedsUnref(CordRepType* rep) { assert(rep); -#if ASAN_BUG_177688959_FIXED unrefs_.push_back(rep); -#endif return rep; } @@ -412,43 +388,23 @@ class CordRingTestWithParam : public testing::TestWithParam<TestParam> { return NeedsUnref(rep); } - void Unref(CordRep* rep) { -#if !ASAN_BUG_177688959_FIXED - CordRep::Unref(rep); -#endif - } - template <typename CordRepType> CordRepType* RefIfShared(CordRepType* rep) { return Shared() ? Ref(rep) : rep; } - void UnrefIfShared(CordRep* rep) { - if (Shared()) Unref(rep); - } - template <typename CordRepType> CordRepType* RefIfInputShared(CordRepType* rep) { return InputShared() ? Ref(rep) : rep; } - void UnrefIfInputShared(CordRep* rep) { - if (InputShared()) Unref(rep); - } - template <typename CordRepType> CordRepType* RefIfInputSharedIndirect(CordRepType* rep) { return InputSharedIndirect() ? Ref(rep) : rep; } - void UnrefIfInputSharedIndirect(CordRep* rep) { - if (InputSharedIndirect()) Unref(rep); - } - private: -#if ASAN_BUG_177688959_FIXED std::vector<CordRep*> unrefs_; -#endif }; class CordRingCreateTest : public CordRingTestWithParam { @@ -520,26 +476,26 @@ class CordRingBuildInputTest : public CordRingTestWithParam { } }; -INSTANTIATE_TEST_CASE_P(WithParam, CordRingSubTest, - testing::ValuesIn(CordRingSubTest::CreateTestParams()), - TestParamToString); +INSTANTIATE_TEST_SUITE_P(WithParam, CordRingSubTest, + testing::ValuesIn(CordRingSubTest::CreateTestParams()), + TestParamToString); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( WithParam, CordRingCreateTest, testing::ValuesIn(CordRingCreateTest::CreateTestParams()), TestParamToString); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( WithParam, CordRingCreateFromTreeTest, testing::ValuesIn(CordRingCreateFromTreeTest::CreateTestParams()), TestParamToString); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( WithParam, CordRingBuildTest, testing::ValuesIn(CordRingBuildTest::CreateTestParams()), TestParamToString); -INSTANTIATE_TEST_CASE_P( +INSTANTIATE_TEST_SUITE_P( WithParam, CordRingBuildInputTest, testing::ValuesIn(CordRingBuildInputTest::CreateTestParams()), TestParamToString); @@ -550,7 +506,6 @@ TEST_P(CordRingCreateTest, CreateFromFlat) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(str1.size())); EXPECT_THAT(ToFlats(result), ElementsAre(str1)); - Unref(result); } TEST_P(CordRingCreateTest, CreateFromRing) { @@ -558,9 +513,8 @@ TEST_P(CordRingCreateTest, CreateFromRing) { CordRepRing* result = NeedsUnref(CordRepRing::Create(ring)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringRing) { @@ -570,23 +524,20 @@ TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringRing) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfInputPrivate(GetParam(), ring)); EXPECT_THAT(ToString(result), string_view(kFox).substr(2, 11)); - UnrefIfInputSharedIndirect(ring); - UnrefIfInputShared(sub); - Unref(result); } TEST_F(CordRingTest, CreateWithIllegalExtraCapacity) { - CordRep* flat = NeedsUnref(MakeFlat("Hello world")); #if defined(ABSL_HAVE_EXCEPTIONS) + CordRep* flat = NeedsUnref(MakeFlat("Hello world")); try { CordRepRing::Create(flat, CordRepRing::kMaxCapacity); GTEST_FAIL() << "expected std::length_error exception"; } catch (const std::length_error&) { } #elif defined(GTEST_HAS_DEATH_TEST) + CordRep* flat = NeedsUnref(MakeFlat("Hello world")); EXPECT_DEATH(CordRepRing::Create(flat, CordRepRing::kMaxCapacity), ".*"); #endif - Unref(flat); } TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfFlat) { @@ -597,9 +548,6 @@ TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfFlat) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(20)); EXPECT_THAT(ToFlats(result), ElementsAre(str1.substr(4, 20))); - Unref(result); - UnrefIfInputShared(flat); - UnrefIfInputSharedIndirect(child); } TEST_P(CordRingCreateTest, CreateFromExternal) { @@ -609,8 +557,6 @@ TEST_P(CordRingCreateTest, CreateFromExternal) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(str1.size())); EXPECT_THAT(ToFlats(result), ElementsAre(str1)); - Unref(result); - UnrefIfInputShared(child); } TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfExternal) { @@ -621,9 +567,6 @@ TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfExternal) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(24)); EXPECT_THAT(ToFlats(result), ElementsAre(str1.substr(1, 24))); - Unref(result); - UnrefIfInputShared(external); - UnrefIfInputSharedIndirect(child); } TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfLargeExternal) { @@ -637,46 +580,6 @@ TEST_P(CordRingCreateFromTreeTest, CreateFromSubstringOfLargeExternal) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(str.size())); EXPECT_THAT(ToRawFlats(result), ElementsAre(str)); - Unref(result); - UnrefIfInputShared(external); - UnrefIfInputSharedIndirect(child); -} - -TEST_P(CordRingBuildInputTest, CreateFromConcat) { - CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), - MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; - auto* left = MakeConcat(RefIfInputSharedIndirect(flats[0]), flats[1]); - auto* right = MakeConcat(flats[2], RefIfInputSharedIndirect(flats[3])); - auto* concat = RefIfInputShared(MakeConcat(left, right)); - CordRepRing* result = NeedsUnref(CordRepRing::Create(concat)); - ASSERT_THAT(result, IsValidRingBuffer()); - EXPECT_THAT(result->length, Eq(26)); - EXPECT_THAT(ToString(result), Eq(kAlphabet)); - UnrefIfInputSharedIndirect(flats[0]); - UnrefIfInputSharedIndirect(flats[3]); - UnrefIfInputShared(concat); - Unref(result); -} - -TEST_P(CordRingBuildInputTest, CreateFromSubstringConcat) { - for (size_t off = 0; off < 26; ++off) { - for (size_t len = 1; len < 26 - off; ++len) { - CordRep* flats[] = {MakeFlat("abcdefgh"), MakeFlat("ijklm"), - MakeFlat("nopqrstuv"), MakeFlat("wxyz")}; - auto* left = MakeConcat(RefIfInputSharedIndirect(flats[0]), flats[1]); - auto* right = MakeConcat(flats[2], RefIfInputSharedIndirect(flats[3])); - auto* concat = MakeConcat(left, right); - auto* child = RefIfInputShared(MakeSubstring(off, len, concat)); - CordRepRing* result = NeedsUnref(CordRepRing::Create(child)); - ASSERT_THAT(result, IsValidRingBuffer()); - ASSERT_THAT(result->length, Eq(len)); - ASSERT_THAT(ToString(result), string_view(kAlphabet).substr(off, len)); - UnrefIfInputSharedIndirect(flats[0]); - UnrefIfInputSharedIndirect(flats[3]); - UnrefIfInputShared(child); - Unref(result); - } - } } TEST_P(CordRingCreateTest, Properties) { @@ -689,7 +592,6 @@ TEST_P(CordRingCreateTest, Properties) { EXPECT_THAT(result->capacity(), Le(2 * 120 + 1)); EXPECT_THAT(result->entries(), Eq(1)); EXPECT_THAT(result->begin_pos(), Eq(0)); - Unref(result); } TEST_P(CordRingCreateTest, EntryForNewFlat) { @@ -700,7 +602,6 @@ TEST_P(CordRingCreateTest, EntryForNewFlat) { EXPECT_THAT(result->entry_child(0), Eq(child)); EXPECT_THAT(result->entry_end_pos(0), Eq(str1.length())); EXPECT_THAT(result->entry_data_offset(0), Eq(0)); - Unref(result); } TEST_P(CordRingCreateTest, EntryForNewFlatSubstring) { @@ -712,7 +613,6 @@ TEST_P(CordRingCreateTest, EntryForNewFlatSubstring) { EXPECT_THAT(result->entry_child(0), Eq(child)); EXPECT_THAT(result->entry_end_pos(0), Eq(26)); EXPECT_THAT(result->entry_data_offset(0), Eq(10)); - Unref(result); } TEST_P(CordRingBuildTest, AppendFlat) { @@ -722,10 +622,9 @@ TEST_P(CordRingBuildTest, AppendFlat) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, MakeFlat(str2))); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, PrependFlat) { @@ -735,10 +634,9 @@ TEST_P(CordRingBuildTest, PrependFlat) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, MakeFlat(str2))); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendString) { @@ -748,10 +646,9 @@ TEST_P(CordRingBuildTest, AppendString) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendStringHavingExtra) { @@ -762,8 +659,7 @@ TEST_P(CordRingBuildTest, AppendStringHavingExtra) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); - UnrefIfShared(ring); - Unref(result); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); } TEST_P(CordRingBuildTest, AppendStringHavingPartialExtra) { @@ -785,13 +681,12 @@ TEST_P(CordRingBuildTest, AppendStringHavingPartialExtra) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); if (GetParam().refcount_is_one) { EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str1, str1a), str2a)); } else { EXPECT_THAT(ToFlats(result), ElementsAre(str1, str2)); } - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendStringHavingExtraInSubstring) { @@ -802,14 +697,13 @@ TEST_P(CordRingBuildTest, AppendStringHavingExtraInSubstring) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(result->length, Eq(4 + str2.size())); if (GetParam().refcount_is_one) { EXPECT_THAT(ToFlats(result), ElementsAre(StrCat("1234", str2))); } else { EXPECT_THAT(ToFlats(result), ElementsAre("1234", str2)); } - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendStringHavingSharedExtra) { @@ -837,10 +731,9 @@ TEST_P(CordRingBuildTest, AppendStringHavingSharedExtra) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, str2)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(result->length, Eq(4 + str2.size())); EXPECT_THAT(ToFlats(result), ElementsAre("1234", str2)); - UnrefIfShared(ring); - Unref(result); CordRep::Unref(shared_type == 1 ? flat1 : flat); } @@ -857,8 +750,6 @@ TEST_P(CordRingBuildTest, AppendStringWithExtra) { EXPECT_THAT(result->length, Eq(str1.size() + str2.size() + str3.size())); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre(str1, StrCat(str2, str3))); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, PrependString) { @@ -875,8 +766,6 @@ TEST_P(CordRingBuildTest, PrependString) { } EXPECT_THAT(result->length, Eq(str1.size() + str2.size())); EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, PrependStringHavingExtra) { @@ -887,14 +776,13 @@ TEST_P(CordRingBuildTest, PrependStringHavingExtra) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, str2)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(result->length, Eq(4 + str2.size())); if (GetParam().refcount_is_one) { EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str2, "1234"))); } else { EXPECT_THAT(ToFlats(result), ElementsAre(str2, "1234")); } - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, PrependStringHavingSharedExtra) { @@ -920,9 +808,8 @@ TEST_P(CordRingBuildTest, PrependStringHavingSharedExtra) { ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result->length, Eq(str1a.size() + str2.size())); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre(str2, str1a)); - UnrefIfShared(ring); - Unref(result); CordRep::Unref(shared_type == 1 ? flat1 : flat); } } @@ -938,8 +825,6 @@ TEST_P(CordRingBuildTest, PrependStringWithExtra) { EXPECT_THAT(result->length, Eq(str1.size() + str2.size() + str3.size())); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre(StrCat(str3, str2), str1)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendPrependStringMix) { @@ -950,12 +835,10 @@ TEST_P(CordRingBuildTest, AppendPrependStringMix) { result = CordRepRing::Prepend(result, flats[4 - i]); result = CordRepRing::Append(result, flats[4 + i]); } - UnrefIfShared(ring); NeedsUnref(result); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); EXPECT_THAT(ToString(result), kFox); - Unref(result); } TEST_P(CordRingBuildTest, AppendPrependStringMixWithExtra) { @@ -976,8 +859,6 @@ TEST_P(CordRingBuildTest, AppendPrependStringMixWithExtra) { EXPECT_THAT(ToFlats(result), ElementsAre("The quick brown fox ", "jumps ", "over the lazy dog")); } - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendPrependStringMixWithPrependedExtra) { @@ -998,8 +879,6 @@ TEST_P(CordRingBuildTest, AppendPrependStringMixWithPrependedExtra) { EXPECT_THAT(ToFlats(result), ElementsAre("The quick brown fox ", "jumps ", "over the lazy dog")); } - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingSubTest, SubRing) { @@ -1011,16 +890,14 @@ TEST_P(CordRingSubTest, SubRing) { CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); CordRepRing* result = CordRepRing::SubRing(ring, offset, 0); EXPECT_THAT(result, nullptr); - UnrefIfShared(ring); for (size_t len = 1; len < all.size() - offset; ++len) { ring = RefIfShared(FromFlats(flats, composition)); result = NeedsUnref(CordRepRing::SubRing(ring, offset, len)); ASSERT_THAT(result, IsValidRingBuffer()); ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); ASSERT_THAT(ToString(result), Eq(all.substr(offset, len))); - UnrefIfShared(ring); - Unref(result); } } } @@ -1039,18 +916,16 @@ TEST_P(CordRingSubTest, SubRingFromLargeExternal) { CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); CordRepRing* result = CordRepRing::SubRing(ring, offset, 0); EXPECT_THAT(result, nullptr); - UnrefIfShared(ring); for (size_t len = all.size() - 30; len < all.size() - offset; ++len) { ring = RefIfShared(FromFlats(flats, composition)); result = NeedsUnref(CordRepRing::SubRing(ring, offset, len)); ASSERT_THAT(result, IsValidRingBuffer()); ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); auto str = ToString(result); ASSERT_THAT(str, SizeIs(len)); ASSERT_THAT(str, Eq(all.substr(offset, len))); - UnrefIfShared(ring); - Unref(result); } } } @@ -1063,16 +938,14 @@ TEST_P(CordRingSubTest, RemovePrefix) { CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); CordRepRing* result = CordRepRing::RemovePrefix(ring, all.size()); EXPECT_THAT(result, nullptr); - UnrefIfShared(ring); for (size_t len = 1; len < all.size(); ++len) { ring = RefIfShared(FromFlats(flats, composition)); result = NeedsUnref(CordRepRing::RemovePrefix(ring, len)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToString(result), Eq(all.substr(len))); - UnrefIfShared(ring); - Unref(result); } } @@ -1087,7 +960,6 @@ TEST_P(CordRingSubTest, RemovePrefixFromLargeExternal) { ElementsAre( not_a_string_view(external1->base, 1 << 20).remove_prefix(1 << 16), not_a_string_view(external2->base, 1 << 20))); - Unref(result); } TEST_P(CordRingSubTest, RemoveSuffix) { @@ -1098,16 +970,14 @@ TEST_P(CordRingSubTest, RemoveSuffix) { CordRepRing* ring = RefIfShared(FromFlats(flats, composition)); CordRepRing* result = CordRepRing::RemoveSuffix(ring, all.size()); EXPECT_THAT(result, nullptr); - UnrefIfShared(ring); for (size_t len = 1; len < all.size(); ++len) { ring = RefIfShared(FromFlats(flats, composition)); result = NeedsUnref(CordRepRing::RemoveSuffix(ring, len)); ASSERT_THAT(result, IsValidRingBuffer()); - EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); - EXPECT_THAT(ToString(result), Eq(all.substr(0, all.size() - len))); - UnrefIfShared(ring); - Unref(result); + ASSERT_THAT(result, EqIfPrivate(GetParam(), ring)); + ASSERT_THAT(result, NeIfShared(GetParam(), ring)); + ASSERT_THAT(ToString(result), Eq(all.substr(0, all.size() - len))); } } @@ -1120,9 +990,8 @@ TEST_P(CordRingSubTest, AppendRing) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, child)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivate(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, AppendRingWithFlatOffset) { @@ -1135,11 +1004,9 @@ TEST_P(CordRingBuildInputTest, AppendRingWithFlatOffset) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Head", "brown ", "fox ", "jumps ", "over ", "the ", "lazy ", "dog")); - UnrefIfInputSharedIndirect(child); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, AppendRingWithBrokenOffset) { @@ -1152,11 +1019,9 @@ TEST_P(CordRingBuildInputTest, AppendRingWithBrokenOffset) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Head", "umps ", "over ", "the ", "lazy ", "dog")); - UnrefIfInputSharedIndirect(child); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, AppendRingWithFlatLength) { @@ -1169,11 +1034,9 @@ TEST_P(CordRingBuildInputTest, AppendRingWithFlatLength) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Head", "The ", "quick ", "brown ", "fox ", "jumps ", "over ", "the ")); - UnrefIfInputSharedIndirect(child); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendRingWithBrokenFlatLength) { @@ -1186,11 +1049,9 @@ TEST_P(CordRingBuildTest, AppendRingWithBrokenFlatLength) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Head", "The ", "quick ", "brown ", "fox ", "jumps ", "ov")); - UnrefIfInputSharedIndirect(child); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendRingMiddlePiece) { @@ -1203,11 +1064,9 @@ TEST_P(CordRingBuildTest, AppendRingMiddlePiece) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Head", "ck ", "brown ", "fox ", "jum")); - UnrefIfInputSharedIndirect(child); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildTest, AppendRingSinglePiece) { @@ -1220,11 +1079,8 @@ TEST_P(CordRingBuildTest, AppendRingSinglePiece) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Head", "row")); - UnrefIfInputSharedIndirect(child); - UnrefIfInputShared(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, AppendRingSinglePieceWithPrefix) { @@ -1241,11 +1097,8 @@ TEST_P(CordRingBuildInputTest, AppendRingSinglePieceWithPrefix) { CordRepRing* result = NeedsUnref(CordRepRing::Append(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("Prepend", "Head", "row")); - UnrefIfInputSharedIndirect(child); - UnrefIfInputShared(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRing) { @@ -1258,10 +1111,8 @@ TEST_P(CordRingBuildInputTest, PrependRing) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, child)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAreArray(kFoxFlats)); - UnrefIfInputShared(child); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingWithFlatOffset) { @@ -1274,12 +1125,9 @@ TEST_P(CordRingBuildInputTest, PrependRingWithFlatOffset) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("brown ", "fox ", "jumps ", "over ", "the ", "lazy ", "dog", "Tail")); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingWithBrokenOffset) { @@ -1291,12 +1139,9 @@ TEST_P(CordRingBuildInputTest, PrependRingWithBrokenOffset) { CordRep* stripped = RefIfInputSharedIndirect(RemovePrefix(21, child)); CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("umps ", "over ", "the ", "lazy ", "dog", "Tail")); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingWithFlatLength) { @@ -1309,12 +1154,9 @@ TEST_P(CordRingBuildInputTest, PrependRingWithFlatLength) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("The ", "quick ", "brown ", "fox ", "jumps ", "over ", "the ", "Tail")); - UnrefIfShared(ring); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingWithBrokenFlatLength) { @@ -1327,12 +1169,9 @@ TEST_P(CordRingBuildInputTest, PrependRingWithBrokenFlatLength) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("The ", "quick ", "brown ", "fox ", "jumps ", "ov", "Tail")); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingMiddlePiece) { @@ -1346,12 +1185,9 @@ TEST_P(CordRingBuildInputTest, PrependRingMiddlePiece) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("ck ", "brown ", "fox ", "jum", "Tail")); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingSinglePiece) { @@ -1364,11 +1200,8 @@ TEST_P(CordRingBuildInputTest, PrependRingSinglePiece) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("row", "Tail")); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_P(CordRingBuildInputTest, PrependRingSinglePieceWithPrefix) { @@ -1384,11 +1217,8 @@ TEST_P(CordRingBuildInputTest, PrependRingSinglePieceWithPrefix) { CordRepRing* result = NeedsUnref(CordRepRing::Prepend(ring, stripped)); ASSERT_THAT(result, IsValidRingBuffer()); EXPECT_THAT(result, EqIfPrivateAndCapacity(GetParam(), ring)); + EXPECT_THAT(result, NeIfShared(GetParam(), ring)); EXPECT_THAT(ToFlats(result), ElementsAre("row", "Prepend", "Tail")); - UnrefIfInputShared(child); - UnrefIfInputSharedIndirect(stripped); - UnrefIfShared(ring); - Unref(result); } TEST_F(CordRingTest, Find) { @@ -1406,7 +1236,6 @@ TEST_F(CordRingTest, Find) { ASSERT_THAT(found.offset, Lt(data.length())); ASSERT_THAT(data[found.offset], Eq(value[i])); } - Unref(ring); } TEST_F(CordRingTest, FindWithHint) { @@ -1442,7 +1271,6 @@ TEST_F(CordRingTest, FindWithHint) { ++flat_pos; flat_offset += flat.length(); } - Unref(ring); } TEST_F(CordRingTest, FindInLargeRing) { @@ -1464,7 +1292,6 @@ TEST_F(CordRingTest, FindInLargeRing) { ASSERT_THAT(pos.offset, Lt(data.length())); ASSERT_THAT(data[pos.offset], Eq(value[i])); } - Unref(ring); } TEST_F(CordRingTest, FindTail) { @@ -1483,7 +1310,6 @@ TEST_F(CordRingTest, FindTail) { ASSERT_THAT(pos.offset, Lt(data.length())); ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); } - Unref(ring); } TEST_F(CordRingTest, FindTailWithHint) { @@ -1510,7 +1336,6 @@ TEST_F(CordRingTest, FindTailWithHint) { ASSERT_THAT(pos.offset, Lt(data.length())); ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); } - Unref(ring); } TEST_F(CordRingTest, FindTailInLargeRing) { @@ -1532,7 +1357,6 @@ TEST_F(CordRingTest, FindTailInLargeRing) { ASSERT_THAT(pos.offset, Lt(data.length())); ASSERT_THAT(data[data.length() - pos.offset - 1], Eq(value[i])); } - Unref(ring); } TEST_F(CordRingTest, GetCharacter) { @@ -1544,7 +1368,6 @@ TEST_F(CordRingTest, GetCharacter) { for (int i = 0; i < value.length(); ++i) { ASSERT_THAT(result->GetCharacter(i), Eq(value[i])); } - Unref(result); } TEST_F(CordRingTest, GetCharacterWithSubstring) { @@ -1556,7 +1379,67 @@ TEST_F(CordRingTest, GetCharacterWithSubstring) { for (int i = 0; i < value.length(); ++i) { ASSERT_THAT(result->GetCharacter(i), Eq(value[i])); } - Unref(result); +} + +TEST_F(CordRingTest, IsFlatSingleFlat) { + for (bool external : {false, true}) { + SCOPED_TRACE(external ? "With External" : "With Flat"); + absl::string_view str = "Hello world"; + CordRep* rep = external ? MakeExternal(str) : MakeFlat(str); + CordRepRing* ring = NeedsUnref(CordRepRing::Create(rep)); + + // The ring is a single non-fragmented flat: + absl::string_view fragment; + EXPECT_TRUE(ring->IsFlat(nullptr)); + EXPECT_TRUE(ring->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + fragment = ""; + EXPECT_TRUE(ring->IsFlat(0, 11, nullptr)); + EXPECT_TRUE(ring->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + + // Arbitrary ranges must check true as well. + EXPECT_TRUE(ring->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("ello")); + EXPECT_TRUE(ring->IsFlat(6, 5, &fragment)); + EXPECT_THAT(fragment, Eq("world")); + } +} + +TEST_F(CordRingTest, IsFlatMultiFlat) { + for (bool external : {false, true}) { + SCOPED_TRACE(external ? "With External" : "With Flat"); + absl::string_view str1 = "Hello world"; + absl::string_view str2 = "Halt and catch fire"; + CordRep* rep1 = external ? MakeExternal(str1) : MakeFlat(str1); + CordRep* rep2 = external ? MakeExternal(str2) : MakeFlat(str2); + CordRepRing* ring = CordRepRing::Append(CordRepRing::Create(rep1), rep2); + NeedsUnref(ring); + + // The ring is fragmented, IsFlat() on the entire cord must be false. + EXPECT_FALSE(ring->IsFlat(nullptr)); + absl::string_view fragment = "Don't touch this"; + EXPECT_FALSE(ring->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Don't touch this")); + + // Check for ranges exactly within both flats. + EXPECT_TRUE(ring->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + EXPECT_TRUE(ring->IsFlat(11, 19, &fragment)); + EXPECT_THAT(fragment, Eq("Halt and catch fire")); + + // Check for arbitrary partial range inside each flat. + EXPECT_TRUE(ring->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, "ello"); + EXPECT_TRUE(ring->IsFlat(26, 4, &fragment)); + EXPECT_THAT(fragment, "fire"); + + // Check ranges spanning across both flats + fragment = "Don't touch this"; + EXPECT_FALSE(ring->IsFlat(1, 18, &fragment)); + EXPECT_FALSE(ring->IsFlat(10, 2, &fragment)); + EXPECT_THAT(fragment, Eq("Don't touch this")); + } } TEST_F(CordRingTest, Dump) { @@ -1564,7 +1447,6 @@ TEST_F(CordRingTest, Dump) { auto flats = MakeSpan(kFoxFlats); CordRepRing* ring = NeedsUnref(FromFlats(flats, kPrepend)); ss << *ring; - Unref(ring); } } // namespace diff --git a/absl/strings/cord_test.cc b/absl/strings/cord_test.cc index f9982428..0862f69a 100644 --- a/absl/strings/cord_test.cc +++ b/absl/strings/cord_test.cc @@ -34,13 +34,32 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/container/fixed_array.h" +#include "absl/hash/hash.h" +#include "absl/random/random.h" #include "absl/strings/cord_test_helpers.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/match.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" +// convenience local constants +static constexpr auto FLAT = absl::cord_internal::FLAT; +static constexpr auto MAX_FLAT_TAG = absl::cord_internal::MAX_FLAT_TAG; + typedef std::mt19937_64 RandomEngine; +using absl::cord_internal::CordRep; +using absl::cord_internal::CordRepBtree; +using absl::cord_internal::CordRepConcat; +using absl::cord_internal::CordRepCrc; +using absl::cord_internal::CordRepExternal; +using absl::cord_internal::CordRepFlat; +using absl::cord_internal::CordRepSubstring; +using absl::cord_internal::CordzUpdateTracker; +using absl::cord_internal::kFlatOverhead; +using absl::cord_internal::kMaxFlatLength; + static std::string RandomLowercaseString(RandomEngine* rng); static std::string RandomLowercaseString(RandomEngine* rng, size_t length); @@ -183,16 +202,129 @@ class CordTestPeer { } static bool IsTree(const Cord& c) { return c.contents_.is_tree(); } + static CordRep* Tree(const Cord& c) { return c.contents_.tree(); } static cord_internal::CordzInfo* GetCordzInfo(const Cord& c) { return c.contents_.cordz_info(); } + + static Cord MakeSubstring(Cord src, size_t offset, size_t length) { + ABSL_RAW_CHECK(src.contents_.is_tree(), "Can not be inlined"); + ABSL_RAW_CHECK(src.ExpectedChecksum() == absl::nullopt, + "Can not be hardened"); + Cord cord; + auto* tree = cord_internal::SkipCrcNode(src.contents_.tree()); + auto* rep = CordRepSubstring::Create(CordRep::Ref(tree), offset, length); + cord.contents_.EmplaceTree(rep, CordzUpdateTracker::kSubCord); + return cord; + } }; ABSL_NAMESPACE_END } // namespace absl -TEST(Cord, AllFlatSizes) { +// The CordTest fixture runs all tests with and without Cord Btree enabled, +// and with our without expected CRCs being set on the subject Cords. +class CordTest : public testing::TestWithParam<int> { + public: + // Returns true if test is running with btree enabled. + bool UseCrc() const { return GetParam() == 2 || GetParam() == 3; } + void MaybeHarden(absl::Cord& c) { + if (UseCrc()) { + c.SetExpectedChecksum(1); + } + } + absl::Cord MaybeHardened(absl::Cord c) { + MaybeHarden(c); + return c; + } + + // Returns human readable string representation of the test parameter. + static std::string ToString(testing::TestParamInfo<int> param) { + switch (param.param) { + case 0: + return "Btree"; + case 1: + return "BtreeHardened"; + default: + assert(false); + return "???"; + } + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordTest, testing::Values(0, 1), + CordTest::ToString); + +TEST(CordRepFlat, AllFlatCapacities) { + // Explicitly and redundantly assert built-in min/max limits + static_assert(absl::cord_internal::kFlatOverhead < 32, ""); + static_assert(absl::cord_internal::kMinFlatSize == 32, ""); + static_assert(absl::cord_internal::kMaxLargeFlatSize == 256 << 10, ""); + EXPECT_EQ(absl::cord_internal::TagToAllocatedSize(FLAT), 32); + EXPECT_EQ(absl::cord_internal::TagToAllocatedSize(MAX_FLAT_TAG), 256 << 10); + + // Verify all tags to map perfectly back and forth, and + // that sizes are monotonically increasing. + size_t last_size = 0; + for (int tag = FLAT; tag <= MAX_FLAT_TAG; ++tag) { + size_t size = absl::cord_internal::TagToAllocatedSize(tag); + ASSERT_GT(size, last_size); + ASSERT_EQ(absl::cord_internal::TagToAllocatedSize(tag), size); + last_size = size; + } + + // All flat size from 32 - 512 are 8 byte granularity + for (size_t size = 32; size <= 512; size += 8) { + ASSERT_EQ(absl::cord_internal::RoundUpForTag(size), size); + uint8_t tag = absl::cord_internal::AllocatedSizeToTag(size); + ASSERT_EQ(absl::cord_internal::TagToAllocatedSize(tag), size); + } + + // All flat sizes from 512 - 8192 are 64 byte granularity + for (size_t size = 512; size <= 8192; size += 64) { + ASSERT_EQ(absl::cord_internal::RoundUpForTag(size), size); + uint8_t tag = absl::cord_internal::AllocatedSizeToTag(size); + ASSERT_EQ(absl::cord_internal::TagToAllocatedSize(tag), size); + } + + // All flat sizes from 8KB to 256KB are 4KB granularity + for (size_t size = 8192; size <= 256 * 1024; size += 4 * 1024) { + ASSERT_EQ(absl::cord_internal::RoundUpForTag(size), size); + uint8_t tag = absl::cord_internal::AllocatedSizeToTag(size); + ASSERT_EQ(absl::cord_internal::TagToAllocatedSize(tag), size); + } +} + +TEST(CordRepFlat, MaxFlatSize) { + CordRepFlat* flat = CordRepFlat::New(kMaxFlatLength); + EXPECT_EQ(flat->Capacity(), kMaxFlatLength); + CordRep::Unref(flat); + + flat = CordRepFlat::New(kMaxFlatLength * 4); + EXPECT_EQ(flat->Capacity(), kMaxFlatLength); + CordRep::Unref(flat); +} + +TEST(CordRepFlat, MaxLargeFlatSize) { + const size_t size = 256 * 1024 - kFlatOverhead; + CordRepFlat* flat = CordRepFlat::New(CordRepFlat::Large(), size); + EXPECT_GE(flat->Capacity(), size); + CordRep::Unref(flat); +} + +TEST(CordRepFlat, AllFlatSizes) { + const size_t kMaxSize = 256 * 1024; + for (size_t size = 32; size <= kMaxSize; size *=2) { + const size_t length = size - kFlatOverhead - 1; + CordRepFlat* flat = CordRepFlat::New(CordRepFlat::Large(), length); + EXPECT_GE(flat->Capacity(), length); + memset(flat->Data(), 0xCD, flat->Capacity()); + CordRep::Unref(flat); + } +} + +TEST_P(CordTest, AllFlatSizes) { using absl::strings_internal::CordTestAccess; for (size_t s = 0; s < CordTestAccess::MaxFlatLength(); s++) { @@ -203,6 +335,7 @@ TEST(Cord, AllFlatSizes) { } absl::Cord dst(src); + MaybeHarden(dst); EXPECT_EQ(std::string(dst), src) << s; } } @@ -210,7 +343,7 @@ TEST(Cord, AllFlatSizes) { // We create a Cord at least 128GB in size using the fact that Cords can // internally reference-count; thus the Cord is enormous without actually // consuming very much memory. -TEST(GigabyteCord, FromExternal) { +TEST_P(CordTest, GigabyteCordFromExternal) { const size_t one_gig = 1024U * 1024U * 1024U; size_t max_size = 2 * one_gig; if (sizeof(max_size) > 4) max_size = 128 * one_gig; @@ -227,7 +360,6 @@ TEST(GigabyteCord, FromExternal) { // caused crashes in production. We grow exponentially so that the code will // execute in a reasonable amount of time. absl::Cord c; - ABSL_RAW_LOG(INFO, "Made a Cord with %zu bytes!", c.size()); c.Append(from); while (c.size() < max_size) { c.Append(c); @@ -235,6 +367,7 @@ TEST(GigabyteCord, FromExternal) { c.Append(from); c.Append(from); c.Append(from); + MaybeHarden(c); } for (int i = 0; i < 1024; ++i) { @@ -260,9 +393,11 @@ static absl::Cord MakeExternalCord(int size) { extern bool my_unique_true_boolean; bool my_unique_true_boolean = true; -TEST(Cord, Assignment) { +TEST_P(CordTest, Assignment) { absl::Cord x(absl::string_view("hi there")); absl::Cord y(x); + MaybeHarden(y); + ASSERT_EQ(x.ExpectedChecksum(), absl::nullopt); ASSERT_EQ(std::string(x), "hi there"); ASSERT_EQ(std::string(y), "hi there"); ASSERT_TRUE(x == y); @@ -314,8 +449,9 @@ TEST(Cord, Assignment) { } } -TEST(Cord, StartsEndsWith) { +TEST_P(CordTest, StartsEndsWith) { absl::Cord x(absl::string_view("abcde")); + MaybeHarden(x); absl::Cord empty(""); ASSERT_TRUE(x.StartsWith(absl::Cord("abcde"))); @@ -347,13 +483,14 @@ TEST(Cord, StartsEndsWith) { ASSERT_TRUE(!empty.EndsWith("xyz")); } -TEST(Cord, Subcord) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, Subcord) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); const std::string s = RandomLowercaseString(&rng, 1024); absl::Cord a; AppendWithFragments(s, &rng, &a); - ASSERT_EQ(s.size(), a.size()); + MaybeHarden(a); + ASSERT_EQ(s, std::string(a)); // Check subcords of a, from a variety of interesting points. std::set<size_t> positions; @@ -374,6 +511,9 @@ TEST(Cord, Subcord) { ASSERT_EQ(absl::string_view(s).substr(pos, end_pos - pos), std::string(sa)) << a; + if (pos != 0 || end_pos != a.size()) { + ASSERT_EQ(sa.ExpectedChecksum(), absl::nullopt); + } } } @@ -408,15 +548,24 @@ TEST(Cord, Subcord) { EXPECT_TRUE(sa.empty()); } -TEST(Cord, Swap) { +TEST_P(CordTest, Swap) { absl::string_view a("Dexter"); absl::string_view b("Mandark"); absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); swap(x, y); + if (UseCrc()) { + ASSERT_EQ(x.ExpectedChecksum(), absl::nullopt); + ASSERT_EQ(y.ExpectedChecksum(), 1); + } ASSERT_EQ(x, absl::Cord(b)); ASSERT_EQ(y, absl::Cord(a)); x.swap(y); + if (UseCrc()) { + ASSERT_EQ(x.ExpectedChecksum(), 1); + ASSERT_EQ(y.ExpectedChecksum(), absl::nullopt); + } ASSERT_EQ(x, absl::Cord(a)); ASSERT_EQ(y, absl::Cord(b)); } @@ -440,56 +589,363 @@ static void VerifyCopyToString(const absl::Cord& cord) { } } -TEST(Cord, CopyToString) { - VerifyCopyToString(absl::Cord()); - VerifyCopyToString(absl::Cord("small cord")); - VerifyCopyToString( +TEST_P(CordTest, CopyToString) { + VerifyCopyToString(absl::Cord()); // empty cords cannot carry CRCs + VerifyCopyToString(MaybeHardened(absl::Cord("small cord"))); + VerifyCopyToString(MaybeHardened( absl::MakeFragmentedCord({"fragmented ", "cord ", "to ", "test ", - "copying ", "to ", "a ", "string."})); + "copying ", "to ", "a ", "string."}))); +} + +TEST_P(CordTest, AppendEmptyBuffer) { + absl::Cord cord; + cord.Append(absl::CordBuffer()); + cord.Append(absl::CordBuffer::CreateWithDefaultLimit(2000)); +} + +TEST_P(CordTest, AppendEmptyBufferToFlat) { + absl::Cord cord(std::string(2000, 'x')); + cord.Append(absl::CordBuffer()); + cord.Append(absl::CordBuffer::CreateWithDefaultLimit(2000)); +} + +TEST_P(CordTest, AppendEmptyBufferToTree) { + absl::Cord cord(std::string(2000, 'x')); + cord.Append(std::string(2000, 'y')); + cord.Append(absl::CordBuffer()); + cord.Append(absl::CordBuffer::CreateWithDefaultLimit(2000)); +} + +TEST_P(CordTest, AppendSmallBuffer) { + absl::Cord cord; + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); + ASSERT_THAT(buffer.capacity(), ::testing::Le(15)); + memcpy(buffer.data(), "Abc", 3); + buffer.SetLength(3); + cord.Append(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + buffer = absl::CordBuffer::CreateWithDefaultLimit(3); + memcpy(buffer.data(), "defgh", 5); + buffer.SetLength(5); + cord.Append(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("Abcdefgh")); +} + +TEST_P(CordTest, AppendAndPrependBufferArePrecise) { + // Create a cord large enough to force 40KB flats. + std::string test_data(absl::cord_internal::kMaxFlatLength * 10, 'x'); + absl::Cord cord1(test_data); + absl::Cord cord2(test_data); + const size_t size1 = cord1.EstimatedMemoryUsage(); + const size_t size2 = cord2.EstimatedMemoryUsage(); + + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); + memcpy(buffer.data(), "Abc", 3); + buffer.SetLength(3); + cord1.Append(std::move(buffer)); + + buffer = absl::CordBuffer::CreateWithDefaultLimit(3); + memcpy(buffer.data(), "Abc", 3); + buffer.SetLength(3); + cord2.Prepend(std::move(buffer)); + +#ifndef NDEBUG + // Allow 32 bytes new CordRepFlat, and 128 bytes for 'glue nodes' + constexpr size_t kMaxDelta = 128 + 32; +#else + // Allow 256 bytes extra for 'allocation debug overhead' + constexpr size_t kMaxDelta = 128 + 32 + 256; +#endif + + EXPECT_LE(cord1.EstimatedMemoryUsage() - size1, kMaxDelta); + EXPECT_LE(cord2.EstimatedMemoryUsage() - size2, kMaxDelta); + + EXPECT_EQ(cord1, absl::StrCat(test_data, "Abc")); + EXPECT_EQ(cord2, absl::StrCat("Abc", test_data)); +} + +TEST_P(CordTest, PrependSmallBuffer) { + absl::Cord cord; + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(3); + ASSERT_THAT(buffer.capacity(), ::testing::Le(15)); + memcpy(buffer.data(), "Abc", 3); + buffer.SetLength(3); + cord.Prepend(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + buffer = absl::CordBuffer::CreateWithDefaultLimit(3); + memcpy(buffer.data(), "defgh", 5); + buffer.SetLength(5); + cord.Prepend(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre("defghAbc")); +} + +TEST_P(CordTest, AppendLargeBuffer) { + absl::Cord cord; + + std::string s1(700, '1'); + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(s1.size()); + memcpy(buffer.data(), s1.data(), s1.size()); + buffer.SetLength(s1.size()); + cord.Append(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + std::string s2(1000, '2'); + buffer = absl::CordBuffer::CreateWithDefaultLimit(s2.size()); + memcpy(buffer.data(), s2.data(), s2.size()); + buffer.SetLength(s2.size()); + cord.Append(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s1, s2)); +} + +TEST_P(CordTest, PrependLargeBuffer) { + absl::Cord cord; + + std::string s1(700, '1'); + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(s1.size()); + memcpy(buffer.data(), s1.data(), s1.size()); + buffer.SetLength(s1.size()); + cord.Prepend(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + std::string s2(1000, '2'); + buffer = absl::CordBuffer::CreateWithDefaultLimit(s2.size()); + memcpy(buffer.data(), s2.data(), s2.size()); + buffer.SetLength(s2.size()); + cord.Prepend(std::move(buffer)); + EXPECT_EQ(buffer.length(), 0); // NOLINT + EXPECT_GT(buffer.capacity(), 0); // NOLINT + + EXPECT_THAT(cord.Chunks(), ::testing::ElementsAre(s2, s1)); } -TEST(TryFlat, Empty) { +TEST_P(CordTest, GetAppendBufferOnEmptyCord) { + absl::Cord cord; + absl::CordBuffer buffer = cord.GetAppendBuffer(1000); + EXPECT_GE(buffer.capacity(), 1000); + EXPECT_EQ(buffer.length(), 0); +} + +TEST_P(CordTest, GetAppendBufferOnInlinedCord) { + static constexpr int kInlinedSize = sizeof(absl::CordBuffer) - 1; + for (int size : {6, kInlinedSize - 3, kInlinedSize - 2, 1000}) { + absl::Cord cord("Abc"); + absl::CordBuffer buffer = cord.GetAppendBuffer(size, 1); + EXPECT_GE(buffer.capacity(), 3 + size); + EXPECT_EQ(buffer.length(), 3); + EXPECT_EQ(absl::string_view(buffer.data(), buffer.length()), "Abc"); + EXPECT_TRUE(cord.empty()); + } +} + +TEST_P(CordTest, GetAppendBufferOnInlinedCordWithCapacityCloseToMax) { + // Cover the use case where we have a non empty inlined cord with some size + // 'n', and ask for something like 'uint64_max - k', assuming internal logic + // could overflow on 'uint64_max - k + size', and return a valid, but + // inefficiently smaller buffer if it would provide is the max allowed size. + for (size_t dist_from_max = 0; dist_from_max <= 4; ++dist_from_max) { + absl::Cord cord("Abc"); + size_t size = std::numeric_limits<size_t>::max() - dist_from_max; + absl::CordBuffer buffer = cord.GetAppendBuffer(size, 1); + EXPECT_EQ(buffer.capacity(), absl::CordBuffer::kDefaultLimit); + EXPECT_EQ(buffer.length(), 3); + EXPECT_EQ(absl::string_view(buffer.data(), buffer.length()), "Abc"); + EXPECT_TRUE(cord.empty()); + } +} + +TEST_P(CordTest, GetAppendBufferOnFlat) { + // Create a cord with a single flat and extra capacity + absl::Cord cord; + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(3); + memcpy(buffer.data(), "Abc", 3); + cord.Append(std::move(buffer)); + + buffer = cord.GetAppendBuffer(6); + EXPECT_GE(buffer.capacity(), 500); + EXPECT_EQ(buffer.length(), 3); + EXPECT_EQ(absl::string_view(buffer.data(), buffer.length()), "Abc"); + EXPECT_TRUE(cord.empty()); +} + +TEST_P(CordTest, GetAppendBufferOnFlatWithoutMinCapacity) { + // Create a cord with a single flat and extra capacity + absl::Cord cord; + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(30); + memset(buffer.data(), 'x', 30); + cord.Append(std::move(buffer)); + + buffer = cord.GetAppendBuffer(1000, 900); + EXPECT_GE(buffer.capacity(), 1000); + EXPECT_EQ(buffer.length(), 0); + EXPECT_EQ(cord, std::string(30, 'x')); +} + +TEST_P(CordTest, GetAppendBufferOnTree) { + RandomEngine rng; + for (int num_flats : {2, 3, 100}) { + // Create a cord with `num_flats` flats and extra capacity + absl::Cord cord; + std::string prefix; + std::string last; + for (int i = 0; i < num_flats - 1; ++i) { + prefix += last; + last = RandomLowercaseString(&rng, 10); + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(10); + memcpy(buffer.data(), last.data(), 10); + cord.Append(std::move(buffer)); + } + absl::CordBuffer buffer = cord.GetAppendBuffer(6); + EXPECT_GE(buffer.capacity(), 500); + EXPECT_EQ(buffer.length(), 10); + EXPECT_EQ(absl::string_view(buffer.data(), buffer.length()), last); + EXPECT_EQ(cord, prefix); + } +} + +TEST_P(CordTest, GetAppendBufferOnTreeWithoutMinCapacity) { + absl::Cord cord; + for (int i = 0; i < 2; ++i) { + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(3); + memcpy(buffer.data(), i ? "def" : "Abc", 3); + cord.Append(std::move(buffer)); + } + absl::CordBuffer buffer = cord.GetAppendBuffer(1000, 900); + EXPECT_GE(buffer.capacity(), 1000); + EXPECT_EQ(buffer.length(), 0); + EXPECT_EQ(cord, "Abcdef"); +} + +TEST_P(CordTest, GetAppendBufferOnSubstring) { + // Create a large cord with a single flat and some extra capacity + absl::Cord cord; + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(450); + memset(buffer.data(), 'x', 450); + cord.Append(std::move(buffer)); + cord.RemovePrefix(1); + + // Deny on substring + buffer = cord.GetAppendBuffer(6); + EXPECT_EQ(buffer.length(), 0); + EXPECT_EQ(cord, std::string(449, 'x')); +} + +TEST_P(CordTest, GetAppendBufferOnSharedCord) { + // Create a shared cord with a single flat and extra capacity + absl::Cord cord; + absl::CordBuffer buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(3); + memcpy(buffer.data(), "Abc", 3); + cord.Append(std::move(buffer)); + absl::Cord shared_cord = cord; + + // Deny on flat + buffer = cord.GetAppendBuffer(6); + EXPECT_EQ(buffer.length(), 0); + EXPECT_EQ(cord, "Abc"); + + buffer = absl::CordBuffer::CreateWithDefaultLimit(500); + buffer.SetLength(3); + memcpy(buffer.data(), "def", 3); + cord.Append(std::move(buffer)); + shared_cord = cord; + + // Deny on tree + buffer = cord.GetAppendBuffer(6); + EXPECT_EQ(buffer.length(), 0); + EXPECT_EQ(cord, "Abcdef"); +} + +TEST_P(CordTest, TryFlatEmpty) { absl::Cord c; EXPECT_EQ(c.TryFlat(), ""); } -TEST(TryFlat, Flat) { +TEST_P(CordTest, TryFlatFlat) { absl::Cord c("hello"); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), "hello"); } -TEST(TryFlat, SubstrInlined) { +TEST_P(CordTest, TryFlatSubstrInlined) { absl::Cord c("hello"); c.RemovePrefix(1); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), "ello"); } -TEST(TryFlat, SubstrFlat) { +TEST_P(CordTest, TryFlatSubstrFlat) { absl::Cord c("longer than 15 bytes"); - c.RemovePrefix(1); - EXPECT_EQ(c.TryFlat(), "onger than 15 bytes"); + absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + MaybeHarden(sub); + EXPECT_EQ(sub.TryFlat(), "onger than 15 bytes"); } -TEST(TryFlat, Concat) { +TEST_P(CordTest, TryFlatConcat) { absl::Cord c = absl::MakeFragmentedCord({"hel", "lo"}); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), absl::nullopt); } -TEST(TryFlat, External) { +TEST_P(CordTest, TryFlatExternal) { absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); + MaybeHarden(c); EXPECT_EQ(c.TryFlat(), "hell"); } -TEST(TryFlat, SubstrExternal) { +TEST_P(CordTest, TryFlatSubstrExternal) { absl::Cord c = absl::MakeCordFromExternal("hell", [](absl::string_view) {}); - c.RemovePrefix(1); - EXPECT_EQ(c.TryFlat(), "ell"); -} - -TEST(TryFlat, SubstrConcat) { - absl::Cord c = absl::MakeFragmentedCord({"hello", " world"}); - c.RemovePrefix(1); - EXPECT_EQ(c.TryFlat(), absl::nullopt); + absl::Cord sub = absl::CordTestPeer::MakeSubstring(c, 1, c.size() - 1); + MaybeHarden(sub); + EXPECT_EQ(sub.TryFlat(), "ell"); +} + +TEST_P(CordTest, TryFlatCommonlyAssumedInvariants) { + // The behavior tested below is not part of the API contract of Cord, but it's + // something we intend to be true in our current implementation. This test + // exists to detect and prevent accidental breakage of the implementation. + absl::string_view fragments[] = {"A fragmented test", + " cord", + " to test subcords", + " of ", + "a", + " cord for", + " each chunk " + "returned by the ", + "iterator"}; + absl::Cord c = absl::MakeFragmentedCord(fragments); + MaybeHarden(c); + int fragment = 0; + int offset = 0; + absl::Cord::CharIterator itc = c.char_begin(); + for (absl::string_view sv : c.Chunks()) { + absl::string_view expected = fragments[fragment]; + absl::Cord subcord1 = c.Subcord(offset, sv.length()); + absl::Cord subcord2 = absl::Cord::AdvanceAndRead(&itc, sv.size()); + EXPECT_EQ(subcord1.TryFlat(), expected); + EXPECT_EQ(subcord2.TryFlat(), expected); + ++fragment; + offset += sv.length(); + } } static bool IsFlat(const absl::Cord& c) { @@ -520,15 +976,17 @@ static void VerifyFlatten(absl::Cord c) { EXPECT_TRUE(IsFlat(c)); } -TEST(Cord, Flatten) { +TEST_P(CordTest, Flatten) { VerifyFlatten(absl::Cord()); - VerifyFlatten(absl::Cord("small cord")); - VerifyFlatten(absl::Cord("larger than small buffer optimization")); - VerifyFlatten(absl::MakeFragmentedCord({"small ", "fragmented ", "cord"})); + VerifyFlatten(MaybeHardened(absl::Cord("small cord"))); + VerifyFlatten( + MaybeHardened(absl::Cord("larger than small buffer optimization"))); + VerifyFlatten(MaybeHardened( + absl::MakeFragmentedCord({"small ", "fragmented ", "cord"}))); // Test with a cord that is longer than the largest flat buffer - RandomEngine rng(testing::GTEST_FLAG(random_seed)); - VerifyFlatten(absl::Cord(RandomLowercaseString(&rng, 8192))); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + VerifyFlatten(MaybeHardened(absl::Cord(RandomLowercaseString(&rng, 8192)))); } // Test data @@ -574,7 +1032,7 @@ class TestData { }; } // namespace -TEST(Cord, MultipleLengths) { +TEST_P(CordTest, MultipleLengths) { TestData d; for (size_t i = 0; i < d.size(); i++) { std::string a = d.data(i); @@ -582,22 +1040,26 @@ TEST(Cord, MultipleLengths) { { // Construct from Cord absl::Cord tmp(a); absl::Cord x(tmp); + MaybeHarden(x); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } { // Construct from absl::string_view absl::Cord x(a); + MaybeHarden(x); EXPECT_EQ(a, std::string(x)) << "'" << a << "'"; } { // Append cord to self absl::Cord self(a); + MaybeHarden(self); self.Append(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; } { // Prepend cord to self absl::Cord self(a); + MaybeHarden(self); self.Prepend(self); EXPECT_EQ(a + a, std::string(self)) << "'" << a << "' + '" << a << "'"; } @@ -609,12 +1071,14 @@ TEST(Cord, MultipleLengths) { { // CopyFrom Cord absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); x = y; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } { // CopyFrom absl::string_view absl::Cord x(a); + MaybeHarden(x); x = b; EXPECT_EQ(b, std::string(x)) << "'" << a << "' + '" << b << "'"; } @@ -622,12 +1086,14 @@ TEST(Cord, MultipleLengths) { { // Cord::Append(Cord) absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); x.Append(y); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } { // Cord::Append(absl::string_view) absl::Cord x(a); + MaybeHarden(x); x.Append(b); EXPECT_EQ(a + b, std::string(x)) << "'" << a << "' + '" << b << "'"; } @@ -635,12 +1101,14 @@ TEST(Cord, MultipleLengths) { { // Cord::Prepend(Cord) absl::Cord x(a); absl::Cord y(b); + MaybeHarden(x); x.Prepend(y); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; } { // Cord::Prepend(absl::string_view) absl::Cord x(a); + MaybeHarden(x); x.Prepend(b); EXPECT_EQ(b + a, std::string(x)) << "'" << b << "' + '" << a << "'"; } @@ -650,25 +1118,29 @@ TEST(Cord, MultipleLengths) { namespace { -TEST(Cord, RemoveSuffixWithExternalOrSubstring) { +TEST_P(CordTest, RemoveSuffixWithExternalOrSubstring) { absl::Cord cord = absl::MakeCordFromExternal( "foo bar baz", [](absl::string_view s) { DoNothing(s, nullptr); }); - EXPECT_EQ("foo bar baz", std::string(cord)); + MaybeHarden(cord); + // This RemoveSuffix() will wrap the EXTERNAL node in a SUBSTRING node. cord.RemoveSuffix(4); EXPECT_EQ("foo bar", std::string(cord)); + MaybeHarden(cord); + // This RemoveSuffix() will adjust the SUBSTRING node in-place. cord.RemoveSuffix(4); EXPECT_EQ("foo", std::string(cord)); } -TEST(Cord, RemoveSuffixMakesZeroLengthNode) { +TEST_P(CordTest, RemoveSuffixMakesZeroLengthNode) { absl::Cord c; c.Append(absl::Cord(std::string(100, 'x'))); absl::Cord other_ref = c; // Prevent inplace appends + MaybeHarden(c); c.Append(absl::Cord(std::string(200, 'y'))); c.RemoveSuffix(200); EXPECT_EQ(std::string(100, 'x'), std::string(c)); @@ -692,24 +1164,27 @@ absl::Cord CordWithZedBlock(size_t size) { } // Establish that ZedBlock does what we think it does. -TEST(CordSpliceTest, ZedBlock) { +TEST_P(CordTest, CordSpliceTestZedBlock) { absl::Cord blob = CordWithZedBlock(10); + MaybeHarden(blob); EXPECT_EQ(10, blob.size()); std::string s; absl::CopyCordToString(blob, &s); EXPECT_EQ("zzzzzzzzzz", s); } -TEST(CordSpliceTest, ZedBlock0) { +TEST_P(CordTest, CordSpliceTestZedBlock0) { absl::Cord blob = CordWithZedBlock(0); + MaybeHarden(blob); EXPECT_EQ(0, blob.size()); std::string s; absl::CopyCordToString(blob, &s); EXPECT_EQ("", s); } -TEST(CordSpliceTest, ZedBlockSuffix1) { +TEST_P(CordTest, CordSpliceTestZedBlockSuffix1) { absl::Cord blob = CordWithZedBlock(10); + MaybeHarden(blob); EXPECT_EQ(10, blob.size()); absl::Cord suffix(blob); suffix.RemovePrefix(9); @@ -720,8 +1195,9 @@ TEST(CordSpliceTest, ZedBlockSuffix1) { } // Remove all of a prefix block -TEST(CordSpliceTest, ZedBlockSuffix0) { +TEST_P(CordTest, CordSpliceTestZedBlockSuffix0) { absl::Cord blob = CordWithZedBlock(10); + MaybeHarden(blob); EXPECT_EQ(10, blob.size()); absl::Cord suffix(blob); suffix.RemovePrefix(10); @@ -752,16 +1228,18 @@ absl::Cord SpliceCord(const absl::Cord& blob, int64_t offset, } // Taking an empty suffix of a block breaks appending. -TEST(CordSpliceTest, RemoveEntireBlock1) { +TEST_P(CordTest, CordSpliceTestRemoveEntireBlock1) { absl::Cord zero = CordWithZedBlock(10); + MaybeHarden(zero); absl::Cord suffix(zero); suffix.RemovePrefix(10); absl::Cord result; result.Append(suffix); } -TEST(CordSpliceTest, RemoveEntireBlock2) { +TEST_P(CordTest, CordSpliceTestRemoveEntireBlock2) { absl::Cord zero = CordWithZedBlock(10); + MaybeHarden(zero); absl::Cord prefix(zero); prefix.RemoveSuffix(10); absl::Cord suffix(zero); @@ -770,16 +1248,22 @@ TEST(CordSpliceTest, RemoveEntireBlock2) { result.Append(suffix); } -TEST(CordSpliceTest, RemoveEntireBlock3) { +TEST_P(CordTest, CordSpliceTestRemoveEntireBlock3) { absl::Cord blob = CordWithZedBlock(10); absl::Cord block = BigCord(10, 'b'); + MaybeHarden(blob); + MaybeHarden(block); blob = SpliceCord(blob, 0, block); } struct CordCompareTestCase { template <typename LHS, typename RHS> - CordCompareTestCase(const LHS& lhs, const RHS& rhs) - : lhs_cord(lhs), rhs_cord(rhs) {} + CordCompareTestCase(const LHS& lhs, const RHS& rhs, bool use_crc) + : lhs_cord(lhs), rhs_cord(rhs) { + if (use_crc) { + lhs_cord.SetExpectedChecksum(1); + } + } absl::Cord lhs_cord; absl::Cord rhs_cord; @@ -801,7 +1285,7 @@ void VerifyComparison(const CordCompareTestCase& test_case) { << "LHS=" << rhs_string << "; RHS=" << lhs_string; } -TEST(Cord, Compare) { +TEST_P(CordTest, Compare) { absl::Cord subcord("aaaaaBBBBBcccccDDDDD"); subcord = subcord.Subcord(3, 10); @@ -816,47 +1300,54 @@ TEST(Cord, Compare) { concat2.Append("cccccccccccDDDDDDDDDDDDDD"); concat2.Append("DD"); + const bool use_crc = UseCrc(); + std::vector<CordCompareTestCase> test_cases = {{ // Inline cords - {"abcdef", "abcdef"}, - {"abcdef", "abcdee"}, - {"abcdef", "abcdeg"}, - {"bbcdef", "abcdef"}, - {"bbcdef", "abcdeg"}, - {"abcdefa", "abcdef"}, - {"abcdef", "abcdefa"}, + {"abcdef", "abcdef", use_crc}, + {"abcdef", "abcdee", use_crc}, + {"abcdef", "abcdeg", use_crc}, + {"bbcdef", "abcdef", use_crc}, + {"bbcdef", "abcdeg", use_crc}, + {"abcdefa", "abcdef", use_crc}, + {"abcdef", "abcdefa", use_crc}, // Small flat cords - {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD"}, - {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD"}, - {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD"}, - {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX"}, - {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD"}, - {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa"}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDD", use_crc}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBxccccDDDDD", use_crc}, + {"aaaaaBBBBBcxcccDDDDD", "aaaaaBBBBBcccccDDDDD", use_crc}, + {"aaaaaBBBBBxccccDDDDD", "aaaaaBBBBBcccccDDDDX", use_crc}, + {"aaaaaBBBBBcccccDDDDDa", "aaaaaBBBBBcccccDDDDD", use_crc}, + {"aaaaaBBBBBcccccDDDDD", "aaaaaBBBBBcccccDDDDDa", use_crc}, // Subcords - {subcord, subcord}, - {subcord, "aaBBBBBccc"}, - {subcord, "aaBBBBBccd"}, - {subcord, "aaBBBBBccb"}, - {subcord, "aaBBBBBxcb"}, - {subcord, "aaBBBBBccca"}, - {subcord, "aaBBBBBcc"}, + {subcord, subcord, use_crc}, + {subcord, "aaBBBBBccc", use_crc}, + {subcord, "aaBBBBBccd", use_crc}, + {subcord, "aaBBBBBccb", use_crc}, + {subcord, "aaBBBBBxcb", use_crc}, + {subcord, "aaBBBBBccca", use_crc}, + {subcord, "aaBBBBBcc", use_crc}, // Concats - {concat, concat}, + {concat, concat, use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBcccccccccccccccxDDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBacccccccccccccccDDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDD", + use_crc}, {concat, - "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe"}, + "aaaaaaaaaaaaaaaaBBBBBBBBBBBBBBBBccccccccccccccccDDDDDDDDDDDDDDDDe", + use_crc}, - {concat, concat2}, + {concat, concat2, use_crc}, }}; for (const auto& tc : test_cases) { @@ -864,9 +1355,10 @@ TEST(Cord, Compare) { } } -TEST(Cord, CompareAfterAssign) { +TEST_P(CordTest, CompareAfterAssign) { absl::Cord a("aaaaaa1111111"); absl::Cord b("aaaaaa2222222"); + MaybeHarden(a); a = "cccccc"; b = "cccccc"; EXPECT_EQ(a, b); @@ -893,8 +1385,8 @@ static void TestCompare(const absl::Cord& c, const absl::Cord& d, EXPECT_EQ(expected, sign(c.Compare(d))) << c << ", " << d; } -TEST(Compare, ComparisonIsUnsigned) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, CompareComparisonIsUnsigned) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); std::uniform_int_distribution<uint32_t> uniform_uint8(0, 255); char x = static_cast<char>(uniform_uint8(rng)); TestCompare( @@ -902,9 +1394,9 @@ TEST(Compare, ComparisonIsUnsigned) { absl::Cord(std::string(GetUniformRandomUpTo(&rng, 100), x ^ 0x80)), &rng); } -TEST(Compare, RandomComparisons) { +TEST_P(CordTest, CompareRandomComparisons) { const int kIters = 5000; - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); int n = GetUniformRandomUpTo(&rng, 5000); absl::Cord a[] = {MakeExternalCord(n), @@ -925,6 +1417,8 @@ TEST(Compare, RandomComparisons) { d.Append(a[GetUniformRandomUpTo(&rng, ABSL_ARRAYSIZE(a))]); } std::bernoulli_distribution coin_flip(0.5); + MaybeHarden(c); + MaybeHarden(d); TestCompare(coin_flip(rng) ? c : absl::Cord(std::string(c)), coin_flip(rng) ? d : absl::Cord(std::string(d)), &rng); } @@ -960,43 +1454,43 @@ void CompareOperators() { EXPECT_FALSE(b <= a); } -TEST(ComparisonOperators, Cord_Cord) { +TEST_P(CordTest, ComparisonOperators_Cord_Cord) { CompareOperators<absl::Cord, absl::Cord>(); } -TEST(ComparisonOperators, Cord_StringPiece) { +TEST_P(CordTest, ComparisonOperators_Cord_StringPiece) { CompareOperators<absl::Cord, absl::string_view>(); } -TEST(ComparisonOperators, StringPiece_Cord) { +TEST_P(CordTest, ComparisonOperators_StringPiece_Cord) { CompareOperators<absl::string_view, absl::Cord>(); } -TEST(ComparisonOperators, Cord_string) { +TEST_P(CordTest, ComparisonOperators_Cord_string) { CompareOperators<absl::Cord, std::string>(); } -TEST(ComparisonOperators, string_Cord) { +TEST_P(CordTest, ComparisonOperators_string_Cord) { CompareOperators<std::string, absl::Cord>(); } -TEST(ComparisonOperators, stdstring_Cord) { +TEST_P(CordTest, ComparisonOperators_stdstring_Cord) { CompareOperators<std::string, absl::Cord>(); } -TEST(ComparisonOperators, Cord_stdstring) { +TEST_P(CordTest, ComparisonOperators_Cord_stdstring) { CompareOperators<absl::Cord, std::string>(); } -TEST(ComparisonOperators, charstar_Cord) { +TEST_P(CordTest, ComparisonOperators_charstar_Cord) { CompareOperators<const char*, absl::Cord>(); } -TEST(ComparisonOperators, Cord_charstar) { +TEST_P(CordTest, ComparisonOperators_Cord_charstar) { CompareOperators<absl::Cord, const char*>(); } -TEST(ConstructFromExternal, ReleaserInvoked) { +TEST_P(CordTest, ConstructFromExternalReleaserInvoked) { // Empty external memory means the releaser should be called immediately. { bool invoked = false; @@ -1038,8 +1532,8 @@ TEST(ConstructFromExternal, ReleaserInvoked) { } } -TEST(ConstructFromExternal, CompareContents) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, ConstructFromExternalCompareContents) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); for (int length = 1; length <= 2048; length *= 2) { std::string data = RandomLowercaseString(&rng, length); @@ -1050,12 +1544,13 @@ TEST(ConstructFromExternal, CompareContents) { EXPECT_EQ(external->size(), sv.size()); delete external; }); + MaybeHarden(cord); EXPECT_EQ(data, cord); } } -TEST(ConstructFromExternal, LargeReleaser) { - RandomEngine rng(testing::GTEST_FLAG(random_seed)); +TEST_P(CordTest, ConstructFromExternalLargeReleaser) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); constexpr size_t kLength = 256; std::string data = RandomLowercaseString(&rng, kLength); std::array<char, kLength> data_array; @@ -1065,11 +1560,11 @@ TEST(ConstructFromExternal, LargeReleaser) { EXPECT_EQ(data, absl::string_view(data_array.data(), data_array.size())); invoked = true; }; - (void)absl::MakeCordFromExternal(data, releaser); + (void)MaybeHardened(absl::MakeCordFromExternal(data, releaser)); EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, FunctionPointerReleaser) { +TEST_P(CordTest, ConstructFromExternalFunctionPointerReleaser) { static absl::string_view data("hello world"); static bool invoked; auto* releaser = @@ -1078,15 +1573,15 @@ TEST(ConstructFromExternal, FunctionPointerReleaser) { invoked = true; }); invoked = false; - (void)absl::MakeCordFromExternal(data, releaser); + (void)MaybeHardened(absl::MakeCordFromExternal(data, releaser)); EXPECT_TRUE(invoked); invoked = false; - (void)absl::MakeCordFromExternal(data, *releaser); + (void)MaybeHardened(absl::MakeCordFromExternal(data, *releaser)); EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, MoveOnlyReleaser) { +TEST_P(CordTest, ConstructFromExternalMoveOnlyReleaser) { struct Releaser { explicit Releaser(bool* invoked) : invoked(invoked) {} Releaser(Releaser&& other) noexcept : invoked(other.invoked) {} @@ -1096,24 +1591,25 @@ TEST(ConstructFromExternal, MoveOnlyReleaser) { }; bool invoked = false; - (void)absl::MakeCordFromExternal("dummy", Releaser(&invoked)); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", Releaser(&invoked))); EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, NoArgLambda) { +TEST_P(CordTest, ConstructFromExternalNoArgLambda) { bool invoked = false; - (void)absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; }); + (void)MaybeHardened( + absl::MakeCordFromExternal("dummy", [&invoked]() { invoked = true; })); EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, StringViewArgLambda) { +TEST_P(CordTest, ConstructFromExternalStringViewArgLambda) { bool invoked = false; - (void)absl::MakeCordFromExternal( - "dummy", [&invoked](absl::string_view) { invoked = true; }); + (void)MaybeHardened(absl::MakeCordFromExternal( + "dummy", [&invoked](absl::string_view) { invoked = true; })); EXPECT_TRUE(invoked); } -TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { +TEST_P(CordTest, ConstructFromExternalNonTrivialReleaserDestructor) { struct Releaser { explicit Releaser(bool* destroyed) : destroyed(destroyed) {} ~Releaser() { *destroyed = true; } @@ -1124,57 +1620,94 @@ TEST(ConstructFromExternal, NonTrivialReleaserDestructor) { bool destroyed = false; Releaser releaser(&destroyed); - (void)absl::MakeCordFromExternal("dummy", releaser); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", releaser)); EXPECT_TRUE(destroyed); } -TEST(ConstructFromExternal, ReferenceQualifierOverloads) { - struct Releaser { - void operator()(absl::string_view) & { *lvalue_invoked = true; } - void operator()(absl::string_view) && { *rvalue_invoked = true; } +TEST_P(CordTest, ConstructFromExternalReferenceQualifierOverloads) { + enum InvokedAs { kMissing, kLValue, kRValue }; + enum CopiedAs { kNone, kMove, kCopy }; + struct Tracker { + CopiedAs copied_as = kNone; + InvokedAs invoked_as = kMissing; + + void Record(InvokedAs rhs) { + ASSERT_EQ(invoked_as, kMissing); + invoked_as = rhs; + } + + void Record(CopiedAs rhs) { + if (copied_as == kNone || rhs == kCopy) copied_as = rhs; + } + } tracker; + + class Releaser { + public: + explicit Releaser(Tracker* tracker) : tr_(tracker) { *tracker = Tracker(); } + Releaser(Releaser&& rhs) : tr_(rhs.tr_) { tr_->Record(kMove); } + Releaser(const Releaser& rhs) : tr_(rhs.tr_) { tr_->Record(kCopy); } + + void operator()(absl::string_view) & { tr_->Record(kLValue); } + void operator()(absl::string_view) && { tr_->Record(kRValue); } - bool* lvalue_invoked; - bool* rvalue_invoked; + private: + Tracker* tr_; }; - bool lvalue_invoked = false; - bool rvalue_invoked = false; - Releaser releaser = {&lvalue_invoked, &rvalue_invoked}; - (void)absl::MakeCordFromExternal("", releaser); - EXPECT_FALSE(lvalue_invoked); - EXPECT_TRUE(rvalue_invoked); - rvalue_invoked = false; + const Releaser releaser1(&tracker); + (void)MaybeHardened(absl::MakeCordFromExternal("", releaser1)); + EXPECT_EQ(tracker.copied_as, kCopy); + EXPECT_EQ(tracker.invoked_as, kRValue); - (void)absl::MakeCordFromExternal("dummy", releaser); - EXPECT_FALSE(lvalue_invoked); - EXPECT_TRUE(rvalue_invoked); - rvalue_invoked = false; + const Releaser releaser2(&tracker); + (void)MaybeHardened(absl::MakeCordFromExternal("", releaser2)); + EXPECT_EQ(tracker.copied_as, kCopy); + EXPECT_EQ(tracker.invoked_as, kRValue); - // NOLINTNEXTLINE: suppress clang-tidy std::move on trivially copyable type. - (void)absl::MakeCordFromExternal("dummy", std::move(releaser)); - EXPECT_FALSE(lvalue_invoked); - EXPECT_TRUE(rvalue_invoked); + Releaser releaser3(&tracker); + (void)MaybeHardened(absl::MakeCordFromExternal("", std::move(releaser3))); + EXPECT_EQ(tracker.copied_as, kMove); + EXPECT_EQ(tracker.invoked_as, kRValue); + + Releaser releaser4(&tracker); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", releaser4)); + EXPECT_EQ(tracker.copied_as, kCopy); + EXPECT_EQ(tracker.invoked_as, kRValue); + + const Releaser releaser5(&tracker); + (void)MaybeHardened(absl::MakeCordFromExternal("dummy", releaser5)); + EXPECT_EQ(tracker.copied_as, kCopy); + EXPECT_EQ(tracker.invoked_as, kRValue); + + Releaser releaser6(&tracker); + (void)MaybeHardened(absl::MakeCordFromExternal("foo", std::move(releaser6))); + EXPECT_EQ(tracker.copied_as, kMove); + EXPECT_EQ(tracker.invoked_as, kRValue); } -TEST(ExternalMemory, BasicUsage) { +TEST_P(CordTest, ExternalMemoryBasicUsage) { static const char* strings[] = {"", "hello", "there"}; for (const char* str : strings) { absl::Cord dst("(prefix)"); + MaybeHarden(dst); AddExternalMemory(str, &dst); + MaybeHarden(dst); dst.Append("(suffix)"); EXPECT_EQ((std::string("(prefix)") + str + std::string("(suffix)")), std::string(dst)); } } -TEST(ExternalMemory, RemovePrefixSuffix) { +TEST_P(CordTest, ExternalMemoryRemovePrefixSuffix) { // Exhaustively try all sub-strings. absl::Cord cord = MakeComposite(); std::string s = std::string(cord); for (int offset = 0; offset <= s.size(); offset++) { for (int length = 0; length <= s.size() - offset; length++) { absl::Cord result(cord); + MaybeHarden(result); result.RemovePrefix(offset); + MaybeHarden(result); result.RemoveSuffix(result.size() - length); EXPECT_EQ(s.substr(offset, length), std::string(result)) << offset << " " << length; @@ -1182,11 +1715,13 @@ TEST(ExternalMemory, RemovePrefixSuffix) { } } -TEST(ExternalMemory, Get) { +TEST_P(CordTest, ExternalMemoryGet) { absl::Cord cord("hello"); AddExternalMemory(" world!", &cord); + MaybeHarden(cord); AddExternalMemory(" how are ", &cord); cord.Append(" you?"); + MaybeHarden(cord); std::string s = std::string(cord); for (int i = 0; i < s.size(); i++) { EXPECT_EQ(s[i], cord[i]); @@ -1194,58 +1729,138 @@ TEST(ExternalMemory, Get) { } // CordMemoryUsage tests verify the correctness of the EstimatedMemoryUsage() -// These tests take into account that the reported memory usage is approximate -// and non-deterministic. For all tests, We verify that the reported memory -// usage is larger than `size()`, and less than `size() * 1.5` as a cord should -// never reserve more 'extra' capacity than half of its size as it grows. -// Additionally we have some whiteboxed expectations based on our knowledge of -// the layout and size of empty and inlined cords, and flat nodes. +// We use whiteboxed expectations based on our knowledge of the layout and size +// of empty and inlined cords, and flat nodes. + +constexpr auto kFairShare = absl::CordMemoryAccounting::kFairShare; -TEST(CordMemoryUsage, Empty) { - EXPECT_EQ(sizeof(absl::Cord), absl::Cord().EstimatedMemoryUsage()); +// Creates a cord of `n` `c` values, making sure no string stealing occurs. +absl::Cord MakeCord(size_t n, char c) { + const std::string s(n, c); + return absl::Cord(s); } -TEST(CordMemoryUsage, Embedded) { - absl::Cord a("hello"); - EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); +TEST(CordTest, CordMemoryUsageEmpty) { + absl::Cord cord; + EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage()); + EXPECT_EQ(sizeof(absl::Cord), cord.EstimatedMemoryUsage(kFairShare)); } -TEST(CordMemoryUsage, EmbeddedAppend) { - absl::Cord a("a"); - absl::Cord b("bcd"); - EXPECT_EQ(b.EstimatedMemoryUsage(), sizeof(absl::Cord)); - a.Append(b); +TEST(CordTest, CordMemoryUsageInlined) { + absl::Cord a("hello"); EXPECT_EQ(a.EstimatedMemoryUsage(), sizeof(absl::Cord)); + EXPECT_EQ(a.EstimatedMemoryUsage(kFairShare), sizeof(absl::Cord)); } -TEST(CordMemoryUsage, ExternalMemory) { - static const int kLength = 1000; +TEST(CordTest, CordMemoryUsageExternalMemory) { absl::Cord cord; - AddExternalMemory(std::string(kLength, 'x'), &cord); - EXPECT_GT(cord.EstimatedMemoryUsage(), kLength); - EXPECT_LE(cord.EstimatedMemoryUsage(), kLength * 1.5); -} + AddExternalMemory(std::string(1000, 'x'), &cord); + const size_t expected = + sizeof(absl::Cord) + 1000 + sizeof(CordRepExternal) + sizeof(intptr_t); + EXPECT_EQ(cord.EstimatedMemoryUsage(), expected); + EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), expected); +} + +TEST(CordTest, CordMemoryUsageFlat) { + absl::Cord cord = MakeCord(1000, 'a'); + const size_t flat_size = + absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize(); + EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + flat_size); +} + +TEST(CordTest, CordMemoryUsageSubStringSharedFlat) { + absl::Cord flat = MakeCord(2000, 'a'); + const size_t flat_size = + absl::CordTestPeer::Tree(flat)->flat()->AllocatedSize(); + absl::Cord cord = flat.Subcord(500, 1000); + EXPECT_EQ(cord.EstimatedMemoryUsage(), + sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + sizeof(CordRepSubstring) + flat_size / 2); +} + +TEST(CordTest, CordMemoryUsageFlatShared) { + absl::Cord shared = MakeCord(1000, 'a'); + absl::Cord cord(shared); + const size_t flat_size = + absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize(); + EXPECT_EQ(cord.EstimatedMemoryUsage(), sizeof(absl::Cord) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + flat_size / 2); +} + +TEST(CordTest, CordMemoryUsageFlatHardenedAndShared) { + absl::Cord shared = MakeCord(1000, 'a'); + absl::Cord cord(shared); + const size_t flat_size = + absl::CordTestPeer::Tree(cord)->flat()->AllocatedSize(); + cord.SetExpectedChecksum(1); + EXPECT_EQ(cord.EstimatedMemoryUsage(), + sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size / 2); + + absl::Cord cord2(cord); + EXPECT_EQ(cord2.EstimatedMemoryUsage(), + sizeof(absl::Cord) + sizeof(CordRepCrc) + flat_size); + EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + (sizeof(CordRepCrc) + flat_size / 2) / 2); +} + +TEST(CordTest, CordMemoryUsageBTree) { + absl::Cord cord1; + size_t flats1_size = 0; + absl::Cord flats1[4] = {MakeCord(1000, 'a'), MakeCord(1100, 'a'), + MakeCord(1200, 'a'), MakeCord(1300, 'a')}; + for (absl::Cord flat : flats1) { + flats1_size += absl::CordTestPeer::Tree(flat)->flat()->AllocatedSize(); + cord1.Append(std::move(flat)); + } -TEST(CordMemoryUsage, Flat) { - static const int kLength = 125; - absl::Cord a(std::string(kLength, 'a')); - EXPECT_GT(a.EstimatedMemoryUsage(), kLength); - EXPECT_LE(a.EstimatedMemoryUsage(), kLength * 1.5); -} + // Make sure the created cord is a BTREE tree. Under some builds such as + // windows DLL, we may have ODR like effects on the flag, meaning the DLL + // code will run with the picked up default. + if (!absl::CordTestPeer::Tree(cord1)->IsBtree()) { + ABSL_RAW_LOG(WARNING, "Cord library code not respecting btree flag"); + return; + } -TEST(CordMemoryUsage, AppendFlat) { - using absl::strings_internal::CordTestAccess; - absl::Cord a(std::string(CordTestAccess::MaxFlatLength(), 'a')); - size_t length = a.EstimatedMemoryUsage(); - a.Append(std::string(CordTestAccess::MaxFlatLength(), 'b')); - size_t delta = a.EstimatedMemoryUsage() - length; - EXPECT_GT(delta, CordTestAccess::MaxFlatLength()); - EXPECT_LE(delta, CordTestAccess::MaxFlatLength() * 1.5); + size_t rep1_size = sizeof(CordRepBtree) + flats1_size; + size_t rep1_shared_size = sizeof(CordRepBtree) + flats1_size / 2; + + EXPECT_EQ(cord1.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep1_size); + EXPECT_EQ(cord1.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + rep1_shared_size); + + absl::Cord cord2; + size_t flats2_size = 0; + absl::Cord flats2[4] = {MakeCord(600, 'a'), MakeCord(700, 'a'), + MakeCord(800, 'a'), MakeCord(900, 'a')}; + for (absl::Cord& flat : flats2) { + flats2_size += absl::CordTestPeer::Tree(flat)->flat()->AllocatedSize(); + cord2.Append(std::move(flat)); + } + size_t rep2_size = sizeof(CordRepBtree) + flats2_size; + + EXPECT_EQ(cord2.EstimatedMemoryUsage(), sizeof(absl::Cord) + rep2_size); + EXPECT_EQ(cord2.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + rep2_size); + + absl::Cord cord(cord1); + cord.Append(std::move(cord2)); + + EXPECT_EQ(cord.EstimatedMemoryUsage(), + sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_size + rep2_size); + EXPECT_EQ(cord.EstimatedMemoryUsage(kFairShare), + sizeof(absl::Cord) + sizeof(CordRepBtree) + rep1_shared_size / 2 + + rep2_size); } // Regtest for a change that had to be rolled back because it expanded out // of the InlineRep too soon, which was observable through MemoryUsage(). -TEST(CordMemoryUsage, InlineRep) { +TEST_P(CordTest, CordMemoryUsageInlineRep) { constexpr size_t kMaxInline = 15; // Cord::InlineRep::N const std::string small_string(kMaxInline, 'x'); absl::Cord c1(small_string); @@ -1259,14 +1874,16 @@ TEST(CordMemoryUsage, InlineRep) { } // namespace // Regtest for 7510292 (fix a bug introduced by 7465150) -TEST(Cord, Concat_Append) { +TEST_P(CordTest, Concat_Append) { // Create a rep of type CONCAT absl::Cord s1("foobarbarbarbarbar"); + MaybeHarden(s1); s1.Append("abcdefgabcdefgabcdefgabcdefgabcdefgabcdefgabcdefg"); size_t size = s1.size(); // Create a copy of s1 and append to it. absl::Cord s2 = s1; + MaybeHarden(s2); s2.Append("x"); // 7465150 modifies s1 when it shouldn't. @@ -1274,10 +1891,91 @@ TEST(Cord, Concat_Append) { EXPECT_EQ(s2.size(), size + 1); } -TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { +TEST_P(CordTest, DiabolicalGrowth) { + // This test exercises a diabolical Append(<one char>) on a cord, making the + // cord shared before each Append call resulting in a terribly fragmented + // resulting cord. + // TODO(b/183983616): Apply some minimum compaction when copying a shared + // source cord into a mutable copy for updates in CordRepRing. + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + const std::string expected = RandomLowercaseString(&rng, 5000); + absl::Cord cord; + for (char c : expected) { + absl::Cord shared(cord); + cord.Append(absl::string_view(&c, 1)); + MaybeHarden(cord); + } + std::string value; + absl::CopyCordToString(cord, &value); + EXPECT_EQ(value, expected); + ABSL_RAW_LOG(INFO, "Diabolical size allocated = %zu", + cord.EstimatedMemoryUsage()); +} + +// The following tests check support for >4GB cords in 64-bit binaries, and +// 2GB-4GB cords in 32-bit binaries. This function returns the large cord size +// that's appropriate for the binary. + +// Construct a huge cord with the specified valid prefix. +static absl::Cord MakeHuge(absl::string_view prefix) { + absl::Cord cord; + if (sizeof(size_t) > 4) { + // In 64-bit binaries, test 64-bit Cord support. + const size_t size = + static_cast<size_t>(std::numeric_limits<uint32_t>::max()) + 314; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(prefix.data(), size), + [](absl::string_view s) { DoNothing(s, nullptr); })); + } else { + // Cords are limited to 32-bit lengths in 32-bit binaries. The following + // tests check for use of "signed int" to represent Cord length/offset. + // However absl::string_view does not allow lengths >= (1u<<31), so we need + // to append in two parts; + const size_t s1 = (1u << 31) - 1; + // For shorter cord, `Append` copies the data rather than allocating a new + // node. The threshold is currently set to 511, so `s2` needs to be bigger + // to not trigger the copy. + const size_t s2 = 600; + cord.Append(absl::MakeCordFromExternal( + absl::string_view(prefix.data(), s1), + [](absl::string_view s) { DoNothing(s, nullptr); })); + cord.Append(absl::MakeCordFromExternal( + absl::string_view("", s2), + [](absl::string_view s) { DoNothing(s, nullptr); })); + } + return cord; +} + +TEST_P(CordTest, HugeCord) { + absl::Cord cord = MakeHuge("huge cord"); + MaybeHarden(cord); + + const size_t acceptable_delta = + 100 + (UseCrc() ? sizeof(absl::cord_internal::CordRepCrc) : 0); + EXPECT_LE(cord.size(), cord.EstimatedMemoryUsage()); + EXPECT_GE(cord.size() + acceptable_delta, cord.EstimatedMemoryUsage()); +} + +// Tests that Append() works ok when handed a self reference +TEST_P(CordTest, AppendSelf) { + // We run the test until data is ~16K + // This guarantees it covers small, medium and large data. + std::string control_data = "Abc"; + absl::Cord data(control_data); + while (control_data.length() < 0x4000) { + MaybeHarden(data); + data.Append(data); + control_data.append(control_data); + ASSERT_EQ(control_data, data); + } +} + +TEST_P(CordTest, MakeFragmentedCordFromInitializerList) { absl::Cord fragmented = absl::MakeFragmentedCord({"A ", "fragmented ", "Cord"}); + MaybeHarden(fragmented); + EXPECT_EQ("A fragmented Cord", fragmented); auto chunk_it = fragmented.chunk_begin(); @@ -1294,10 +1992,12 @@ TEST(MakeFragmentedCord, MakeFragmentedCordFromInitializerList) { ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); } -TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { +TEST_P(CordTest, MakeFragmentedCordFromVector) { std::vector<absl::string_view> chunks = {"A ", "fragmented ", "Cord"}; absl::Cord fragmented = absl::MakeFragmentedCord(chunks); + MaybeHarden(fragmented); + EXPECT_EQ("A fragmented Cord", fragmented); auto chunk_it = fragmented.chunk_begin(); @@ -1314,7 +2014,7 @@ TEST(MakeFragmentedCord, MakeFragmentedCordFromVector) { ASSERT_TRUE(++chunk_it == fragmented.chunk_end()); } -TEST(CordChunkIterator, Traits) { +TEST_P(CordTest, CordChunkIteratorTraits) { static_assert(std::is_copy_constructible<absl::Cord::ChunkIterator>::value, ""); static_assert(std::is_copy_assignable<absl::Cord::ChunkIterator>::value, ""); @@ -1395,39 +2095,115 @@ static void VerifyChunkIterator(const absl::Cord& cord, EXPECT_TRUE(post_iter == cord.chunk_end()); // NOLINT } -TEST(CordChunkIterator, Operations) { +TEST_P(CordTest, CordChunkIteratorOperations) { absl::Cord empty_cord; VerifyChunkIterator(empty_cord, 0); absl::Cord small_buffer_cord("small cord"); + MaybeHarden(small_buffer_cord); VerifyChunkIterator(small_buffer_cord, 1); absl::Cord flat_node_cord("larger than small buffer optimization"); + MaybeHarden(flat_node_cord); VerifyChunkIterator(flat_node_cord, 1); - VerifyChunkIterator( - absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", - "testing ", "chunk ", "iterations."}), - 8); + VerifyChunkIterator(MaybeHardened(absl::MakeFragmentedCord( + {"a ", "small ", "fragmented ", "cord ", "for ", + "testing ", "chunk ", "iterations."})), + 8); absl::Cord reused_nodes_cord(std::string(40, 'c')); reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'b'))); + MaybeHarden(reused_nodes_cord); reused_nodes_cord.Prepend(absl::Cord(std::string(40, 'a'))); size_t expected_chunks = 3; for (int i = 0; i < 8; ++i) { reused_nodes_cord.Prepend(reused_nodes_cord); + MaybeHarden(reused_nodes_cord); expected_chunks *= 2; VerifyChunkIterator(reused_nodes_cord, expected_chunks); } - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; for (int i = 0; i < 128; ++i) subcords.Prepend(flat_cord.Subcord(i, 128)); VerifyChunkIterator(subcords, 128); } -TEST(CordCharIterator, Traits) { + +TEST_P(CordTest, AdvanceAndReadOnDataEdge) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + const std::string data = RandomLowercaseString(&rng, 2000); + for (bool as_flat : {true, false}) { + SCOPED_TRACE(as_flat ? "Flat" : "External"); + + absl::Cord cord = + as_flat ? absl::Cord(data) + : absl::MakeCordFromExternal(data, [](absl::string_view) {}); + auto it = cord.Chars().begin(); +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED + EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*"); +#endif + + it = cord.Chars().begin(); + absl::Cord frag = cord.AdvanceAndRead(&it, 2000); + EXPECT_EQ(frag, data); + EXPECT_TRUE(it == cord.Chars().end()); + + it = cord.Chars().begin(); + frag = cord.AdvanceAndRead(&it, 200); + EXPECT_EQ(frag, data.substr(0, 200)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 1500); + EXPECT_EQ(frag, data.substr(200, 1500)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 300); + EXPECT_EQ(frag, data.substr(1700, 300)); + EXPECT_TRUE(it == cord.Chars().end()); + } +} + +TEST_P(CordTest, AdvanceAndReadOnSubstringDataEdge) { + RandomEngine rng(GTEST_FLAG_GET(random_seed)); + const std::string data = RandomLowercaseString(&rng, 2500); + for (bool as_flat : {true, false}) { + SCOPED_TRACE(as_flat ? "Flat" : "External"); + + absl::Cord cord = + as_flat ? absl::Cord(data) + : absl::MakeCordFromExternal(data, [](absl::string_view) {}); + cord = cord.Subcord(200, 2000); + const std::string substr = data.substr(200, 2000); + + auto it = cord.Chars().begin(); +#if !defined(NDEBUG) || ABSL_OPTION_HARDENED + EXPECT_DEATH_IF_SUPPORTED(cord.AdvanceAndRead(&it, 2001), ".*"); +#endif + + it = cord.Chars().begin(); + absl::Cord frag = cord.AdvanceAndRead(&it, 2000); + EXPECT_EQ(frag, substr); + EXPECT_TRUE(it == cord.Chars().end()); + + it = cord.Chars().begin(); + frag = cord.AdvanceAndRead(&it, 200); + EXPECT_EQ(frag, substr.substr(0, 200)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 1500); + EXPECT_EQ(frag, substr.substr(200, 1500)); + EXPECT_FALSE(it == cord.Chars().end()); + + frag = cord.AdvanceAndRead(&it, 300); + EXPECT_EQ(frag, substr.substr(1700, 300)); + EXPECT_TRUE(it == cord.Chars().end()); + } +} + +TEST_P(CordTest, CharIteratorTraits) { static_assert(std::is_copy_constructible<absl::Cord::CharIterator>::value, ""); static_assert(std::is_copy_assignable<absl::Cord::CharIterator>::value, ""); @@ -1536,44 +2312,88 @@ static void VerifyCharIterator(const absl::Cord& cord) { } } -TEST(CordCharIterator, Operations) { +TEST_P(CordTest, CharIteratorOperations) { absl::Cord empty_cord; VerifyCharIterator(empty_cord); absl::Cord small_buffer_cord("small cord"); + MaybeHarden(small_buffer_cord); VerifyCharIterator(small_buffer_cord); absl::Cord flat_node_cord("larger than small buffer optimization"); + MaybeHarden(flat_node_cord); VerifyCharIterator(flat_node_cord); - VerifyCharIterator( + VerifyCharIterator(MaybeHardened( absl::MakeFragmentedCord({"a ", "small ", "fragmented ", "cord ", "for ", - "testing ", "character ", "iteration."})); + "testing ", "character ", "iteration."}))); absl::Cord reused_nodes_cord("ghi"); reused_nodes_cord.Prepend(absl::Cord("def")); reused_nodes_cord.Prepend(absl::Cord("abc")); for (int i = 0; i < 4; ++i) { reused_nodes_cord.Prepend(reused_nodes_cord); + MaybeHarden(reused_nodes_cord); VerifyCharIterator(reused_nodes_cord); } - RandomEngine rng(testing::GTEST_FLAG(random_seed)); + RandomEngine rng(GTEST_FLAG_GET(random_seed)); absl::Cord flat_cord(RandomLowercaseString(&rng, 256)); absl::Cord subcords; - for (int i = 0; i < 4; ++i) subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + for (int i = 0; i < 4; ++i) { + subcords.Prepend(flat_cord.Subcord(16 * i, 128)); + MaybeHarden(subcords); + } VerifyCharIterator(subcords); } -TEST(Cord, StreamingOutput) { +TEST_P(CordTest, CharIteratorAdvanceAndRead) { + // Create a Cord holding 6 flats of 2500 bytes each, and then iterate over it + // reading 150, 1500, 2500 and 3000 bytes. This will result in all possible + // partial, full and straddled read combinations including reads below + // kMaxBytesToCopy. b/197776822 surfaced a bug for a specific partial, small + // read 'at end' on Cord which caused a failure on attempting to read past the + // end in CordRepBtreeReader which was not covered by any existing test. + constexpr int kBlocks = 6; + constexpr size_t kBlockSize = 2500; + constexpr size_t kChunkSize1 = 1500; + constexpr size_t kChunkSize2 = 2500; + constexpr size_t kChunkSize3 = 3000; + constexpr size_t kChunkSize4 = 150; + RandomEngine rng; + std::string data = RandomLowercaseString(&rng, kBlocks * kBlockSize); + absl::Cord cord; + for (int i = 0; i < kBlocks; ++i) { + const std::string block = data.substr(i * kBlockSize, kBlockSize); + cord.Append(absl::Cord(block)); + } + + MaybeHarden(cord); + + for (size_t chunk_size : + {kChunkSize1, kChunkSize2, kChunkSize3, kChunkSize4}) { + absl::Cord::CharIterator it = cord.char_begin(); + size_t offset = 0; + while (offset < data.length()) { + const size_t n = std::min<size_t>(data.length() - offset, chunk_size); + absl::Cord chunk = cord.AdvanceAndRead(&it, n); + ASSERT_EQ(chunk.size(), n); + ASSERT_EQ(chunk.Compare(data.substr(offset, n)), 0); + offset += n; + } + } +} + +TEST_P(CordTest, StreamingOutput) { absl::Cord c = absl::MakeFragmentedCord({"A ", "small ", "fragmented ", "Cord", "."}); + MaybeHarden(c); std::stringstream output; output << c; EXPECT_EQ("A small fragmented Cord.", output.str()); } -TEST(Cord, ForEachChunk) { +TEST_P(CordTest, ForEachChunk) { for (int num_elements : {1, 10, 200}) { SCOPED_TRACE(num_elements); std::vector<std::string> cord_chunks; @@ -1581,6 +2401,7 @@ TEST(Cord, ForEachChunk) { cord_chunks.push_back(absl::StrCat("[", i, "]")); } absl::Cord c = absl::MakeFragmentedCord(cord_chunks); + MaybeHarden(c); std::vector<std::string> iterated_chunks; absl::CordTestPeer::ForEachChunk(c, @@ -1591,13 +2412,14 @@ TEST(Cord, ForEachChunk) { } } -TEST(Cord, SmallBufferAssignFromOwnData) { +TEST_P(CordTest, SmallBufferAssignFromOwnData) { constexpr size_t kMaxInline = 15; std::string contents = "small buff cord"; EXPECT_EQ(contents.size(), kMaxInline); for (size_t pos = 0; pos < contents.size(); ++pos) { for (size_t count = contents.size() - pos; count > 0; --count) { absl::Cord c(contents); + MaybeHarden(c); absl::string_view flat = c.Flatten(); c = flat.substr(pos, count); EXPECT_EQ(c, contents.substr(pos, count)) @@ -1606,16 +2428,20 @@ TEST(Cord, SmallBufferAssignFromOwnData) { } } -TEST(Cord, Format) { +TEST_P(CordTest, Format) { absl::Cord c; absl::Format(&c, "There were %04d little %s.", 3, "pigs"); EXPECT_EQ(c, "There were 0003 little pigs."); + MaybeHarden(c); absl::Format(&c, "And %-3llx bad wolf!", 1); + MaybeHarden(c); EXPECT_EQ(c, "There were 0003 little pigs.And 1 bad wolf!"); } -TEST(CordDeathTest, Hardening) { +TEST_P(CordTest, Hardening) { absl::Cord cord("hello"); + MaybeHarden(cord); + // These statement should abort the program in all builds modes. EXPECT_DEATH_IF_SUPPORTED(cord.RemovePrefix(6), ""); EXPECT_DEATH_IF_SUPPORTED(cord.RemoveSuffix(6), ""); @@ -1634,6 +2460,49 @@ TEST(CordDeathTest, Hardening) { EXPECT_DEATH_IF_SUPPORTED(++cord.chunk_end(), ""); } +// This test mimics a specific (and rare) application repeatedly splitting a +// cord, inserting (overwriting) a string value, and composing a new cord from +// the three pieces. This is hostile towards a Btree implementation: A split of +// a node at any level is likely to have the right-most edge of the left split, +// and the left-most edge of the right split shared. For example, splitting a +// leaf node with 6 edges will result likely in a 1-6, 2-5, 3-4, etc. split, +// sharing the 'split node'. When recomposing such nodes, we 'injected' an edge +// in that node. As this happens with some probability on each level of the +// tree, this will quickly grow the tree until it reaches maximum height. +TEST_P(CordTest, BtreeHostileSplitInsertJoin) { + absl::BitGen bitgen; + + // Start with about 1GB of data + std::string data(1 << 10, 'x'); + absl::Cord buffer(data); + absl::Cord cord; + for (int i = 0; i < 1000000; ++i) { + cord.Append(buffer); + } + + for (int j = 0; j < 1000; ++j) { + MaybeHarden(cord); + size_t offset = absl::Uniform(bitgen, 0u, cord.size()); + size_t length = absl::Uniform(bitgen, 100u, data.size()); + if (cord.size() == offset) { + cord.Append(absl::string_view(data.data(), length)); + } else { + absl::Cord suffix; + if (offset + length < cord.size()) { + suffix = cord; + suffix.RemovePrefix(offset + length); + } + if (cord.size() > offset) { + cord.RemoveSuffix(cord.size() - offset); + } + cord.Append(absl::string_view(data.data(), length)); + if (!suffix.empty()) { + cord.Append(suffix); + } + } + } +} + class AfterExitCordTester { public: bool Set(absl::Cord* cord, absl::string_view expected) { @@ -1650,12 +2519,34 @@ class AfterExitCordTester { absl::string_view expected_; }; +// Deliberately prevents the destructor for an absl::Cord from running. The cord +// is accessible via the cord member during the lifetime of the CordLeaker. +// After the CordLeaker is destroyed, pointers to the cord will remain valid +// until the CordLeaker's memory is deallocated. +struct CordLeaker { + union { + absl::Cord cord; + }; + + template <typename Str> + constexpr explicit CordLeaker(const Str& str) : cord(str) {} + + ~CordLeaker() { + // Don't do anything, including running cord's destructor. (cord's + // destructor won't run automatically because cord is hidden inside a + // union.) + } +}; + template <typename Str> void TestConstinitConstructor(Str) { const auto expected = Str::value; // Defined before `cord` to be destroyed after it. static AfterExitCordTester exit_tester; // NOLINT - ABSL_CONST_INIT static absl::Cord cord(Str{}); // NOLINT + ABSL_CONST_INIT static CordLeaker cord_leaker(Str{}); // NOLINT + // cord_leaker is static, so this reference will remain valid through the end + // of program execution. + static absl::Cord& cord = cord_leaker.cord; static bool init_exit_tester = exit_tester.Set(&cord, expected); (void)init_exit_tester; @@ -1707,9 +2598,278 @@ struct LongView { }; -TEST(Cord, ConstinitConstructor) { +TEST_P(CordTest, ConstinitConstructor) { TestConstinitConstructor( absl::strings_internal::MakeStringConstant(ShortView{})); TestConstinitConstructor( absl::strings_internal::MakeStringConstant(LongView{})); } + +namespace { + +// Test helper that generates a populated cord for future manipulation. +// +// By test convention, all generated cords begin with the characters "abcde" at +// the start of the first chunk. +class PopulatedCordFactory { + public: + constexpr PopulatedCordFactory(absl::string_view name, + absl::Cord (*generator)()) + : name_(name), generator_(generator) {} + + absl::string_view Name() const { return name_; } + absl::Cord Generate() const { return generator_(); } + + private: + absl::string_view name_; + absl::Cord (*generator_)(); +}; + +// clang-format off +// This array is constant-initialized in conformant compilers. +PopulatedCordFactory cord_factories[] = { + {"sso", [] { return absl::Cord("abcde"); }}, + {"flat", [] { + // Too large to live in SSO space, but small enough to be a simple FLAT. + absl::Cord flat(absl::StrCat("abcde", std::string(1000, 'x'))); + flat.Flatten(); + return flat; + }}, + {"external", [] { + // A cheat: we are using a string literal as the external storage, so a + // no-op releaser is correct here. + return absl::MakeCordFromExternal("abcde External!", []{}); + }}, + {"external substring", [] { + // A cheat: we are using a string literal as the external storage, so a + // no-op releaser is correct here. + absl::Cord ext = absl::MakeCordFromExternal("-abcde External!", []{}); + return absl::CordTestPeer::MakeSubstring(ext, 1, ext.size() - 1); + }}, + {"substring", [] { + absl::Cord flat(absl::StrCat("-abcde", std::string(1000, 'x'))); + flat.Flatten(); + return flat.Subcord(1, 998); + }}, + {"fragmented", [] { + std::string fragment = absl::StrCat("abcde", std::string(195, 'x')); + std::vector<std::string> fragments(200, fragment); + absl::Cord cord = absl::MakeFragmentedCord(fragments); + assert(cord.size() == 40000); + return cord; + }}, +}; +// clang-format on + +// Test helper that can mutate a cord, and possibly undo the mutation, for +// testing. +class CordMutator { + public: + constexpr CordMutator(absl::string_view name, void (*mutate)(absl::Cord&), + void (*undo)(absl::Cord&) = nullptr) + : name_(name), mutate_(mutate), undo_(undo) {} + + absl::string_view Name() const { return name_; } + void Mutate(absl::Cord& cord) const { mutate_(cord); } + bool CanUndo() const { return undo_ != nullptr; } + void Undo(absl::Cord& cord) const { undo_(cord); } + + private: + absl::string_view name_; + void (*mutate_)(absl::Cord&); + void (*undo_)(absl::Cord&); +}; + +// clang-format off +// This array is constant-initialized in conformant compilers. +CordMutator cord_mutators[] ={ + {"clear", [](absl::Cord& c) { c.Clear(); }}, + {"overwrite", [](absl::Cord& c) { c = "overwritten"; }}, + { + "append string", + [](absl::Cord& c) { c.Append("0123456789"); }, + [](absl::Cord& c) { c.RemoveSuffix(10); } + }, + { + "append cord", + [](absl::Cord& c) { + c.Append(absl::MakeFragmentedCord({"12345", "67890"})); + }, + [](absl::Cord& c) { c.RemoveSuffix(10); } + }, + { + "append checksummed cord", + [](absl::Cord& c) { + absl::Cord to_append = absl::MakeFragmentedCord({"12345", "67890"}); + to_append.SetExpectedChecksum(999); + c.Append(to_append); + }, + [](absl::Cord& c) { c.RemoveSuffix(10); } + }, + { + "append self", + [](absl::Cord& c) { c.Append(c); }, + [](absl::Cord& c) { c.RemoveSuffix(c.size() / 2); } + }, + { + "prepend string", + [](absl::Cord& c) { c.Prepend("9876543210"); }, + [](absl::Cord& c) { c.RemovePrefix(10); } + }, + { + "prepend cord", + [](absl::Cord& c) { + c.Prepend(absl::MakeFragmentedCord({"98765", "43210"})); + }, + [](absl::Cord& c) { c.RemovePrefix(10); } + }, + { + "prepend checksummed cord", + [](absl::Cord& c) { + absl::Cord to_prepend = absl::MakeFragmentedCord({"98765", "43210"}); + to_prepend.SetExpectedChecksum(999); + c.Prepend(to_prepend); + }, + [](absl::Cord& c) { c.RemovePrefix(10); } + }, + { + "prepend self", + [](absl::Cord& c) { c.Prepend(c); }, + [](absl::Cord& c) { c.RemovePrefix(c.size() / 2); } + }, + {"remove prefix", [](absl::Cord& c) { c.RemovePrefix(2); }}, + {"remove suffix", [](absl::Cord& c) { c.RemoveSuffix(2); }}, + {"subcord", [](absl::Cord& c) { c = c.Subcord(1, c.size() - 2); }}, + { + "swap inline", + [](absl::Cord& c) { + absl::Cord other("swap"); + c.swap(other); + } + }, + { + "swap tree", + [](absl::Cord& c) { + absl::Cord other(std::string(10000, 'x')); + c.swap(other); + } + }, +}; +// clang-format on +} // namespace + +TEST_P(CordTest, ExpectedChecksum) { + for (const PopulatedCordFactory& factory : cord_factories) { + SCOPED_TRACE(factory.Name()); + for (bool shared : {false, true}) { + SCOPED_TRACE(shared); + + absl::Cord shared_cord_source = factory.Generate(); + auto make_instance = [=] { + return shared ? shared_cord_source : factory.Generate(); + }; + + const absl::Cord base_value = factory.Generate(); + const std::string base_value_as_string(factory.Generate().Flatten()); + + absl::Cord c1 = make_instance(); + EXPECT_FALSE(c1.ExpectedChecksum().has_value()); + + // Setting an expected checksum works, and retains the cord's bytes + c1.SetExpectedChecksum(12345); + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + EXPECT_EQ(c1, base_value); + + // CRC persists through copies, assignments, and moves: + absl::Cord c1_copy_construct = c1; + EXPECT_EQ(c1_copy_construct.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_copy_assign; + c1_copy_assign = c1; + EXPECT_EQ(c1_copy_assign.ExpectedChecksum().value_or(0), 12345); + + absl::Cord c1_move(std::move(c1_copy_assign)); + EXPECT_EQ(c1_move.ExpectedChecksum().value_or(0), 12345); + + EXPECT_EQ(c1.ExpectedChecksum().value_or(0), 12345); + + // A CRC Cord compares equal to its non-CRC value. + EXPECT_EQ(c1, make_instance()); + + for (const CordMutator& mutator : cord_mutators) { + SCOPED_TRACE(mutator.Name()); + + // Test that mutating a cord removes its stored checksum + absl::Cord c2 = make_instance(); + c2.SetExpectedChecksum(24680); + + mutator.Mutate(c2); + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + + if (mutator.CanUndo()) { + // Undoing an operation should not restore the checksum + mutator.Undo(c2); + EXPECT_EQ(c2, base_value); + EXPECT_EQ(c2.ExpectedChecksum(), absl::nullopt); + } + } + + absl::Cord c3 = make_instance(); + c3.SetExpectedChecksum(999); + const absl::Cord& cc3 = c3; + + // Test that all cord reading operations function in the face of an + // expected checksum. + + // Test data precondition + ASSERT_TRUE(cc3.StartsWith("abcde")); + + EXPECT_EQ(cc3.size(), base_value_as_string.size()); + EXPECT_FALSE(cc3.empty()); + EXPECT_EQ(cc3.Compare(base_value), 0); + EXPECT_EQ(cc3.Compare(base_value_as_string), 0); + EXPECT_EQ(cc3.Compare("wxyz"), -1); + EXPECT_EQ(cc3.Compare(absl::Cord("wxyz")), -1); + EXPECT_EQ(cc3.Compare("aaaa"), 1); + EXPECT_EQ(cc3.Compare(absl::Cord("aaaa")), 1); + EXPECT_EQ(absl::Cord("wxyz").Compare(cc3), 1); + EXPECT_EQ(absl::Cord("aaaa").Compare(cc3), -1); + EXPECT_TRUE(cc3.StartsWith("abcd")); + EXPECT_EQ(std::string(cc3), base_value_as_string); + + std::string dest; + absl::CopyCordToString(cc3, &dest); + EXPECT_EQ(dest, base_value_as_string); + + bool first_pass = true; + for (absl::string_view chunk : cc3.Chunks()) { + if (first_pass) { + EXPECT_TRUE(absl::StartsWith(chunk, "abcde")); + } + first_pass = false; + } + first_pass = true; + for (char ch : cc3.Chars()) { + if (first_pass) { + EXPECT_EQ(ch, 'a'); + } + first_pass = false; + } + EXPECT_TRUE(absl::StartsWith(*cc3.chunk_begin(), "abcde")); + EXPECT_EQ(*cc3.char_begin(), 'a'); + + auto char_it = cc3.char_begin(); + absl::Cord::Advance(&char_it, 2); + EXPECT_EQ(absl::Cord::AdvanceAndRead(&char_it, 2), "cd"); + EXPECT_EQ(*char_it, 'e'); + char_it = cc3.char_begin(); + absl::Cord::Advance(&char_it, 2); + EXPECT_TRUE(absl::StartsWith(absl::Cord::ChunkRemaining(char_it), "cde")); + + EXPECT_EQ(cc3[0], 'a'); + EXPECT_EQ(cc3[4], 'e'); + EXPECT_EQ(absl::HashOf(cc3), absl::HashOf(base_value)); + EXPECT_EQ(absl::HashOf(cc3), absl::HashOf(base_value_as_string)); + } + } +} diff --git a/absl/strings/cord_test_helpers.h b/absl/strings/cord_test_helpers.h index f1036e3b..31a1dc89 100644 --- a/absl/strings/cord_test_helpers.h +++ b/absl/strings/cord_test_helpers.h @@ -17,11 +17,73 @@ #ifndef ABSL_STRINGS_CORD_TEST_HELPERS_H_ #define ABSL_STRINGS_CORD_TEST_HELPERS_H_ +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/config.h" #include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/string_view.h" namespace absl { ABSL_NAMESPACE_BEGIN +// Cord sizes relevant for testing +enum class TestCordSize { + // An empty value + kEmpty = 0, + + // An inlined string value + kInlined = cord_internal::kMaxInline / 2 + 1, + + // 'Well known' SSO lengths (excluding terminating zero). + // libstdcxx has a maximum SSO of 15, libc++ has a maximum SSO of 22. + kStringSso1 = 15, + kStringSso2 = 22, + + // A string value which is too large to fit in inlined data, but small enough + // such that Cord prefers copying the value if possible, i.e.: not stealing + // std::string inputs, or referencing existing CordReps on Append, etc. + kSmall = cord_internal::kMaxBytesToCopy / 2 + 1, + + // A string value large enough that Cord prefers to reference or steal from + // existing inputs rather than copying contents of the input. + kMedium = cord_internal::kMaxFlatLength / 2 + 1, + + // A string value large enough to cause it to be stored in mutliple flats. + kLarge = cord_internal::kMaxFlatLength * 4 +}; + +// To string helper +inline absl::string_view ToString(TestCordSize size) { + switch (size) { + case TestCordSize::kEmpty: + return "Empty"; + case TestCordSize::kInlined: + return "Inlined"; + case TestCordSize::kSmall: + return "Small"; + case TestCordSize::kStringSso1: + return "StringSso1"; + case TestCordSize::kStringSso2: + return "StringSso2"; + case TestCordSize::kMedium: + return "Medium"; + case TestCordSize::kLarge: + return "Large"; + } + return "???"; +} + +// Returns the length matching the specified size +inline size_t Length(TestCordSize size) { return static_cast<size_t>(size); } + +// Stream output helper +inline std::ostream& operator<<(std::ostream& stream, TestCordSize size) { + return stream << ToString(size); +} + // Creates a multi-segment Cord from an iterable container of strings. The // resulting Cord is guaranteed to have one segment for every string in the // container. This allows code to be unit tested with multi-segment Cord diff --git a/absl/strings/cordz_test.cc b/absl/strings/cordz_test.cc new file mode 100644 index 00000000..2b7d30b0 --- /dev/null +++ b/absl/strings/cordz_test.cc @@ -0,0 +1,466 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <cstdint> +#include <string> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/strings/cord.h" +#include "absl/strings/cord_test_helpers.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +using testing::Eq; +using testing::AnyOf; + +namespace absl { +ABSL_NAMESPACE_BEGIN + +using cord_internal::CordzInfo; +using cord_internal::CordzSampleToken; +using cord_internal::CordzStatistics; +using cord_internal::CordzUpdateTracker; +using Method = CordzUpdateTracker::MethodIdentifier; + +// Do not print cord contents, we only care about 'size' perhaps. +// Note that this method must be inside the named namespace. +inline void PrintTo(const Cord& cord, std::ostream* s) { + if (s) *s << "Cord[" << cord.size() << "]"; +} + +namespace { + +auto constexpr kMaxInline = cord_internal::kMaxInline; + +// Returns a string_view value of the specified length +// We do this to avoid 'consuming' large strings in Cord by default. +absl::string_view MakeString(size_t size) { + thread_local std::string str; + str = std::string(size, '.'); + return str; +} + +absl::string_view MakeString(TestCordSize size) { + return MakeString(Length(size)); +} + +// Returns a cord with a sampled method of kAppendString. +absl::Cord MakeAppendStringCord(TestCordSize size) { + CordzSamplingIntervalHelper always(1); + absl::Cord cord; + cord.Append(MakeString(size)); + return cord; +} + +std::string TestParamToString(::testing::TestParamInfo<TestCordSize> size) { + return absl::StrCat("On", ToString(size.param), "Cord"); +} + +class CordzUpdateTest : public testing::TestWithParam<TestCordSize> { + public: + Cord& cord() { return cord_; } + + Method InitialOr(Method method) const { + return (GetParam() > TestCordSize::kInlined) ? Method::kConstructorString + : method; + } + + private: + CordzSamplingIntervalHelper sample_every_{1}; + Cord cord_{MakeString(GetParam())}; +}; + +template <typename T> +std::string ParamToString(::testing::TestParamInfo<T> param) { + return std::string(ToString(param.param)); +} + +INSTANTIATE_TEST_SUITE_P(WithParam, CordzUpdateTest, + testing::Values(TestCordSize::kEmpty, + TestCordSize::kInlined, + TestCordSize::kLarge), + TestParamToString); + +class CordzStringTest : public testing::TestWithParam<TestCordSize> { + private: + CordzSamplingIntervalHelper sample_every_{1}; +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordzStringTest, + testing::Values(TestCordSize::kInlined, + TestCordSize::kStringSso1, + TestCordSize::kStringSso2, + TestCordSize::kSmall, + TestCordSize::kLarge), + ParamToString<TestCordSize>); + +TEST(CordzTest, ConstructSmallArray) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(MakeString(TestCordSize::kSmall)); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST(CordzTest, ConstructLargeArray) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(MakeString(TestCordSize::kLarge)); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST_P(CordzStringTest, ConstructString) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(std::string(Length(GetParam()), '.')); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + } +} + +TEST(CordzTest, CopyConstructFromUnsampled) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + Cord cord(src); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, CopyConstructFromSampled) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord(src); + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); +} + +TEST(CordzTest, MoveConstruct) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src(MakeString(TestCordSize::kLarge)); + Cord cord(std::move(src)); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST_P(CordzUpdateTest, AssignUnsampledCord) { + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + const CordzInfo* info = GetCordzInfoForTesting(cord()); + cord() = src; + EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST_P(CordzUpdateTest, AssignSampledCord) { + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + cord() = src; + ASSERT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord())->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignSampledCordToInlined) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord cord; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignSampledCordToUnsampledCord) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord cord = UnsampledCord(MakeString(TestCordSize::kLarge)); + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignUnsampledCordToSampledCordWithoutSampling) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord cord = MakeAppendStringCord(TestCordSize::kLarge); + const CordzInfo* info = GetCordzInfoForTesting(cord); + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord = src; + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST(CordzUpdateTest, AssignUnsampledCordToSampledCordWithSampling) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord = MakeAppendStringCord(TestCordSize::kLarge); + const CordzInfo* info = GetCordzInfoForTesting(cord); + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord = src; + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST(CordzUpdateTest, AssignSampledCordToSampledCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord(MakeString(TestCordSize::kLarge)); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzUpdateTest, AssignUnsampledCordToSampledCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord(MakeString(TestCordSize::kLarge)); + cord = src; + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kAssignCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); + EXPECT_THAT(stats.update_tracker.Value(Method::kConstructorString), Eq(0)); +} + +TEST(CordzTest, AssignInlinedCordToSampledCord) { + CordzSampleToken token; + CordzSamplingIntervalHelper sample_every{1}; + Cord cord(MakeString(TestCordSize::kLarge)); + const CordzInfo* info = GetCordzInfoForTesting(cord); + Cord src = UnsampledCord(MakeString(TestCordSize::kInlined)); + cord = src; + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); + EXPECT_FALSE(CordzInfoIsListed(info)); +} + +TEST(CordzUpdateTest, MoveAssignCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord; + Cord src(MakeString(TestCordSize::kLarge)); + cord = std::move(src); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); +} + +TEST_P(CordzUpdateTest, AssignLargeArray) { + cord() = MakeString(TestCordSize::kSmall); + EXPECT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignString)); +} + +TEST_P(CordzUpdateTest, AssignSmallArray) { + cord() = MakeString(TestCordSize::kSmall); + EXPECT_THAT(cord(), HasValidCordzInfoOf(Method::kAssignString)); +} + +TEST_P(CordzUpdateTest, AssignInlinedArray) { + cord() = MakeString(TestCordSize::kInlined); + EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr)); +} + +TEST_P(CordzStringTest, AssignStringToInlined) { + Cord cord; + cord = std::string(Length(GetParam()), '.'); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAssignString)); + } +} + +TEST_P(CordzStringTest, AssignStringToCord) { + Cord cord(MakeString(TestCordSize::kLarge)); + cord = std::string(Length(GetParam()), '.'); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kAssignString, 1)); + } +} + +TEST_P(CordzUpdateTest, AssignInlinedString) { + cord() = std::string(Length(TestCordSize::kInlined), '.'); + EXPECT_THAT(GetCordzInfoForTesting(cord()), Eq(nullptr)); +} + +TEST_P(CordzUpdateTest, AppendCord) { + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord().Append(src); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendCord))); +} + +TEST_P(CordzUpdateTest, MoveAppendCord) { + cord().Append(UnsampledCord(MakeString(TestCordSize::kLarge))); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendCord))); +} + +TEST_P(CordzUpdateTest, AppendSmallArray) { + cord().Append(MakeString(TestCordSize::kSmall)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendString))); +} + +TEST_P(CordzUpdateTest, AppendLargeArray) { + cord().Append(MakeString(TestCordSize::kLarge)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kAppendString))); +} + +TEST_P(CordzStringTest, AppendStringToEmpty) { + Cord cord; + cord.Append(std::string(Length(GetParam()), '.')); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAppendString)); + } +} + +TEST_P(CordzStringTest, AppendStringToInlined) { + Cord cord(MakeString(TestCordSize::kInlined)); + cord.Append(std::string(Length(GetParam()), '.')); + if (Length(TestCordSize::kInlined) + Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kAppendString)); + } +} + +TEST_P(CordzStringTest, AppendStringToCord) { + Cord cord(MakeString(TestCordSize::kLarge)); + cord.Append(std::string(Length(GetParam()), '.')); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kAppendString, 1)); +} + +TEST(CordzTest, MakeCordFromExternal) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord = MakeCordFromExternal("Hello world", [](absl::string_view) {}); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kMakeCordFromExternal)); +} + +TEST(CordzTest, MakeCordFromEmptyExternal) { + CordzSamplingIntervalHelper sample_every{1}; + Cord cord = MakeCordFromExternal({}, [](absl::string_view) {}); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST_P(CordzUpdateTest, PrependCord) { + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + cord().Prepend(src); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependCord))); +} + +TEST_P(CordzUpdateTest, PrependSmallArray) { + cord().Prepend(MakeString(TestCordSize::kSmall)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependString))); +} + +TEST_P(CordzUpdateTest, PrependLargeArray) { + cord().Prepend(MakeString(TestCordSize::kLarge)); + EXPECT_THAT(cord(), HasValidCordzInfoOf(InitialOr(Method::kPrependString))); +} + +TEST_P(CordzStringTest, PrependStringToEmpty) { + Cord cord; + cord.Prepend(std::string(Length(GetParam()), '.')); + if (Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kPrependString)); + } +} + +TEST_P(CordzStringTest, PrependStringToInlined) { + Cord cord(MakeString(TestCordSize::kInlined)); + cord.Prepend(std::string(Length(GetParam()), '.')); + if (Length(TestCordSize::kInlined) + Length(GetParam()) > kMaxInline) { + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kPrependString)); + } +} + +TEST_P(CordzStringTest, PrependStringToCord) { + Cord cord(MakeString(TestCordSize::kLarge)); + cord.Prepend(std::string(Length(GetParam()), '.')); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kPrependString, 1)); +} + +TEST(CordzTest, RemovePrefix) { + CordzSamplingIntervalHelper sample_every(1); + Cord cord(MakeString(TestCordSize::kLarge)); + + // Half the cord + cord.RemovePrefix(cord.size() / 2); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemovePrefix, 1)); + + // TODO(mvels): RemovePrefix does not reset to inlined, except if empty? + cord.RemovePrefix(cord.size() - kMaxInline); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemovePrefix, 2)); + + cord.RemovePrefix(cord.size()); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, RemoveSuffix) { + CordzSamplingIntervalHelper sample_every(1); + Cord cord(MakeString(TestCordSize::kLarge)); + + // Half the cord + cord.RemoveSuffix(cord.size() / 2); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemoveSuffix, 1)); + + // TODO(mvels): RemoveSuffix does not reset to inlined, except if empty? + cord.RemoveSuffix(cord.size() - kMaxInline); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kConstructorString)); + EXPECT_THAT(cord, CordzMethodCountEq(Method::kRemoveSuffix, 2)); + + cord.RemoveSuffix(cord.size()); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, SubCordFromUnsampledCord) { + CordzSamplingIntervalHelper sample_every{1}; + Cord src = UnsampledCord(MakeString(TestCordSize::kLarge)); + Cord cord = src.Subcord(10, src.size() / 2); + EXPECT_THAT(GetCordzInfoForTesting(cord), Eq(nullptr)); +} + +TEST(CordzTest, SubCordFromSampledCord) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord = src.Subcord(10, src.size() / 2); + ASSERT_THAT(cord, HasValidCordzInfoOf(Method::kSubCord)); + CordzStatistics stats = GetCordzInfoForTesting(cord)->GetCordzStatistics(); + EXPECT_THAT(stats.parent_method, Eq(Method::kAppendString)); + EXPECT_THAT(stats.update_tracker.Value(Method::kAppendString), Eq(1)); +} + +TEST(CordzTest, SmallSubCord) { + CordzSamplingIntervalHelper sample_never{99999}; + Cord src = MakeAppendStringCord(TestCordSize::kLarge); + Cord cord = src.Subcord(10, kMaxInline + 1); + EXPECT_THAT(cord, HasValidCordzInfoOf(Method::kSubCord)); +} + +} // namespace + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_INTERNAL_CORDZ_ENABLED diff --git a/absl/strings/cordz_test_helpers.h b/absl/strings/cordz_test_helpers.h new file mode 100644 index 00000000..e410eecf --- /dev/null +++ b/absl/strings/cordz_test_helpers.h @@ -0,0 +1,151 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ +#define ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ + +#include <utility> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/macros.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN + +// Returns the CordzInfo for the cord, or nullptr if the cord is not sampled. +inline const cord_internal::CordzInfo* GetCordzInfoForTesting( + const Cord& cord) { + if (!cord.contents_.is_tree()) return nullptr; + return cord.contents_.cordz_info(); +} + +// Returns true if the provided cordz_info is in the list of sampled cords. +inline bool CordzInfoIsListed(const cord_internal::CordzInfo* cordz_info, + cord_internal::CordzSampleToken token = {}) { + for (const cord_internal::CordzInfo& info : token) { + if (cordz_info == &info) return true; + } + return false; +} + +// Matcher on Cord that verifies all of: +// - the cord is sampled +// - the CordzInfo of the cord is listed / discoverable. +// - the reported CordzStatistics match the cord's actual properties +// - the cord has an (initial) UpdateTracker count of 1 for `method` +MATCHER_P(HasValidCordzInfoOf, method, "CordzInfo matches cord") { + const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg); + if (cord_info == nullptr) { + *result_listener << "cord is not sampled"; + return false; + } + if (!CordzInfoIsListed(cord_info)) { + *result_listener << "cord is sampled, but not listed"; + return false; + } + cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics(); + if (stat.size != arg.size()) { + *result_listener << "cordz size " << stat.size + << " does not match cord size " << arg.size(); + return false; + } + if (stat.update_tracker.Value(method) != 1) { + *result_listener << "Expected method count 1 for " << method << ", found " + << stat.update_tracker.Value(method); + return false; + } + return true; +} + +// Matcher on Cord that verifies that the cord is sampled and that the CordzInfo +// update tracker has 'method' with a call count of 'n' +MATCHER_P2(CordzMethodCountEq, method, n, + absl::StrCat("CordzInfo method count equals ", n)) { + const cord_internal::CordzInfo* cord_info = GetCordzInfoForTesting(arg); + if (cord_info == nullptr) { + *result_listener << "cord is not sampled"; + return false; + } + cord_internal::CordzStatistics stat = cord_info->GetCordzStatistics(); + if (stat.update_tracker.Value(method) != n) { + *result_listener << "Expected method count " << n << " for " << method + << ", found " << stat.update_tracker.Value(method); + return false; + } + return true; +} + +// Cordz will only update with a new rate once the previously scheduled event +// has fired. When we disable Cordz, a long delay takes place where we won't +// consider profiling new Cords. CordzSampleIntervalHelper will burn through +// that interval and allow for testing that assumes that the average sampling +// interval is a particular value. +class CordzSamplingIntervalHelper { + public: + explicit CordzSamplingIntervalHelper(int32_t interval) + : orig_mean_interval_(absl::cord_internal::get_cordz_mean_interval()) { + absl::cord_internal::set_cordz_mean_interval(interval); + absl::cord_internal::cordz_set_next_sample_for_testing(interval); + } + + ~CordzSamplingIntervalHelper() { + absl::cord_internal::set_cordz_mean_interval(orig_mean_interval_); + absl::cord_internal::cordz_set_next_sample_for_testing(orig_mean_interval_); + } + + private: + int32_t orig_mean_interval_; +}; + +// Wrapper struct managing a small CordRep `rep` +struct TestCordRep { + cord_internal::CordRepFlat* rep; + + TestCordRep() { + rep = cord_internal::CordRepFlat::New(100); + rep->length = 100; + memset(rep->Data(), 1, 100); + } + ~TestCordRep() { cord_internal::CordRep::Unref(rep); } +}; + +// Wrapper struct managing a small CordRep `rep`, and +// an InlineData `data` initialized with that CordRep. +struct TestCordData { + TestCordRep rep; + cord_internal::InlineData data{rep.rep}; +}; + +// Creates a Cord that is not sampled +template <typename... Args> +Cord UnsampledCord(Args... args) { + CordzSamplingIntervalHelper never(9999); + Cord cord(std::forward<Args>(args)...); + ABSL_ASSERT(GetCordzInfoForTesting(cord) == nullptr); + return cord; +} + +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_TEST_HELPERS_H_ diff --git a/absl/strings/internal/charconv_parse.cc b/absl/strings/internal/charconv_parse.cc index 8b11868c..d29acaf4 100644 --- a/absl/strings/internal/charconv_parse.cc +++ b/absl/strings/internal/charconv_parse.cc @@ -52,7 +52,7 @@ static_assert(std::numeric_limits<double>::digits == 53, "IEEE double fact"); // The lowest valued 19-digit decimal mantissa we can read still contains // sufficient information to reconstruct a binary mantissa. -static_assert(1000000000000000000u > (uint64_t(1) << (53 + 3)), "(b) above"); +static_assert(1000000000000000000u > (uint64_t{1} << (53 + 3)), "(b) above"); // ParseFloat<16> will read the first 15 significant digits of the mantissa. // diff --git a/absl/strings/internal/cord_data_edge.h b/absl/strings/internal/cord_data_edge.h new file mode 100644 index 00000000..e18b33e1 --- /dev/null +++ b/absl/strings/internal/cord_data_edge.h @@ -0,0 +1,63 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_ +#define ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_ + +#include <cassert> +#include <cstddef> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns true if the provided rep is a FLAT, EXTERNAL or a SUBSTRING node +// holding a FLAT or EXTERNAL child rep. Requires `rep != nullptr`. +inline bool IsDataEdge(const CordRep* edge) { + assert(edge != nullptr); + + // The fast path is that `edge` is an EXTERNAL or FLAT node, making the below + // if a single, well predicted branch. We then repeat the FLAT or EXTERNAL + // check in the slow path of the SUBSTRING check to optimize for the hot path. + if (edge->tag == EXTERNAL || edge->tag >= FLAT) return true; + if (edge->tag == SUBSTRING) edge = edge->substring()->child; + return edge->tag == EXTERNAL || edge->tag >= FLAT; +} + +// Returns the `absl::string_view` data reference for the provided data edge. +// Requires 'IsDataEdge(edge) == true`. +inline absl::string_view EdgeData(const CordRep* edge) { + assert(IsDataEdge(edge)); + + size_t offset = 0; + const size_t length = edge->length; + if (edge->IsSubstring()) { + offset = edge->substring()->start; + edge = edge->substring()->child; + } + return edge->tag >= FLAT + ? absl::string_view{edge->flat()->Data() + offset, length} + : absl::string_view{edge->external()->base + offset, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_DATA_EDGE_H_ diff --git a/absl/strings/internal/cord_data_edge_test.cc b/absl/strings/internal/cord_data_edge_test.cc new file mode 100644 index 00000000..8fce3bc6 --- /dev/null +++ b/absl/strings/internal/cord_data_edge_test.cc @@ -0,0 +1,130 @@ +// Copyright 2022 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_data_edge.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::absl::cordrep_testing::MakeExternal; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; + +TEST(CordDataEdgeTest, IsDataEdgeOnFlat) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + EXPECT_TRUE(IsDataEdge(rep)); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnExternal) { + CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ..."); + EXPECT_TRUE(IsDataEdge(rep)); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfFlat) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_TRUE(IsDataEdge(substr)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnSubstringOfExternal) { + CordRep* rep = MakeExternal("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_TRUE(IsDataEdge(substr)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnBtree) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRepBtree* tree = CordRepBtree::New(rep); + EXPECT_FALSE(IsDataEdge(tree)); + CordRep::Unref(tree); +} + +TEST(CordDataEdgeTest, IsDataEdgeOnBadSubstr) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep)); + EXPECT_FALSE(IsDataEdge(substr)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, EdgeDataOnFlat) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeFlat(value); + EXPECT_EQ(EdgeData(rep), value); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, EdgeDataOnExternal) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeExternal(value); + EXPECT_EQ(EdgeData(rep), value); + CordRep::Unref(rep); +} + +TEST(CordDataEdgeTest, EdgeDataOnSubstringOfFlat) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeFlat(value); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_EQ(EdgeData(substr), value.substr(1, 20)); + CordRep::Unref(substr); +} + +TEST(CordDataEdgeTest, EdgeDataOnSubstringOfExternal) { + absl::string_view value = "Lorem ipsum dolor sit amet, consectetur ..."; + CordRep* rep = MakeExternal(value); + CordRep* substr = MakeSubstring(1, 20, rep); + EXPECT_EQ(EdgeData(substr), value.substr(1, 20)); + CordRep::Unref(substr); +} + +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST(CordDataEdgeTest, IsDataEdgeOnNullPtr) { + EXPECT_DEATH(IsDataEdge(nullptr), ".*"); +} + +TEST(CordDataEdgeTest, EdgeDataOnNullPtr) { + EXPECT_DEATH(EdgeData(nullptr), ".*"); +} + +TEST(CordDataEdgeTest, EdgeDataOnBtree) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRepBtree* tree = CordRepBtree::New(rep); + EXPECT_DEATH(EdgeData(tree), ".*"); + CordRep::Unref(tree); +} + +TEST(CordDataEdgeTest, EdgeDataOnBadSubstr) { + CordRep* rep = MakeFlat("Lorem ipsum dolor sit amet, consectetur ..."); + CordRep* substr = MakeSubstring(1, 18, MakeSubstring(1, 20, rep)); + EXPECT_DEATH(EdgeData(substr), ".*"); + CordRep::Unref(substr); +} + +#endif // GTEST_HAS_DEATH_TEST && !NDEBUG + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_internal.cc b/absl/strings/internal/cord_internal.cc index 905ffd0c..b6b06cfa 100644 --- a/absl/strings/internal/cord_internal.cc +++ b/absl/strings/internal/cord_internal.cc @@ -17,9 +17,13 @@ #include <cassert> #include <memory> +#include "absl/base/internal/raw_logging.h" #include "absl/container/inlined_vector.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" #include "absl/strings/internal/cord_rep_flat.h" #include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/str_cat.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -29,51 +33,41 @@ ABSL_CONST_INIT std::atomic<bool> cord_ring_buffer_enabled( kCordEnableRingBufferDefault); ABSL_CONST_INIT std::atomic<bool> shallow_subcords_enabled( kCordShallowSubcordsDefault); +ABSL_CONST_INIT std::atomic<bool> cord_btree_exhaustive_validation(false); + +void LogFatalNodeType(CordRep* rep) { + ABSL_INTERNAL_LOG(FATAL, absl::StrCat("Unexpected node type: ", + static_cast<int>(rep->tag))); +} void CordRep::Destroy(CordRep* rep) { assert(rep != nullptr); - absl::InlinedVector<CordRep*, Constants::kInlinedVectorSize> pending; while (true) { assert(!rep->refcount.IsImmortal()); - if (rep->tag == CONCAT) { - CordRepConcat* rep_concat = rep->concat(); - CordRep* right = rep_concat->right; - if (!right->refcount.Decrement()) { - pending.push_back(right); - } - CordRep* left = rep_concat->left; - delete rep_concat; - rep = nullptr; - if (!left->refcount.Decrement()) { - rep = left; - continue; - } + if (rep->tag == BTREE) { + CordRepBtree::Destroy(rep->btree()); + return; } else if (rep->tag == RING) { CordRepRing::Destroy(rep->ring()); - rep = nullptr; + return; } else if (rep->tag == EXTERNAL) { CordRepExternal::Delete(rep); - rep = nullptr; + return; } else if (rep->tag == SUBSTRING) { CordRepSubstring* rep_substring = rep->substring(); - CordRep* child = rep_substring->child; + rep = rep_substring->child; delete rep_substring; - rep = nullptr; - if (!child->refcount.Decrement()) { - rep = child; - continue; + if (rep->refcount.Decrement()) { + return; } + } else if (rep->tag == CRC) { + CordRepCrc::Destroy(rep->crc()); + return; } else { + assert(rep->IsFlat()); CordRepFlat::Delete(rep); - rep = nullptr; - } - - if (!pending.empty()) { - rep = pending.back(); - pending.pop_back(); - } else { - break; + return; } } } diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index a1ba67fe..b50fb79a 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -21,6 +21,7 @@ #include <cstdint> #include <type_traits> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/invoke.h" @@ -33,6 +34,19 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { +// The overhead of a vtable is too much for Cord, so we roll our own subclasses +// using only a single byte to differentiate classes from each other - the "tag" +// byte. Define the subclasses first so we can provide downcasting helper +// functions in the base class. +struct CordRep; +struct CordRepConcat; +struct CordRepExternal; +struct CordRepFlat; +struct CordRepSubstring; +struct CordRepCrc; +class CordRepRing; +class CordRepBtree; + class CordzInfo; // Default feature enable states for cord ring buffers @@ -44,6 +58,12 @@ enum CordFeatureDefaults { extern std::atomic<bool> cord_ring_buffer_enabled; extern std::atomic<bool> shallow_subcords_enabled; +// `cord_btree_exhaustive_validation` can be set to force exhaustive validation +// in debug assertions, and code that calls `IsValid()` explicitly. By default, +// assertions should be relatively cheap and AssertValid() can easily lead to +// O(n^2) complexity as recursive / full tree validation is O(n). +extern std::atomic<bool> cord_btree_exhaustive_validation; + inline void enable_cord_ring_buffer(bool enable) { cord_ring_buffer_enabled.store(enable, std::memory_order_relaxed); } @@ -68,12 +88,16 @@ enum Constants { kMaxBytesToCopy = 511 }; -// Wraps std::atomic for reference counting. -class Refcount { +// Emits a fatal error "Unexpected node type: xyz" and aborts the program. +ABSL_ATTRIBUTE_NORETURN void LogFatalNodeType(CordRep* rep); + +// Compact class for tracking the reference count and state flags for CordRep +// instances. Data is stored in an atomic int32_t for compactness and speed. +class RefcountAndFlags { public: - constexpr Refcount() : count_{kRefIncrement} {} + constexpr RefcountAndFlags() : count_{kRefIncrement} {} struct Immortal {}; - explicit constexpr Refcount(Immortal) : count_(kImmortalTag) {} + explicit constexpr RefcountAndFlags(Immortal) : count_(kImmortalFlag) {} // Increments the reference count. Imposes no memory ordering. inline void Increment() { @@ -86,26 +110,27 @@ class Refcount { // Returns false if there are no references outstanding; true otherwise. // Inserts barriers to ensure that state written before this method returns // false will be visible to a thread that just observed this method returning - // false. + // false. Always returns false when the immortal bit is set. inline bool Decrement() { - int32_t refcount = count_.load(std::memory_order_acquire); - assert(refcount > 0 || refcount & kImmortalTag); + int32_t refcount = count_.load(std::memory_order_acquire) & kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); return refcount != kRefIncrement && - count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) != - kRefIncrement; + (count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask) != kRefIncrement; } // Same as Decrement but expect that refcount is greater than 1. inline bool DecrementExpectHighRefcount() { int32_t refcount = - count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel); - assert(refcount > 0 || refcount & kImmortalTag); + count_.fetch_sub(kRefIncrement, std::memory_order_acq_rel) & + kRefcountMask; + assert(refcount > 0 || refcount & kImmortalFlag); return refcount != kRefIncrement; } // Returns the current reference count using acquire semantics. inline int32_t Get() const { - return count_.load(std::memory_order_acquire) >> kImmortalShift; + return count_.load(std::memory_order_acquire) >> kNumFlags; } // Returns whether the atomic integer is 1. @@ -115,83 +140,127 @@ class Refcount { // This call performs the test for a reference count of one, and // performs the memory barrier needed for the owning thread // to act on the object, knowing that it has exclusive access to the - // object. + // object. Always returns false when the immortal bit is set. inline bool IsOne() { - return count_.load(std::memory_order_acquire) == kRefIncrement; + return (count_.load(std::memory_order_acquire) & kRefcountMask) == + kRefIncrement; } bool IsImmortal() const { - return (count_.load(std::memory_order_relaxed) & kImmortalTag) != 0; + return (count_.load(std::memory_order_relaxed) & kImmortalFlag) != 0; } private: - // We reserve the bottom bit to tag a reference count as immortal. - // By making it `1` we ensure that we never reach `0` when adding/subtracting - // `2`, thus it never looks as if it should be destroyed. - // These are used for the StringConstant constructor where we do not increase - // the refcount at construction time (due to constinit requirements) but we - // will still decrease it at destruction time to avoid branching on Unref. - enum { - kImmortalShift = 1, - kRefIncrement = 1 << kImmortalShift, - kImmortalTag = kRefIncrement - 1 + // We reserve the bottom bits for flags. + // kImmortalBit indicates that this entity should never be collected; it is + // used for the StringConstant constructor to avoid collecting immutable + // constant cords. + // kReservedFlag is reserved for future use. + enum Flags { + kNumFlags = 2, + + kImmortalFlag = 0x1, + kReservedFlag = 0x2, + kRefIncrement = (1 << kNumFlags), + + // Bitmask to use when checking refcount by equality. This masks out + // all flags except kImmortalFlag, which is part of the refcount for + // purposes of equality. (A refcount of 0 or 1 does not count as 0 or 1 + // if the immortal bit is set.) + kRefcountMask = ~kReservedFlag, }; std::atomic<int32_t> count_; }; -// The overhead of a vtable is too much for Cord, so we roll our own subclasses -// using only a single byte to differentiate classes from each other - the "tag" -// byte. Define the subclasses first so we can provide downcasting helper -// functions in the base class. - -struct CordRepConcat; -struct CordRepExternal; -struct CordRepFlat; -struct CordRepSubstring; -class CordRepRing; - // Various representations that we allow enum CordRepKind { - CONCAT = 0, - EXTERNAL = 1, - SUBSTRING = 2, - RING = 3, + UNUSED_0 = 0, + SUBSTRING = 1, + CRC = 2, + BTREE = 3, + RING = 4, + EXTERNAL = 5, // We have different tags for different sized flat arrays, - // starting with FLAT, and limited to MAX_FLAT_TAG. The 224 value is based on - // the current 'size to tag' encoding of 8 / 32 bytes. If a new tag is needed - // in the future, then 'FLAT' and 'MAX_FLAT_TAG' should be adjusted as well - // as the Tag <---> Size logic so that FLAT stil represents the minimum flat - // allocation size. (32 bytes as of now). - FLAT = 4, - MAX_FLAT_TAG = 224 + // starting with FLAT, and limited to MAX_FLAT_TAG. The below values map to an + // allocated range of 32 bytes to 256 KB. The current granularity is: + // - 8 byte granularity for flat sizes in [32 - 512] + // - 64 byte granularity for flat sizes in (512 - 8KiB] + // - 4KiB byte granularity for flat sizes in (8KiB, 256 KiB] + // If a new tag is needed in the future, then 'FLAT' and 'MAX_FLAT_TAG' should + // be adjusted as well as the Tag <---> Size mapping logic so that FLAT still + // represents the minimum flat allocation size. (32 bytes as of now). + FLAT = 6, + MAX_FLAT_TAG = 248 }; +// There are various locations where we want to check if some rep is a 'plain' +// data edge, i.e. an external or flat rep. By having FLAT == EXTERNAL + 1, we +// can perform this check in a single branch as 'tag >= EXTERNAL' +// Likewise, we have some locations where we check for 'ring or external/flat', +// so likewise align RING to EXTERNAL. +// Note that we can leave this optimization to the compiler. The compiler will +// DTRT when it sees a condition like `tag == EXTERNAL || tag >= FLAT`. +static_assert(RING == BTREE + 1, "BTREE and RING not consecutive"); +static_assert(EXTERNAL == RING + 1, "BTREE and EXTERNAL not consecutive"); +static_assert(FLAT == EXTERNAL + 1, "EXTERNAL and FLAT not consecutive"); + struct CordRep { + // Result from an `extract edge` operation. Contains the (possibly changed) + // tree node as well as the extracted edge, or {tree, nullptr} if no edge + // could be extracted. + // On success, the returned `tree` value is null if `extracted` was the only + // data edge inside the tree, a data edge if there were only two data edges in + // the tree, or the (possibly new / smaller) remaining tree with the extracted + // data edge removed. + struct ExtractResult { + CordRep* tree; + CordRep* extracted; + }; + CordRep() = default; - constexpr CordRep(Refcount::Immortal immortal, size_t l) + constexpr CordRep(RefcountAndFlags::Immortal immortal, size_t l) : length(l), refcount(immortal), tag(EXTERNAL), storage{} {} // The following three fields have to be less than 32 bytes since // that is the smallest supported flat node size. size_t length; - Refcount refcount; + RefcountAndFlags refcount; // If tag < FLAT, it represents CordRepKind and indicates the type of node. // Otherwise, the node type is CordRepFlat and the tag is the encoded size. uint8_t tag; - char storage[1]; // Starting point for flat array: MUST BE LAST FIELD + + // `storage` provides two main purposes: + // - the starting point for FlatCordRep.Data() [flexible-array-member] + // - 3 bytes of additional storage for use by derived classes. + // The latter is used by CordrepConcat and CordRepBtree. CordRepConcat stores + // a 'depth' value in storage[0], and the (future) CordRepBtree class stores + // `height`, `begin` and `end` in the 3 entries. Otherwise we would need to + // allocate room for these in the derived class, as not all compilers reuse + // padding space from the base class (clang and gcc do, MSVC does not, etc) + uint8_t storage[3]; + + // Returns true if this instance's tag matches the requested type. + constexpr bool IsRing() const { return tag == RING; } + constexpr bool IsSubstring() const { return tag == SUBSTRING; } + constexpr bool IsCrc() const { return tag == CRC; } + constexpr bool IsExternal() const { return tag == EXTERNAL; } + constexpr bool IsFlat() const { return tag >= FLAT; } + constexpr bool IsBtree() const { return tag == BTREE; } inline CordRepRing* ring(); inline const CordRepRing* ring() const; - inline CordRepConcat* concat(); - inline const CordRepConcat* concat() const; inline CordRepSubstring* substring(); inline const CordRepSubstring* substring() const; + inline CordRepCrc* crc(); + inline const CordRepCrc* crc() const; inline CordRepExternal* external(); inline const CordRepExternal* external() const; inline CordRepFlat* flat(); inline const CordRepFlat* flat() const; + inline CordRepBtree* btree(); + inline const CordRepBtree* btree() const; // -------------------------------------------------------------------- // Memory management @@ -208,17 +277,23 @@ struct CordRep { static inline void Unref(CordRep* rep); }; -struct CordRepConcat : public CordRep { - CordRep* left; - CordRep* right; - - uint8_t depth() const { return static_cast<uint8_t>(storage[0]); } - void set_depth(uint8_t depth) { storage[0] = static_cast<char>(depth); } -}; - struct CordRepSubstring : public CordRep { size_t start; // Starting offset of substring in child CordRep* child; + + // Creates a substring on `child`, adopting a reference on `child`. + // Requires `child` to be either a flat or external node, and `pos` and `n` to + // form a non-empty partial sub range of `'child`, i.e.: + // `n > 0 && n < length && n + pos <= length` + static inline CordRepSubstring* Create(CordRep* child, size_t pos, size_t n); + + // Creates a substring of `rep`. Does not adopt a reference on `rep`. + // Requires `IsDataEdge(rep) && n > 0 && pos + n <= rep->length`. + // If `n == rep->length` then this method returns `CordRep::Ref(rep)` + // If `rep` is a substring of a flat or external node, then this method will + // return a new substring of that flat or external node with `pos` adjusted + // with the original `start` position. + static inline CordRep* Substring(CordRep* rep, size_t pos, size_t n); }; // Type for function pointer that will invoke the releaser function and also @@ -231,7 +306,7 @@ using ExternalReleaserInvoker = void (*)(CordRepExternal*); struct CordRepExternal : public CordRep { CordRepExternal() = default; explicit constexpr CordRepExternal(absl::string_view str) - : CordRep(Refcount::Immortal{}, str.size()), + : CordRep(RefcountAndFlags::Immortal{}, str.size()), base(str.data()), releaser_invoker(nullptr) {} @@ -240,7 +315,7 @@ struct CordRepExternal : public CordRep { ExternalReleaserInvoker releaser_invoker; // Deletes (releases) the external rep. - // Requires rep != nullptr and rep->tag == EXTERNAL + // Requires rep != nullptr and rep->IsExternal() static void Delete(CordRep* rep); }; @@ -282,8 +357,49 @@ struct CordRepExternalImpl } }; +inline CordRepSubstring* CordRepSubstring::Create(CordRep* child, size_t pos, + size_t n) { + assert(child != nullptr); + assert(n > 0); + assert(n < child->length); + assert(pos < child->length); + assert(n <= child->length - pos); + + // TODO(b/217376272): Harden internal logic. + // Move to strategical places inside the Cord logic and make this an assert. + if (ABSL_PREDICT_FALSE(!(child->IsExternal() || child->IsFlat()))) { + LogFatalNodeType(child); + } + + CordRepSubstring* rep = new CordRepSubstring(); + rep->length = n; + rep->tag = SUBSTRING; + rep->start = pos; + rep->child = child; + return rep; +} + +inline CordRep* CordRepSubstring::Substring(CordRep* rep, size_t pos, + size_t n) { + assert(rep != nullptr); + assert(n != 0); + assert(pos < rep->length); + assert(n <= rep->length - pos); + if (n == rep->length) return CordRep::Ref(rep); + if (rep->IsSubstring()) { + pos += rep->substring()->start; + rep = rep->substring()->child; + } + CordRepSubstring* substr = new CordRepSubstring(); + substr->length = n; + substr->tag = SUBSTRING; + substr->start = pos; + substr->child = CordRep::Ref(rep); + return substr; +} + inline void CordRepExternal::Delete(CordRep* rep) { - assert(rep != nullptr && rep->tag == EXTERNAL); + assert(rep != nullptr && rep->IsExternal()); auto* rep_external = static_cast<CordRepExternal*>(rep); assert(rep_external->releaser_invoker != nullptr); rep_external->releaser_invoker(rep_external); @@ -295,7 +411,8 @@ struct ConstInitExternalStorage { }; template <typename Str> -CordRepExternal ConstInitExternalStorage<Str>::value(Str::value); +ABSL_CONST_INIT CordRepExternal + ConstInitExternalStorage<Str>::value(Str::value); enum { kMaxInline = 15, @@ -329,18 +446,17 @@ static constexpr cordz_info_t BigEndianByte(unsigned char value) { class InlineData { public: + // DefaultInitType forces the use of the default initialization constructor. + enum DefaultInitType { kDefaultInit }; + // kNullCordzInfo holds the big endian representation of intptr_t(1) // This is the 'null' / initial value of 'cordz_info'. The null value // is specifically big endian 1 as with 64-bit pointers, the last // byte of cordz_info overlaps with the last byte holding the tag. static constexpr cordz_info_t kNullCordzInfo = BigEndianByte(1); - // kFakeCordzInfo holds a 'fake', non-null cordz-info value we use to - // emulate the previous 'kProfiled' tag logic in 'set_profiled' until - // cord code is changed to store cordz_info values in InlineData. - static constexpr cordz_info_t kFakeCordzInfo = BigEndianByte(9); - constexpr InlineData() : as_chars_{0} {} + explicit InlineData(DefaultInitType) {} explicit constexpr InlineData(CordRep* rep) : as_tree_(rep) {} explicit constexpr InlineData(absl::string_view chars) : as_chars_{ @@ -367,13 +483,23 @@ class InlineData { return as_tree_.cordz_info != kNullCordzInfo; } + // Returns true if either of the provided instances hold a cordz_info value. + // This method is more efficient than the equivalent `data1.is_profiled() || + // data2.is_profiled()`. Requires both arguments to hold a tree. + static bool is_either_profiled(const InlineData& data1, + const InlineData& data2) { + assert(data1.is_tree() && data2.is_tree()); + return (data1.as_tree_.cordz_info | data2.as_tree_.cordz_info) != + kNullCordzInfo; + } + // Returns the cordz_info sampling instance for this instance, or nullptr // if the current instance is not sampled and does not have CordzInfo data. // Requires the current instance to hold a tree value. CordzInfo* cordz_info() const { assert(is_tree()); - intptr_t info = - static_cast<intptr_t>(absl::big_endian::ToHost64(as_tree_.cordz_info)); + intptr_t info = static_cast<intptr_t>( + absl::big_endian::ToHost64(static_cast<uint64_t>(as_tree_.cordz_info))); assert(info & 1); return reinterpret_cast<CordzInfo*>(info - 1); } @@ -383,8 +509,9 @@ class InlineData { // Requires the current instance to hold a tree value. void set_cordz_info(CordzInfo* cordz_info) { assert(is_tree()); - intptr_t info = reinterpret_cast<intptr_t>(cordz_info) | 1; - as_tree_.cordz_info = absl::big_endian::FromHost64(info); + uintptr_t info = reinterpret_cast<uintptr_t>(cordz_info) | 1; + as_tree_.cordz_info = + static_cast<cordz_info_t>(absl::big_endian::FromHost64(info)); } // Resets the current cordz_info to null / empty. @@ -454,13 +581,6 @@ class InlineData { tag() = static_cast<char>(size << 1); } - // Sets or unsets the 'is_profiled' state of this instance. - // Requires the current instance to hold a tree value. - void set_profiled(bool profiled) { - assert(is_tree()); - as_tree_.cordz_info = profiled ? kFakeCordzInfo : kNullCordzInfo; - } - private: // See cordz_info_t for forced alignment and size of `cordz_info` details. struct AsTree { @@ -483,7 +603,7 @@ class InlineData { // store the size in the last char of `as_chars_` shifted left + 1. // Else we store it in a tree and store a pointer to that tree in // `as_tree_.rep` and store a tag in `tagged_size`. - union { + union { char as_chars_[kMaxInline + 1]; AsTree as_tree_; }; @@ -491,38 +611,30 @@ class InlineData { static_assert(sizeof(InlineData) == kMaxInline + 1, ""); -inline CordRepConcat* CordRep::concat() { - assert(tag == CONCAT); - return static_cast<CordRepConcat*>(this); -} - -inline const CordRepConcat* CordRep::concat() const { - assert(tag == CONCAT); - return static_cast<const CordRepConcat*>(this); -} - inline CordRepSubstring* CordRep::substring() { - assert(tag == SUBSTRING); + assert(IsSubstring()); return static_cast<CordRepSubstring*>(this); } inline const CordRepSubstring* CordRep::substring() const { - assert(tag == SUBSTRING); + assert(IsSubstring()); return static_cast<const CordRepSubstring*>(this); } inline CordRepExternal* CordRep::external() { - assert(tag == EXTERNAL); + assert(IsExternal()); return static_cast<CordRepExternal*>(this); } inline const CordRepExternal* CordRep::external() const { - assert(tag == EXTERNAL); + assert(IsExternal()); return static_cast<const CordRepExternal*>(this); } inline CordRep* CordRep::Ref(CordRep* rep) { - assert(rep != nullptr); + // ABSL_ASSUME is a workaround for + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105585 + ABSL_ASSUME(rep != nullptr); rep->refcount.Increment(); return rep; } diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc new file mode 100644 index 00000000..cacbf3da --- /dev/null +++ b/absl/strings/internal/cord_rep_btree.cc @@ -0,0 +1,1228 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree.h" + +#include <cassert> +#include <cstdint> +#include <iostream> +#include <string> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL +constexpr size_t CordRepBtree::kMaxCapacity; +#endif + +namespace { + +using NodeStack = CordRepBtree * [CordRepBtree::kMaxDepth]; +using EdgeType = CordRepBtree::EdgeType; +using OpResult = CordRepBtree::OpResult; +using CopyResult = CordRepBtree::CopyResult; + +constexpr auto kFront = CordRepBtree::kFront; +constexpr auto kBack = CordRepBtree::kBack; + +inline bool exhaustive_validation() { + return cord_btree_exhaustive_validation.load(std::memory_order_relaxed); +} + +// Implementation of the various 'Dump' functions. +// Prints the entire tree structure or 'rep'. External callers should +// not specify 'depth' and leave it to its default (0) value. +// Rep may be a CordRepBtree tree, or a SUBSTRING / EXTERNAL / FLAT node. +void DumpAll(const CordRep* rep, bool include_contents, std::ostream& stream, + int depth = 0) { + // Allow for full height trees + substring -> flat / external nodes. + assert(depth <= CordRepBtree::kMaxDepth + 2); + std::string sharing = const_cast<CordRep*>(rep)->refcount.IsOne() + ? std::string("Private") + : absl::StrCat("Shared(", rep->refcount.Get(), ")"); + std::string sptr = absl::StrCat("0x", absl::Hex(rep)); + + // Dumps the data contents of `rep` if `include_contents` is true. + // Always emits a new line character. + auto maybe_dump_data = [&stream, include_contents](const CordRep* r) { + if (include_contents) { + // Allow for up to 60 wide display of content data, which with some + // indentation and prefix / labels keeps us within roughly 80-100 wide. + constexpr size_t kMaxDataLength = 60; + stream << ", data = \"" + << EdgeData(r).substr(0, kMaxDataLength) + << (r->length > kMaxDataLength ? "\"..." : "\""); + } + stream << '\n'; + }; + + // For each level, we print the 'shared/private' state and the rep pointer, + // indented by two spaces per recursive depth. + stream << std::string(depth * 2, ' ') << sharing << " (" << sptr << ") "; + + if (rep->IsBtree()) { + const CordRepBtree* node = rep->btree(); + std::string label = + node->height() ? absl::StrCat("Node(", node->height(), ")") : "Leaf"; + stream << label << ", len = " << node->length + << ", begin = " << node->begin() << ", end = " << node->end() + << "\n"; + for (CordRep* edge : node->Edges()) { + DumpAll(edge, include_contents, stream, depth + 1); + } + } else if (rep->tag == SUBSTRING) { + const CordRepSubstring* substring = rep->substring(); + stream << "Substring, len = " << rep->length + << ", start = " << substring->start; + maybe_dump_data(rep); + DumpAll(substring->child, include_contents, stream, depth + 1); + } else if (rep->tag >= FLAT) { + stream << "Flat, len = " << rep->length + << ", cap = " << rep->flat()->Capacity(); + maybe_dump_data(rep); + } else if (rep->tag == EXTERNAL) { + stream << "Extn, len = " << rep->length; + maybe_dump_data(rep); + } +} + +// TODO(b/192061034): add 'bytes to copy' logic to avoid large slop on substring +// small data out of large reps, and general efficiency of 'always copy small +// data'. Consider making this a cord rep internal library function. +CordRepSubstring* CreateSubstring(CordRep* rep, size_t offset, size_t n) { + assert(n != 0); + assert(offset + n <= rep->length); + assert(offset != 0 || n != rep->length); + + if (rep->tag == SUBSTRING) { + CordRepSubstring* substring = rep->substring(); + offset += substring->start; + rep = CordRep::Ref(substring->child); + CordRep::Unref(substring); + } + assert(rep->IsExternal() || rep->IsFlat()); + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = rep; + return substring; +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset, size_t n) { + if (n == rep->length) return rep; + if (n == 0) return CordRep::Unref(rep), nullptr; + return CreateSubstring(rep, offset, n); +} + +// TODO(b/192061034): consider making this a cord rep library function. +inline CordRep* MakeSubstring(CordRep* rep, size_t offset) { + if (offset == 0) return rep; + return CreateSubstring(rep, offset, rep->length - offset); +} + +// Resizes `edge` to the provided `length`. Adopts a reference on `edge`. +// This method directly returns `edge` if `length` equals `edge->length`. +// If `is_mutable` is set to true, this function may return `edge` with +// `edge->length` set to the new length depending on the type and size of +// `edge`. Otherwise, this function returns a new CordRepSubstring value. +// Requires `length > 0 && length <= edge->length`. +CordRep* ResizeEdge(CordRep* edge, size_t length, bool is_mutable) { + assert(length > 0); + assert(length <= edge->length); + assert(IsDataEdge(edge)); + if (length >= edge->length) return edge; + + if (is_mutable && (edge->tag >= FLAT || edge->tag == SUBSTRING)) { + edge->length = length; + return edge; + } + + return CreateSubstring(edge, 0, length); +} + +template <EdgeType edge_type> +inline absl::string_view Consume(absl::string_view s, size_t n) { + return edge_type == kBack ? s.substr(n) : s.substr(0, s.size() - n); +} + +template <EdgeType edge_type> +inline absl::string_view Consume(char* dst, absl::string_view s, size_t n) { + if (edge_type == kBack) { + memcpy(dst, s.data(), n); + return s.substr(n); + } else { + const size_t offset = s.size() - n; + memcpy(dst, s.data() + offset, n); + return s.substr(0, offset); + } +} + +// Known issue / optimization weirdness: the store associated with the +// decrement introduces traffic between cpus (even if the result of that +// traffic does nothing), making this faster than a single call to +// refcount.Decrement() checking the zero refcount condition. +template <typename R, typename Fn> +inline void FastUnref(R* r, Fn&& fn) { + if (r->refcount.IsOne()) { + fn(r); + } else if (!r->refcount.DecrementExpectHighRefcount()) { + fn(r); + } +} + + +void DeleteSubstring(CordRepSubstring* substring) { + CordRep* rep = substring->child; + if (!rep->refcount.Decrement()) { + if (rep->tag >= FLAT) { + CordRepFlat::Delete(rep->flat()); + } else { + assert(rep->tag == EXTERNAL); + CordRepExternal::Delete(rep->external()); + } + } + delete substring; +} + +// Deletes a leaf node data edge. Requires `IsDataEdge(rep)`. +void DeleteLeafEdge(CordRep* rep) { + assert(IsDataEdge(rep)); + if (rep->tag >= FLAT) { + CordRepFlat::Delete(rep->flat()); + } else if (rep->tag == EXTERNAL) { + CordRepExternal::Delete(rep->external()); + } else { + DeleteSubstring(rep->substring()); + } +} + +// StackOperations contains the logic to build a left-most or right-most stack +// (leg) down to the leaf level of a btree, and 'unwind' / 'Finalize' methods to +// propagate node changes up the stack. +template <EdgeType edge_type> +struct StackOperations { + // Returns true if the node at 'depth' is not shared, i.e. has a refcount + // of one and all of its parent nodes have a refcount of one. + inline bool owned(int depth) const { return depth < share_depth; } + + // Returns the node at 'depth'. + inline CordRepBtree* node(int depth) const { return stack[depth]; } + + // Builds a `depth` levels deep stack starting at `tree` recording which nodes + // are private in the form of the 'share depth' where nodes are shared. + inline CordRepBtree* BuildStack(CordRepBtree* tree, int depth) { + assert(depth <= tree->height()); + int current_depth = 0; + while (current_depth < depth && tree->refcount.IsOne()) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + share_depth = current_depth + (tree->refcount.IsOne() ? 1 : 0); + while (current_depth < depth) { + stack[current_depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + return tree; + } + + // Builds a stack with the invariant that all nodes are private owned / not + // shared. This is used in iterative updates where a previous propagation + // guaranteed all nodes are owned / private. + inline void BuildOwnedStack(CordRepBtree* tree, int height) { + assert(height <= CordRepBtree::kMaxHeight); + int depth = 0; + while (depth < height) { + assert(tree->refcount.IsOne()); + stack[depth++] = tree; + tree = tree->Edge(edge_type)->btree(); + } + assert(tree->refcount.IsOne()); + share_depth = depth + 1; + } + + // Processes the final 'top level' result action for the tree. + // See the 'Action' enum for the various action implications. + static inline CordRepBtree* Finalize(CordRepBtree* tree, OpResult result) { + switch (result.action) { + case CordRepBtree::kPopped: + tree = edge_type == kBack ? CordRepBtree::New(tree, result.tree) + : CordRepBtree::New(result.tree, tree); + if (ABSL_PREDICT_FALSE(tree->height() > CordRepBtree::kMaxHeight)) { + tree = CordRepBtree::Rebuild(tree); + ABSL_RAW_CHECK(tree->height() <= CordRepBtree::kMaxHeight, + "Max height exceeded"); + } + return tree; + case CordRepBtree::kCopied: + CordRep::Unref(tree); + ABSL_FALLTHROUGH_INTENDED; + case CordRepBtree::kSelf: + return result.tree; + } + ABSL_INTERNAL_UNREACHABLE; + return result.tree; + } + + // Propagate the action result in 'result' up into all nodes of the stack + // starting at depth 'depth'. 'length' contains the extra length of data that + // was added at the lowest level, and is updated into all nodes of the stack. + // See the 'Action' enum for the various action implications. + // If 'propagate' is true, then any copied node values are updated into the + // stack, which is used for iterative processing on the same stack. + template <bool propagate = false> + inline CordRepBtree* Unwind(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + // TODO(mvels): revisit the below code to check if 3 loops with 3 + // (incremental) conditions is faster than 1 loop with a switch. + // Benchmarking and perf recordings indicate the loop with switch is + // fastest, likely because of indirect jumps on the tight case values and + // dense branches. But it's worth considering 3 loops, as the `action` + // transitions are mono directional. E.g.: + // while (action == kPopped) { + // ... + // } + // while (action == kCopied) { + // ... + // } + // ... + // We also found that an "if () do {}" loop here seems faster, possibly + // because it allows the branch predictor more granular heuristics on + // 'single leaf' (`depth` == 0) and 'single depth' (`depth` == 1) cases + // which appear to be the most common use cases. + if (depth != 0) { + do { + CordRepBtree* node = stack[--depth]; + const bool owned = depth < share_depth; + switch (result.action) { + case CordRepBtree::kPopped: + assert(!propagate); + result = node->AddEdge<edge_type>(owned, result.tree, length); + break; + case CordRepBtree::kCopied: + result = node->SetEdge<edge_type>(owned, result.tree, length); + if (propagate) stack[depth] = result.tree; + break; + case CordRepBtree::kSelf: + node->length += length; + while (depth > 0) { + node = stack[--depth]; + node->length += length; + } + return node; + } + } while (depth > 0); + } + return Finalize(tree, result); + } + + // Invokes `Unwind` with `propagate=true` to update the stack node values. + inline CordRepBtree* Propagate(CordRepBtree* tree, int depth, size_t length, + OpResult result) { + return Unwind</*propagate=*/true>(tree, depth, length, result); + } + + // `share_depth` contains the depth at which the nodes in the stack become + // shared. I.e., if the top most level is shared (i.e.: `!refcount.IsOne()`), + // then `share_depth` is 0. If the 2nd node is shared (and implicitly all + // nodes below that) then `share_depth` is 1, etc. A `share_depth` greater + // than the depth of the stack indicates that none of the nodes in the stack + // are shared. + int share_depth; + + NodeStack stack; +}; + +} // namespace + +void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, + bool include_contents, std::ostream& stream) { + stream << "===================================\n"; + if (!label.empty()) { + stream << label << '\n'; + stream << "-----------------------------------\n"; + } + if (rep) { + DumpAll(rep, include_contents, stream); + } else { + stream << "NULL\n"; + } +} + +void CordRepBtree::Dump(const CordRep* rep, absl::string_view label, + std::ostream& stream) { + Dump(rep, label, false, stream); +} + +void CordRepBtree::Dump(const CordRep* rep, std::ostream& stream) { + Dump(rep, absl::string_view(), false, stream); +} + +template <size_t size> +static void DestroyTree(CordRepBtree* tree) { + for (CordRep* node : tree->Edges()) { + if (node->refcount.Decrement()) continue; + for (CordRep* edge : node->btree()->Edges()) { + if (edge->refcount.Decrement()) continue; + if (size == 1) { + DeleteLeafEdge(edge); + } else { + CordRepBtree::Destroy(edge->btree()); + } + } + CordRepBtree::Delete(node->btree()); + } + CordRepBtree::Delete(tree); +} + +void CordRepBtree::Destroy(CordRepBtree* tree) { + switch (tree->height()) { + case 0: + for (CordRep* edge : tree->Edges()) { + if (!edge->refcount.Decrement()) { + DeleteLeafEdge(edge); + } + } + return CordRepBtree::Delete(tree); + case 1: + return DestroyTree<1>(tree); + default: + return DestroyTree<2>(tree); + } +} + +bool CordRepBtree::IsValid(const CordRepBtree* tree, bool shallow) { +#define NODE_CHECK_VALID(x) \ + if (!(x)) { \ + ABSL_RAW_LOG(ERROR, "CordRepBtree::CheckValid() FAILED: %s", #x); \ + return false; \ + } +#define NODE_CHECK_EQ(x, y) \ + if ((x) != (y)) { \ + ABSL_RAW_LOG(ERROR, \ + "CordRepBtree::CheckValid() FAILED: %s != %s (%s vs %s)", #x, \ + #y, absl::StrCat(x).c_str(), absl::StrCat(y).c_str()); \ + return false; \ + } + + NODE_CHECK_VALID(tree != nullptr); + NODE_CHECK_VALID(tree->IsBtree()); + NODE_CHECK_VALID(tree->height() <= kMaxHeight); + NODE_CHECK_VALID(tree->begin() < tree->capacity()); + NODE_CHECK_VALID(tree->end() <= tree->capacity()); + NODE_CHECK_VALID(tree->begin() <= tree->end()); + size_t child_length = 0; + for (CordRep* edge : tree->Edges()) { + NODE_CHECK_VALID(edge != nullptr); + if (tree->height() > 0) { + NODE_CHECK_VALID(edge->IsBtree()); + NODE_CHECK_VALID(edge->btree()->height() == tree->height() - 1); + } else { + NODE_CHECK_VALID(IsDataEdge(edge)); + } + child_length += edge->length; + } + NODE_CHECK_EQ(child_length, tree->length); + if ((!shallow || exhaustive_validation()) && tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + if (!IsValid(edge->btree(), shallow)) return false; + } + } + return true; + +#undef NODE_CHECK_VALID +#undef NODE_CHECK_EQ +} + +#ifndef NDEBUG + +CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool shallow) { + if (!IsValid(tree, shallow)) { + Dump(tree, "CordRepBtree validation failed:", false, std::cout); + ABSL_RAW_LOG(FATAL, "CordRepBtree::CheckValid() FAILED"); + } + return tree; +} + +#endif // NDEBUG + +template <EdgeType edge_type> +inline OpResult CordRepBtree::AddEdge(bool owned, CordRep* edge, size_t delta) { + if (size() >= kMaxCapacity) return {New(edge), kPopped}; + OpResult result = ToOpResult(owned); + result.tree->Add<edge_type>(edge); + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) { + OpResult result; + const size_t idx = index(edge_type); + if (owned) { + result = {this, kSelf}; + CordRep::Unref(edges_[idx]); + } else { + // Create a copy containing all unchanged edges. Unchanged edges are the + // open interval [begin, back) or [begin + 1, end) depending on `edge_type`. + // We conveniently cover both case using a constexpr `shift` being 0 or 1 + // as `end :== back + 1`. + result = {CopyRaw(), kCopied}; + constexpr int shift = edge_type == kFront ? 1 : 0; + for (CordRep* r : Edges(begin() + shift, back() + shift)) { + CordRep::Ref(r); + } + } + result.tree->edges_[idx] = edge; + result.tree->length += delta; + return result; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddCordRep(CordRepBtree* tree, CordRep* rep) { + const int depth = tree->height(); + const size_t length = rep->length; + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + const OpResult result = + leaf->AddEdge<edge_type>(ops.owned(depth), rep, length); + return ops.Unwind(tree, depth, length, result); +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kBack>(absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t end = 0; + const size_t cap = leaf->capacity(); + while (!data.empty() && end != cap) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[end++] = flat; + data = Consume<kBack>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_end(end); + return leaf; +} + +template <> +CordRepBtree* CordRepBtree::NewLeaf<kFront>(absl::string_view data, + size_t extra) { + CordRepBtree* leaf = CordRepBtree::New(0); + size_t length = 0; + size_t begin = leaf->capacity(); + leaf->set_end(leaf->capacity()); + while (!data.empty() && begin != 0) { + auto* flat = CordRepFlat::New(data.length() + extra); + flat->length = (std::min)(data.length(), flat->Capacity()); + length += flat->length; + leaf->edges_[--begin] = flat; + data = Consume<kFront>(flat->Data(), data, flat->length); + } + leaf->length = length; + leaf->set_begin(begin); + return leaf; +} + +template <> +absl::string_view CordRepBtree::AddData<kBack>(absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignBegin(); + const size_t cap = capacity(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[fetch_add_end(1)] = flat; + data = Consume<kBack>(flat->Data(), data, n); + } while (!data.empty() && end() != cap); + return data; +} + +template <> +absl::string_view CordRepBtree::AddData<kFront>(absl::string_view data, + size_t extra) { + assert(!data.empty()); + assert(size() < capacity()); + AlignEnd(); + do { + CordRepFlat* flat = CordRepFlat::New(data.length() + extra); + const size_t n = (std::min)(data.length(), flat->Capacity()); + flat->length = n; + edges_[sub_fetch_begin(1)] = flat; + data = Consume<kFront>(flat->Data(), data, n); + } while (!data.empty() && begin() != 0); + return data; +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::AddData(CordRepBtree* tree, absl::string_view data, + size_t extra) { + if (ABSL_PREDICT_FALSE(data.empty())) return tree; + + const size_t original_data_size = data.size(); + int depth = tree->height(); + StackOperations<edge_type> ops; + CordRepBtree* leaf = ops.BuildStack(tree, depth); + + // If there is capacity in the last edge, append as much data + // as possible into this last edge. + if (leaf->size() < leaf->capacity()) { + OpResult result = leaf->ToOpResult(ops.owned(depth)); + data = result.tree->AddData<edge_type>(data, extra); + if (data.empty()) { + result.tree->length += original_data_size; + return ops.Unwind(tree, depth, original_data_size, result); + } + + // We added some data into this leaf, but not all. Propagate the added + // length to the top most node, and rebuild the stack with any newly copied + // or updated nodes. From this point on, the path (leg) from the top most + // node to the right-most node towards the leaf node is privately owned. + size_t delta = original_data_size - data.size(); + assert(delta > 0); + result.tree->length += delta; + tree = ops.Propagate(tree, depth, delta, result); + ops.share_depth = depth + 1; + } + + // We were unable to append all data into the existing right-most leaf node. + // This means all remaining data must be put into (a) new leaf node(s) which + // we append to the tree. To make this efficient, we iteratively build full + // leaf nodes from `data` until the created leaf contains all remaining data. + // We utilize the `Unwind` method to merge the created leaf into the first + // level towards root that has capacity. On each iteration with remaining + // data, we rebuild the stack in the knowledge that right-most nodes are + // privately owned after the first `Unwind` completes. + for (;;) { + OpResult result = {CordRepBtree::NewLeaf<edge_type>(data, extra), kPopped}; + if (result.tree->length == data.size()) { + return ops.Unwind(tree, depth, result.tree->length, result); + } + data = Consume<edge_type>(data, result.tree->length); + tree = ops.Unwind(tree, depth, result.tree->length, result); + depth = tree->height(); + ops.BuildOwnedStack(tree, depth); + } +} + +template <EdgeType edge_type> +CordRepBtree* CordRepBtree::Merge(CordRepBtree* dst, CordRepBtree* src) { + assert(dst->height() >= src->height()); + + // Capture source length as we may consume / destroy `src`. + const size_t length = src->length; + + // We attempt to merge `src` at its corresponding height in `dst`. + const int depth = dst->height() - src->height(); + StackOperations<edge_type> ops; + CordRepBtree* merge_node = ops.BuildStack(dst, depth); + + // If there is enough space in `merge_node` for all edges from `src`, add all + // edges to this node, making a fresh copy as needed if not privately owned. + // If `merge_node` does not have capacity for `src`, we rely on `Unwind` and + // `Finalize` to merge `src` into the first level towards `root` where there + // is capacity for another edge, or create a new top level node. + OpResult result; + if (merge_node->size() + src->size() <= kMaxCapacity) { + result = merge_node->ToOpResult(ops.owned(depth)); + result.tree->Add<edge_type>(src->Edges()); + result.tree->length += src->length; + if (src->refcount.IsOne()) { + Delete(src); + } else { + for (CordRep* edge : src->Edges()) CordRep::Ref(edge); + CordRepBtree::Unref(src); + } + } else { + result = {src, kPopped}; + } + + // Unless we merged at the top level (i.e.: src and dst are equal height), + // unwind the result towards the top level, and finalize the result. + if (depth) { + return ops.Unwind(dst, depth, length, result); + } + return ops.Finalize(dst, result); +} + +CopyResult CordRepBtree::CopySuffix(size_t offset) { + assert(offset < this->length); + + // As long as `offset` starts inside the last edge, we can 'drop' the current + // depth. For the most extreme example: if offset references the last data + // edge in the tree, there is only a single edge / path from the top of the + // tree to that last edge, so we can drop all the nodes except that edge. + // The fast path check for this is `back->length >= length - offset`. + int height = this->height(); + CordRepBtree* node = this; + size_t len = node->length - offset; + CordRep* back = node->Edge(kBack); + while (back->length >= len) { + offset = back->length - len; + if (--height < 0) { + return {MakeSubstring(CordRep::Ref(back), offset), height}; + } + node = back->btree(); + back = node->Edge(kBack); + } + if (offset == 0) return {CordRep::Ref(node), height}; + + // Offset does not point into the last edge, so we span at least two edges. + // Find the index of offset with `IndexBeyond` which provides us the edge + // 'beyond' the offset if offset is not a clean starting point of an edge. + Position pos = node->IndexBeyond(offset); + CordRepBtree* sub = node->CopyToEndFrom(pos.index, len); + const CopyResult result = {sub, height}; + + // `pos.n` contains a non zero value if the offset is not an exact starting + // point of an edge. In this case, `pos.n` contains the 'trailing' amount of + // bytes of the edge preceding that in `pos.index`. We need to iteratively + // adjust the preceding edge with the 'broken' offset until we have a perfect + // start of the edge. + while (pos.n != 0) { + assert(pos.index >= 1); + const size_t begin = pos.index - 1; + sub->set_begin(begin); + CordRep* const edge = node->Edge(begin); + + len = pos.n; + offset = edge->length - len; + + if (--height < 0) { + sub->edges_[begin] = MakeSubstring(CordRep::Ref(edge), offset, len); + return result; + } + + node = edge->btree(); + pos = node->IndexBeyond(offset); + + CordRepBtree* nsub = node->CopyToEndFrom(pos.index, len); + sub->edges_[begin] = nsub; + sub = nsub; + } + sub->set_begin(pos.index); + return result; +} + +CopyResult CordRepBtree::CopyPrefix(size_t n, bool allow_folding) { + assert(n > 0); + assert(n <= this->length); + + // As long as `n` does not exceed the length of the first edge, we can 'drop' + // the current depth. For the most extreme example: if we'd copy a 1 byte + // prefix from a tree, there is only a single edge / path from the top of the + // tree to the single data edge containing this byte, so we can drop all the + // nodes except the data node. + int height = this->height(); + CordRepBtree* node = this; + CordRep* front = node->Edge(kFront); + if (allow_folding) { + while (front->length >= n) { + if (--height < 0) return {MakeSubstring(CordRep::Ref(front), 0, n), -1}; + node = front->btree(); + front = node->Edge(kFront); + } + } + if (node->length == n) return {CordRep::Ref(node), height}; + + // `n` spans at least two nodes, find the end point of the span. + Position pos = node->IndexOf(n); + + // Create a partial copy of the node up to `pos.index`, with a defined length + // of `n`. Any 'partial last edge' is added further below as needed. + CordRepBtree* sub = node->CopyBeginTo(pos.index, n); + const CopyResult result = {sub, height}; + + // `pos.n` contains the 'offset inside the edge for IndexOf(n)'. As long as + // this is not zero, we don't have a 'clean cut', so we need to make a + // (partial) copy of that last edge, and repeat this until pos.n is zero. + while (pos.n != 0) { + size_t end = pos.index; + n = pos.n; + + CordRep* edge = node->Edge(pos.index); + if (--height < 0) { + sub->edges_[end++] = MakeSubstring(CordRep::Ref(edge), 0, n); + sub->set_end(end); + AssertValid(result.edge->btree()); + return result; + } + + node = edge->btree(); + pos = node->IndexOf(n); + CordRepBtree* nsub = node->CopyBeginTo(pos.index, n); + sub->edges_[end++] = nsub; + sub->set_end(end); + sub = nsub; + } + sub->set_end(pos.index); + AssertValid(result.edge->btree()); + return result; +} + +CordRep* CordRepBtree::ExtractFront(CordRepBtree* tree) { + CordRep* front = tree->Edge(tree->begin()); + if (tree->refcount.IsOne()) { + Unref(tree->Edges(tree->begin() + 1, tree->end())); + CordRepBtree::Delete(tree); + } else { + CordRep::Ref(front); + CordRep::Unref(tree); + } + return front; +} + +CordRepBtree* CordRepBtree::ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length) { + assert(end <= tree->end()); + if (tree->refcount.IsOne()) { + Unref(tree->Edges(end, tree->end())); + tree->set_end(end); + tree->length = new_length; + } else { + CordRepBtree* old = tree; + tree = tree->CopyBeginTo(end, new_length); + CordRep::Unref(old); + } + return tree; +} + +CordRep* CordRepBtree::RemoveSuffix(CordRepBtree* tree, size_t n) { + // Check input and deal with trivial cases 'Remove all/none' + assert(tree != nullptr); + assert(n <= tree->length); + const size_t len = tree->length; + if (ABSL_PREDICT_FALSE(n == 0)) { + return tree; + } + if (ABSL_PREDICT_FALSE(n >= len)) { + CordRepBtree::Unref(tree); + return nullptr; + } + + size_t length = len - n; + int height = tree->height(); + bool is_mutable = tree->refcount.IsOne(); + + // Extract all top nodes which are reduced to size = 1 + Position pos = tree->IndexOfLength(length); + while (pos.index == tree->begin()) { + CordRep* edge = ExtractFront(tree); + is_mutable &= edge->refcount.IsOne(); + if (height-- == 0) return ResizeEdge(edge, length, is_mutable); + tree = edge->btree(); + pos = tree->IndexOfLength(length); + } + + // Repeat the following sequence traversing down the tree: + // - Crop the top node to the 'last remaining edge' adjusting length. + // - Set the length for down edges to the partial length in that last edge. + // - Repeat this until the last edge is 'included in full' + // - If we hit the data edge level, resize and return the last data edge + CordRepBtree* top = tree = ConsumeBeginTo(tree, pos.index + 1, length); + CordRep* edge = tree->Edge(pos.index); + length = pos.n; + while (length != edge->length) { + // ConsumeBeginTo guarantees `tree` is a clean, privately owned copy. + assert(tree->refcount.IsOne()); + const bool edge_is_mutable = edge->refcount.IsOne(); + + if (height-- == 0) { + tree->edges_[pos.index] = ResizeEdge(edge, length, edge_is_mutable); + return AssertValid(top); + } + + if (!edge_is_mutable) { + // We can't 'in place' remove any suffixes down this edge. + // Replace this edge with a prefix copy instead. + tree->edges_[pos.index] = edge->btree()->CopyPrefix(length, false).edge; + CordRep::Unref(edge); + return AssertValid(top); + } + + // Move down one level, rinse repeat. + tree = edge->btree(); + pos = tree->IndexOfLength(length); + tree = ConsumeBeginTo(edge->btree(), pos.index + 1, length); + edge = tree->Edge(pos.index); + length = pos.n; + } + + return AssertValid(top); +} + +CordRep* CordRepBtree::SubTree(size_t offset, size_t n) { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return nullptr; + + CordRepBtree* node = this; + int height = node->height(); + Position front = node->IndexOf(offset); + CordRep* left = node->edges_[front.index]; + while (front.n + n <= left->length) { + if (--height < 0) return MakeSubstring(CordRep::Ref(left), front.n, n); + node = left->btree(); + front = node->IndexOf(front.n); + left = node->edges_[front.index]; + } + + const Position back = node->IndexBefore(front, n); + CordRep* const right = node->edges_[back.index]; + assert(back.index > front.index); + + // Get partial suffix and prefix entries. + CopyResult prefix; + CopyResult suffix; + if (height > 0) { + // Copy prefix and suffix of the boundary nodes. + prefix = left->btree()->CopySuffix(front.n); + suffix = right->btree()->CopyPrefix(back.n); + + // If there is an edge between the prefix and suffix edges, then the tree + // must remain at its previous (full) height. If we have no edges between + // prefix and suffix edges, then the tree must be as high as either the + // suffix or prefix edges (which are collapsed to their minimum heights). + if (front.index + 1 == back.index) { + height = (std::max)(prefix.height, suffix.height) + 1; + } + + // Raise prefix and suffixes to the new tree height. + for (int h = prefix.height + 1; h < height; ++h) { + prefix.edge = CordRepBtree::New(prefix.edge); + } + for (int h = suffix.height + 1; h < height; ++h) { + suffix.edge = CordRepBtree::New(suffix.edge); + } + } else { + // Leaf node, simply take substrings for prefix and suffix. + prefix = CopyResult{MakeSubstring(CordRep::Ref(left), front.n), -1}; + suffix = CopyResult{MakeSubstring(CordRep::Ref(right), 0, back.n), -1}; + } + + // Compose resulting tree. + CordRepBtree* sub = CordRepBtree::New(height); + size_t end = 0; + sub->edges_[end++] = prefix.edge; + for (CordRep* r : node->Edges(front.index + 1, back.index)) { + sub->edges_[end++] = CordRep::Ref(r); + } + sub->edges_[end++] = suffix.edge; + sub->set_end(end); + sub->length = n; + return AssertValid(sub); +} + +CordRepBtree* CordRepBtree::MergeTrees(CordRepBtree* left, + CordRepBtree* right) { + return left->height() >= right->height() ? Merge<kBack>(left, right) + : Merge<kFront>(right, left); +} + +bool CordRepBtree::IsFlat(absl::string_view* fragment) const { + if (height() == 0 && size() == 1) { + if (fragment) *fragment = Data(begin()); + return true; + } + return false; +} + +bool CordRepBtree::IsFlat(size_t offset, const size_t n, + absl::string_view* fragment) const { + assert(n <= this->length); + assert(offset <= this->length - n); + if (ABSL_PREDICT_FALSE(n == 0)) return false; + int height = this->height(); + const CordRepBtree* node = this; + for (;;) { + const Position front = node->IndexOf(offset); + const CordRep* edge = node->Edge(front.index); + if (edge->length < front.n + n) return false; + if (--height < 0) { + if (fragment) *fragment = EdgeData(edge).substr(front.n, n); + return true; + } + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +char CordRepBtree::GetCharacter(size_t offset) const { + assert(offset < length); + const CordRepBtree* node = this; + int height = node->height(); + for (;;) { + Position front = node->IndexOf(offset); + if (--height < 0) return node->Data(front.index)[front.n]; + offset = front.n; + node = node->Edge(front.index)->btree(); + } +} + +Span<char> CordRepBtree::GetAppendBufferSlow(size_t size) { + // The inlined version in `GetAppendBuffer()` deals with all heights <= 3. + assert(height() >= 4); + assert(refcount.IsOne()); + + // Build a stack of nodes we may potentially need to update if we find a + // non-shared FLAT with capacity at the leaf level. + const int depth = height(); + CordRepBtree* node = this; + CordRepBtree* stack[kMaxDepth]; + for (int i = 0; i < depth; ++i) { + node = node->Edge(kBack)->btree(); + if (!node->refcount.IsOne()) return {}; + stack[i] = node; + } + + // Must be a privately owned, mutable flat. + CordRep* const edge = node->Edge(kBack); + if (!edge->refcount.IsOne() || edge->tag < FLAT) return {}; + + // Must have capacity. + const size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + + // Build span on remaining capacity. + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + this->length += delta; + for (int i = 0; i < depth; ++i) { + stack[i]->length += delta; + } + return span; +} + +CordRepBtree* CordRepBtree::CreateSlow(CordRep* rep) { + if (rep->IsBtree()) return rep->btree(); + + CordRepBtree* node = nullptr; + auto consume = [&node](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + if (node == nullptr) { + node = New(r); + } else { + node = CordRepBtree::AddCordRep<kBack>(node, r); + } + }; + Consume(rep, consume); + return node; +} + +CordRepBtree* CordRepBtree::AppendSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(tree, rep->btree()); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kBack>(tree, r); + }; + Consume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::PrependSlow(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(rep->IsBtree())) { + return MergeTrees(rep->btree(), tree); + } + auto consume = [&tree](CordRep* r, size_t offset, size_t length) { + r = MakeSubstring(r, offset, length); + tree = CordRepBtree::AddCordRep<kFront>(tree, r); + }; + ReverseConsume(rep, consume); + return tree; +} + +CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kBack>(tree, data, extra); +} + +CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, absl::string_view data, + size_t extra) { + return CordRepBtree::AddData<kFront>(tree, data, extra); +} + +template CordRepBtree* CordRepBtree::AddCordRep<kFront>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddCordRep<kBack>(CordRepBtree* tree, + CordRep* rep); +template CordRepBtree* CordRepBtree::AddData<kFront>(CordRepBtree* tree, + absl::string_view data, + size_t extra); +template CordRepBtree* CordRepBtree::AddData<kBack>(CordRepBtree* tree, + absl::string_view data, + size_t extra); + +void CordRepBtree::Rebuild(CordRepBtree** stack, CordRepBtree* tree, + bool consume) { + bool owned = consume && tree->refcount.IsOne(); + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + if (!owned) edge = CordRep::Ref(edge); + size_t height = 0; + size_t length = edge->length; + CordRepBtree* node = stack[0]; + OpResult result = node->AddEdge<kBack>(true, edge, length); + while (result.action == CordRepBtree::kPopped) { + stack[height] = result.tree; + if (stack[++height] == nullptr) { + result.action = CordRepBtree::kSelf; + stack[height] = CordRepBtree::New(node, result.tree); + } else { + node = stack[height]; + result = node->AddEdge<kBack>(true, result.tree, length); + } + } + while (stack[++height] != nullptr) { + stack[height]->length += length; + } + } + } else { + for (CordRep* rep : tree->Edges()) { + Rebuild(stack, rep->btree(), owned); + } + } + if (consume) { + if (owned) { + CordRepBtree::Delete(tree); + } else { + CordRepBtree::Unref(tree); + } + } +} + +CordRepBtree* CordRepBtree::Rebuild(CordRepBtree* tree) { + // Set up initial stack with empty leaf node. + CordRepBtree* node = CordRepBtree::New(); + CordRepBtree* stack[CordRepBtree::kMaxDepth + 1] = {node}; + + // Recursively build the tree, consuming the input tree. + Rebuild(stack, tree, /* consume reference */ true); + + // Return top most node + for (CordRepBtree* parent : stack) { + if (parent == nullptr) return node; + node = parent; + } + + // Unreachable + assert(false); + return nullptr; +} + +CordRepBtree::ExtractResult CordRepBtree::ExtractAppendBuffer( + CordRepBtree* tree, size_t extra_capacity) { + int depth = 0; + NodeStack stack; + + // Set up default 'no success' result which is {tree, nullptr}. + ExtractResult result; + result.tree = tree; + result.extracted = nullptr; + + // Dive down the right side of the tree, making sure no edges are shared. + while (tree->height() > 0) { + if (!tree->refcount.IsOne()) return result; + stack[depth++] = tree; + tree = tree->Edge(kBack)->btree(); + } + if (!tree->refcount.IsOne()) return result; + + // Validate we ended on a non shared flat. + CordRep* rep = tree->Edge(kBack); + if (!(rep->IsFlat() && rep->refcount.IsOne())) return result; + + // Verify it has at least the requested extra capacity. + CordRepFlat* flat = rep->flat(); + const size_t length = flat->length; + const size_t avail = flat->Capacity() - flat->length; + if (extra_capacity > avail) return result; + + // Set the extracted flat in the result. + result.extracted = flat; + + // Cascading delete all nodes that become empty. + while (tree->size() == 1) { + CordRepBtree::Delete(tree); + if (--depth < 0) { + // We consumed the entire tree: return nullptr for new tree. + result.tree = nullptr; + return result; + } + rep = tree; + tree = stack[depth]; + } + + // Remove the edge or cascaded up parent node. + tree->set_end(tree->end() - 1); + tree->length -= length; + + // Adjust lengths up the tree. + while (depth > 0) { + tree = stack[--depth]; + tree->length -= length; + } + + // Remove unnecessary top nodes with size = 1. This may iterate all the way + // down to the leaf node in which case we simply return the remaining last + // edge in that node and the extracted flat. + while (tree->size() == 1) { + int height = tree->height(); + rep = tree->Edge(kBack); + Delete(tree); + if (height == 0) { + // We consumed the leaf: return the sole data edge as the new tree. + result.tree = rep; + return result; + } + tree = rep->btree(); + } + + // Done: return the (new) top level node and extracted flat. + result.tree = tree; + return result; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h new file mode 100644 index 00000000..2cbc09ec --- /dev/null +++ b/absl/strings/internal/cord_rep_btree.h @@ -0,0 +1,924 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ + +#include <cassert> +#include <cstdint> +#include <iosfwd> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/optimization.h" +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeNavigator; + +// CordRepBtree is as the name implies a btree implementation of a Cordrep tree. +// Data is stored at the leaf level only, non leaf nodes contain down pointers +// only. Allowed types of data edges are FLAT, EXTERNAL and SUBSTRINGs of FLAT +// or EXTERNAL nodes. The implementation allows for data to be added to either +// end of the tree only, it does not provide any 'insert' logic. This has the +// benefit that we can expect good fill ratios: all nodes except the outer +// 'legs' will have 100% fill ratios for trees built using Append/Prepend +// methods. Merged trees will typically have a fill ratio well above 50% as in a +// similar fashion, one side of the merged tree will typically have a 100% fill +// ratio, and the 'open' end will average 50%. All operations are O(log(n)) or +// better, and the tree never needs balancing. +// +// All methods accepting a CordRep* or CordRepBtree* adopt a reference on that +// input unless explicitly stated otherwise. All functions returning a CordRep* +// or CordRepBtree* instance transfer a reference back to the caller. +// Simplified, callers both 'donate' and 'consume' a reference count on each +// call, simplifying the API. An example of building a tree: +// +// CordRepBtree* tree = CordRepBtree::Create(MakeFlat("Hello")); +// tree = CordRepBtree::Append(tree, MakeFlat("world")); +// +// In the above example, all inputs are consumed, making each call affecting +// `tree` reference count neutral. The returned `tree` value can be different +// from the input if the input is shared with other threads, or if the tree +// grows in height, but callers typically never have to concern themselves with +// that and trust that all methods DTRT at all times. +class CordRepBtree : public CordRep { + public: + // EdgeType identifies `front` and `back` enum values. + // Various implementations in CordRepBtree such as `Add` and `Edge` are + // generic and templated on operating on either of the boundary edges. + // For more information on the possible edges contained in a CordRepBtree + // instance see the documentation for `edges_`. + enum class EdgeType { kFront, kBack }; + + // Convenience constants into `EdgeType` + static constexpr EdgeType kFront = EdgeType::kFront; + static constexpr EdgeType kBack = EdgeType::kBack; + + // Maximum number of edges: based on experiments and performance data, we can + // pick suitable values resulting in optimum cacheline aligned values. The + // preferred values are based on 64-bit systems where we aim to align this + // class onto 64 bytes, i.e.: 6 = 64 bytes, 14 = 128 bytes, etc. + // TODO(b/192061034): experiment with alternative sizes. + static constexpr size_t kMaxCapacity = 6; + + // Reasonable maximum height of the btree. We can expect a fill ratio of at + // least 50%: trees are always expanded at the front or back. Concatenating + // trees will then typically fold at the top most node, where the lower nodes + // are at least at capacity on one side of joined inputs. At a lower fill + // rate of 4 edges per node, we have capacity for ~16 million leaf nodes. + // We will fail / abort if an application ever exceeds this height, which + // should be extremely rare (near impossible) and be an indication of an + // application error: we do not assume it reasonable for any application to + // operate correctly with such monster trees. + // Another compelling reason for the number `12` is that any contextual stack + // required for navigation or insertion requires 12 words and 12 bytes, which + // fits inside 2 cache lines with some room to spare, and is reasonable as a + // local stack variable compared to Cord's current near 400 bytes stack use. + // The maximum `height` value of a node is then `kMaxDepth - 1` as node height + // values start with a value of 0 for leaf nodes. + static constexpr int kMaxDepth = 12; + static constexpr int kMaxHeight = kMaxDepth - 1; + + // `Action` defines the action for unwinding changes done at the btree's leaf + // level that need to be propagated up to the parent node(s). Each operation + // on a node has an effect / action defined as follows: + // - kSelf + // The operation (add / update, etc) was performed directly on the node as + // the node is private to the current thread (i.e.: not shared directly or + // indirectly through a refcount > 1). Changes can be propagated directly to + // all parent nodes as all parent nodes are also then private to the current + // thread. + // - kCopied + // The operation (add / update, etc) was performed on a copy of the original + // node, as the node is (potentially) directly or indirectly shared with + // other threads. Changes need to be propagated into the parent nodes where + // the old down pointer must be unreffed and replaced with this new copy. + // Such changes to parent nodes may themselves require a copy if the parent + // node is also shared. A kCopied action can propagate all the way to the + // top node where we then must unref the `tree` input provided by the + // caller, and return the new copy. + // - kPopped + // The operation (typically add) could not be satisfied due to insufficient + // capacity in the targeted node, and a new 'leg' was created that needs to + // be added into the parent node. For example, adding a FLAT inside a leaf + // node that is at capacity will create a new leaf node containing that + // FLAT, that needs to be 'popped' up the btree. Such 'pop' actions can + // cascade up the tree if parent nodes are also at capacity. A 'Popped' + // action propagating all the way to the top of the tree will result in + // the tree becoming one level higher than the current tree through a final + // `CordRepBtree::New(tree, popped)` call, resulting in a new top node + // referencing the old tree and the new (fully popped upwards) 'leg'. + enum Action { kSelf, kCopied, kPopped }; + + // Result of an operation on a node. See the `Action` enum for details. + struct OpResult { + CordRepBtree* tree; + Action action; + }; + + // Return value of the CopyPrefix and CopySuffix methods which can + // return a node or data edge at any height inside the tree. + // A height of 0 defines the lowest (leaf) node, a height of -1 identifies + // `edge` as being a plain data node: EXTERNAL / FLAT or SUBSTRING thereof. + struct CopyResult { + CordRep* edge; + int height; + }; + + // Logical position inside a node: + // - index: index of the edge. + // - n: size or offset value depending on context. + struct Position { + size_t index; + size_t n; + }; + + // Creates a btree from the given input. Adopts a ref of `rep`. + // If the input `rep` is itself a btree, i.e., `IsBtree()`, then this + // function immediately returns `rep->btree()`. If the input is a valid data + // edge (see IsDataEdge()), then a new leaf node is returned containing `rep` + // as the sole data edge. Else, the input is assumed to be a (legacy) concat + // tree, and the input is consumed and transformed into a btree(). + static CordRepBtree* Create(CordRep* rep); + + // Destroys the provided tree. Should only be called by cord internal API's, + // typically after a ref_count.Decrement() on the last reference count. + static void Destroy(CordRepBtree* tree); + + // Destruction + static void Delete(CordRepBtree* tree) { delete tree; } + + // Use CordRep::Unref() as we overload for absl::Span<CordRep* const>. + using CordRep::Unref; + + // Unrefs all edges in `edges` which are assumed to be 'likely one'. + static void Unref(absl::Span<CordRep* const> edges); + + // Appends / Prepends an existing CordRep instance to this tree. + // The below methods accept three types of input: + // 1) `rep` is a data node (See `IsDataNode` for valid data edges). + // `rep` is appended or prepended to this tree 'as is'. + // 2) `rep` is a BTREE. + // `rep` is merged into `tree` respecting the Append/Prepend order. + // 3) `rep` is some other (legacy) type. + // `rep` is converted in place and added to `tree` + // Requires `tree` and `rep` to be not null. + static CordRepBtree* Append(CordRepBtree* tree, CordRep* rep); + static CordRepBtree* Prepend(CordRepBtree* tree, CordRep* rep); + + // Append/Prepend the data in `data` to this tree. + // The `extra` parameter defines how much extra capacity should be allocated + // for any additional FLAT being allocated. This is an optimization hint from + // the caller. For example, a caller may need to add 2 string_views of data + // "abc" and "defghi" which are not consecutive. The caller can in this case + // invoke `AddData(tree, "abc", 6)`, and any newly added flat is allocated + // where possible with at least 6 bytes of extra capacity beyond `length`. + // This helps avoiding data getting fragmented over multiple flats. + // There is no limit on the size of `data`. If `data` can not be stored inside + // a single flat, then the function will iteratively add flats until all data + // has been consumed and appended or prepended to the tree. + static CordRepBtree* Append(CordRepBtree* tree, string_view data, + size_t extra = 0); + static CordRepBtree* Prepend(CordRepBtree* tree, string_view data, + size_t extra = 0); + + // Returns a new tree, containing `n` bytes of data from this instance + // starting at offset `offset`. Where possible, the returned tree shares + // (re-uses) data edges and nodes with this instance to minimize the + // combined memory footprint of both trees. + // Requires `offset + n <= length`. Returns `nullptr` if `n` is zero. + CordRep* SubTree(size_t offset, size_t n); + + // Removes `n` trailing bytes from `tree`, and returns the resulting tree + // or data edge. Returns `tree` if n is zero, and nullptr if n == length. + // This function is logically identical to: + // result = tree->SubTree(0, tree->length - n); + // Unref(tree); + // return result; + // However, the actual implementation will as much as possible perform 'in + // place' modifications on the tree on all nodes and edges that are mutable. + // For example, in a fully privately owned tree with the last edge being a + // flat of length 12, RemoveSuffix(1) will simply set the length of that data + // edge to 11, and reduce the length of all nodes on the edge path by 1. + static CordRep* RemoveSuffix(CordRepBtree* tree, size_t n); + + // Returns the character at the given offset. + char GetCharacter(size_t offset) const; + + // Returns true if this node holds a single data edge, and if so, sets + // `fragment` to reference the contained data. `fragment` is an optional + // output parameter and allowed to be null. + bool IsFlat(absl::string_view* fragment) const; + + // Returns true if the data of `n` bytes starting at offset `offset` + // is contained in a single data edge, and if so, sets fragment to reference + // the contained data. `fragment` is an optional output parameter and allowed + // to be null. + bool IsFlat(size_t offset, size_t n, absl::string_view* fragment) const; + + // Returns a span (mutable range of bytes) of up to `size` bytes into the + // last FLAT data edge inside this tree under the following conditions: + // - none of the nodes down into the FLAT node are shared. + // - the last data edge in this tree is a non-shared FLAT. + // - the referenced FLAT has additional capacity available. + // If all these conditions are met, a non-empty span is returned, and the + // length of the flat node and involved tree nodes have been increased by + // `span.length()`. The caller is responsible for immediately assigning values + // to all uninitialized data reference by the returned span. + // Requires `this->refcount.IsOne()`: this function forces the caller to do + // this fast path check on the top level node, as this is the most commonly + // shared node of a cord tree. + Span<char> GetAppendBuffer(size_t size); + + // Extracts the right-most data edge from this tree iff: + // - the tree and all internal edges to the right-most node are not shared. + // - the right-most node is a FLAT node and not shared. + // - the right-most node has at least the desired extra capacity. + // + // Returns {tree, nullptr} if any of the above conditions are not met. + // This method effectively removes data from the tree. The intent of this + // method is to allow applications appending small string data to use + // pre-existing capacity, and add the modified rep back to the tree. + // + // Simplified such code would look similar to this: + // void MyTreeBuilder::Append(string_view data) { + // ExtractResult result = CordRepBtree::ExtractAppendBuffer(tree_, 1); + // if (CordRep* rep = result.extracted) { + // size_t available = rep->Capacity() - rep->length; + // size_t n = std::min(data.size(), n); + // memcpy(rep->Data(), data.data(), n); + // rep->length += n; + // data.remove_prefix(n); + // if (!result.tree->IsBtree()) { + // tree_ = CordRepBtree::Create(result.tree); + // } + // tree_ = CordRepBtree::Append(tree_, rep); + // } + // ... + // // Remaining edge in `result.tree`. + // } + static ExtractResult ExtractAppendBuffer(CordRepBtree* tree, + size_t extra_capacity = 1); + + // Returns the `height` of the tree. The height of a tree is limited to + // kMaxHeight. `height` is implemented as an `int` as in some places we + // use negative (-1) values for 'data edges'. + int height() const { return static_cast<int>(storage[0]); } + + // Properties: begin, back, end, front/back boundary indexes. + size_t begin() const { return static_cast<size_t>(storage[1]); } + size_t back() const { return static_cast<size_t>(storage[2]) - 1; } + size_t end() const { return static_cast<size_t>(storage[2]); } + size_t index(EdgeType edge) const { + return edge == kFront ? begin() : back(); + } + + // Properties: size and capacity. + // `capacity` contains the current capacity of this instance, where + // `kMaxCapacity` contains the maximum capacity of a btree node. + // For now, `capacity` and `kMaxCapacity` return the same value, but this may + // change in the future if we see benefit in dynamically sizing 'small' nodes + // to 'large' nodes for large data trees. + size_t size() const { return end() - begin(); } + size_t capacity() const { return kMaxCapacity; } + + // Edge access + inline CordRep* Edge(size_t index) const; + inline CordRep* Edge(EdgeType edge_type) const; + inline absl::Span<CordRep* const> Edges() const; + inline absl::Span<CordRep* const> Edges(size_t begin, size_t end) const; + + // Returns reference to the data edge at `index`. + // Requires this instance to be a leaf node, and `index` to be valid index. + inline absl::string_view Data(size_t index) const; + + // Diagnostics: returns true if `tree` is valid and internally consistent. + // If `shallow` is false, then the provided top level node and all child nodes + // below it are recursively checked. If `shallow` is true, only the provided + // node in `tree` and the cumulative length, type and height of the direct + // child nodes of `tree` are checked. The value of `shallow` is ignored if the + // internal `cord_btree_exhaustive_validation` diagnostics variable is true, + // in which case the performed validations works as if `shallow` were false. + // This function is intended for debugging and testing purposes only. + static bool IsValid(const CordRepBtree* tree, bool shallow = false); + + // Diagnostics: asserts that the provided tree is valid. + // `AssertValid()` performs a shallow validation by default. `shallow` can be + // set to false in which case an exhaustive validation is performed. This + // function is implemented in terms of calling `IsValid()` and asserting the + // return value to be true. See `IsValid()` for more information. + // This function is intended for debugging and testing purposes only. + static CordRepBtree* AssertValid(CordRepBtree* tree, bool shallow = true); + static const CordRepBtree* AssertValid(const CordRepBtree* tree, + bool shallow = true); + + // Diagnostics: dump the contents of this tree to `stream`. + // This function is intended for debugging and testing purposes only. + static void Dump(const CordRep* rep, std::ostream& stream); + static void Dump(const CordRep* rep, absl::string_view label, + std::ostream& stream); + static void Dump(const CordRep* rep, absl::string_view label, + bool include_contents, std::ostream& stream); + + // Adds the edge `edge` to this node if possible. `owned` indicates if the + // current node is potentially shared or not with other threads. Returns: + // - {kSelf, <this>} + // The edge was directly added to this node. + // - {kCopied, <node>} + // The edge was added to a copy of this node. + // - {kPopped, New(edge, height())} + // A new leg with the edge was created as this node has no extra capacity. + template <EdgeType edge_type> + inline OpResult AddEdge(bool owned, CordRep* edge, size_t delta); + + // Replaces the front or back edge with the provided new edge. Returns: + // - {kSelf, <this>} + // The edge was directly set in this node. The old edge is unreffed. + // - {kCopied, <node>} + // A copy of this node was created with the new edge value. + // In both cases, the function adopts a reference on `edge`. + template <EdgeType edge_type> + OpResult SetEdge(bool owned, CordRep* edge, size_t delta); + + // Creates a new empty node at the specified height. + static CordRepBtree* New(int height = 0); + + // Creates a new node containing `rep`, with the height being computed + // automatically based on the type of `rep`. + static CordRepBtree* New(CordRep* rep); + + // Creates a new node containing both `front` and `back` at height + // `front.height() + 1`. Requires `back.height() == front.height()`. + static CordRepBtree* New(CordRepBtree* front, CordRepBtree* back); + + // Creates a fully balanced tree from the provided tree by rebuilding a new + // tree from all data edges in the input. This function is automatically + // invoked internally when the tree exceeds the maximum height. + static CordRepBtree* Rebuild(CordRepBtree* tree); + + private: + CordRepBtree() = default; + ~CordRepBtree() = default; + + // Initializes the main properties `tag`, `begin`, `end`, `height`. + inline void InitInstance(int height, size_t begin = 0, size_t end = 0); + + // Direct property access begin / end + void set_begin(size_t begin) { storage[1] = static_cast<uint8_t>(begin); } + void set_end(size_t end) { storage[2] = static_cast<uint8_t>(end); } + + // Decreases the value of `begin` by `n`, and returns the new value. Notice + // how this returns the new value unlike atomic::fetch_add which returns the + // old value. This is because this is used to prepend edges at 'begin - 1'. + size_t sub_fetch_begin(size_t n) { + storage[1] -= static_cast<uint8_t>(n); + return storage[1]; + } + + // Increases the value of `end` by `n`, and returns the previous value. This + // function is typically used to append edges at 'end'. + size_t fetch_add_end(size_t n) { + const uint8_t current = storage[2]; + storage[2] = static_cast<uint8_t>(current + n); + return current; + } + + // Returns the index of the last edge starting on, or before `offset`, with + // `n` containing the relative offset of `offset` inside that edge. + // Requires `offset` < length. + Position IndexOf(size_t offset) const; + + // Returns the index of the last edge starting before `offset`, with `n` + // containing the relative offset of `offset` inside that edge. + // This function is useful to find the edges for some span of bytes ending at + // `offset` (i.e., `n` bytes). For example: + // + // Position pos = IndexBefore(n) + // edges = Edges(begin(), pos.index) // All full edges (may be empty) + // last = Sub(Edge(pos.index), 0, pos.n) // Last partial edge (may be empty) + // + // Requires 0 < `offset` <= length. + Position IndexBefore(size_t offset) const; + + // Returns the index of the edge ending at (or on) length `length`, and the + // number of bytes inside that edge up to `length`. For example, if we have a + // Node with 2 edges, one of 10 and one of 20 long, then IndexOfLength(27) + // will return {1, 17}, and IndexOfLength(10) will return {0, 10}. + Position IndexOfLength(size_t n) const; + + // Identical to the above function except starting from the position `front`. + // This function is equivalent to `IndexBefore(front.n + offset)`, with + // the difference that this function is optimized to start at `front.index`. + Position IndexBefore(Position front, size_t offset) const; + + // Returns the index of the edge directly beyond the edge containing offset + // `offset`, with `n` containing the distance of that edge from `offset`. + // This function is useful for iteratively finding suffix nodes and remaining + // partial bytes in left-most suffix nodes as for example in CopySuffix. + // Requires `offset` < length. + Position IndexBeyond(size_t offset) const; + + // Creates a new leaf node containing as much data as possible from `data`. + // The data is added either forwards or reversed depending on `edge_type`. + // Callers must check the length of the returned node to determine if all data + // was copied or not. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* NewLeaf(absl::string_view data, size_t extra); + + // Creates a raw copy of this Btree node, copying all properties, but + // without adding any references to existing edges. + CordRepBtree* CopyRaw() const; + + // Creates a full copy of this Btree node, adding a reference on all edges. + CordRepBtree* Copy() const; + + // Creates a partial copy of this Btree node, copying all edges up to `end`, + // adding a reference on each copied edge, and sets the length of the newly + // created copy to `new_length`. + CordRepBtree* CopyBeginTo(size_t end, size_t new_length) const; + + // Returns a tree containing the edges [tree->begin(), end) and length + // of `new_length`. This method consumes a reference on the provided + // tree, and logically performs the following operation: + // result = tree->CopyBeginTo(end, new_length); + // CordRep::Unref(tree); + // return result; + static CordRepBtree* ConsumeBeginTo(CordRepBtree* tree, size_t end, + size_t new_length); + + // Creates a partial copy of this Btree node, copying all edges starting at + // `begin`, adding a reference on each copied edge, and sets the length of + // the newly created copy to `new_length`. + CordRepBtree* CopyToEndFrom(size_t begin, size_t new_length) const; + + // Extracts and returns the front edge from the provided tree. + // This method consumes a reference on the provided tree, and logically + // performs the following operation: + // edge = CordRep::Ref(tree->Edge(kFront)); + // CordRep::Unref(tree); + // return edge; + static CordRep* ExtractFront(CordRepBtree* tree); + + // Returns a tree containing the result of appending `right` to `left`. + static CordRepBtree* MergeTrees(CordRepBtree* left, CordRepBtree* right); + + // Fallback functions for `Create()`, `Append()` and `Prepend()` which + // deal with legacy / non conforming input, i.e.: CONCAT trees. + static CordRepBtree* CreateSlow(CordRep* rep); + static CordRepBtree* AppendSlow(CordRepBtree*, CordRep* rep); + static CordRepBtree* PrependSlow(CordRepBtree*, CordRep* rep); + + // Recursively rebuilds `tree` into `stack`. If 'consume` is set to true, the + // function will consume a reference on `tree`. `stack` is a null terminated + // array containing the new tree's state, with the current leaf node at + // stack[0], and parent nodes above that, or null for 'top of tree'. + static void Rebuild(CordRepBtree** stack, CordRepBtree* tree, bool consume); + + // Aligns existing edges to start at index 0, to allow for a new edge to be + // added to the back of the current edges. + inline void AlignBegin(); + + // Aligns existing edges to end at `capacity`, to allow for a new edge to be + // added in front of the current edges. + inline void AlignEnd(); + + // Adds the provided edge to this node. + // Requires this node to have capacity for the edge. Realigns / moves + // existing edges as needed to prepend or append the new edge. + template <EdgeType edge_type> + inline void Add(CordRep* rep); + + // Adds the provided edges to this node. + // Requires this node to have capacity for the edges. Realigns / moves + // existing edges as needed to prepend or append the new edges. + template <EdgeType edge_type> + inline void Add(absl::Span<CordRep* const>); + + // Adds data from `data` to this node until either all data has been consumed, + // or there is no more capacity for additional flat nodes inside this node. + // Requires the current node to be a leaf node, data to be non empty, and the + // current node to have capacity for at least one more data edge. + // Returns any remaining data from `data` that was not added, which is + // depending on the edge type (front / back) either the remaining prefix of + // suffix of the input. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + absl::string_view AddData(absl::string_view data, size_t extra); + + // Replace the front or back edge with the provided value. + // Adopts a reference on `edge` and unrefs the old edge. + template <EdgeType edge_type> + inline void SetEdge(CordRep* edge); + + // Returns a partial copy of the current tree containing the first `n` bytes + // of data. `CopyResult` contains both the resulting edge and its height. The + // resulting tree may be less high than the current tree, or even be a single + // matching data edge if `allow_folding` is set to true. + // For example, if `n == 1`, then the result will be the single data edge, and + // height will be set to -1 (one below the owning leaf node). If n == 0, this + // function returns null. Requires `n <= length` + CopyResult CopyPrefix(size_t n, bool allow_folding = true); + + // Returns a partial copy of the current tree containing all data starting + // after `offset`. `CopyResult` contains both the resulting edge and its + // height. The resulting tree may be less high than the current tree, or even + // be a single matching data edge. For example, if `n == length - 1`, then the + // result will be a single data edge, and height will be set to -1 (one below + // the owning leaf node). + // Requires `offset < length` + CopyResult CopySuffix(size_t offset); + + // Returns a OpResult value of {this, kSelf} or {Copy(), kCopied} + // depending on the value of `owned`. + inline OpResult ToOpResult(bool owned); + + // Adds `rep` to the specified tree, returning the modified tree. + template <EdgeType edge_type> + static CordRepBtree* AddCordRep(CordRepBtree* tree, CordRep* rep); + + // Adds `data` to the specified tree, returning the modified tree. + // See the `Append/Prepend` function for the meaning and purpose of `extra`. + template <EdgeType edge_type> + static CordRepBtree* AddData(CordRepBtree* tree, absl::string_view data, + size_t extra = 0); + + // Merges `src` into `dst` with `src` being added either before (kFront) or + // after (kBack) `dst`. Requires the height of `dst` to be greater than or + // equal to the height of `src`. + template <EdgeType edge_type> + static CordRepBtree* Merge(CordRepBtree* dst, CordRepBtree* src); + + // Fallback version of GetAppendBuffer for large trees: GetAppendBuffer() + // implements an inlined version for trees of limited height (3 levels), + // GetAppendBufferSlow implements the logic for large trees. + Span<char> GetAppendBufferSlow(size_t size); + + // `edges_` contains all edges starting from this instance. + // These are explicitly `child` edges only, a cord btree (or any cord tree in + // that respect) does not store `parent` pointers anywhere: multiple trees / + // parents can reference the same shared child edge. The type of these edges + // depends on the height of the node. `Leaf nodes` (height == 0) contain `data + // edges` (external or flat nodes, or sub-strings thereof). All other nodes + // (height > 0) contain pointers to BTREE nodes with a height of `height - 1`. + CordRep* edges_[kMaxCapacity]; + + friend class CordRepBtreeTestPeer; + friend class CordRepBtreeNavigator; +}; + +inline CordRepBtree* CordRep::btree() { + assert(IsBtree()); + return static_cast<CordRepBtree*>(this); +} + +inline const CordRepBtree* CordRep::btree() const { + assert(IsBtree()); + return static_cast<const CordRepBtree*>(this); +} + +inline void CordRepBtree::InitInstance(int height, size_t begin, size_t end) { + tag = BTREE; + storage[0] = static_cast<uint8_t>(height); + storage[1] = static_cast<uint8_t>(begin); + storage[2] = static_cast<uint8_t>(end); +} + +inline CordRep* CordRepBtree::Edge(size_t index) const { + assert(index >= begin()); + assert(index < end()); + return edges_[index]; +} + +inline CordRep* CordRepBtree::Edge(EdgeType edge_type) const { + return edges_[edge_type == kFront ? begin() : back()]; +} + +inline absl::Span<CordRep* const> CordRepBtree::Edges() const { + return {edges_ + begin(), size()}; +} + +inline absl::Span<CordRep* const> CordRepBtree::Edges(size_t begin, + size_t end) const { + assert(begin <= end); + assert(begin >= this->begin()); + assert(end <= this->end()); + return {edges_ + begin, static_cast<size_t>(end - begin)}; +} + +inline absl::string_view CordRepBtree::Data(size_t index) const { + assert(height() == 0); + return EdgeData(Edge(index)); +} + +inline CordRepBtree* CordRepBtree::New(int height) { + CordRepBtree* tree = new CordRepBtree; + tree->length = 0; + tree->InitInstance(height); + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRep* rep) { + CordRepBtree* tree = new CordRepBtree; + int height = rep->IsBtree() ? rep->btree()->height() + 1 : 0; + tree->length = rep->length; + tree->InitInstance(height, /*begin=*/0, /*end=*/1); + tree->edges_[0] = rep; + return tree; +} + +inline CordRepBtree* CordRepBtree::New(CordRepBtree* front, + CordRepBtree* back) { + assert(front->height() == back->height()); + CordRepBtree* tree = new CordRepBtree; + tree->length = front->length + back->length; + tree->InitInstance(front->height() + 1, /*begin=*/0, /*end=*/2); + tree->edges_[0] = front; + tree->edges_[1] = back; + return tree; +} + +inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) { + for (CordRep* edge : edges) { + if (ABSL_PREDICT_FALSE(!edge->refcount.Decrement())) { + CordRep::Destroy(edge); + } + } +} + +inline CordRepBtree* CordRepBtree::CopyRaw() const { + auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree))); + memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree)); + new (&tree->refcount) RefcountAndFlags; + return tree; +} + +inline CordRepBtree* CordRepBtree::Copy() const { + CordRepBtree* tree = CopyRaw(); + for (CordRep* rep : Edges()) CordRep::Ref(rep); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin, + size_t new_length) const { + assert(begin >= this->begin()); + assert(begin <= this->end()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_begin(begin); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end, + size_t new_length) const { + assert(end <= capacity()); + assert(end >= this->begin()); + CordRepBtree* tree = CopyRaw(); + tree->length = new_length; + tree->set_end(end); + for (CordRep* edge : tree->Edges()) CordRep::Ref(edge); + return tree; +} + +inline void CordRepBtree::AlignBegin() { + // The below code itself does not need to be fast as typically we have + // mono-directional append/prepend calls, and `begin` / `end` are typically + // adjusted no more than once. But we want to avoid potential register clobber + // effects, making the compiler emit register save/store/spills, and minimize + // the size of code. + const size_t delta = begin(); + if (ABSL_PREDICT_FALSE(delta != 0)) { + const size_t new_end = end() - delta; + set_begin(0); + set_end(new_end); + // TODO(mvels): we can write this using 2 loads / 2 stores depending on + // total size for the kMaxCapacity = 6 case. I.e., we can branch (switch) on + // size, and then do overlapping load/store of up to 4 pointers (inlined as + // XMM, YMM or ZMM load/store) and up to 2 pointers (XMM / YMM), which is a) + // compact and b) not clobbering any registers. + ABSL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = 0; i < new_end; ++i) { + edges_[i] = edges_[i + delta]; + } + } +} + +inline void CordRepBtree::AlignEnd() { + // See comments in `AlignBegin` for motivation on the hand-rolled for loops. + const size_t delta = capacity() - end(); + if (delta != 0) { + const size_t new_begin = begin() + delta; + const size_t new_end = end() + delta; + set_begin(new_begin); + set_end(new_end); + ABSL_ASSUME(new_end <= kMaxCapacity); +#ifdef __clang__ +#pragma unroll 1 +#endif + for (size_t i = new_end - 1; i >= new_begin; --i) { + edges_[i] = edges_[i - delta]; + } + } +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>(CordRep* rep) { + AlignBegin(); + edges_[fetch_add_end(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kBack>( + absl::Span<CordRep* const> edges) { + AlignBegin(); + size_t new_end = end(); + for (CordRep* edge : edges) edges_[new_end++] = edge; + set_end(new_end); +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>(CordRep* rep) { + AlignEnd(); + edges_[sub_fetch_begin(1)] = rep; +} + +template <> +inline void CordRepBtree::Add<CordRepBtree::kFront>( + absl::Span<CordRep* const> edges) { + AlignEnd(); + size_t new_begin = begin() - edges.size(); + set_begin(new_begin); + for (CordRep* edge : edges) edges_[new_begin++] = edge; +} + +template <CordRepBtree::EdgeType edge_type> +inline void CordRepBtree::SetEdge(CordRep* edge) { + const int idx = edge_type == kFront ? begin() : back(); + CordRep::Unref(edges_[idx]); + edges_[idx] = edge; +} + +inline CordRepBtree::OpResult CordRepBtree::ToOpResult(bool owned) { + return owned ? OpResult{this, kSelf} : OpResult{Copy(), kCopied}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOf(size_t offset) const { + assert(offset < length); + size_t index = begin(); + while (offset >= edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(size_t offset) const { + assert(offset > 0); + assert(offset <= length); + size_t index = begin(); + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBefore(Position front, + size_t offset) const { + size_t index = front.index; + offset = offset + front.n; + while (offset > edges_[index]->length) offset -= edges_[index++]->length; + return {index, offset}; +} + +inline CordRepBtree::Position CordRepBtree::IndexOfLength(size_t n) const { + assert(n <= length); + size_t index = back(); + size_t strip = length - n; + while (strip >= edges_[index]->length) strip -= edges_[index--]->length; + return {index, edges_[index]->length - strip}; +} + +inline CordRepBtree::Position CordRepBtree::IndexBeyond( + const size_t offset) const { + // We need to find the edge which `starting offset` is beyond (>=)`offset`. + // For this we can't use the `offset -= length` logic of IndexOf. Instead, we + // track the offset of the `current edge` in `off`, which we increase as we + // iterate over the edges until we find the matching edge. + size_t off = 0; + size_t index = begin(); + while (offset > off) off += edges_[index++]->length; + return {index, off - offset}; +} + +inline CordRepBtree* CordRepBtree::Create(CordRep* rep) { + if (IsDataEdge(rep)) return New(rep); + return CreateSlow(rep); +} + +inline Span<char> CordRepBtree::GetAppendBuffer(size_t size) { + assert(refcount.IsOne()); + CordRepBtree* tree = this; + const int height = this->height(); + CordRepBtree* n1 = tree; + CordRepBtree* n2 = tree; + CordRepBtree* n3 = tree; + switch (height) { + case 3: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsOne()) return {}; + n2 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 2: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsOne()) return {}; + n1 = tree; + ABSL_FALLTHROUGH_INTENDED; + case 1: + tree = tree->Edge(kBack)->btree(); + if (!tree->refcount.IsOne()) return {}; + ABSL_FALLTHROUGH_INTENDED; + case 0: + CordRep* edge = tree->Edge(kBack); + if (!edge->refcount.IsOne()) return {}; + if (edge->tag < FLAT) return {}; + size_t avail = edge->flat()->Capacity() - edge->length; + if (avail == 0) return {}; + size_t delta = (std::min)(size, avail); + Span<char> span = {edge->flat()->Data() + edge->length, delta}; + edge->length += delta; + switch (height) { + case 3: + n3->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 2: + n2->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 1: + n1->length += delta; + ABSL_FALLTHROUGH_INTENDED; + case 0: + tree->length += delta; + return span; + } + break; + } + return GetAppendBufferSlow(size); +} + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kBack>( + CordRepBtree* tree, CordRep* rep); + +extern template CordRepBtree* CordRepBtree::AddCordRep<CordRepBtree::kFront>( + CordRepBtree* tree, CordRep* rep); + +inline CordRepBtree* CordRepBtree::Append(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kBack>(tree, rep); + } + return AppendSlow(tree, rep); +} + +inline CordRepBtree* CordRepBtree::Prepend(CordRepBtree* tree, CordRep* rep) { + if (ABSL_PREDICT_TRUE(IsDataEdge(rep))) { + return CordRepBtree::AddCordRep<kFront>(tree, rep); + } + return PrependSlow(tree, rep); +} + +#ifdef NDEBUG + +inline CordRepBtree* CordRepBtree::AssertValid(CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +inline const CordRepBtree* CordRepBtree::AssertValid(const CordRepBtree* tree, + bool /* shallow */) { + return tree; +} + +#endif + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_H_ diff --git a/absl/strings/internal/cord_rep_btree_navigator.cc b/absl/strings/internal/cord_rep_btree_navigator.cc new file mode 100644 index 00000000..9b896a3d --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_navigator.cc @@ -0,0 +1,187 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <cassert> + +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ReadResult = CordRepBtreeNavigator::ReadResult; + +namespace { + +// Returns a `CordRepSubstring` from `rep` starting at `offset` of size `n`. +// If `rep` is already a `CordRepSubstring` instance, an adjusted instance is +// created based on the old offset and new offset. +// Adopts a reference on `rep`. Rep must be a valid data edge. Returns +// nullptr if `n == 0`, `rep` if `n == rep->length`. +// Requires `offset < rep->length` and `offset + n <= rep->length`. +// TODO(192061034): move to utility library in internal and optimize for small +// substrings of larger reps. +inline CordRep* Substring(CordRep* rep, size_t offset, size_t n) { + assert(n <= rep->length); + assert(offset < rep->length); + assert(offset <= rep->length - n); + assert(IsDataEdge(rep)); + + if (n == 0) return nullptr; + if (n == rep->length) return CordRep::Ref(rep); + + if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + + assert(rep->IsExternal() || rep->IsFlat()); + CordRepSubstring* substring = new CordRepSubstring(); + substring->length = n; + substring->tag = SUBSTRING; + substring->start = offset; + substring->child = CordRep::Ref(rep); + return substring; +} + +inline CordRep* Substring(CordRep* rep, size_t offset) { + return Substring(rep, offset, rep->length - offset); +} + +} // namespace + +CordRepBtreeNavigator::Position CordRepBtreeNavigator::Skip(size_t n) { + int height = 0; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + + // Overall logic: Find an edge of at least the length we need to skip. + // We consume all edges which are smaller (i.e., must be 100% skipped). + // If we exhausted all edges on the current level, we move one level + // up the tree, and repeat until we either find the edge, or until we hit + // the top of the tree meaning the skip exceeds tree->length. + while (n >= edge->length) { + n -= edge->length; + while (++index == node->end()) { + if (++height > height_) return {nullptr, n}; + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + } + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be skipped. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + while (n >= edge->length) { + n -= edge->length; + ++index; + assert(index != node->end()); + edge = node->Edge(index); + } + } + index_[0] = index; + return {edge, n}; +} + +ReadResult CordRepBtreeNavigator::Read(size_t edge_offset, size_t n) { + int height = 0; + size_t length = edge_offset + n; + size_t index = index_[0]; + CordRepBtree* node = node_[0]; + CordRep* edge = node->Edge(index); + assert(edge_offset < edge->length); + + if (length < edge->length) { + return {Substring(edge, edge_offset, n), length}; + } + + // Similar to 'Skip', we consume all edges that are inside the 'length' of + // data that needs to be read. If we exhaust the current level, we move one + // level up the tree and repeat until we hit the final edge that must be + // (partially) read. We consume all edges into `subtree`. + CordRepBtree* subtree = CordRepBtree::New(Substring(edge, edge_offset)); + size_t subtree_end = 1; + do { + length -= edge->length; + while (++index == node->end()) { + index_[height] = index; + if (++height > height_) { + subtree->set_end(subtree_end); + if (length == 0) return {subtree, 0}; + CordRep::Unref(subtree); + return {nullptr, length}; + } + if (length != 0) { + subtree->set_end(subtree_end); + subtree = CordRepBtree::New(subtree); + subtree_end = 1; + } + node = node_[height]; + index = index_[height]; + } + edge = node->Edge(index); + if (length >= edge->length) { + subtree->length += edge->length; + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + } + } while (length >= edge->length); + CordRepBtree* tree = subtree; + subtree->length += length; + + // If we moved up the tree, descend down to the leaf level, consuming all + // edges that must be read, adding 'down' nodes to `subtree`. + while (height > 0) { + node = edge->btree(); + index_[height] = index; + node_[--height] = node; + index = node->begin(); + edge = node->Edge(index); + + if (length != 0) { + CordRepBtree* right = CordRepBtree::New(height); + right->length = length; + subtree->edges_[subtree_end++] = right; + subtree->set_end(subtree_end); + subtree = right; + subtree_end = 0; + while (length >= edge->length) { + subtree->edges_[subtree_end++] = CordRep::Ref(edge); + length -= edge->length; + edge = node->Edge(++index); + } + } + } + // Add any (partial) edge still remaining at the leaf level. + if (length != 0) { + subtree->edges_[subtree_end++] = Substring(edge, 0, length); + } + subtree->set_end(subtree_end); + index_[0] = index; + return {tree, length}; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree_navigator.h b/absl/strings/internal/cord_rep_btree_navigator.h new file mode 100644 index 00000000..3d581c87 --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_navigator.h @@ -0,0 +1,267 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ + +#include <cassert> +#include <iostream> + +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeNavigator is a bi-directional navigator allowing callers to +// navigate all the (leaf) data edges in a CordRepBtree instance. +// +// A CordRepBtreeNavigator instance is by default empty. Callers initialize a +// navigator instance by calling one of `InitFirst()`, `InitLast()` or +// `InitOffset()`, which establishes a current position. Callers can then +// navigate using the `Next`, `Previous`, `Skip` and `Seek` methods. +// +// The navigator instance does not take or adopt a reference on the provided +// `tree` on any of the initialization calls. Callers are responsible for +// guaranteeing the lifecycle of the provided tree. A navigator instance can +// be reset to the empty state by calling `Reset`. +// +// A navigator only keeps positional state on the 'current data edge', it does +// explicitly not keep any 'offset' state. The class does accept and return +// offsets in the `Read()`, `Skip()` and 'Seek()` methods as these would +// otherwise put a big burden on callers. Callers are expected to maintain +// (returned) offset info if they require such granular state. +class CordRepBtreeNavigator { + public: + // The logical position as returned by the Seek() and Skip() functions. + // Returns the current leaf edge for the desired seek or skip position and + // the offset of that position inside that edge. + struct Position { + CordRep* edge; + size_t offset; + }; + + // The read result as returned by the Read() function. + // `tree` contains the resulting tree which is identical to the result + // of calling CordRepBtree::SubTree(...) on the tree being navigated. + // `n` contains the number of bytes used from the last navigated to + // edge of the tree. + struct ReadResult { + CordRep* tree; + size_t n; + }; + + // Returns true if this instance is not empty. + explicit operator bool() const; + + // Returns the tree for this instance or nullptr if empty. + CordRepBtree* btree() const; + + // Returns the data edge of the current position. + // Requires this instance to not be empty. + CordRep* Current() const; + + // Resets this navigator to `tree`, returning the first data edge in the tree. + CordRep* InitFirst(CordRepBtree* tree); + + // Resets this navigator to `tree`, returning the last data edge in the tree. + CordRep* InitLast(CordRepBtree* tree); + + // Resets this navigator to `tree` returning the data edge at position + // `offset` and the relative offset of `offset` into that data edge. + // Returns `Position.edge = nullptr` if the provided offset is greater + // than or equal to the length of the tree, in which case the state of + // the navigator instance remains unchanged. + Position InitOffset(CordRepBtree* tree, size_t offset); + + // Navigates to the next data edge. + // Returns the next data edge or nullptr if there is no next data edge, in + // which case the current position remains unchanged. + CordRep* Next(); + + // Navigates to the previous data edge. + // Returns the previous data edge or nullptr if there is no previous data + // edge, in which case the current position remains unchanged. + CordRep* Previous(); + + // Navigates to the data edge at position `offset`. Returns the navigated to + // data edge in `Position.edge` and the relative offset of `offset` into that + // data edge in `Position.offset`. Returns `Position.edge = nullptr` if the + // provide offset is greater than or equal to the tree's length. + Position Seek(size_t offset); + + // Reads `n` bytes of data starting at offset `edge_offset` of the current + // data edge, and returns the result in `ReadResult.tree`. `ReadResult.n` + // contains the 'bytes used` from the last / current data edge in the tree. + // This allows users that mix regular navigation (using string views) and + // 'read into cord' navigation to keep track of the current state, and which + // bytes have been consumed from a navigator. + // This function returns `ReadResult.tree = nullptr` if the requested length + // exceeds the length of the tree starting at the current data edge. + ReadResult Read(size_t edge_offset, size_t n); + + // Skips `n` bytes forward from the current data edge, returning the navigated + // to data edge in `Position.edge` and `Position.offset` containing the offset + // inside that data edge. Note that the state of the navigator is left + // unchanged if `n` is smaller than the length of the current data edge. + Position Skip(size_t n); + + // Resets this instance to the default / empty state. + void Reset(); + + private: + // Slow path for Next() if Next() reached the end of a leaf node. Backtracks + // up the stack until it finds a node that has a 'next' position available, + // and then does a 'front dive' towards the next leaf node. + CordRep* NextUp(); + + // Slow path for Previous() if Previous() reached the beginning of a leaf + // node. Backtracks up the stack until it finds a node that has a 'previous' + // position available, and then does a 'back dive' towards the previous leaf + // node. + CordRep* PreviousUp(); + + // Generic implementation of InitFirst() and InitLast(). + template <CordRepBtree::EdgeType edge_type> + CordRep* Init(CordRepBtree* tree); + + // `height_` contains the height of the current tree, or -1 if empty. + int height_ = -1; + + // `index_` and `node_` contain the navigation state as the 'path' to the + // current data edge which is at `node_[0]->Edge(index_[0])`. The contents + // of these are undefined until the instance is initialized (`height_ >= 0`). + uint8_t index_[CordRepBtree::kMaxDepth]; + CordRepBtree* node_[CordRepBtree::kMaxDepth]; +}; + +// Returns true if this instance is not empty. +inline CordRepBtreeNavigator::operator bool() const { return height_ >= 0; } + +inline CordRepBtree* CordRepBtreeNavigator::btree() const { + return height_ >= 0 ? node_[height_] : nullptr; +} + +inline CordRep* CordRepBtreeNavigator::Current() const { + assert(height_ >= 0); + return node_[0]->Edge(index_[0]); +} + +inline void CordRepBtreeNavigator::Reset() { height_ = -1; } + +inline CordRep* CordRepBtreeNavigator::InitFirst(CordRepBtree* tree) { + return Init<CordRepBtree::kFront>(tree); +} + +inline CordRep* CordRepBtreeNavigator::InitLast(CordRepBtree* tree) { + return Init<CordRepBtree::kBack>(tree); +} + +template <CordRepBtree::EdgeType edge_type> +inline CordRep* CordRepBtreeNavigator::Init(CordRepBtree* tree) { + assert(tree != nullptr); + assert(tree->size() > 0); + assert(tree->height() <= CordRepBtree::kMaxHeight); + int height = height_ = tree->height(); + size_t index = tree->index(edge_type); + node_[height] = tree; + index_[height] = static_cast<uint8_t>(index); + while (--height >= 0) { + tree = tree->Edge(index)->btree(); + node_[height] = tree; + index = tree->index(edge_type); + index_[height] = static_cast<uint8_t>(index); + } + return node_[0]->Edge(index); +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::Seek( + size_t offset) { + assert(btree() != nullptr); + int height = height_; + CordRepBtree* edge = node_[height]; + if (ABSL_PREDICT_FALSE(offset >= edge->length)) return {nullptr, 0}; + CordRepBtree::Position index = edge->IndexOf(offset); + index_[height] = static_cast<uint8_t>(index.index); + while (--height >= 0) { + edge = edge->Edge(index.index)->btree(); + node_[height] = edge; + index = edge->IndexOf(index.n); + index_[height] = static_cast<uint8_t>(index.index); + } + return {edge->Edge(index.index), index.n}; +} + +inline CordRepBtreeNavigator::Position CordRepBtreeNavigator::InitOffset( + CordRepBtree* tree, size_t offset) { + assert(tree != nullptr); + assert(tree->height() <= CordRepBtree::kMaxHeight); + if (ABSL_PREDICT_FALSE(offset >= tree->length)) return {nullptr, 0}; + height_ = tree->height(); + node_[height_] = tree; + return Seek(offset); +} + +inline CordRep* CordRepBtreeNavigator::Next() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->back() ? NextUp() : edge->Edge(++index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::Previous() { + CordRepBtree* edge = node_[0]; + return index_[0] == edge->begin() ? PreviousUp() : edge->Edge(--index_[0]); +} + +inline CordRep* CordRepBtreeNavigator::NextUp() { + assert(index_[0] == node_[0]->back()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height] + 1; + } while (index == edge->end()); + index_[height] = static_cast<uint8_t>(index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->begin()); + } while (height > 0); + return edge->Edge(index); +} + +inline CordRep* CordRepBtreeNavigator::PreviousUp() { + assert(index_[0] == node_[0]->begin()); + CordRepBtree* edge; + size_t index; + int height = 0; + do { + if (++height > height_) return nullptr; + edge = node_[height]; + index = index_[height]; + } while (index == edge->begin()); + index_[height] = static_cast<uint8_t>(--index); + do { + node_[--height] = edge = edge->Edge(index)->btree(); + index_[height] = static_cast<uint8_t>(index = edge->back()); + } while (height > 0); + return edge->Edge(index); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_NAVIGATOR_H_ diff --git a/absl/strings/internal/cord_rep_btree_navigator_test.cc b/absl/strings/internal/cord_rep_btree_navigator_test.cc new file mode 100644 index 00000000..4f9bd4e5 --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_navigator_test.cc @@ -0,0 +1,346 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_navigator.h" + +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ne; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; + +using ReadResult = CordRepBtreeNavigator::ReadResult; +using Position = CordRepBtreeNavigator::Position; + +// CordRepBtreeNavigatorTest is a test fixture which automatically creates a +// tree to test navigation logic on. The parameter `count' defines the number of +// data edges in the test tree. +class CordRepBtreeNavigatorTest : public testing::TestWithParam<int> { + public: + using Flats = std::vector<CordRep*>; + static constexpr size_t kCharsPerFlat = 3; + + CordRepBtreeNavigatorTest() { + data_ = CreateRandomString(count() * kCharsPerFlat); + flats_ = CreateFlatsFromString(data_, kCharsPerFlat); + + // Turn flat 0 or 1 into a substring to cover partial reads on substrings. + if (count() > 1) { + CordRep::Unref(flats_[1]); + flats_[1] = MakeSubstring(kCharsPerFlat, kCharsPerFlat, MakeFlat(data_)); + } else { + CordRep::Unref(flats_[0]); + flats_[0] = MakeSubstring(0, kCharsPerFlat, MakeFlat(data_)); + } + + tree_ = CordRepBtreeFromFlats(flats_); + } + + ~CordRepBtreeNavigatorTest() override { CordRep::Unref(tree_); } + + int count() const { return GetParam(); } + CordRepBtree* tree() { return tree_; } + const std::string& data() const { return data_; } + const std::vector<CordRep*>& flats() const { return flats_; } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param, "_Flats"); + } + + private: + std::string data_; + Flats flats_; + CordRepBtree* tree_; +}; + +INSTANTIATE_TEST_SUITE_P( + WithParam, CordRepBtreeNavigatorTest, + testing::Values(1, CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity - 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity + 1, + CordRepBtree::kMaxCapacity* CordRepBtree::kMaxCapacity * 2 + + 17), + CordRepBtreeNavigatorTest::ToString); + +TEST(CordRepBtreeNavigatorTest, Uninitialized) { + CordRepBtreeNavigator nav; + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif +} + +TEST_P(CordRepBtreeNavigatorTest, InitFirst) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitFirst(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().front())); + EXPECT_THAT(edge, Eq(flats().front())); +} + +TEST_P(CordRepBtreeNavigatorTest, InitLast) { + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitLast(tree()); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree())); + EXPECT_THAT(nav.Current(), Eq(flats().back())); + EXPECT_THAT(edge, Eq(flats().back())); +} + +TEST_P(CordRepBtreeNavigatorTest, NextPrev) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); +} + +TEST_P(CordRepBtreeNavigatorTest, PrevNext) { + CordRepBtreeNavigator nav; + nav.InitLast(tree()); + const Flats& flats = this->flats(); + + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); + for (int i = static_cast<int>(flats.size()) - 2; i >= 0; --i) { + ASSERT_THAT(nav.Previous(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.front())); + for (int i = 1; i < flats.size(); ++i) { + ASSERT_THAT(nav.Next(), Eq(flats[i])); + EXPECT_THAT(nav.Current(), Eq(flats[i])); + } + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Current(), Eq(flats.back())); +} + +TEST(CordRepBtreeNavigatorTest, Reset) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtreeNavigator nav; + nav.InitFirst(tree); + nav.Reset(); + EXPECT_FALSE(nav); + EXPECT_THAT(nav.btree(), Eq(nullptr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(nav.Current(), ".*"); +#endif + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeNavigatorTest, Skip) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Skip(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index1 = 0; index1 < count; ++index1) { + for (int index2 = index1; index2 < count; ++index2) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + size_t length1 = index1 * kCharsPerFlat; + Position pos1 = nav.Skip(length1 + char_offset); + ASSERT_THAT(pos1.edge, Eq(flats[index1])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + + size_t length2 = index2 * kCharsPerFlat; + Position pos2 = nav.Skip(length2 - length1 + char_offset); + ASSERT_THAT(pos2.edge, Eq(flats[index2])); + ASSERT_THAT(pos2.edge, Eq(nav.Current())); + ASSERT_THAT(pos2.offset, Eq(char_offset)); + } + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, Seek) { + int count = this->count(); + const Flats& flats = this->flats(); + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + Position pos = nav.Seek(char_offset); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.edge, Eq(flats[0])); + EXPECT_THAT(pos.offset, Eq(char_offset)); + } + + for (int index = 0; index < count; ++index) { + for (int char_offset = 0; char_offset < kCharsPerFlat; ++char_offset) { + size_t offset = index * kCharsPerFlat + char_offset; + Position pos1 = nav.Seek(offset); + ASSERT_THAT(pos1.edge, Eq(flats[index])); + ASSERT_THAT(pos1.edge, Eq(nav.Current())); + ASSERT_THAT(pos1.offset, Eq(char_offset)); + } + } +} + +TEST(CordRepBtreeNavigatorTest, InitOffset) { + // Whitebox: InitOffset() is implemented in terms of Seek() which is + // exhaustively tested. Only test it initializes / forwards properly.. + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeNavigator nav; + Position pos = nav.InitOffset(tree, 5); + EXPECT_TRUE(nav); + EXPECT_THAT(nav.btree(), Eq(tree)); + EXPECT_THAT(pos.edge, Eq(tree->Edges()[1])); + EXPECT_THAT(pos.edge, Eq(nav.Current())); + EXPECT_THAT(pos.offset, Eq(2)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeNavigatorTest, InitOffsetAndSeekBeyondLength) { + CordRepBtree* tree1 = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree2 = CordRepBtree::Create(MakeFlat("def")); + + CordRepBtreeNavigator nav; + nav.InitFirst(tree1); + EXPECT_THAT(nav.Seek(3).edge, Eq(nullptr)); + EXPECT_THAT(nav.Seek(100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + EXPECT_THAT(nav.InitOffset(tree2, 3).edge, Eq(nullptr)); + EXPECT_THAT(nav.InitOffset(tree2, 100).edge, Eq(nullptr)); + EXPECT_THAT(nav.btree(), Eq(tree1)); + EXPECT_THAT(nav.Current(), Eq(tree1->Edges().front())); + + CordRep::Unref(tree1); + CordRep::Unref(tree2); +} + +TEST_P(CordRepBtreeNavigatorTest, Read) { + const Flats& flats = this->flats(); + const std::string& data = this->data(); + + for (size_t offset = 0; offset < data.size(); ++offset) { + for (size_t length = 1; length <= data.size() - offset; ++length) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + + // Skip towards edge holding offset + size_t edge_offset = nav.Skip(offset).offset; + + // Read node + ReadResult result = nav.Read(edge_offset, length); + ASSERT_THAT(result.tree, Ne(nullptr)); + EXPECT_THAT(result.tree->length, Eq(length)); + if (result.tree->tag == BTREE) { + ASSERT_TRUE(CordRepBtree::IsValid(result.tree->btree())); + } + + // Verify contents + std::string value = CordToString(result.tree); + EXPECT_THAT(value, Eq(data.substr(offset, length))); + + // Verify 'partial last edge' reads. + size_t partial = (offset + length) % kCharsPerFlat; + ASSERT_THAT(result.n, Eq(partial)); + + // Verify ending position if not EOF + if (offset + length < data.size()) { + size_t index = (offset + length) / kCharsPerFlat; + EXPECT_THAT(nav.Current(), Eq(flats[index])); + } + + CordRep::Unref(result.tree); + } + } +} + +TEST_P(CordRepBtreeNavigatorTest, ReadBeyondLengthOfTree) { + CordRepBtreeNavigator nav; + nav.InitFirst(tree()); + ReadResult result = nav.Read(2, tree()->length); + ASSERT_THAT(result.tree, Eq(nullptr)); +} + +TEST(CordRepBtreeNavigatorTest, NavigateMaximumTreeDepth) { + CordRepFlat* flat1 = MakeFlat("Hello world"); + CordRepFlat* flat2 = MakeFlat("World Hello"); + + CordRepBtree* node = CordRepBtree::Create(flat1); + node = CordRepBtree::Append(node, flat2); + while (node->height() < CordRepBtree::kMaxHeight) { + node = CordRepBtree::New(node); + } + + CordRepBtreeNavigator nav; + CordRep* edge = nav.InitFirst(node); + EXPECT_THAT(edge, Eq(flat1)); + EXPECT_THAT(nav.Next(), Eq(flat2)); + EXPECT_THAT(nav.Next(), Eq(nullptr)); + EXPECT_THAT(nav.Previous(), Eq(flat1)); + EXPECT_THAT(nav.Previous(), Eq(nullptr)); + + CordRep::Unref(node); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree_reader.cc b/absl/strings/internal/cord_rep_btree_reader.cc new file mode 100644 index 00000000..0d0e8601 --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_reader.cc @@ -0,0 +1,69 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +absl::string_view CordRepBtreeReader::Read(size_t n, size_t chunk_size, + CordRep*& tree) { + assert(chunk_size <= navigator_.Current()->length); + + // If chunk_size is non-zero, we need to start inside last returned edge. + // Else we start reading at the next data edge of the tree. + CordRep* edge = chunk_size ? navigator_.Current() : navigator_.Next(); + const size_t offset = chunk_size ? edge->length - chunk_size : 0; + + // Read the sub tree and verify we got what we wanted. + ReadResult result = navigator_.Read(offset, n); + tree = result.tree; + + // If the data returned in `tree` was covered entirely by `chunk_size`, i.e., + // read from the 'previous' edge, we did not consume any additional data, and + // can directly return the substring into the current data edge as the next + // chunk. We can easily establish from the above code that `navigator_.Next()` + // has not been called as that requires `chunk_size` to be zero. + if (n < chunk_size) return EdgeData(edge).substr(result.n); + + // The amount of data taken from the last edge is `chunk_size` and `result.n` + // contains the offset into the current edge trailing the read data (which can + // be 0). As the call to `navigator_.Read()` could have consumed all remaining + // data, calling `navigator_.Current()` is not safe before checking if we + // already consumed all remaining data. + const size_t consumed_by_read = n - chunk_size - result.n; + if (consumed_by_read >= remaining_) { + remaining_ = 0; + return {}; + } + + // We did not read all data, return remaining data from current edge. + edge = navigator_.Current(); + remaining_ -= consumed_by_read + edge->length; + return EdgeData(edge).substr(result.n); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree_reader.h b/absl/strings/internal/cord_rep_btree_reader.h new file mode 100644 index 00000000..8db8f8dd --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_reader.h @@ -0,0 +1,212 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ + +#include <cassert> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_btree_navigator.h" +#include "absl/strings/internal/cord_rep_flat.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepBtreeReader implements logic to iterate over cord btrees. +// References to the underlying data are returned as absl::string_view values. +// The most typical use case is a forward only iteration over tree data. +// The class also provides `Skip()`, `Seek()` and `Read()` methods similar to +// CordRepBtreeNavigator that allow more advanced navigation. +// +// Example: iterate over all data inside a cord btree: +// +// CordRepBtreeReader reader; +// for (string_view sv = reader.Init(tree); !sv.Empty(); sv = sv.Next()) { +// DoSomethingWithDataIn(sv); +// } +// +// All navigation methods always return the next 'chunk' of data. The class +// assumes that all data is directly 'consumed' by the caller. For example: +// invoking `Skip()` will skip the desired number of bytes, and directly +// read and return the next chunk of data directly after the skipped bytes. +// +// Example: iterate over all data inside a btree skipping the first 100 bytes: +// +// CordRepBtreeReader reader; +// absl::string_view sv = reader.Init(tree); +// if (sv.length() > 100) { +// sv.RemovePrefix(100); +// } else { +// sv = reader.Skip(100 - sv.length()); +// } +// while (!sv.empty()) { +// DoSomethingWithDataIn(sv); +// absl::string_view sv = reader.Next(); +// } +// +// It is important to notice that `remaining` is based on the end position of +// the last data edge returned to the caller, not the cumulative data returned +// to the caller which can be less in cases of skipping or seeking over data. +// +// For example, consider a cord btree with five data edges: "abc", "def", "ghi", +// "jkl" and "mno": +// +// absl::string_view sv; +// CordRepBtreeReader reader; +// +// sv = reader.Init(tree); // sv = "abc", remaining = 12 +// sv = reader.Skip(4); // sv = "hi", remaining = 6 +// sv = reader.Skip(2); // sv = "l", remaining = 3 +// sv = reader.Next(); // sv = "mno", remaining = 0 +// sv = reader.Seek(1); // sv = "bc", remaining = 12 +// +class CordRepBtreeReader { + public: + using ReadResult = CordRepBtreeNavigator::ReadResult; + using Position = CordRepBtreeNavigator::Position; + + // Returns true if this instance is not empty. + explicit operator bool() const { return navigator_.btree() != nullptr; } + + // Returns the tree referenced by this instance or nullptr if empty. + CordRepBtree* btree() const { return navigator_.btree(); } + + // Returns the current data edge inside the referenced btree. + // Requires that the current instance is not empty. + CordRep* node() const { return navigator_.Current(); } + + // Returns the length of the referenced tree. + // Requires that the current instance is not empty. + size_t length() const; + + // Returns the number of remaining bytes available for iteration, which is the + // number of bytes directly following the end of the last chunk returned. + // This value will be zero if we iterated over the last edge in the bound + // tree, in which case any call to Next() or Skip() will return an empty + // string_view reflecting the EOF state. + // Note that a call to `Seek()` resets `remaining` to a value based on the + // end position of the chunk returned by that call. + size_t remaining() const { return remaining_; } + + // Resets this instance to an empty value. + void Reset() { navigator_.Reset(); } + + // Initializes this instance with `tree`. `tree` must not be null. + // Returns a reference to the first data edge of the provided tree. + absl::string_view Init(CordRepBtree* tree); + + // Navigates to and returns the next data edge of the referenced tree. + // Returns an empty string_view if an attempt is made to read beyond the end + // of the tree, i.e.: if `remaining()` is zero indicating an EOF condition. + // Requires that the current instance is not empty. + absl::string_view Next(); + + // Skips the provided amount of bytes and returns a reference to the data + // directly following the skipped bytes. + absl::string_view Skip(size_t skip); + + // Reads `n` bytes into `tree`. + // If `chunk_size` is zero, starts reading at the next data edge. If + // `chunk_size` is non zero, the read starts at the last `chunk_size` bytes of + // the last returned data edge. Effectively, this means that the read starts + // at offset `consumed() - chunk_size`. + // Requires that `chunk_size` is less than or equal to the length of the + // last returned data edge. The purpose of `chunk_size` is to simplify code + // partially consuming a returned chunk and wanting to include the remaining + // bytes in the Read call. For example, the below code will read 1000 bytes of + // data into a cord tree if the first chunk starts with "big:": + // + // CordRepBtreeReader reader; + // absl::string_view sv = reader.Init(tree); + // if (absl::StartsWith(sv, "big:")) { + // CordRepBtree tree; + // sv = reader.Read(1000, sv.size() - 4 /* "big:" */, &tree); + // } + // + // This method will return an empty string view if all remaining data was + // read. If `n` exceeded the amount of remaining data this function will + // return an empty string view and `tree` will be set to nullptr. + // In both cases, `consumed` will be set to `length`. + absl::string_view Read(size_t n, size_t chunk_size, CordRep*& tree); + + // Navigates to the chunk at offset `offset`. + // Returns a reference into the navigated to chunk, adjusted for the relative + // position of `offset` into that chunk. For example, calling `Seek(13)` on a + // cord tree containing 2 chunks of 10 and 20 bytes respectively will return + // a string view into the second chunk starting at offset 3 with a size of 17. + // Returns an empty string view if `offset` is equal to or greater than the + // length of the referenced tree. + absl::string_view Seek(size_t offset); + + private: + size_t remaining_ = 0; + CordRepBtreeNavigator navigator_; +}; + +inline size_t CordRepBtreeReader::length() const { + assert(btree() != nullptr); + return btree()->length; +} + +inline absl::string_view CordRepBtreeReader::Init(CordRepBtree* tree) { + assert(tree != nullptr); + const CordRep* edge = navigator_.InitFirst(tree); + remaining_ = tree->length - edge->length; + return EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Next() { + if (remaining_ == 0) return {}; + const CordRep* edge = navigator_.Next(); + assert(edge != nullptr); + remaining_ -= edge->length; + return EdgeData(edge); +} + +inline absl::string_view CordRepBtreeReader::Skip(size_t skip) { + // As we are always positioned on the last 'consumed' edge, we + // need to skip the current edge as well as `skip`. + const size_t edge_length = navigator_.Current()->length; + CordRepBtreeNavigator::Position pos = navigator_.Skip(skip + edge_length); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + // The combined length of all edges skipped before `pos.edge` is `skip - + // pos.offset`, all of which are 'consumed', as well as the current edge. + remaining_ -= skip - pos.offset + pos.edge->length; + return EdgeData(pos.edge).substr(pos.offset); +} + +inline absl::string_view CordRepBtreeReader::Seek(size_t offset) { + const CordRepBtreeNavigator::Position pos = navigator_.Seek(offset); + if (ABSL_PREDICT_FALSE(pos.edge == nullptr)) { + remaining_ = 0; + return {}; + } + absl::string_view chunk = EdgeData(pos.edge).substr(pos.offset); + remaining_ = length() - offset - chunk.length(); + return chunk; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_BTREE_READER_H_ diff --git a/absl/strings/internal/cord_rep_btree_reader_test.cc b/absl/strings/internal/cord_rep_btree_reader_test.cc new file mode 100644 index 00000000..9b27a81f --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_reader_test.cc @@ -0,0 +1,293 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree_reader.h" + +#include <iostream> +#include <random> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Ne; +using ::testing::Not; + +using ::absl::cordrep_testing::CordRepBtreeFromFlats; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; + +using ReadResult = CordRepBtreeReader::ReadResult; + +TEST(CordRepBtreeReaderTest, Next) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRepBtreeReader reader; + size_t remaining = data.length(); + absl::string_view chunk = reader.Init(node); + EXPECT_THAT(chunk, Eq(data.substr(0, chunk.length()))); + + remaining -= chunk.length(); + EXPECT_THAT(reader.remaining(), Eq(remaining)); + + while (remaining > 0) { + const size_t offset = data.length() - remaining; + chunk = reader.Next(); + EXPECT_THAT(chunk, Eq(data.substr(offset, chunk.length()))); + + remaining -= chunk.length(); + EXPECT_THAT(reader.remaining(), Eq(remaining)); + } + + EXPECT_THAT(reader.remaining(), Eq(0)); + + // Verify trying to read beyond EOF returns empty string_view + EXPECT_THAT(reader.Next(), testing::IsEmpty()); + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, Skip) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t skip1 = 0; skip1 < data.length() - kChars; ++skip1) { + for (size_t skip2 = 0; skip2 < data.length() - kChars; ++skip2) { + CordRepBtreeReader reader; + size_t remaining = data.length(); + absl::string_view chunk = reader.Init(node); + remaining -= chunk.length(); + + chunk = reader.Skip(skip1); + size_t offset = data.length() - remaining; + ASSERT_THAT(chunk, Eq(data.substr(offset + skip1, chunk.length()))); + remaining -= chunk.length() + skip1; + ASSERT_THAT(reader.remaining(), Eq(remaining)); + + if (remaining == 0) continue; + + size_t skip = std::min(remaining - 1, skip2); + chunk = reader.Skip(skip); + offset = data.length() - remaining; + ASSERT_THAT(chunk, Eq(data.substr(offset + skip, chunk.length()))); + } + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SkipBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Skip(100), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Seek) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap, cap * cap + 1, cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t seek = 0; seek < data.length() - 1; ++seek) { + CordRepBtreeReader reader; + reader.Init(node); + absl::string_view chunk = reader.Seek(seek); + ASSERT_THAT(chunk, Not(IsEmpty())); + ASSERT_THAT(chunk, Eq(data.substr(seek, chunk.length()))); + ASSERT_THAT(reader.remaining(), + Eq(data.length() - seek - chunk.length())); + } + + CordRep::Unref(node); + } +} + +TEST(CordRepBtreeReaderTest, SeekBeyondLength) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + tree = CordRepBtree::Append(tree, MakeFlat("def")); + CordRepBtreeReader reader; + reader.Init(tree); + EXPECT_THAT(reader.Seek(6), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + EXPECT_THAT(reader.Seek(100), IsEmpty()); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); +} + +TEST(CordRepBtreeReaderTest, Read) { + std::string data = "abcdefghijklmno"; + std::vector<CordRep*> flats = CreateFlatsFromString(data, 5); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + CordRep* tree; + CordRepBtreeReader reader; + absl::string_view chunk; + + // Read zero bytes + chunk = reader.Init(node); + chunk = reader.Read(0, chunk.length(), tree); + EXPECT_THAT(tree, Eq(nullptr)); + EXPECT_THAT(chunk, Eq("abcde")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + + // Read in full + chunk = reader.Init(node); + chunk = reader.Read(15, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abcdefghijklmno")); + EXPECT_THAT(chunk, Eq("")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + // Read < chunk bytes + chunk = reader.Init(node); + chunk = reader.Read(3, chunk.length(), tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("abc")); + EXPECT_THAT(chunk, Eq("de")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read < chunk bytes at offset + chunk = reader.Init(node); + chunk = reader.Read(2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cd")); + EXPECT_THAT(chunk, Eq("e")); + EXPECT_THAT(reader.remaining(), Eq(10)); + EXPECT_THAT(reader.Next(), Eq("fghij")); + CordRep::Unref(tree); + + // Read from consumed chunk + chunk = reader.Init(node); + chunk = reader.Read(3, 0, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("fgh")); + EXPECT_THAT(chunk, Eq("ij")); + EXPECT_THAT(reader.remaining(), Eq(5)); + EXPECT_THAT(reader.Next(), Eq("klmno")); + CordRep::Unref(tree); + + // Read across chunks + chunk = reader.Init(node); + chunk = reader.Read(12, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghijklmn")); + EXPECT_THAT(chunk, Eq("o")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + // Read across chunks landing on exact edge boundary + chunk = reader.Init(node); + chunk = reader.Read(10 - 2, chunk.length() - 2, tree); + ASSERT_THAT(tree, Ne(nullptr)); + EXPECT_THAT(CordToString(tree), Eq("cdefghij")); + EXPECT_THAT(chunk, Eq("klmno")); + EXPECT_THAT(reader.remaining(), Eq(0)); + CordRep::Unref(tree); + + CordRep::Unref(node); +} + +TEST(CordRepBtreeReaderTest, ReadExhaustive) { + constexpr size_t kChars = 3; + const size_t cap = CordRepBtree::kMaxCapacity; + int counts[] = {1, 2, cap, cap * cap + 1, cap * cap * cap * 2 + 17}; + + for (int count : counts) { + std::string data = CreateRandomString(count * kChars); + std::vector<CordRep*> flats = CreateFlatsFromString(data, kChars); + CordRepBtree* node = CordRepBtreeFromFlats(flats); + + for (size_t read_size : {kChars - 1, kChars, kChars + 7, cap * cap}) { + CordRepBtreeReader reader; + absl::string_view chunk = reader.Init(node); + + // `consumed` tracks the end of last consumed chunk which is the start of + // the next chunk: we always read with `chunk_size = chunk.length()`. + size_t consumed = 0; + size_t remaining = data.length(); + while (remaining > 0) { + CordRep* tree; + size_t n = (std::min)(remaining, read_size); + chunk = reader.Read(n, chunk.length(), tree); + EXPECT_THAT(tree, Ne(nullptr)); + if (tree) { + EXPECT_THAT(CordToString(tree), Eq(data.substr(consumed, n))); + CordRep::Unref(tree); + } + + consumed += n; + remaining -= n; + EXPECT_THAT(reader.remaining(), Eq(remaining - chunk.length())); + + if (remaining > 0) { + ASSERT_FALSE(chunk.empty()); + ASSERT_THAT(chunk, Eq(data.substr(consumed, chunk.length()))); + } else { + ASSERT_TRUE(chunk.empty()) << chunk; + } + } + } + + CordRep::Unref(node); + } +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_btree_test.cc b/absl/strings/internal/cord_rep_btree_test.cc new file mode 100644 index 00000000..51b90db1 --- /dev/null +++ b/absl/strings/internal/cord_rep_btree_test.cc @@ -0,0 +1,1565 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_btree.h" + +#include <cmath> +#include <deque> +#include <iostream> +#include <string> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/cleanup/cleanup.h" +#include "absl/strings/internal/cord_data_edge.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +class CordRepBtreeTestPeer { + public: + static void SetEdge(CordRepBtree* node, size_t idx, CordRep* edge) { + node->edges_[idx] = edge; + } + static void AddEdge(CordRepBtree* node, CordRep* edge) { + node->edges_[node->fetch_add_end(1)] = edge; + } +}; + +namespace { + +using ::absl::cordrep_testing::AutoUnref; +using ::absl::cordrep_testing::CordCollectRepsIf; +using ::absl::cordrep_testing::CordToString; +using ::absl::cordrep_testing::CordVisitReps; +using ::absl::cordrep_testing::CreateFlatsFromString; +using ::absl::cordrep_testing::CreateRandomString; +using ::absl::cordrep_testing::MakeExternal; +using ::absl::cordrep_testing::MakeFlat; +using ::absl::cordrep_testing::MakeSubstring; +using ::testing::_; +using ::testing::AllOf; +using ::testing::AnyOf; +using ::testing::Conditional; +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Le; +using ::testing::Ne; +using ::testing::Not; +using ::testing::SizeIs; +using ::testing::TypedEq; + +MATCHER_P(EqFlatHolding, data, "Equals flat holding data") { + if (arg->tag < FLAT) { + *result_listener << "Expected FLAT, got tag " << static_cast<int>(arg->tag); + return false; + } + std::string actual = CordToString(arg); + if (actual != data) { + *result_listener << "Expected flat holding \"" << data + << "\", got flat holding \"" << actual << "\""; + return false; + } + return true; +} + +MATCHER_P(IsNode, height, absl::StrCat("Is a valid node of height ", height)) { + if (arg == nullptr) { + *result_listener << "Expected NODE, got nullptr"; + return false; + } + if (arg->tag != BTREE) { + *result_listener << "Expected NODE, got " << static_cast<int>(arg->tag); + return false; + } + if (!CordRepBtree::IsValid(arg->btree())) { + CordRepBtree::Dump(arg->btree(), "Expected valid NODE, got:", false, + *result_listener->stream()); + return false; + } + if (arg->btree()->height() != height) { + *result_listener << "Expected NODE of height " << height << ", got " + << arg->btree()->height(); + return false; + } + return true; +} + +MATCHER_P2(IsSubstring, start, length, + absl::StrCat("Is a substring(start = ", start, ", length = ", length, + ")")) { + if (arg == nullptr) { + *result_listener << "Expected substring, got nullptr"; + return false; + } + if (arg->tag != SUBSTRING) { + *result_listener << "Expected SUBSTRING, got " + << static_cast<int>(arg->tag); + return false; + } + const CordRepSubstring* const substr = arg->substring(); + if (substr->start != start || substr->length != length) { + *result_listener << "Expected substring(" << start << ", " << length + << "), got substring(" << substr->start << ", " + << substr->length << ")"; + return false; + } + return true; +} + +MATCHER_P2(EqExtractResult, tree, rep, "Equals ExtractResult") { + if (arg.tree != tree || arg.extracted != rep) { + *result_listener << "Expected {" << static_cast<const void*>(tree) << ", " + << static_cast<const void*>(rep) << "}, got {" << arg.tree + << ", " << arg.extracted << "}"; + return false; + } + return true; +} + +// DataConsumer is a simple helper class used by tests to 'consume' string +// fragments from the provided input in forward or backward direction. +class DataConsumer { + public: + // Starts consumption of `data`. Caller must make sure `data` outlives this + // instance. Consumes data starting at the front if `forward` is true, else + // consumes data from the back. + DataConsumer(absl::string_view data, bool forward) + : data_(data), forward_(forward) {} + + // Return the next `n` bytes from referenced data. + absl::string_view Next(size_t n) { + assert(n <= data_.size() - consumed_); + consumed_ += n; + return data_.substr(forward_ ? consumed_ - n : data_.size() - consumed_, n); + } + + // Returns all data consumed so far. + absl::string_view Consumed() const { + return forward_ ? data_.substr(0, consumed_) + : data_.substr(data_.size() - consumed_); + } + + private: + absl::string_view data_; + size_t consumed_ = 0; + bool forward_; +}; + +// BtreeAdd returns either CordRepBtree::Append or CordRepBtree::Prepend. +CordRepBtree* BtreeAdd(CordRepBtree* node, bool append, + absl::string_view data) { + return append ? CordRepBtree::Append(node, data) + : CordRepBtree::Prepend(node, data); +} + +// Recursively collects all leaf edges from `tree` and appends them to `edges`. +void GetLeafEdges(const CordRepBtree* tree, std::vector<CordRep*>& edges) { + if (tree->height() == 0) { + for (CordRep* edge : tree->Edges()) { + edges.push_back(edge); + } + } else { + for (CordRep* edge : tree->Edges()) { + GetLeafEdges(edge->btree(), edges); + } + } +} + +// Recursively collects and returns all leaf edges from `tree`. +std::vector<CordRep*> GetLeafEdges(const CordRepBtree* tree) { + std::vector<CordRep*> edges; + GetLeafEdges(tree, edges); + return edges; +} + +// Creates a flat containing the hexadecimal value of `i` zero padded +// to at least 4 digits prefixed with "0x", e.g.: "0x04AC". +CordRepFlat* MakeHexFlat(size_t i) { + return MakeFlat(absl::StrCat("0x", absl::Hex(i, absl::kZeroPad4))); +} + +CordRepBtree* MakeLeaf(size_t size = CordRepBtree::kMaxCapacity) { + assert(size <= CordRepBtree::kMaxCapacity); + CordRepBtree* leaf = CordRepBtree::Create(MakeHexFlat(0)); + for (size_t i = 1; i < size; ++i) { + leaf = CordRepBtree::Append(leaf, MakeHexFlat(i)); + } + return leaf; +} + +CordRepBtree* MakeTree(size_t size, bool append = true) { + CordRepBtree* tree = CordRepBtree::Create(MakeHexFlat(0)); + for (size_t i = 1; i < size; ++i) { + tree = append ? CordRepBtree::Append(tree, MakeHexFlat(i)) + : CordRepBtree::Prepend(tree, MakeHexFlat(i)); + } + return tree; +} + +CordRepBtree* CreateTree(absl::Span<CordRep* const> reps) { + auto it = reps.begin(); + CordRepBtree* tree = CordRepBtree::Create(*it); + while (++it != reps.end()) tree = CordRepBtree::Append(tree, *it); + return tree; +} + +CordRepBtree* CreateTree(absl::string_view data, size_t chunk_size) { + return CreateTree(CreateFlatsFromString(data, chunk_size)); +} + +CordRepBtree* CreateTreeReverse(absl::string_view data, size_t chunk_size) { + std::vector<CordRep*> flats = CreateFlatsFromString(data, chunk_size); + auto rit = flats.rbegin(); + CordRepBtree* tree = CordRepBtree::Create(*rit); + while (++rit != flats.rend()) tree = CordRepBtree::Prepend(tree, *rit); + return tree; +} + +class CordRepBtreeTest : public testing::TestWithParam<bool> { + public: + bool shared() const { return GetParam(); } + + static std::string ToString(testing::TestParamInfo<bool> param) { + return param.param ? "Shared" : "Private"; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeTest, testing::Bool(), + CordRepBtreeTest::ToString); + +class CordRepBtreeHeightTest : public testing::TestWithParam<int> { + public: + int height() const { return GetParam(); } + + static std::string ToString(testing::TestParamInfo<int> param) { + return absl::StrCat(param.param); + } +}; + +INSTANTIATE_TEST_SUITE_P(WithHeights, CordRepBtreeHeightTest, + testing::Range(0, CordRepBtree::kMaxHeight), + CordRepBtreeHeightTest::ToString); + +using TwoBools = testing::tuple<bool, bool>; + +class CordRepBtreeDualTest : public testing::TestWithParam<TwoBools> { + public: + bool first_shared() const { return std::get<0>(GetParam()); } + bool second_shared() const { return std::get<1>(GetParam()); } + + static std::string ToString(testing::TestParamInfo<TwoBools> param) { + if (std::get<0>(param.param)) { + return std::get<1>(param.param) ? "BothShared" : "FirstShared"; + } + return std::get<1>(param.param) ? "SecondShared" : "Private"; + } +}; + +INSTANTIATE_TEST_SUITE_P(WithParam, CordRepBtreeDualTest, + testing::Combine(testing::Bool(), testing::Bool()), + CordRepBtreeDualTest::ToString); + +TEST(CordRepBtreeTest, SizeIsMultipleOf64) { + // Only enforce for fully 64-bit platforms. + if (sizeof(size_t) == 8 && sizeof(void*) == 8) { + EXPECT_THAT(sizeof(CordRepBtree) % 64, Eq(0)) << "Should be multiple of 64"; + } +} + +TEST(CordRepBtreeTest, NewDestroyEmptyTree) { + auto* tree = CordRepBtree::New(); + EXPECT_THAT(tree->size(), Eq(0)); + EXPECT_THAT(tree->height(), Eq(0)); + EXPECT_THAT(tree->Edges(), ElementsAre()); + CordRepBtree::Destroy(tree); +} + +TEST(CordRepBtreeTest, NewDestroyEmptyTreeAtHeight) { + auto* tree = CordRepBtree::New(3); + EXPECT_THAT(tree->size(), Eq(0)); + EXPECT_THAT(tree->height(), Eq(3)); + EXPECT_THAT(tree->Edges(), ElementsAre()); + CordRepBtree::Destroy(tree); +} + +TEST(CordRepBtreeTest, Btree) { + CordRep* rep = CordRepBtree::New(); + EXPECT_THAT(rep->btree(), Eq(rep)); + EXPECT_THAT(static_cast<const CordRep*>(rep)->btree(), Eq(rep)); + CordRep::Unref(rep); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + rep = MakeFlat("Hello world"); + EXPECT_DEATH(rep->btree(), ".*"); + EXPECT_DEATH(static_cast<const CordRep*>(rep)->btree(), ".*"); + CordRep::Unref(rep); +#endif +} + +TEST(CordRepBtreeTest, EdgeData) { + CordRepFlat* flat = MakeFlat("Hello world"); + CordRepExternal* external = MakeExternal("Hello external"); + CordRep* substr1 = MakeSubstring(1, 6, CordRep::Ref(flat)); + CordRep* substr2 = MakeSubstring(1, 6, CordRep::Ref(external)); + CordRep* bad_substr = MakeSubstring(1, 2, CordRep::Ref(substr1)); + + EXPECT_TRUE(IsDataEdge(flat)); + EXPECT_THAT(EdgeData(flat).data(), TypedEq<const void*>(flat->Data())); + EXPECT_THAT(EdgeData(flat), Eq("Hello world")); + + EXPECT_TRUE(IsDataEdge(external)); + EXPECT_THAT(EdgeData(external).data(), TypedEq<const void*>(external->base)); + EXPECT_THAT(EdgeData(external), Eq("Hello external")); + + EXPECT_TRUE(IsDataEdge(substr1)); + EXPECT_THAT(EdgeData(substr1).data(), TypedEq<const void*>(flat->Data() + 1)); + EXPECT_THAT(EdgeData(substr1), Eq("ello w")); + + EXPECT_TRUE(IsDataEdge(substr2)); + EXPECT_THAT(EdgeData(substr2).data(), + TypedEq<const void*>(external->base + 1)); + EXPECT_THAT(EdgeData(substr2), Eq("ello e")); + + EXPECT_FALSE(IsDataEdge(bad_substr)); +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + EXPECT_DEATH(EdgeData(bad_substr), ".*"); +#endif + + CordRep::Unref(bad_substr); + CordRep::Unref(substr2); + CordRep::Unref(substr1); + CordRep::Unref(external); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, CreateUnrefLeaf) { + auto* flat = MakeFlat("a"); + auto* leaf = CordRepBtree::Create(flat); + EXPECT_THAT(leaf->size(), Eq(1)); + EXPECT_THAT(leaf->height(), Eq(0)); + EXPECT_THAT(leaf->Edges(), ElementsAre(flat)); + CordRepBtree::Unref(leaf); +} + +TEST(CordRepBtreeTest, NewUnrefNode) { + auto* leaf = CordRepBtree::Create(MakeFlat("a")); + CordRepBtree* tree = CordRepBtree::New(leaf); + EXPECT_THAT(tree->size(), Eq(1)); + EXPECT_THAT(tree->height(), Eq(1)); + EXPECT_THAT(tree->Edges(), ElementsAre(leaf)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeTest, AppendToLeafToCapacity) { + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.back()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + flats.push_back(MakeHexFlat(i)); + auto* result = CordRepBtree::Append(leaf, flats.back()); + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, PrependToLeafToCapacity) { + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_front(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.front()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + flats.push_front(MakeHexFlat(i)); + auto* result = CordRepBtree::Prepend(leaf, flats.front()); + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +// This test specifically aims at code aligning data at either the front or the +// back of the contained `edges[]` array, alternating Append and Prepend will +// move `begin()` and `end()` values as needed for each added value. +TEST_P(CordRepBtreeTest, AppendPrependToLeafToCapacity) { + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_front(MakeHexFlat(0)); + auto* leaf = CordRepBtree::Create(flats.front()); + + for (size_t i = 1; i < CordRepBtree::kMaxCapacity; ++i) { + refs.RefIf(shared(), leaf); + CordRepBtree* result; + if (i % 2 != 0) { + flats.push_front(MakeHexFlat(i)); + result = CordRepBtree::Prepend(leaf, flats.front()); + } else { + flats.push_back(MakeHexFlat(i)); + result = CordRepBtree::Append(leaf, flats.back()); + } + EXPECT_THAT(result->height(), Eq(0)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(result->Edges(), ElementsAreArray(flats)); + leaf = result; + } + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, AppendToLeafBeyondCapacity) { + AutoUnref refs; + auto* leaf = MakeLeaf(); + refs.RefIf(shared(), leaf); + CordRep* flat = MakeFlat("abc"); + auto* result = CordRepBtree::Append(leaf, flat); + ASSERT_THAT(result, IsNode(1)); + EXPECT_THAT(result, Ne(leaf)); + absl::Span<CordRep* const> edges = result->Edges(); + ASSERT_THAT(edges, ElementsAre(leaf, IsNode(0))); + EXPECT_THAT(edges[1]->btree()->Edges(), ElementsAre(flat)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, PrependToLeafBeyondCapacity) { + AutoUnref refs; + auto* leaf = MakeLeaf(); + refs.RefIf(shared(), leaf); + CordRep* flat = MakeFlat("abc"); + auto* result = CordRepBtree::Prepend(leaf, flat); + ASSERT_THAT(result, IsNode(1)); + EXPECT_THAT(result, Ne(leaf)); + absl::Span<CordRep* const> edges = result->Edges(); + ASSERT_THAT(edges, ElementsAre(IsNode(0), leaf)); + EXPECT_THAT(edges[0]->btree()->Edges(), ElementsAre(flat)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, AppendToTreeOneDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap; ++i) { + flats.push_back(MakeHexFlat(i)); + tree = CordRepBtree::Append(tree, flats.back()); + } + ASSERT_THAT(tree, IsNode(1)); + + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref leaf node once every 4 iterations, which should not have an + // observable effect other than that the leaf itself is copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 4 == 0, tree->Edges().back()); + + flats.push_back(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Append(tree, flats.back()); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, AppendToTreeTwoDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::vector<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap * max_cap; ++i) { + flats.push_back(MakeHexFlat(i)); + tree = CordRepBtree::Append(tree, flats.back()); + } + ASSERT_THAT(tree, IsNode(2)); + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref child node once every 16 iterations, and leaf node every 4 + // iterrations which which should not have an observable effect other than + // the node and/or the leaf below it being copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 16 == 0, tree->Edges().back()); + refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back()); + + flats.push_back(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Append(tree, flats.back()); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, PrependToTreeOneDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap; ++i) { + flats.push_front(MakeHexFlat(i)); + tree = CordRepBtree::Prepend(tree, flats.front()); + } + ASSERT_THAT(tree, IsNode(1)); + + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref leaf node once every 4 iterations which should not have an observable + // effect other than than the leaf itself is copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 4 == 0, tree->Edges().back()); + + flats.push_front(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front()); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, PrependToTreeTwoDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + AutoUnref refs; + std::deque<CordRep*> flats; + flats.push_back(MakeHexFlat(0)); + CordRepBtree* tree = CordRepBtree::Create(flats.back()); + for (size_t i = 1; i <= max_cap * max_cap; ++i) { + flats.push_front(MakeHexFlat(i)); + tree = CordRepBtree::Prepend(tree, flats.front()); + } + ASSERT_THAT(tree, IsNode(2)); + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; ++i) { + // Ref top level tree based on param. + // Ref child node once every 16 iterations, and leaf node every 4 + // iterrations which which should not have an observable effect other than + // the node and/or the leaf below it being copied. + refs.RefIf(shared(), tree); + refs.RefIf(i % 16 == 0, tree->Edges().back()); + refs.RefIf(i % 4 == 0, tree->Edges().back()->btree()->Edges().back()); + + flats.push_front(MakeHexFlat(i)); + CordRepBtree* result = CordRepBtree::Prepend(tree, flats.front()); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + std::vector<CordRep*> edges = GetLeafEdges(result); + ASSERT_THAT(edges, ElementsAreArray(flats)); + tree = result; + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeDualTest, MergeLeafsNotExceedingCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side leaf appending all contained flats to `flats` + CordRepBtree* left = MakeLeaf(3); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side leaf appending all contained flats to `flats` + CordRepBtree* right = MakeLeaf(2); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(0)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(tree->Edges(), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafsExceedingCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + + // Build `left` side tree appending all contained flats to `flats` + CordRepBtree* left = MakeLeaf(CordRepBtree::kMaxCapacity - 2); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all contained flats to `flats` + CordRepBtree* right = MakeLeaf(CordRepBtree::kMaxCapacity - 1); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), ElementsAre(left, right)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeEqualHeightTrees) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all contained flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all contained flats to `flats` + CordRepBtree* right = MakeTree(CordRepBtree::kMaxCapacity * 2); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(5)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeNotExceedingLeafCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all added flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 2 + 2); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all added flats to `flats` + CordRepBtree* right = MakeTree(3); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(3)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeDualTest, MergeLeafWithTreeExceedingLeafCapacity) { + for (bool use_append : {false, true}) { + SCOPED_TRACE(use_append ? "Using Append" : "Using Prepend"); + + AutoUnref refs; + std::vector<CordRep*> flats; + + // Build `left` side tree appending all added flats to `flats` + CordRepBtree* left = MakeTree(CordRepBtree::kMaxCapacity * 3 - 2); + GetLeafEdges(left, flats); + refs.RefIf(first_shared(), left); + + // Build `right` side tree appending all added flats to `flats` + CordRepBtree* right = MakeTree(3); + GetLeafEdges(right, flats); + refs.RefIf(second_shared(), right); + + CordRepBtree* tree = use_append ? CordRepBtree::Append(left, right) + : CordRepBtree::Prepend(right, left); + EXPECT_THAT(tree, IsNode(1)); + EXPECT_THAT(tree->Edges(), SizeIs(4)); + + // `tree` contains all flats originally belonging to `left` and `right`. + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +void RefEdgesAt(size_t depth, AutoUnref& refs, CordRepBtree* tree) { + absl::Span<CordRep* const> edges = tree->Edges(); + if (depth == 0) { + refs.Ref(edges.front()); + refs.Ref(edges.back()); + } else { + assert(tree->height() > 0); + RefEdgesAt(depth - 1, refs, edges.front()->btree()); + RefEdgesAt(depth - 1, refs, edges.back()->btree()); + } +} + +TEST(CordRepBtreeTest, MergeFuzzTest) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + std::minstd_rand rnd; + std::uniform_int_distribution<int> coin_flip(0, 1); + std::uniform_int_distribution<int> dice_throw(1, 6); + + auto random_leaf_count = [&]() { + std::uniform_int_distribution<int> dist_height(0, 3); + std::uniform_int_distribution<int> dist_leaf(0, max_cap - 1); + const size_t height = dist_height(rnd); + return (height ? pow(max_cap, height) : 0) + dist_leaf(rnd); + }; + + for (int i = 0; i < 10000; ++i) { + AutoUnref refs; + std::vector<CordRep*> flats; + + CordRepBtree* left = MakeTree(random_leaf_count(), coin_flip(rnd)); + GetLeafEdges(left, flats); + if (dice_throw(rnd) == 1) { + std::uniform_int_distribution<int> dist(0, left->height()); + RefEdgesAt(dist(rnd), refs, left); + } + + CordRepBtree* right = MakeTree(random_leaf_count(), coin_flip(rnd)); + GetLeafEdges(right, flats); + if (dice_throw(rnd) == 1) { + std::uniform_int_distribution<int> dist(0, right->height()); + RefEdgesAt(dist(rnd), refs, right); + } + + CordRepBtree* tree = CordRepBtree::Append(left, right); + EXPECT_THAT(GetLeafEdges(tree), ElementsAreArray(flats)); + CordRepBtree::Unref(tree); + } +} + +TEST_P(CordRepBtreeTest, RemoveSuffix) { + // Create tree of 1, 2 and 3 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + for (size_t cap : {max_cap - 1, max_cap * 2, max_cap * max_cap * 2}) { + const std::string data = CreateRandomString(cap * 512); + + { + // Verify RemoveSuffix(<all>) + AutoUnref refs; + CordRepBtree* node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, data.length()), Eq(nullptr)); + + // Verify RemoveSuffix(<none>) + node = refs.RefIf(shared(), CreateTree(data, 512)); + EXPECT_THAT(CordRepBtree::RemoveSuffix(node, 0), Eq(node)); + CordRep::Unref(node); + } + + for (int n = 1; n < data.length(); ++n) { + AutoUnref refs; + auto flats = CreateFlatsFromString(data, 512); + CordRepBtree* node = refs.RefIf(shared(), CreateTree(flats)); + CordRep* rep = refs.Add(CordRepBtree::RemoveSuffix(node, n)); + EXPECT_THAT(CordToString(rep), Eq(data.substr(0, data.length() - n))); + + // Collect all flats + auto is_flat = [](CordRep* rep) { return rep->tag >= FLAT; }; + std::vector<CordRep*> edges = CordCollectRepsIf(is_flat, rep); + ASSERT_THAT(edges.size(), Le(flats.size())); + + // Isolate last edge + CordRep* last_edge = edges.back(); + edges.pop_back(); + const size_t last_length = rep->length - edges.size() * 512; + + // All flats except the last edge must be kept or copied 'as is' + int index = 0; + for (CordRep* edge : edges) { + ASSERT_THAT(edge, Eq(flats[index++])); + ASSERT_THAT(edge->length, Eq(512)); + } + + // CordRepBtree may optimize small substrings to avoid waste, so only + // check for flat sharing / updates where the code should always do this. + if (last_length >= 500) { + EXPECT_THAT(last_edge, Eq(flats[index++])); + if (shared()) { + EXPECT_THAT(last_edge->length, Eq(512)); + } else { + EXPECT_TRUE(last_edge->refcount.IsOne()); + EXPECT_THAT(last_edge->length, Eq(last_length)); + } + } + } + } +} + +TEST(CordRepBtreeTest, SubTree) { + // Create tree of at least 2 levels high + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + const std::string data = CreateRandomString(n * 3); + std::vector<CordRep*> flats; + for (absl::string_view s = data; !s.empty(); s.remove_prefix(3)) { + flats.push_back(MakeFlat(s.substr(0, 3))); + } + CordRepBtree* node = CordRepBtree::Create(CordRep::Ref(flats[0])); + for (size_t i = 1; i < flats.size(); ++i) { + node = CordRepBtree::Append(node, CordRep::Ref(flats[i])); + } + + for (int offset = 0; offset < data.length(); ++offset) { + for (int length = 1; length <= data.length() - offset; ++length) { + CordRep* rep = node->SubTree(offset, length); + EXPECT_THAT(CordToString(rep), Eq(data.substr(offset, length))); + CordRep::Unref(rep); + } + } + CordRepBtree::Unref(node); + for (CordRep* rep : flats) { + CordRep::Unref(rep); + } +} + +TEST(CordRepBtreeTest, SubTreeOnExistingSubstring) { + // This test verifies that a SubTree call on a pre-existing (large) substring + // adjusts the existing substring if not shared, and else rewrites the + // existing substring. + AutoUnref refs; + std::string data = CreateRandomString(1000); + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + CordRep* flat = MakeFlat(data); + leaf = CordRepBtree::Append(leaf, flat); + + // Setup tree containing substring. + CordRep* result = leaf->SubTree(0, 3 + 990); + ASSERT_THAT(result->tag, Eq(BTREE)); + CordRep::Unref(leaf); + leaf = result->btree(); + ASSERT_THAT(leaf->Edges(), ElementsAre(_, IsSubstring(0, 990))); + EXPECT_THAT(leaf->Edges()[1]->substring()->child, Eq(flat)); + + // Verify substring of substring. + result = leaf->SubTree(3 + 5, 970); + ASSERT_THAT(result, IsSubstring(5, 970)); + EXPECT_THAT(result->substring()->child, Eq(flat)); + CordRep::Unref(result); + + CordRep::Unref(leaf); +} + +TEST_P(CordRepBtreeTest, AddDataToLeaf) { + const size_t n = CordRepBtree::kMaxCapacity; + const std::string data = CreateRandomString(n * 3); + + for (bool append : {true, false}) { + AutoUnref refs; + DataConsumer consumer(data, append); + SCOPED_TRACE(append ? "Append" : "Prepend"); + + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat(consumer.Next(3))); + for (size_t i = 1; i < n; ++i) { + refs.RefIf(shared(), leaf); + CordRepBtree* result = BtreeAdd(leaf, append, consumer.Next(3)); + EXPECT_THAT(result, Conditional(shared(), Ne(leaf), Eq(leaf))); + EXPECT_THAT(CordToString(result), Eq(consumer.Consumed())); + leaf = result; + } + CordRep::Unref(leaf); + } +} + +TEST_P(CordRepBtreeTest, AppendDataToTree) { + AutoUnref refs; + size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = refs.RefIf(shared(), CreateTree(data, 3)); + CordRepBtree* leaf0 = tree->Edges()[0]->btree(); + CordRepBtree* leaf1 = tree->Edges()[1]->btree(); + CordRepBtree* result = CordRepBtree::Append(tree, "123456789"); + EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + EXPECT_THAT(result->Edges(), + ElementsAre(leaf0, Conditional(shared(), Ne(leaf1), Eq(leaf1)))); + EXPECT_THAT(CordToString(result), Eq(data + "123456789")); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, PrependDataToTree) { + AutoUnref refs; + size_t n = CordRepBtree::kMaxCapacity + CordRepBtree::kMaxCapacity / 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = refs.RefIf(shared(), CreateTreeReverse(data, 3)); + CordRepBtree* leaf0 = tree->Edges()[0]->btree(); + CordRepBtree* leaf1 = tree->Edges()[1]->btree(); + CordRepBtree* result = CordRepBtree::Prepend(tree, "123456789"); + EXPECT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + EXPECT_THAT(result->Edges(), + ElementsAre(Conditional(shared(), Ne(leaf0), Eq(leaf0)), leaf1)); + EXPECT_THAT(CordToString(result), Eq("123456789" + data)); + CordRep::Unref(result); +} + +TEST_P(CordRepBtreeTest, AddDataToTreeThreeLevelsDeep) { + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * max_cap; + const std::string data = CreateRandomString(n * 3); + + for (bool append : {true, false}) { + AutoUnref refs; + DataConsumer consumer(data, append); + SCOPED_TRACE(append ? "Append" : "Prepend"); + + // Fill leaf + CordRepBtree* tree = CordRepBtree::Create(MakeFlat(consumer.Next(3))); + for (size_t i = 1; i < max_cap; ++i) { + tree = BtreeAdd(tree, append, consumer.Next(3)); + } + ASSERT_THAT(CordToString(tree), Eq(consumer.Consumed())); + + // Fill to maximum at one deep + refs.RefIf(shared(), tree); + CordRepBtree* result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, IsNode(1)); + ASSERT_THAT(result, Ne(tree)); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + for (size_t i = max_cap + 1; i < max_cap * max_cap; ++i) { + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + } + + // Fill to maximum at two deep + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, IsNode(2)); + ASSERT_THAT(result, Ne(tree)); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + for (size_t i = max_cap * max_cap + 1; i < max_cap * max_cap * max_cap; + ++i) { + refs.RefIf(shared(), tree); + result = BtreeAdd(tree, append, consumer.Next(3)); + ASSERT_THAT(result, Conditional(shared(), Ne(tree), Eq(tree))); + ASSERT_THAT(CordToString(result), Eq(consumer.Consumed())); + tree = result; + } + + CordRep::Unref(tree); + } +} + +TEST_P(CordRepBtreeTest, AddLargeDataToLeaf) { + const size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * max_cap * 3 + 2; + const std::string data = CreateRandomString(n * kMaxFlatLength); + + for (bool append : {true, false}) { + AutoUnref refs; + SCOPED_TRACE(append ? "Append" : "Prepend"); + + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + refs.RefIf(shared(), leaf); + CordRepBtree* result = BtreeAdd(leaf, append, data); + EXPECT_THAT(CordToString(result), Eq(append ? "abc" + data : data + "abc")); + CordRep::Unref(result); + } +} + +TEST_P(CordRepBtreeTest, CreateFromTreeReturnsTree) { + AutoUnref refs; + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world")); + refs.RefIf(shared(), leaf); + CordRepBtree* result = CordRepBtree::Create(leaf); + EXPECT_THAT(result, Eq(leaf)); + CordRep::Unref(result); +} + +TEST(CordRepBtreeTest, GetCharacter) { + size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = CreateTree(data, 3); + // Add a substring node for good measure. + tree = tree->Append(tree, MakeSubstring(4, 5, MakeFlat("abcdefghijklm"))); + data += "efghi"; + for (size_t i = 0; i < data.length(); ++i) { + ASSERT_THAT(tree->GetCharacter(i), Eq(data[i])); + } + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, IsFlatSingleFlat) { + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("Hello world")); + + absl::string_view fragment; + EXPECT_TRUE(leaf->IsFlat(nullptr)); + EXPECT_TRUE(leaf->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + fragment = ""; + EXPECT_TRUE(leaf->IsFlat(0, 11, nullptr)); + EXPECT_TRUE(leaf->IsFlat(0, 11, &fragment)); + EXPECT_THAT(fragment, Eq("Hello world")); + + // Arbitrary ranges must check true as well. + EXPECT_TRUE(leaf->IsFlat(1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("ello")); + EXPECT_TRUE(leaf->IsFlat(6, 5, &fragment)); + EXPECT_THAT(fragment, Eq("world")); + + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, IsFlatMultiFlat) { + size_t n = CordRepBtree::kMaxCapacity * CordRepBtree::kMaxCapacity + 2; + std::string data = CreateRandomString(n * 3); + CordRepBtree* tree = CreateTree(data, 3); + // Add substring nodes for good measure. + tree = tree->Append(tree, MakeSubstring(4, 3, MakeFlat("abcdefghijklm"))); + tree = tree->Append(tree, MakeSubstring(8, 3, MakeFlat("abcdefghijklm"))); + data += "efgijk"; + + EXPECT_FALSE(tree->IsFlat(nullptr)); + absl::string_view fragment = "Can't touch this"; + EXPECT_FALSE(tree->IsFlat(&fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + + for (size_t offset = 0; offset < data.size(); offset += 3) { + EXPECT_TRUE(tree->IsFlat(offset, 3, nullptr)); + EXPECT_TRUE(tree->IsFlat(offset, 3, &fragment)); + EXPECT_THAT(fragment, Eq(data.substr(offset, 3))); + + fragment = "Can't touch this"; + if (offset > 0) { + EXPECT_FALSE(tree->IsFlat(offset - 1, 4, nullptr)); + EXPECT_FALSE(tree->IsFlat(offset - 1, 4, &fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + } + if (offset < data.size() - 4) { + EXPECT_FALSE(tree->IsFlat(offset, 4, nullptr)); + EXPECT_FALSE(tree->IsFlat(offset, 4, &fragment)); + EXPECT_THAT(fragment, Eq("Can't touch this")); + } + } + + CordRep::Unref(tree); +} + +#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotPrivate) { + CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo")); + CordRepBtree::Ref(tree); + EXPECT_DEATH(tree->GetAppendBuffer(1), ".*"); + CordRepBtree::Unref(tree); + CordRepBtree::Unref(tree); +} + +#endif // defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG) + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferNotFlat) { + CordRepBtree* tree = CordRepBtree::Create(MakeExternal("Foo")); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNotPrivate) { + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat)); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); + CordRep::Unref(flat); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferTreeNotPrivate) { + if (height() == 0) return; + AutoUnref refs; + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(CordRep::Ref(flat)); + for (int i = 1; i <= height(); ++i) { + if (i == (height() + 1) / 2) refs.Ref(tree); + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); + CordRep::Unref(flat); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatNoCapacity) { + CordRepFlat* flat = MakeFlat("abc"); + flat->length = flat->Capacity(); + CordRepBtree* tree = CordRepBtree::Create(flat); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + EXPECT_THAT(tree->GetAppendBuffer(1), SizeIs(0)); + CordRepBtree::Unref(tree); +} + +TEST_P(CordRepBtreeHeightTest, GetAppendBufferFlatWithCapacity) { + CordRepFlat* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + for (int i = 1; i <= height(); ++i) { + tree = CordRepBtree::New(tree); + } + absl::Span<char> span = tree->GetAppendBuffer(2); + EXPECT_THAT(span, SizeIs(2)); + EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 3)); + EXPECT_THAT(tree->length, Eq(5)); + + size_t avail = flat->Capacity() - 5; + span = tree->GetAppendBuffer(avail + 100); + EXPECT_THAT(span, SizeIs(avail)); + EXPECT_THAT(span.data(), TypedEq<void*>(flat->Data() + 5)); + EXPECT_THAT(tree->length, Eq(5 + avail)); + + CordRepBtree::Unref(tree); +} + +TEST(CordRepBtreeTest, Dump) { + // Handles nullptr + std::stringstream ss; + CordRepBtree::Dump(nullptr, ss); + CordRepBtree::Dump(nullptr, "Once upon a label", ss); + CordRepBtree::Dump(nullptr, "Once upon a label", false, ss); + CordRepBtree::Dump(nullptr, "Once upon a label", true, ss); + + // Cover legal edges + CordRepFlat* flat = MakeFlat("Hello world"); + CordRepExternal* external = MakeExternal("Hello external"); + CordRep* substr_flat = MakeSubstring(1, 6, CordRep::Ref(flat)); + CordRep* substr_external = MakeSubstring(2, 7, CordRep::Ref(external)); + + // Build tree + CordRepBtree* tree = CordRepBtree::Create(flat); + tree = CordRepBtree::Append(tree, external); + tree = CordRepBtree::Append(tree, substr_flat); + tree = CordRepBtree::Append(tree, substr_external); + + // Repeat until we have a tree + while (tree->height() == 0) { + tree = CordRepBtree::Append(tree, CordRep::Ref(flat)); + tree = CordRepBtree::Append(tree, CordRep::Ref(external)); + tree = CordRepBtree::Append(tree, CordRep::Ref(substr_flat)); + tree = CordRepBtree::Append(tree, CordRep::Ref(substr_external)); + } + + for (int api = 0; api <= 3; ++api) { + absl::string_view api_scope; + std::stringstream ss; + switch (api) { + case 0: + api_scope = "Bare"; + CordRepBtree::Dump(tree, ss); + break; + case 1: + api_scope = "Label only"; + CordRepBtree::Dump(tree, "Once upon a label", ss); + break; + case 2: + api_scope = "Label no content"; + CordRepBtree::Dump(tree, "Once upon a label", false, ss); + break; + default: + api_scope = "Label and content"; + CordRepBtree::Dump(tree, "Once upon a label", true, ss); + break; + } + SCOPED_TRACE(api_scope); + std::string str = ss.str(); + + // Contains Node(depth) / Leaf and private / shared indicators + EXPECT_THAT(str, AllOf(HasSubstr("Node(1)"), HasSubstr("Leaf"), + HasSubstr("Private"), HasSubstr("Shared"))); + + // Contains length and start offset of all data edges + EXPECT_THAT(str, AllOf(HasSubstr("len = 11"), HasSubstr("len = 14"), + HasSubstr("len = 6"), HasSubstr("len = 7"), + HasSubstr("start = 1"), HasSubstr("start = 2"))); + + // Contains address of all data edges + EXPECT_THAT( + str, AllOf(HasSubstr(absl::StrCat("0x", absl::Hex(flat))), + HasSubstr(absl::StrCat("0x", absl::Hex(external))), + HasSubstr(absl::StrCat("0x", absl::Hex(substr_flat))), + HasSubstr(absl::StrCat("0x", absl::Hex(substr_external))))); + + if (api != 0) { + // Contains label + EXPECT_THAT(str, HasSubstr("Once upon a label")); + } + + if (api != 3) { + // Does not contain contents + EXPECT_THAT(str, Not(AnyOf((HasSubstr("data = \"Hello world\""), + HasSubstr("data = \"Hello external\""), + HasSubstr("data = \"ello w\""), + HasSubstr("data = \"llo ext\""))))); + } else { + // Contains contents + EXPECT_THAT(str, AllOf((HasSubstr("data = \"Hello world\""), + HasSubstr("data = \"Hello external\""), + HasSubstr("data = \"ello w\""), + HasSubstr("data = \"llo ext\"")))); + } + } + + CordRep::Unref(tree); +} + +TEST(CordRepBtreeTest, IsValid) { + EXPECT_FALSE(CordRepBtree::IsValid(nullptr)); + + CordRepBtree* empty = CordRepBtree::New(0); + EXPECT_TRUE(CordRepBtree::IsValid(empty)); + CordRep::Unref(empty); + + for (bool as_tree : {false, true}) { + CordRepBtree* leaf = CordRepBtree::Create(MakeFlat("abc")); + CordRepBtree* tree = as_tree ? CordRepBtree::New(leaf) : nullptr; + CordRepBtree* check = as_tree ? tree : leaf; + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->length--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->length++; + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->tag--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->tag++; + + // Height + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->storage[0] = static_cast<uint8_t>(CordRepBtree::kMaxHeight + 1); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[0] = 1; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[0] = 0; + + // Begin + ASSERT_TRUE(CordRepBtree::IsValid(check)); + const uint8_t begin = leaf->storage[1]; + leaf->storage[1] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[1] = 2; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[1] = begin; + + // End + ASSERT_TRUE(CordRepBtree::IsValid(check)); + const uint8_t end = leaf->storage[2]; + leaf->storage[2] = static_cast<uint8_t>(CordRepBtree::kMaxCapacity + 1); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->storage[2] = end; + + // DataEdge tag and value + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep* const edge = leaf->Edges()[0]; + const uint8_t tag = edge->tag; + CordRepBtreeTestPeer::SetEdge(leaf, begin, nullptr); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + CordRepBtreeTestPeer::SetEdge(leaf, begin, edge); + edge->tag = BTREE; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + edge->tag = tag; + + if (as_tree) { + ASSERT_TRUE(CordRepBtree::IsValid(check)); + leaf->length--; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + leaf->length++; + + // Height + ASSERT_TRUE(CordRepBtree::IsValid(check)); + tree->storage[0] = static_cast<uint8_t>(2); + EXPECT_FALSE(CordRepBtree::IsValid(check)); + tree->storage[0] = 1; + + // Btree edge + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep* const edge = tree->Edges()[0]; + const uint8_t tag = edge->tag; + edge->tag = FLAT; + EXPECT_FALSE(CordRepBtree::IsValid(check)); + edge->tag = tag; + } + + ASSERT_TRUE(CordRepBtree::IsValid(check)); + CordRep::Unref(check); + } +} + +TEST(CordRepBtreeTest, AssertValid) { + CordRepBtree* tree = CordRepBtree::Create(MakeFlat("abc")); + const CordRepBtree* ctree = tree; + EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)); + EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)); + +#if defined(GTEST_HAS_DEATH_TEST) + CordRepBtree* nulltree = nullptr; + const CordRepBtree* cnulltree = nullptr; + EXPECT_DEBUG_DEATH( + EXPECT_THAT(CordRepBtree::AssertValid(nulltree), Eq(nulltree)), ".*"); + EXPECT_DEBUG_DEATH( + EXPECT_THAT(CordRepBtree::AssertValid(cnulltree), Eq(cnulltree)), ".*"); + + tree->length--; + EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(tree), Eq(tree)), + ".*"); + EXPECT_DEBUG_DEATH(EXPECT_THAT(CordRepBtree::AssertValid(ctree), Eq(ctree)), + ".*"); + tree->length++; +#endif + CordRep::Unref(tree); +} + +TEST(CordRepBtreeTest, CheckAssertValidShallowVsDeep) { + // Restore exhaustive validation on any exit. + const bool exhaustive_validation = cord_btree_exhaustive_validation.load(); + auto cleanup = absl::MakeCleanup([exhaustive_validation] { + cord_btree_exhaustive_validation.store(exhaustive_validation); + }); + + // Create a tree of at least 2 levels, and mess with the original flat, which + // should go undetected in shallow mode as the flat is too far away, but + // should be detected in forced non-shallow mode. + CordRep* flat = MakeFlat("abc"); + CordRepBtree* tree = CordRepBtree::Create(flat); + constexpr size_t max_cap = CordRepBtree::kMaxCapacity; + const size_t n = max_cap * max_cap * 2; + for (size_t i = 0; i < n; ++i) { + tree = CordRepBtree::Append(tree, MakeFlat("Hello world")); + } + flat->length = 100; + + cord_btree_exhaustive_validation.store(false); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_TRUE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); + CordRepBtree::AssertValid(tree); + CordRepBtree::AssertValid(tree, true); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, false), ".*"); +#endif + + cord_btree_exhaustive_validation.store(true); + EXPECT_FALSE(CordRepBtree::IsValid(tree)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, true)); + EXPECT_FALSE(CordRepBtree::IsValid(tree, false)); +#if defined(GTEST_HAS_DEATH_TEST) + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree), ".*"); + EXPECT_DEBUG_DEATH(CordRepBtree::AssertValid(tree, true), ".*"); +#endif + + flat->length = 3; + CordRep::Unref(tree); +} + +TEST_P(CordRepBtreeTest, Rebuild) { + for (size_t size : {3, 8, 100, 10000, 1000000}) { + SCOPED_TRACE(absl::StrCat("Rebuild @", size)); + + std::vector<CordRepFlat*> flats; + for (int i = 0; i < size; ++i) { + flats.push_back(CordRepFlat::New(2)); + flats.back()->Data()[0] = 'x'; + flats.back()->length = 1; + } + + // Build the tree into 'right', and each so many 'split_limit' edges, + // combine 'left' + 'right' into a new 'left', and start a new 'right'. + // This guarantees we get a reasonable amount of chaos in the tree. + size_t split_count = 0; + size_t split_limit = 3; + auto it = flats.begin(); + CordRepBtree* left = nullptr; + CordRepBtree* right = CordRepBtree::New(*it); + while (++it != flats.end()) { + if (++split_count >= split_limit) { + split_limit += split_limit / 16; + left = left ? CordRepBtree::Append(left, right) : right; + right = CordRepBtree::New(*it); + } else { + right = CordRepBtree::Append(right, *it); + } + } + + // Finalize tree + left = left ? CordRepBtree::Append(left, right) : right; + + // Rebuild + AutoUnref ref; + left = ref.Add(CordRepBtree::Rebuild(ref.RefIf(shared(), left))); + ASSERT_TRUE(CordRepBtree::IsValid(left)); + + // Verify we have the exact same edges in the exact same order. + bool ok = true; + it = flats.begin(); + CordVisitReps(left, [&](CordRep* edge) { + if (edge->tag < FLAT) return; + ok = ok && (it != flats.end() && *it++ == edge); + }); + EXPECT_TRUE(ok && it == flats.end()) << "Rebuild edges mismatch"; + } +} + +// Convenience helper for CordRepBtree::ExtractAppendBuffer +CordRepBtree::ExtractResult ExtractLast(CordRepBtree* input, size_t cap = 1) { + return CordRepBtree::ExtractAppendBuffer(input, cap); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferLeafSingleFlat) { + CordRep* flat = MakeFlat("Abc"); + CordRepBtree* leaf = CordRepBtree::Create(flat); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(nullptr, flat)); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeSingleFlat) { + CordRep* flat = MakeFlat("Abc"); + CordRepBtree* leaf = CordRepBtree::Create(flat); + CordRepBtree* node = CordRepBtree::New(leaf); + EXPECT_THAT(ExtractLast(node), EqExtractResult(nullptr, flat)); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferLeafTwoFlats) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeTwoFlats) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + CordRepBtree* node = CordRepBtree::New(leaf); + EXPECT_THAT(ExtractLast(node), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeTwoFlatsInTwoLeafs) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf1 = CordRepBtree::Create(flats[0]); + CordRepBtree* leaf2 = CordRepBtree::Create(flats[1]); + CordRepBtree* node = CordRepBtree::New(leaf1, leaf2); + EXPECT_THAT(ExtractLast(node), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferLeafThreeFlats) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdefghi", 3); + CordRepBtree* leaf = CreateTree(flats); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, flats[2])); + CordRep::Unref(flats[2]); + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeThreeFlatsRightNoFolding) { + CordRep* flat = MakeFlat("Abc"); + std::vector<CordRep*> flats = CreateFlatsFromString("defghi", 3); + CordRepBtree* leaf1 = CordRepBtree::Create(flat); + CordRepBtree* leaf2 = CreateTree(flats); + CordRepBtree* node = CordRepBtree::New(leaf1, leaf2); + EXPECT_THAT(ExtractLast(node), EqExtractResult(node, flats[1])); + EXPECT_THAT(node->Edges(), ElementsAre(leaf1, leaf2)); + EXPECT_THAT(leaf1->Edges(), ElementsAre(flat)); + EXPECT_THAT(leaf2->Edges(), ElementsAre(flats[0])); + CordRep::Unref(node); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNodeThreeFlatsRightLeafFolding) { + CordRep* flat = MakeFlat("Abc"); + std::vector<CordRep*> flats = CreateFlatsFromString("defghi", 3); + CordRepBtree* leaf1 = CreateTree(flats); + CordRepBtree* leaf2 = CordRepBtree::Create(flat); + CordRepBtree* node = CordRepBtree::New(leaf1, leaf2); + EXPECT_THAT(ExtractLast(node), EqExtractResult(leaf1, flat)); + EXPECT_THAT(leaf1->Edges(), ElementsAreArray(flats)); + CordRep::Unref(leaf1); + CordRep::Unref(flat); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNoCapacity) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + size_t avail = flats[1]->flat()->Capacity() - flats[1]->length; + EXPECT_THAT(ExtractLast(leaf, avail + 1), EqExtractResult(leaf, nullptr)); + EXPECT_THAT(ExtractLast(leaf, avail), EqExtractResult(flats[0], flats[1])); + CordRep::Unref(flats[0]); + CordRep::Unref(flats[1]); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferNotFlat) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + auto substr = MakeSubstring(1, 2, flats[1]); + CordRepBtree* leaf = CreateTree({flats[0], substr}); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, nullptr)); + CordRep::Unref(leaf); +} + +TEST(CordRepBtreeTest, ExtractAppendBufferShared) { + std::vector<CordRep*> flats = CreateFlatsFromString("abcdef", 3); + CordRepBtree* leaf = CreateTree(flats); + + CordRep::Ref(flats[1]); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, nullptr)); + CordRep::Unref(flats[1]); + + CordRep::Ref(leaf); + EXPECT_THAT(ExtractLast(leaf), EqExtractResult(leaf, nullptr)); + CordRep::Unref(leaf); + + CordRepBtree* node = CordRepBtree::New(leaf); + CordRep::Ref(node); + EXPECT_THAT(ExtractLast(node), EqExtractResult(node, nullptr)); + CordRep::Unref(node); + + CordRep::Unref(node); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_consume.cc b/absl/strings/internal/cord_rep_consume.cc new file mode 100644 index 00000000..20a55797 --- /dev/null +++ b/absl/strings/internal/cord_rep_consume.cc @@ -0,0 +1,62 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_consume.h" + +#include <array> +#include <utility> + +#include "absl/container/inlined_vector.h" +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Unrefs the provided `substring`, and returns `substring->child` +// Adds or assumes a reference on `substring->child` +CordRep* ClipSubstring(CordRepSubstring* substring) { + CordRep* child = substring->child; + if (substring->refcount.IsOne()) { + delete substring; + } else { + CordRep::Ref(child); + CordRep::Unref(substring); + } + return child; +} + +} // namespace + +void Consume(CordRep* rep, ConsumeFn consume_fn) { + size_t offset = 0; + size_t length = rep->length; + + if (rep->tag == SUBSTRING) { + offset += rep->substring()->start; + rep = ClipSubstring(rep->substring()); + } + consume_fn(rep, offset, length); +} + +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn) { + return Consume(rep, std::move(consume_fn)); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_consume.h b/absl/strings/internal/cord_rep_consume.h new file mode 100644 index 00000000..d46fca2b --- /dev/null +++ b/absl/strings/internal/cord_rep_consume.h @@ -0,0 +1,50 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ + +#include <functional> + +#include "absl/functional/function_ref.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Functor for the Consume() and ReverseConsume() functions: +// void ConsumeFunc(CordRep* rep, size_t offset, size_t length); +// See the Consume() and ReverseConsume() function comments for documentation. +using ConsumeFn = FunctionRef<void(CordRep*, size_t, size_t)>; + +// Consume() and ReverseConsume() consume CONCAT based trees and invoke the +// provided functor with the contained nodes in the proper forward or reverse +// order, which is used to convert CONCAT trees into other tree or cord data. +// All CONCAT and SUBSTRING nodes are processed internally. The 'offset` +// parameter of the functor is non-zero for any nodes below SUBSTRING nodes. +// It's up to the caller to form these back into SUBSTRING nodes or otherwise +// store offset / prefix information. These functions are intended to be used +// only for migration / transitional code where due to factors such as ODR +// violations, we can not 100% guarantee that all code respects 'new format' +// settings and flags, so we need to be able to parse old data on the fly until +// all old code is deprecated / no longer the default format. +void Consume(CordRep* rep, ConsumeFn consume_fn); +void ReverseConsume(CordRep* rep, ConsumeFn consume_fn); + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CONSUME_H_ diff --git a/absl/strings/internal/cord_rep_crc.cc b/absl/strings/internal/cord_rep_crc.cc new file mode 100644 index 00000000..ee140354 --- /dev/null +++ b/absl/strings/internal/cord_rep_crc.cc @@ -0,0 +1,54 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_crc.h" + +#include <cassert> +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +CordRepCrc* CordRepCrc::New(CordRep* child, uint32_t crc) { + assert(child != nullptr); + if (child->IsCrc()) { + if (child->refcount.IsOne()) { + child->crc()->crc = crc; + return child->crc(); + } + CordRep* old = child; + child = old->crc()->child; + CordRep::Ref(child); + CordRep::Unref(old); + } + auto* new_cordrep = new CordRepCrc; + new_cordrep->length = child->length; + new_cordrep->tag = cord_internal::CRC; + new_cordrep->child = child; + new_cordrep->crc = crc; + return new_cordrep; +} + +void CordRepCrc::Destroy(CordRepCrc* node) { + CordRep::Unref(node->child); + delete node; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_crc.h b/absl/strings/internal/cord_rep_crc.h new file mode 100644 index 00000000..5294b0d1 --- /dev/null +++ b/absl/strings/internal/cord_rep_crc.h @@ -0,0 +1,102 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_CRC_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_CRC_H_ + +#include <cassert> +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/strings/internal/cord_internal.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordRepCrc is a CordRep node intended only to appear at the top level of a +// cord tree. It associates an "expected CRC" with the contained data, to allow +// for easy passage of checksum data in Cord data flows. +// +// From Cord's perspective, the crc value has no semantics; any validation of +// the contained checksum is the user's responsibility. +struct CordRepCrc : public CordRep { + CordRep* child; + uint32_t crc; + + // Consumes `child` and returns a CordRepCrc prefixed tree containing `child`. + // If the specified `child` is itself a CordRepCrc node, then this method + // either replaces the existing node, or directly updates the crc value in it + // depending on the node being shared or not, i.e.: refcount.IsOne(). + // `child` must not be null. Never returns null. + static CordRepCrc* New(CordRep* child, uint32_t crc); + + // Destroys (deletes) the provided node. `node` must not be null. + static void Destroy(CordRepCrc* node); +}; + +// Consumes `rep` and returns a CordRep* with any outer CordRepCrc wrapper +// removed. This is usually a no-op (returning `rep`), but this will remove and +// unref an outer CordRepCrc node. +inline CordRep* RemoveCrcNode(CordRep* rep) { + assert(rep != nullptr); + if (ABSL_PREDICT_FALSE(rep->IsCrc())) { + CordRep* child = rep->crc()->child; + if (rep->refcount.IsOne()) { + delete rep->crc(); + } else { + CordRep::Ref(child); + CordRep::Unref(rep); + } + return child; + } + return rep; +} + +// Returns `rep` if it is not a CordRepCrc node, or its child if it is. +// Does not consume or create a reference on `rep` or the returned value. +inline CordRep* SkipCrcNode(CordRep* rep) { + assert(rep != nullptr); + if (ABSL_PREDICT_FALSE(rep->IsCrc())) { + return rep->crc()->child; + } else { + return rep; + } +} + +inline const CordRep* SkipCrcNode(const CordRep* rep) { + assert(rep != nullptr); + if (ABSL_PREDICT_FALSE(rep->IsCrc())) { + return rep->crc()->child; + } else { + return rep; + } +} + +inline CordRepCrc* CordRep::crc() { + assert(IsCrc()); + return static_cast<CordRepCrc*>(this); +} + +inline const CordRepCrc* CordRep::crc() const { + assert(IsCrc()); + return static_cast<const CordRepCrc*>(this); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_CRC_H_ diff --git a/absl/strings/internal/cord_rep_crc_test.cc b/absl/strings/internal/cord_rep_crc_test.cc new file mode 100644 index 00000000..80f53485 --- /dev/null +++ b/absl/strings/internal/cord_rep_crc_test.cc @@ -0,0 +1,115 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cord_rep_crc.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_test_util.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::absl::cordrep_testing::MakeFlat; +using ::testing::Eq; +using ::testing::Ne; + +#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST + +TEST(CordRepCrc, NewWithNullPtr) { + EXPECT_DEATH(CordRepCrc::New(nullptr, 0), ""); +} + +TEST(CordRepCrc, RemoveCrcWithNullptr) { + EXPECT_DEATH(RemoveCrcNode(nullptr), ""); +} + +#endif // !NDEBUG && GTEST_HAS_DEATH_TEST + +TEST(CordRepCrc, NewDestroy) { + CordRep* rep = cordrep_testing::MakeFlat("Hello world"); + CordRepCrc* crc = CordRepCrc::New(rep, 12345); + EXPECT_TRUE(crc->refcount.IsOne()); + EXPECT_THAT(crc->child, Eq(rep)); + EXPECT_THAT(crc->crc, Eq(12345)); + EXPECT_TRUE(rep->refcount.IsOne()); + CordRepCrc::Destroy(crc); +} + +TEST(CordRepCrc, NewExistingCrcNotShared) { + CordRep* rep = cordrep_testing::MakeFlat("Hello world"); + CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + EXPECT_THAT(new_crc, Eq(crc)); + EXPECT_TRUE(new_crc->refcount.IsOne()); + EXPECT_THAT(new_crc->child, Eq(rep)); + EXPECT_THAT(new_crc->crc, Eq(54321)); + EXPECT_TRUE(rep->refcount.IsOne()); + CordRepCrc::Destroy(new_crc); +} + +TEST(CordRepCrc, NewExistingCrcShared) { + CordRep* rep = cordrep_testing::MakeFlat("Hello world"); + CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRep::Ref(crc); + CordRepCrc* new_crc = CordRepCrc::New(crc, 54321); + + EXPECT_THAT(new_crc, Ne(crc)); + EXPECT_TRUE(new_crc->refcount.IsOne()); + EXPECT_TRUE(crc->refcount.IsOne()); + EXPECT_FALSE(rep->refcount.IsOne()); + EXPECT_THAT(crc->child, Eq(rep)); + EXPECT_THAT(new_crc->child, Eq(rep)); + EXPECT_THAT(crc->crc, Eq(12345)); + EXPECT_THAT(new_crc->crc, Eq(54321)); + + CordRep::Unref(crc); + CordRep::Unref(new_crc); +} + +TEST(CordRepCrc, RemoveCrcNotCrc) { + CordRep* rep = cordrep_testing::MakeFlat("Hello world"); + CordRep* nocrc = RemoveCrcNode(rep); + EXPECT_THAT(nocrc, Eq(rep)); + CordRep::Unref(nocrc); +} + +TEST(CordRepCrc, RemoveCrcNotShared) { + CordRep* rep = cordrep_testing::MakeFlat("Hello world"); + CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRep* nocrc = RemoveCrcNode(crc); + EXPECT_THAT(nocrc, Eq(rep)); + EXPECT_TRUE(rep->refcount.IsOne()); + CordRep::Unref(nocrc); +} + +TEST(CordRepCrc, RemoveCrcShared) { + CordRep* rep = cordrep_testing::MakeFlat("Hello world"); + CordRepCrc* crc = CordRepCrc::New(rep, 12345); + CordRep::Ref(crc); + CordRep* nocrc = RemoveCrcNode(crc); + EXPECT_THAT(nocrc, Eq(rep)); + EXPECT_FALSE(rep->refcount.IsOne()); + CordRep::Unref(nocrc); + CordRep::Unref(crc); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cord_rep_flat.h b/absl/strings/internal/cord_rep_flat.h index a98aa9df..e3e27fcd 100644 --- a/absl/strings/internal/cord_rep_flat.h +++ b/absl/strings/internal/cord_rep_flat.h @@ -20,6 +20,8 @@ #include <cstdint> #include <memory> +#include "absl/base/config.h" +#include "absl/base/macros.h" #include "absl/strings/internal/cord_internal.h" namespace absl { @@ -42,23 +44,45 @@ static constexpr size_t kMinFlatSize = 32; static constexpr size_t kMaxFlatSize = 4096; static constexpr size_t kMaxFlatLength = kMaxFlatSize - kFlatOverhead; static constexpr size_t kMinFlatLength = kMinFlatSize - kFlatOverhead; +static constexpr size_t kMaxLargeFlatSize = 256 * 1024; +static constexpr size_t kMaxLargeFlatLength = kMaxLargeFlatSize - kFlatOverhead; +// kTagBase should make the Size <--> Tag computation resilient +// against changes to the value of FLAT when we add a new tag.. +static constexpr uint8_t kTagBase = FLAT - 4; + +// Converts the provided rounded size to the corresponding tag constexpr uint8_t AllocatedSizeToTagUnchecked(size_t size) { - return static_cast<uint8_t>((size <= 1024) ? size / 8 - : 128 + size / 32 - 1024 / 32); + return static_cast<uint8_t>(size <= 512 ? kTagBase + size / 8 + : size <= 8192 + ? kTagBase + 512 / 8 + size / 64 - 512 / 64 + : kTagBase + 512 / 8 + ((8192 - 512) / 64) + + size / 4096 - 8192 / 4096); +} + +// Converts the provided tag to the corresponding allocated size +constexpr size_t TagToAllocatedSize(uint8_t tag) { + return (tag <= kTagBase + 512 / 8) ? tag * 8 - kTagBase * 8 + : (tag <= kTagBase + (512 / 8) + ((8192 - 512) / 64)) + ? 512 + tag * 64 - kTagBase * 64 - 512 / 8 * 64 + : 8192 + tag * 4096 - kTagBase * 4096 - + ((512 / 8) + ((8192 - 512) / 64)) * 4096; } -static_assert(kMinFlatSize / 8 >= FLAT, ""); -static_assert(AllocatedSizeToTagUnchecked(kMaxFlatSize) <= MAX_FLAT_TAG, ""); +static_assert(AllocatedSizeToTagUnchecked(kMinFlatSize) == FLAT, ""); +static_assert(AllocatedSizeToTagUnchecked(kMaxLargeFlatSize) == MAX_FLAT_TAG, + ""); -// Helper functions for rounded div, and rounding to exact sizes. -constexpr size_t DivUp(size_t n, size_t m) { return (n + m - 1) / m; } -constexpr size_t RoundUp(size_t n, size_t m) { return DivUp(n, m) * m; } +// RoundUp logically performs `((n + m - 1) / m) * m` to round up to the nearest +// multiple of `m`, optimized for the invariant that `m` is a power of 2. +constexpr size_t RoundUp(size_t n, size_t m) { + return (n + m - 1) & (0 - m); +} // Returns the size to the nearest equal or larger value that can be // expressed exactly as a tag value. inline size_t RoundUpForTag(size_t size) { - return RoundUp(size, (size <= 1024) ? 8 : 32); + return RoundUp(size, (size <= 512) ? 8 : (size <= 8192 ? 64 : 4096)); } // Converts the allocated size to a tag, rounding down if the size @@ -71,26 +95,26 @@ inline uint8_t AllocatedSizeToTag(size_t size) { return tag; } -// Converts the provided tag to the corresponding allocated size -constexpr size_t TagToAllocatedSize(uint8_t tag) { - return (tag <= 128) ? (tag * 8) : (1024 + (tag - 128) * 32); -} - // Converts the provided tag to the corresponding available data length constexpr size_t TagToLength(uint8_t tag) { return TagToAllocatedSize(tag) - kFlatOverhead; } // Enforce that kMaxFlatSize maps to a well-known exact tag value. -static_assert(TagToAllocatedSize(224) == kMaxFlatSize, "Bad tag logic"); +static_assert(TagToAllocatedSize(MAX_FLAT_TAG) == kMaxLargeFlatSize, + "Bad tag logic"); struct CordRepFlat : public CordRep { + // Tag for explicit 'large flat' allocation + struct Large {}; + // Creates a new flat node. - static CordRepFlat* New(size_t len) { + template <size_t max_flat_size, typename... Args> + static CordRepFlat* NewImpl(size_t len, Args... args ABSL_ATTRIBUTE_UNUSED) { if (len <= kMinFlatLength) { len = kMinFlatLength; - } else if (len > kMaxFlatLength) { - len = kMaxFlatLength; + } else if (len > max_flat_size - kFlatOverhead) { + len = max_flat_size - kFlatOverhead; } // Round size up so it matches a size we can exactly express in a tag. @@ -101,6 +125,12 @@ struct CordRepFlat : public CordRep { return rep; } + static CordRepFlat* New(size_t len) { return NewImpl<kMaxFlatSize>(len); } + + static CordRepFlat* New(Large, size_t len) { + return NewImpl<kMaxLargeFlatSize>(len); + } + // Deletes a CordRepFlat instance created previously through a call to New(). // Flat CordReps are allocated and constructed with raw ::operator new and // placement new, and must be destructed and deallocated accordingly. @@ -117,9 +147,20 @@ struct CordRepFlat : public CordRep { #endif } + // Create a CordRepFlat containing `data`, with an optional additional + // extra capacity of up to `extra` bytes. Requires that `data.size()` + // is less than kMaxFlatLength. + static CordRepFlat* Create(absl::string_view data, size_t extra = 0) { + assert(data.size() <= kMaxFlatLength); + CordRepFlat* flat = New(data.size() + (std::min)(extra, kMaxFlatLength)); + memcpy(flat->Data(), data.data(), data.size()); + flat->length = data.size(); + return flat; + } + // Returns a pointer to the data inside this flat rep. - char* Data() { return storage; } - const char* Data() const { return storage; } + char* Data() { return reinterpret_cast<char*>(storage); } + const char* Data() const { return reinterpret_cast<const char*>(storage); } // Returns the maximum capacity (payload size) of this instance. size_t Capacity() const { return TagToLength(tag); } diff --git a/absl/strings/internal/cord_rep_ring.cc b/absl/strings/internal/cord_rep_ring.cc index 4d31d1d9..af2fc768 100644 --- a/absl/strings/internal/cord_rep_ring.cc +++ b/absl/strings/internal/cord_rep_ring.cc @@ -26,21 +26,13 @@ #include "absl/base/macros.h" #include "absl/container/inlined_vector.h" #include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_consume.h" #include "absl/strings/internal/cord_rep_flat.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// See https://bugs.llvm.org/show_bug.cgi?id=48477 -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wshadow" -#if __has_warning("-Wshadow-field") -#pragma clang diagnostic ignored "-Wshadow-field" -#endif -#endif - namespace { using index_type = CordRepRing::index_type; @@ -48,7 +40,7 @@ using index_type = CordRepRing::index_type; enum class Direction { kForward, kReversed }; inline bool IsFlatOrExternal(CordRep* rep) { - return rep->tag >= FLAT || rep->tag == EXTERNAL; + return rep->IsFlat() || rep->IsExternal(); } // Verifies that n + extra <= kMaxCapacity: throws std::length_error otherwise. @@ -58,14 +50,6 @@ inline void CheckCapacity(size_t n, size_t extra) { } } -// Removes a reference from `rep` only. -// Asserts that the refcount after decrement is not zero. -inline bool UnrefNeverOne(CordRep* rep) { - bool result = rep->refcount.Decrement(); - assert(result); - return result; -} - // Creates a flat from the provided string data, allocating up to `extra` // capacity in the returned flat depending on kMaxFlatLength limitations. // Requires `len` to be less or equal to `kMaxFlatLength` @@ -77,40 +61,6 @@ CordRepFlat* CreateFlat(const char* s, size_t n, size_t extra = 0) { // NOLINT return rep; } -// Unrefs the provided `substring`, and returns `substring->child` -// Adds or assumes a reference on `substring->child` -CordRep* ClipSubstring(CordRepSubstring* substring) { - CordRep* child = substring->child; - if (substring->refcount.IsOne()) { - delete substring; - } else { - CordRep::Ref(child); - if (ABSL_PREDICT_FALSE(!substring->refcount.Decrement())) { - UnrefNeverOne(child); - delete substring; - } - } - return child; -} - -// Unrefs the provided `concat`, and returns `{concat->left, concat->right}` -// Adds or assumes a reference on `concat->left` and `concat->right`. -std::pair<CordRep*, CordRep*> ClipConcat(CordRepConcat* concat) { - auto result = std::make_pair(concat->left, concat->right); - if (concat->refcount.IsOne()) { - delete concat; - } else { - CordRep::Ref(result.first); - CordRep::Ref(result.second); - if (ABSL_PREDICT_FALSE(!concat->refcount.Decrement())) { - UnrefNeverOne(result.first); - UnrefNeverOne(result.second); - delete concat; - } - } - return result; -} - // Unrefs the entries in `[head, tail)`. // Requires all entries to be a FLAT or EXTERNAL node. void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { @@ -126,79 +76,6 @@ void UnrefEntries(const CordRepRing* rep, index_type head, index_type tail) { }); } -template <typename F> -void Consume(Direction direction, CordRep* rep, F&& fn) { - size_t offset = 0; - size_t length = rep->length; - struct Entry { - CordRep* rep; - size_t offset; - size_t length; - }; - absl::InlinedVector<Entry, 40> stack; - - for (;;) { - if (rep->tag >= FLAT || rep->tag == EXTERNAL || rep->tag == RING) { - fn(rep, offset, length); - if (stack.empty()) return; - - rep = stack.back().rep; - offset = stack.back().offset; - length = stack.back().length; - stack.pop_back(); - } else if (rep->tag == SUBSTRING) { - offset += rep->substring()->start; - rep = ClipSubstring(rep->substring()); - } else if (rep->tag == CONCAT) { - auto res = ClipConcat(rep->concat()); - CordRep* left = res.first; - CordRep* right = res.second; - - if (left->length <= offset) { - // Don't need left node - offset -= left->length; - CordRep::Unref(left); - rep = right; - continue; - } - - size_t length_left = left->length - offset; - if (length_left >= length) { - // Don't need right node - CordRep::Unref(right); - rep = left; - continue; - } - - // Need both nodes - size_t length_right = length - length_left; - if (direction == Direction::kReversed) { - stack.push_back({left, offset, length_left}); - rep = right; - offset = 0; - length = length_right; - } else { - stack.push_back({right, 0, length_right}); - rep = left; - length = length_left; - } - } else { - assert("Valid tag" == nullptr); - return; - } - } -} - -template <typename F> -void Consume(CordRep* rep, F&& fn) { - return Consume(Direction::kForward, rep, std::forward<F>(fn)); -} - -template <typename F> -void RConsume(CordRep* rep, F&& fn) { - return Consume(Direction::kReversed, rep, std::forward<F>(fn)); -} - } // namespace std::ostream& operator<<(std::ostream& s, const CordRepRing& rep) { @@ -252,7 +129,9 @@ class CordRepRing::Filler { index_type pos_; }; -constexpr size_t CordRepRing::kMaxCapacity; // NOLINT: needed for c++11 +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL +constexpr size_t CordRepRing::kMaxCapacity; +#endif bool CordRepRing::IsValid(std::ostream& output) const { if (capacity_ == 0) { @@ -301,7 +180,7 @@ bool CordRepRing::IsValid(std::ostream& output) const { if (offset >= child->length || entry_length > child->length - offset) { output << "entry[" << head << "] has offset " << offset << " and entry length " << entry_length - << " which are outside of the childs length of " << child->length; + << " which are outside of the child's length of " << child->length; return false; } @@ -352,7 +231,7 @@ void CordRepRing::SetCapacityForTesting(size_t capacity) { } void CordRepRing::Delete(CordRepRing* rep) { - assert(rep != nullptr && rep->tag == RING); + assert(rep != nullptr && rep->IsRing()); #if defined(__cpp_sized_deallocation) size_t size = AllocSize(rep->capacity_); rep->~CordRepRing(); @@ -400,10 +279,11 @@ CordRepRing* CordRepRing::Mutable(CordRepRing* rep, size_t extra) { // Get current number of entries, and check for max capacity. size_t entries = rep->entries(); - size_t min_extra = (std::max)(extra, rep->capacity() * 2 - entries); if (!rep->refcount.IsOne()) { - return Copy(rep, rep->head(), rep->tail(), min_extra); + return Copy(rep, rep->head(), rep->tail(), extra); } else if (entries + extra > rep->capacity()) { + const size_t min_grow = rep->capacity() + rep->capacity() / 2; + const size_t min_extra = (std::max)(extra, min_grow - entries); CordRepRing* newrep = CordRepRing::New(entries, min_extra); newrep->Fill<false>(rep, rep->head(), rep->tail()); CordRepRing::Delete(rep); @@ -449,12 +329,12 @@ Span<char> CordRepRing::GetPrependBuffer(size_t size) { } CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, - size_t length, size_t extra) { + size_t len, size_t extra) { CordRepRing* rep = CordRepRing::New(1, extra); rep->head_ = 0; rep->tail_ = rep->advance(0); - rep->length = length; - rep->entry_end_pos()[0] = length; + rep->length = len; + rep->entry_end_pos()[0] = len; rep->entry_child()[0] = child; rep->entry_data_offset()[0] = static_cast<offset_type>(offset); return Validate(rep); @@ -462,16 +342,16 @@ CordRepRing* CordRepRing::CreateFromLeaf(CordRep* child, size_t offset, CordRepRing* CordRepRing::CreateSlow(CordRep* child, size_t extra) { CordRepRing* rep = nullptr; - Consume(child, [&](CordRep* child, size_t offset, size_t length) { - if (IsFlatOrExternal(child)) { - rep = rep ? AppendLeaf(rep, child, offset, length) - : CreateFromLeaf(child, offset, length, extra); + Consume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = rep ? AppendLeaf(rep, child_arg, offset, len) + : CreateFromLeaf(child_arg, offset, len, extra); } else if (rep) { - rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length); - } else if (offset == 0 && child->length == length) { - rep = Mutable(child->ring(), extra); + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); + } else if (offset == 0 && child_arg->length == len) { + rep = Mutable(child_arg->ring(), extra); } else { - rep = SubRing(child->ring(), offset, length, extra); + rep = SubRing(child_arg->ring(), offset, len, extra); } }); return Validate(rep, nullptr, __LINE__); @@ -482,7 +362,7 @@ CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { if (IsFlatOrExternal(child)) { return CreateFromLeaf(child, 0, length, extra); } - if (child->tag == RING) { + if (child->IsRing()) { return Mutable(child->ring(), extra); } return CreateSlow(child, extra); @@ -490,18 +370,18 @@ CordRepRing* CordRepRing::Create(CordRep* child, size_t extra) { template <CordRepRing::AddMode mode> CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, - size_t offset, size_t length) { + size_t offset, size_t len) { assert(offset < ring->length); constexpr bool append = mode == AddMode::kAppend; Position head = ring->Find(offset); - Position tail = ring->FindTail(head.index, offset + length); + Position tail = ring->FindTail(head.index, offset + len); const index_type entries = ring->entries(head.index, tail.index); rep = Mutable(rep, entries); // The delta for making ring[head].end_pos into 'len - offset' const pos_type delta_length = - (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - length) - + (append ? rep->begin_pos_ + rep->length : rep->begin_pos_ - len) - ring->entry_begin_pos(head.index) - head.offset; // Start filling at `tail`, or `entries` before `head` @@ -542,36 +422,36 @@ CordRepRing* CordRepRing::AddRing(CordRepRing* rep, CordRepRing* ring, } // Commit changes - rep->length += length; + rep->length += len; if (append) { rep->tail_ = filler.pos(); } else { rep->head_ = filler.head(); - rep->begin_pos_ -= length; + rep->begin_pos_ -= len; } return Validate(rep); } CordRepRing* CordRepRing::AppendSlow(CordRepRing* rep, CordRep* child) { - Consume(child, [&rep](CordRep* child, size_t offset, size_t length) { - if (child->tag == RING) { - rep = AddRing<AddMode::kAppend>(rep, child->ring(), offset, length); + Consume(child, [&rep](CordRep* child_arg, size_t offset, size_t len) { + if (child_arg->IsRing()) { + rep = AddRing<AddMode::kAppend>(rep, child_arg->ring(), offset, len); } else { - rep = AppendLeaf(rep, child, offset, length); + rep = AppendLeaf(rep, child_arg, offset, len); } }); return rep; } CordRepRing* CordRepRing::AppendLeaf(CordRepRing* rep, CordRep* child, - size_t offset, size_t length) { + size_t offset, size_t len) { rep = Mutable(rep, 1); index_type back = rep->tail_; const pos_type begin_pos = rep->begin_pos_ + rep->length; rep->tail_ = rep->advance(rep->tail_); - rep->length += length; - rep->entry_end_pos()[back] = begin_pos + length; + rep->length += len; + rep->entry_end_pos()[back] = begin_pos + len; rep->entry_child()[back] = child; rep->entry_data_offset()[back] = static_cast<offset_type>(offset); return Validate(rep, nullptr, __LINE__); @@ -582,31 +462,31 @@ CordRepRing* CordRepRing::Append(CordRepRing* rep, CordRep* child) { if (IsFlatOrExternal(child)) { return AppendLeaf(rep, child, 0, length); } - if (child->tag == RING) { + if (child->IsRing()) { return AddRing<AddMode::kAppend>(rep, child->ring(), 0, length); } return AppendSlow(rep, child); } CordRepRing* CordRepRing::PrependSlow(CordRepRing* rep, CordRep* child) { - RConsume(child, [&](CordRep* child, size_t offset, size_t length) { - if (IsFlatOrExternal(child)) { - rep = PrependLeaf(rep, child, offset, length); + ReverseConsume(child, [&](CordRep* child_arg, size_t offset, size_t len) { + if (IsFlatOrExternal(child_arg)) { + rep = PrependLeaf(rep, child_arg, offset, len); } else { - rep = AddRing<AddMode::kPrepend>(rep, child->ring(), offset, length); + rep = AddRing<AddMode::kPrepend>(rep, child_arg->ring(), offset, len); } }); return Validate(rep); } CordRepRing* CordRepRing::PrependLeaf(CordRepRing* rep, CordRep* child, - size_t offset, size_t length) { + size_t offset, size_t len) { rep = Mutable(rep, 1); index_type head = rep->retreat(rep->head_); pos_type end_pos = rep->begin_pos_; rep->head_ = head; - rep->length += length; - rep->begin_pos_ -= length; + rep->length += len; + rep->begin_pos_ -= len; rep->entry_end_pos()[head] = end_pos; rep->entry_child()[head] = child; rep->entry_data_offset()[head] = static_cast<offset_type>(offset); @@ -618,7 +498,7 @@ CordRepRing* CordRepRing::Prepend(CordRepRing* rep, CordRep* child) { if (IsFlatOrExternal(child)) { return PrependLeaf(rep, child, 0, length); } - if (child->tag == RING) { + if (child->IsRing()) { return AddRing<AddMode::kPrepend>(rep, child->ring(), 0, length); } return PrependSlow(rep, child); @@ -786,18 +666,18 @@ char CordRepRing::GetCharacter(size_t offset) const { } CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, - size_t length, size_t extra) { + size_t len, size_t extra) { assert(offset <= rep->length); - assert(offset <= rep->length - length); + assert(offset <= rep->length - len); - if (length == 0) { + if (len == 0) { CordRep::Unref(rep); return nullptr; } // Find position of first byte Position head = rep->Find(offset); - Position tail = rep->FindTail(head.index, offset + length); + Position tail = rep->FindTail(head.index, offset + len); const size_t new_entries = rep->entries(head.index, tail.index); if (rep->refcount.IsOne() && extra <= (rep->capacity() - new_entries)) { @@ -814,7 +694,7 @@ CordRepRing* CordRepRing::SubRing(CordRepRing* rep, size_t offset, } // Adjust begin_pos and length - rep->length = length; + rep->length = len; rep->begin_pos_ += offset; // Adjust head and tail blocks @@ -888,10 +768,6 @@ CordRepRing* CordRepRing::RemoveSuffix(CordRepRing* rep, size_t len, return Validate(rep); } -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/cord_rep_ring.h b/absl/strings/internal/cord_rep_ring.h index c74d3353..2000e21e 100644 --- a/absl/strings/internal/cord_rep_ring.h +++ b/absl/strings/internal/cord_rep_ring.h @@ -30,15 +30,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace cord_internal { -// See https://bugs.llvm.org/show_bug.cgi?id=48477 -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wshadow" -#if __has_warning("-Wshadow-field") -#pragma clang diagnostic ignored "-Wshadow-field" -#endif -#endif - // All operations modifying a ring buffer are implemented as static methods // requiring a CordRepRing instance with a reference adopted by the method. // @@ -210,23 +201,23 @@ class CordRepRing : public CordRep { // referencing up to `size` capacity directly before the existing data. Span<char> GetPrependBuffer(size_t size); - // Returns a cord ring buffer containing `length` bytes of data starting at + // Returns a cord ring buffer containing `len` bytes of data starting at // `offset`. If the input is not shared, this function will remove all head // and tail child nodes outside of the requested range, and adjust the new // head and tail nodes as required. If the input is shared, this function // returns a new instance sharing some or all of the nodes from the input. - static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t length, + static CordRepRing* SubRing(CordRepRing* r, size_t offset, size_t len, size_t extra = 0); - // Returns a cord ring buffer with the first `length` bytes removed. + // Returns a cord ring buffer with the first `len` bytes removed. // If the input is not shared, this function will remove all head child nodes // fully inside the first `length` bytes, and adjust the new head as required. // If the input is shared, this function returns a new instance sharing some // or all of the nodes from the input. - static CordRepRing* RemoveSuffix(CordRepRing* r, size_t length, + static CordRepRing* RemoveSuffix(CordRepRing* r, size_t len, size_t extra = 0); - // Returns a cord ring buffer with the last `length` bytes removed. + // Returns a cord ring buffer with the last `len` bytes removed. // If the input is not shared, this function will remove all head child nodes // fully inside the first `length` bytes, and adjust the new head as required. // If the input is shared, this function returns a new instance sharing some @@ -237,6 +228,18 @@ class CordRepRing : public CordRep { // Returns the character at `offset`. Requires that `offset < length`. char GetCharacter(size_t offset) const; + // Returns true if this instance manages a single contiguous buffer, in which + // case the (optional) output parameter `fragment` is set. Otherwise, the + // function returns false, and `fragment` is left unchanged. + bool IsFlat(absl::string_view* fragment) const; + + // Returns true if the data starting at `offset` with length `len` is + // managed by this instance inside a single contiguous buffer, in which case + // the (optional) output parameter `fragment` is set to the contiguous memory + // starting at offset `offset` with length `length`. Otherwise, the function + // returns false, and `fragment` is left unchanged. + bool IsFlat(size_t offset, size_t len, absl::string_view* fragment) const; + // Testing only: set capacity to requested capacity. void SetCapacityForTesting(size_t capacity); @@ -380,8 +383,8 @@ class CordRepRing : public CordRep { // Destroys the provided ring buffer, decrementing the reference count of all // contained child CordReps. The provided 1\`rep` should have a ref count of - // one (pre decrement destroy call observing `refcount.IsOne()`) or zero (post - // decrement destroy call observing `!refcount.Decrement()`). + // one (pre decrement destroy call observing `refcount.IsOne()`) or zero + // (post decrement destroy call observing `!refcount.Decrement()`). static void Destroy(CordRepRing* rep); // Returns a mutable reference to the logical end position array. @@ -461,10 +464,10 @@ class CordRepRing : public CordRep { size_t length, size_t extra); // Appends or prepends (depending on AddMode) the ring buffer in `ring' to - // `rep` starting at `offset` with length `length`. + // `rep` starting at `offset` with length `len`. template <AddMode mode> static CordRepRing* AddRing(CordRepRing* rep, CordRepRing* ring, - size_t offset, size_t length); + size_t offset, size_t len); // Increases the data offset for entry `index` by `n`. void AddDataOffset(index_type index, size_t n); @@ -567,20 +570,35 @@ inline CordRepRing::Position CordRepRing::FindTail(index_type head, // Now that CordRepRing is defined, we can define CordRep's helper casts: inline CordRepRing* CordRep::ring() { - assert(tag == RING); + assert(IsRing()); return static_cast<CordRepRing*>(this); } inline const CordRepRing* CordRep::ring() const { - assert(tag == RING); + assert(IsRing()); return static_cast<const CordRepRing*>(this); } -std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); +inline bool CordRepRing::IsFlat(absl::string_view* fragment) const { + if (entries() == 1) { + if (fragment) *fragment = entry_data(head()); + return true; + } + return false; +} -#ifdef __clang__ -#pragma clang diagnostic pop -#endif +inline bool CordRepRing::IsFlat(size_t offset, size_t len, + absl::string_view* fragment) const { + const Position pos = Find(offset); + const absl::string_view data = entry_data(pos.index); + if (data.length() >= len && data.length() - len >= pos.offset) { + if (fragment) *fragment = data.substr(pos.offset, len); + return true; + } + return false; +} + +std::ostream& operator<<(std::ostream& s, const CordRepRing& rep); } // namespace cord_internal ABSL_NAMESPACE_END diff --git a/absl/strings/internal/cord_rep_ring_reader.h b/absl/strings/internal/cord_rep_ring_reader.h index 396c0e2c..7ceeaa00 100644 --- a/absl/strings/internal/cord_rep_ring_reader.h +++ b/absl/strings/internal/cord_rep_ring_reader.h @@ -40,6 +40,10 @@ class CordRepRingReader { // The returned value is undefined if this instance is empty. CordRepRing::index_type index() const { return index_; } + // Returns the current node inside the ring buffer for this instance. + // The returned value is undefined if this instance is empty. + CordRep* node() const { return ring_->entry_child(index_); } + // Returns the length of the referenced ring buffer. // Requires the current instance to be non empty. size_t length() const { diff --git a/absl/strings/internal/cord_rep_test_util.h b/absl/strings/internal/cord_rep_test_util.h new file mode 100644 index 00000000..18a0a195 --- /dev/null +++ b/absl/strings/internal/cord_rep_test_util.h @@ -0,0 +1,205 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ +#define ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ + +#include <cassert> +#include <memory> +#include <random> +#include <string> +#include <vector> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/string_view.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cordrep_testing { + +inline cord_internal::CordRepSubstring* MakeSubstring( + size_t start, size_t len, cord_internal::CordRep* rep) { + auto* sub = new cord_internal::CordRepSubstring; + sub->tag = cord_internal::SUBSTRING; + sub->start = start; + sub->length = len <= 0 ? rep->length - start + len : len; + sub->child = rep; + return sub; +} + +inline cord_internal::CordRepFlat* MakeFlat(absl::string_view value) { + assert(value.length() <= cord_internal::kMaxFlatLength); + auto* flat = cord_internal::CordRepFlat::New(value.length()); + flat->length = value.length(); + memcpy(flat->Data(), value.data(), value.length()); + return flat; +} + +// Creates an external node for testing +inline cord_internal::CordRepExternal* MakeExternal(absl::string_view s) { + struct Rep : public cord_internal::CordRepExternal { + std::string s; + explicit Rep(absl::string_view sv) : s(sv) { + this->tag = cord_internal::EXTERNAL; + this->base = s.data(); + this->length = s.length(); + this->releaser_invoker = [](cord_internal::CordRepExternal* self) { + delete static_cast<Rep*>(self); + }; + } + }; + return new Rep(s); +} + +inline std::string CreateRandomString(size_t n) { + absl::string_view data = + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789~!@#$%^&*()_+=-<>?:\"{}[]|"; + std::minstd_rand rnd; + std::uniform_int_distribution<size_t> dist(0, data.size() - 1); + std::string s(n, ' '); + for (size_t i = 0; i < n; ++i) { + s[i] = data[dist(rnd)]; + } + return s; +} + +// Creates an array of flats from the provided string, chopping +// the provided string up into flats of size `chunk_size` characters +// resulting in roughly `data.size() / chunk_size` total flats. +inline std::vector<cord_internal::CordRep*> CreateFlatsFromString( + absl::string_view data, size_t chunk_size) { + assert(chunk_size > 0); + std::vector<cord_internal::CordRep*> flats; + for (absl::string_view s = data; !s.empty(); s.remove_prefix(chunk_size)) { + flats.push_back(MakeFlat(s.substr(0, chunk_size))); + } + return flats; +} + +inline cord_internal::CordRepBtree* CordRepBtreeFromFlats( + absl::Span<cord_internal::CordRep* const> flats) { + assert(!flats.empty()); + auto* node = cord_internal::CordRepBtree::Create(flats[0]); + for (size_t i = 1; i < flats.size(); ++i) { + node = cord_internal::CordRepBtree::Append(node, flats[i]); + } + return node; +} + +template <typename Fn> +inline void CordVisitReps(cord_internal::CordRep* rep, Fn&& fn) { + fn(rep); + while (rep->tag == cord_internal::SUBSTRING) { + rep = rep->substring()->child; + fn(rep); + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordVisitReps(edge, fn); + } + } +} + +template <typename Predicate> +inline std::vector<cord_internal::CordRep*> CordCollectRepsIf( + Predicate&& predicate, cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + CordVisitReps(rep, [&reps, &predicate](cord_internal::CordRep* rep) { + if (predicate(rep)) reps.push_back(rep); + }); + return reps; +} + +inline std::vector<cord_internal::CordRep*> CordCollectReps( + cord_internal::CordRep* rep) { + std::vector<cord_internal::CordRep*> reps; + auto fn = [&reps](cord_internal::CordRep* rep) { reps.push_back(rep); }; + CordVisitReps(rep, fn); + return reps; +} + +inline void CordToString(cord_internal::CordRep* rep, std::string& s) { + size_t offset = 0; + size_t length = rep->length; + while (rep->tag == cord_internal::SUBSTRING) { + offset += rep->substring()->start; + rep = rep->substring()->child; + } + if (rep->tag == cord_internal::BTREE) { + for (cord_internal::CordRep* edge : rep->btree()->Edges()) { + CordToString(edge, s); + } + } else if (rep->tag >= cord_internal::FLAT) { + s.append(rep->flat()->Data() + offset, length); + } else if (rep->tag == cord_internal::EXTERNAL) { + s.append(rep->external()->base + offset, length); + } else { + ABSL_RAW_LOG(FATAL, "Unsupported tag %d", rep->tag); + } +} + +inline std::string CordToString(cord_internal::CordRep* rep) { + std::string s; + s.reserve(rep->length); + CordToString(rep, s); + return s; +} + +// RAII Helper class to automatically unref reps on destruction. +class AutoUnref { + public: + ~AutoUnref() { + for (CordRep* rep : unrefs_) CordRep::Unref(rep); + } + + // Adds `rep` to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Add(CordRepType* rep) { + unrefs_.push_back(rep); + return rep; + } + + // Increments the reference count of `rep` by one, and adds it to + // the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* Ref(CordRepType* rep) { + unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + // Increments the reference count of `rep` by one if `condition` is true, + // and adds it to the list of reps to be unreffed at destruction. + template <typename CordRepType> + CordRepType* RefIf(bool condition, CordRepType* rep) { + if (condition) unrefs_.push_back(CordRep::Ref(rep)); + return rep; + } + + private: + using CordRep = absl::cord_internal::CordRep; + + std::vector<CordRep*> unrefs_; +}; + +} // namespace cordrep_testing +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORD_REP_TEST_UTIL_H_ diff --git a/absl/strings/internal/cordz_functions.cc b/absl/strings/internal/cordz_functions.cc new file mode 100644 index 00000000..20d314f0 --- /dev/null +++ b/absl/strings/internal/cordz_functions.cc @@ -0,0 +1,96 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_functions.h" + +#include <atomic> +#include <cmath> +#include <limits> +#include <random> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/profiling/internal/exponential_biased.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +// The average interval until the next sample. A value of 0 disables profiling +// while a value of 1 will profile all Cords. +std::atomic<int> g_cordz_mean_interval(50000); + +} // namespace + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// Special negative 'not initialized' per thread value for cordz_next_sample. +static constexpr int64_t kInitCordzNextSample = -1; + +ABSL_CONST_INIT thread_local int64_t cordz_next_sample = kInitCordzNextSample; + +// kIntervalIfDisabled is the number of profile-eligible events need to occur +// before the code will confirm that cordz is still disabled. +constexpr int64_t kIntervalIfDisabled = 1 << 16; + +ABSL_ATTRIBUTE_NOINLINE bool cordz_should_profile_slow() { + + thread_local absl::profiling_internal::ExponentialBiased + exponential_biased_generator; + int32_t mean_interval = get_cordz_mean_interval(); + + // Check if we disabled profiling. If so, set the next sample to a "large" + // number to minimize the overhead of the should_profile codepath. + if (mean_interval <= 0) { + cordz_next_sample = kIntervalIfDisabled; + return false; + } + + // Check if we're always sampling. + if (mean_interval == 1) { + cordz_next_sample = 1; + return true; + } + + if (cordz_next_sample <= 0) { + // If first check on current thread, check cordz_should_profile() + // again using the created (initial) stride in cordz_next_sample. + const bool initialized = cordz_next_sample != kInitCordzNextSample; + cordz_next_sample = exponential_biased_generator.GetStride(mean_interval); + return initialized || cordz_should_profile(); + } + + --cordz_next_sample; + return false; +} + +void cordz_set_next_sample_for_testing(int64_t next_sample) { + cordz_next_sample = next_sample; +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +int32_t get_cordz_mean_interval() { + return g_cordz_mean_interval.load(std::memory_order_acquire); +} + +void set_cordz_mean_interval(int32_t mean_interval) { + g_cordz_mean_interval.store(mean_interval, std::memory_order_release); +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_functions.h b/absl/strings/internal/cordz_functions.h new file mode 100644 index 00000000..c9ba1450 --- /dev/null +++ b/absl/strings/internal/cordz_functions.h @@ -0,0 +1,85 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_FUNCTIONS_H_ +#define ABSL_STRINGS_CORDZ_FUNCTIONS_H_ + +#include <stdint.h> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/base/optimization.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Returns the current sample rate. This represents the average interval +// between samples. +int32_t get_cordz_mean_interval(); + +// Sets the sample rate with the average interval between samples. +void set_cordz_mean_interval(int32_t mean_interval); + +// Enable cordz unless any of the following applies: +// - no thread local support +// - MSVC build +// - Android build +// - Apple build +// - DLL build +// Hashtablez is turned off completely in opensource builds. +// MSVC's static atomics are dynamically initialized in debug mode, which breaks +// sampling. +#if defined(ABSL_HAVE_THREAD_LOCAL) && !defined(_MSC_VER) && \ + !defined(ABSL_BUILD_DLL) && !defined(ABSL_CONSUME_DLL) && \ + !defined(__ANDROID__) && !defined(__APPLE__) +#define ABSL_INTERNAL_CORDZ_ENABLED 1 +#endif + +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +// cordz_next_sample is the number of events until the next sample event. If +// the value is 1 or less, the code will check on the next event if cordz is +// enabled, and if so, will sample the Cord. cordz is only enabled when we can +// use thread locals. +ABSL_CONST_INIT extern thread_local int64_t cordz_next_sample; + +// Determines if the next sample should be profiled. If it is, the value pointed +// at by next_sample will be set with the interval until the next sample. +bool cordz_should_profile_slow(); + +// Returns true if the next cord should be sampled. +inline bool cordz_should_profile() { + if (ABSL_PREDICT_TRUE(cordz_next_sample > 1)) { + cordz_next_sample--; + return false; + } + return cordz_should_profile_slow(); +} + +// Sets the interval until the next sample (for testing only) +void cordz_set_next_sample_for_testing(int64_t next_sample); + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +inline bool cordz_should_profile() { return false; } +inline void cordz_set_next_sample_for_testing(int64_t) {} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_FUNCTIONS_H_ diff --git a/absl/strings/internal/cordz_functions_test.cc b/absl/strings/internal/cordz_functions_test.cc new file mode 100644 index 00000000..350623c1 --- /dev/null +++ b/absl/strings/internal/cordz_functions_test.cc @@ -0,0 +1,149 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_functions.h" + +#include <thread> // NOLINT we need real clean new threads + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::Eq; +using ::testing::Ge; +using ::testing::Le; + +TEST(CordzFunctionsTest, SampleRate) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + int32_t expected_sample_rate = 123; + set_cordz_mean_interval(expected_sample_rate); + EXPECT_THAT(get_cordz_mean_interval(), Eq(expected_sample_rate)); + set_cordz_mean_interval(orig_sample_rate); +} + +// Cordz is disabled when we don't have thread_local. All calls to +// should_profile will return false when cordz is diabled, so we might want to +// avoid those tests. +#ifdef ABSL_INTERNAL_CORDZ_ENABLED + +TEST(CordzFunctionsTest, ShouldProfileDisable) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(0); + cordz_set_next_sample_for_testing(0); + EXPECT_FALSE(cordz_should_profile()); + // 1 << 16 is from kIntervalIfDisabled in cordz_functions.cc. + EXPECT_THAT(cordz_next_sample, Eq(1 << 16)); + + set_cordz_mean_interval(orig_sample_rate); +} + +TEST(CordzFunctionsTest, ShouldProfileAlways) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(1); + cordz_set_next_sample_for_testing(1); + EXPECT_TRUE(cordz_should_profile()); + EXPECT_THAT(cordz_next_sample, Le(1)); + + set_cordz_mean_interval(orig_sample_rate); +} + +TEST(CordzFunctionsTest, DoesNotAlwaysSampleFirstCord) { + // Set large enough interval such that the chance of 'tons' of threads + // randomly sampling the first call is infinitely small. + set_cordz_mean_interval(10000); + int tries = 0; + bool sampled = false; + do { + ++tries; + ASSERT_THAT(tries, Le(1000)); + std::thread thread([&sampled] { + sampled = cordz_should_profile(); + }); + thread.join(); + } while (sampled); +} + +TEST(CordzFunctionsTest, ShouldProfileRate) { + static constexpr int kDesiredMeanInterval = 1000; + static constexpr int kSamples = 10000; + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(kDesiredMeanInterval); + + int64_t sum_of_intervals = 0; + for (int i = 0; i < kSamples; i++) { + // Setting next_sample to 0 will force cordz_should_profile to generate a + // new value for next_sample each iteration. + cordz_set_next_sample_for_testing(0); + cordz_should_profile(); + sum_of_intervals += cordz_next_sample; + } + + // The sum of independent exponential variables is an Erlang distribution, + // which is a gamma distribution where the shape parameter is equal to the + // number of summands. The distribution used for cordz_should_profile is + // actually floor(Exponential(1/mean)) which introduces bias. However, we can + // apply the squint-really-hard correction factor. That is, when mean is + // large, then if we squint really hard the shape of the distribution between + // N and N+1 looks like a uniform distribution. On average, each value for + // next_sample will be about 0.5 lower than we would expect from an + // exponential distribution. This squint-really-hard correction approach won't + // work when mean is smaller than about 10 but works fine when mean is 1000. + // + // We can use R to calculate a confidence interval. This + // shows how to generate a confidence interval with a false positive rate of + // one in a billion. + // + // $ R -q + // > mean = 1000 + // > kSamples = 10000 + // > errorRate = 1e-9 + // > correction = -kSamples / 2 + // > low = qgamma(errorRate/2, kSamples, 1/mean) + correction + // > high = qgamma(1 - errorRate/2, kSamples, 1/mean) + correction + // > low + // [1] 9396115 + // > high + // [1] 10618100 + EXPECT_THAT(sum_of_intervals, Ge(9396115)); + EXPECT_THAT(sum_of_intervals, Le(10618100)); + + set_cordz_mean_interval(orig_sample_rate); +} + +#else // ABSL_INTERNAL_CORDZ_ENABLED + +TEST(CordzFunctionsTest, ShouldProfileDisabled) { + int32_t orig_sample_rate = get_cordz_mean_interval(); + + set_cordz_mean_interval(1); + cordz_set_next_sample_for_testing(0); + EXPECT_FALSE(cordz_should_profile()); + + set_cordz_mean_interval(orig_sample_rate); +} + +#endif // ABSL_INTERNAL_CORDZ_ENABLED + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_handle.cc b/absl/strings/internal/cordz_handle.cc new file mode 100644 index 00000000..a73fefed --- /dev/null +++ b/absl/strings/internal/cordz_handle.cc @@ -0,0 +1,139 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cordz_handle.h" + +#include <atomic> + +#include "absl/base/internal/raw_logging.h" // For ABSL_RAW_CHECK +#include "absl/base/internal/spinlock.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::absl::base_internal::SpinLockHolder; + +ABSL_CONST_INIT CordzHandle::Queue CordzHandle::global_queue_(absl::kConstInit); + +CordzHandle::CordzHandle(bool is_snapshot) : is_snapshot_(is_snapshot) { + if (is_snapshot) { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* dq_tail = queue_->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + dq_prev_ = dq_tail; + dq_tail->dq_next_ = this; + } + queue_->dq_tail.store(this, std::memory_order_release); + } +} + +CordzHandle::~CordzHandle() { + ODRCheck(); + if (is_snapshot_) { + std::vector<CordzHandle*> to_delete; + { + SpinLockHolder lock(&queue_->mutex); + CordzHandle* next = dq_next_; + if (dq_prev_ == nullptr) { + // We were head of the queue, delete every CordzHandle until we reach + // either the end of the list, or a snapshot handle. + while (next && !next->is_snapshot_) { + to_delete.push_back(next); + next = next->dq_next_; + } + } else { + // Another CordzHandle existed before this one, don't delete anything. + dq_prev_->dq_next_ = next; + } + if (next) { + next->dq_prev_ = dq_prev_; + } else { + queue_->dq_tail.store(dq_prev_, std::memory_order_release); + } + } + for (CordzHandle* handle : to_delete) { + delete handle; + } + } +} + +bool CordzHandle::SafeToDelete() const { + return is_snapshot_ || queue_->IsEmpty(); +} + +void CordzHandle::Delete(CordzHandle* handle) { + assert(handle); + if (handle) { + handle->ODRCheck(); + Queue* const queue = handle->queue_; + if (!handle->SafeToDelete()) { + SpinLockHolder lock(&queue->mutex); + CordzHandle* dq_tail = queue->dq_tail.load(std::memory_order_acquire); + if (dq_tail != nullptr) { + handle->dq_prev_ = dq_tail; + dq_tail->dq_next_ = handle; + queue->dq_tail.store(handle, std::memory_order_release); + return; + } + } + delete handle; + } +} + +std::vector<const CordzHandle*> CordzHandle::DiagnosticsGetDeleteQueue() { + std::vector<const CordzHandle*> handles; + SpinLockHolder lock(&global_queue_.mutex); + CordzHandle* dq_tail = global_queue_.dq_tail.load(std::memory_order_acquire); + for (const CordzHandle* p = dq_tail; p; p = p->dq_prev_) { + handles.push_back(p); + } + return handles; +} + +bool CordzHandle::DiagnosticsHandleIsSafeToInspect( + const CordzHandle* handle) const { + ODRCheck(); + if (!is_snapshot_) return false; + if (handle == nullptr) return true; + if (handle->is_snapshot_) return false; + bool snapshot_found = false; + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = queue_->dq_tail; p; p = p->dq_prev_) { + if (p == handle) return !snapshot_found; + if (p == this) snapshot_found = true; + } + ABSL_ASSERT(snapshot_found); // Assert that 'this' is in delete queue. + return true; +} + +std::vector<const CordzHandle*> +CordzHandle::DiagnosticsGetSafeToInspectDeletedHandles() { + ODRCheck(); + std::vector<const CordzHandle*> handles; + if (!is_snapshot()) { + return handles; + } + + SpinLockHolder lock(&queue_->mutex); + for (const CordzHandle* p = dq_next_; p != nullptr; p = p->dq_next_) { + if (!p->is_snapshot()) { + handles.push_back(p); + } + } + return handles; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_handle.h b/absl/strings/internal/cordz_handle.h new file mode 100644 index 00000000..5df53c78 --- /dev/null +++ b/absl/strings/internal/cordz_handle.h @@ -0,0 +1,131 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_HANDLE_H_ +#define ABSL_STRINGS_CORDZ_HANDLE_H_ + +#include <atomic> +#include <vector> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/spinlock.h" +#include "absl/synchronization/mutex.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// This base class allows multiple types of object (CordzInfo and +// CordzSampleToken) to exist simultaneously on the delete queue (pointed to by +// global_dq_tail and traversed using dq_prev_ and dq_next_). The +// delete queue guarantees that once a profiler creates a CordzSampleToken and +// has gained visibility into a CordzInfo object, that CordzInfo object will not +// be deleted prematurely. This allows the profiler to inspect all CordzInfo +// objects that are alive without needing to hold a global lock. +class CordzHandle { + public: + CordzHandle() : CordzHandle(false) {} + + bool is_snapshot() const { return is_snapshot_; } + + // Returns true if this instance is safe to be deleted because it is either a + // snapshot, which is always safe to delete, or not included in the global + // delete queue and thus not included in any snapshot. + // Callers are responsible for making sure this instance can not be newly + // discovered by other threads. For example, CordzInfo instances first de-list + // themselves from the global CordzInfo list before determining if they are + // safe to be deleted directly. + // If SafeToDelete returns false, callers MUST use the Delete() method to + // safely queue CordzHandle instances for deletion. + bool SafeToDelete() const; + + // Deletes the provided instance, or puts it on the delete queue to be deleted + // once there are no more sample tokens (snapshot) instances potentially + // referencing the instance. `handle` should not be null. + static void Delete(CordzHandle* handle); + + // Returns the current entries in the delete queue in LIFO order. + static std::vector<const CordzHandle*> DiagnosticsGetDeleteQueue(); + + // Returns true if the provided handle is nullptr or guarded by this handle. + // Since the CordzSnapshot token is itself a CordzHandle, this method will + // allow tests to check if that token is keeping an arbitrary CordzHandle + // alive. + bool DiagnosticsHandleIsSafeToInspect(const CordzHandle* handle) const; + + // Returns the current entries in the delete queue, in LIFO order, that are + // protected by this. CordzHandle objects are only placed on the delete queue + // after CordzHandle::Delete is called with them as an argument. Only + // CordzHandle objects that are not also CordzSnapshot objects will be + // included in the return vector. For each of the handles in the return + // vector, the earliest that their memory can be freed is when this + // CordzSnapshot object is deleted. + std::vector<const CordzHandle*> DiagnosticsGetSafeToInspectDeletedHandles(); + + protected: + explicit CordzHandle(bool is_snapshot); + virtual ~CordzHandle(); + + private: + // Global queue data. CordzHandle stores a pointer to the global queue + // instance to harden against ODR violations. + struct Queue { + constexpr explicit Queue(absl::ConstInitType) + : mutex(absl::kConstInit, + absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + absl::base_internal::SpinLock mutex; + std::atomic<CordzHandle*> dq_tail ABSL_GUARDED_BY(mutex){nullptr}; + + // Returns true if this delete queue is empty. This method does not acquire + // the lock, but does a 'load acquire' observation on the delete queue tail. + // It is used inside Delete() to check for the presence of a delete queue + // without holding the lock. The assumption is that the caller is in the + // state of 'being deleted', and can not be newly discovered by a concurrent + // 'being constructed' snapshot instance. Practically, this means that any + // such discovery (`find`, 'first' or 'next', etc) must have proper 'happens + // before / after' semantics and atomic fences. + bool IsEmpty() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return dq_tail.load(std::memory_order_acquire) == nullptr; + } + }; + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(queue_ == &global_queue_, "ODR violation in Cord"); +#endif + } + + ABSL_CONST_INIT static Queue global_queue_; + Queue* const queue_ = &global_queue_; + const bool is_snapshot_; + + // dq_prev_ and dq_next_ require the global queue mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that queue_ and global_queue_ are one and the same. + CordzHandle* dq_prev_ = nullptr; + CordzHandle* dq_next_ = nullptr; +}; + +class CordzSnapshot : public CordzHandle { + public: + CordzSnapshot() : CordzHandle(true) {} +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_HANDLE_H_ diff --git a/absl/strings/internal/cordz_handle_test.cc b/absl/strings/internal/cordz_handle_test.cc new file mode 100644 index 00000000..fd68e06b --- /dev/null +++ b/absl/strings/internal/cordz_handle_test.cc @@ -0,0 +1,265 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "absl/strings/internal/cordz_handle.h" + +#include <random> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Gt; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +// Local less verbose helper +std::vector<const CordzHandle*> DeleteQueue() { + return CordzHandle::DiagnosticsGetDeleteQueue(); +} + +struct CordzHandleDeleteTracker : public CordzHandle { + bool* deleted; + explicit CordzHandleDeleteTracker(bool* deleted) : deleted(deleted) {} + ~CordzHandleDeleteTracker() override { *deleted = true; } +}; + +TEST(CordzHandleTest, DeleteQueueIsEmpty) { + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzHandleTest, CordzHandleCreateDelete) { + bool deleted = false; + auto* handle = new CordzHandleDeleteTracker(&deleted); + EXPECT_FALSE(handle->is_snapshot()); + EXPECT_TRUE(handle->SafeToDelete()); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + + CordzHandle::Delete(handle); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_TRUE(deleted); +} + +TEST(CordzHandleTest, CordzSnapshotCreateDelete) { + auto* snapshot = new CordzSnapshot(); + EXPECT_TRUE(snapshot->is_snapshot()); + EXPECT_TRUE(snapshot->SafeToDelete()); + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot)); + delete snapshot; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzHandleTest, CordzHandleCreateDeleteWithSnapshot) { + bool deleted = false; + auto* snapshot = new CordzSnapshot(); + auto* handle = new CordzHandleDeleteTracker(&deleted); + EXPECT_FALSE(handle->SafeToDelete()); + + CordzHandle::Delete(handle); + EXPECT_THAT(DeleteQueue(), ElementsAre(handle, snapshot)); + EXPECT_FALSE(deleted); + EXPECT_FALSE(handle->SafeToDelete()); + + delete snapshot; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_TRUE(deleted); +} + +TEST(CordzHandleTest, MultiSnapshot) { + bool deleted[3] = {false, false, false}; + + CordzSnapshot* snapshot[3]; + CordzHandleDeleteTracker* handle[3]; + for (int i = 0; i < 3; ++i) { + snapshot[i] = new CordzSnapshot(); + handle[i] = new CordzHandleDeleteTracker(&deleted[i]); + CordzHandle::Delete(handle[i]); + } + + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1], + snapshot[1], handle[0], snapshot[0])); + EXPECT_THAT(deleted, ElementsAre(false, false, false)); + + delete snapshot[1]; + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2], handle[1], + handle[0], snapshot[0])); + EXPECT_THAT(deleted, ElementsAre(false, false, false)); + + delete snapshot[0]; + EXPECT_THAT(DeleteQueue(), ElementsAre(handle[2], snapshot[2])); + EXPECT_THAT(deleted, ElementsAre(true, true, false)); + + delete snapshot[2]; + EXPECT_THAT(DeleteQueue(), SizeIs(0)); + EXPECT_THAT(deleted, ElementsAre(true, true, deleted)); +} + +TEST(CordzHandleTest, DiagnosticsHandleIsSafeToInspect) { + CordzSnapshot snapshot1; + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(nullptr)); + + auto* handle1 = new CordzHandle(); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + + CordzHandle::Delete(handle1); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + + CordzSnapshot snapshot2; + auto* handle2 = new CordzHandle(); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle2)); + EXPECT_FALSE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle1)); + EXPECT_TRUE(snapshot2.DiagnosticsHandleIsSafeToInspect(handle2)); + + CordzHandle::Delete(handle2); + EXPECT_TRUE(snapshot1.DiagnosticsHandleIsSafeToInspect(handle1)); +} + +TEST(CordzHandleTest, DiagnosticsGetSafeToInspectDeletedHandles) { + EXPECT_THAT(DeleteQueue(), IsEmpty()); + + auto* handle = new CordzHandle(); + auto* snapshot1 = new CordzSnapshot(); + + // snapshot1 should be able to see handle. + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot1)); + EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle)); + EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(), + IsEmpty()); + + // This handle will be safe to inspect as long as snapshot1 is alive. However, + // since only snapshot1 can prove that it's alive, it will be hidden from + // snapshot2. + CordzHandle::Delete(handle); + + // This snapshot shouldn't be able to see handle because handle was already + // sent to Delete. + auto* snapshot2 = new CordzSnapshot(); + + // DeleteQueue elements are LIFO order. + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2, handle, snapshot1)); + + EXPECT_TRUE(snapshot1->DiagnosticsHandleIsSafeToInspect(handle)); + EXPECT_FALSE(snapshot2->DiagnosticsHandleIsSafeToInspect(handle)); + + EXPECT_THAT(snapshot1->DiagnosticsGetSafeToInspectDeletedHandles(), + ElementsAre(handle)); + EXPECT_THAT(snapshot2->DiagnosticsGetSafeToInspectDeletedHandles(), + IsEmpty()); + + CordzHandle::Delete(snapshot1); + EXPECT_THAT(DeleteQueue(), ElementsAre(snapshot2)); + + CordzHandle::Delete(snapshot2); + EXPECT_THAT(DeleteQueue(), IsEmpty()); +} + +// Create and delete CordzHandle and CordzSnapshot objects in multiple threads +// so that tsan has some time to chew on it and look for memory problems. +TEST(CordzHandleTest, MultiThreaded) { + Notification stop; + static constexpr int kNumThreads = 4; + // Keep the number of handles relatively small so that the test will naturally + // transition to an empty delete queue during the test. If there are, say, 100 + // handles, that will virtually never happen. With 10 handles and around 50k + // iterations in each of 4 threads, the delete queue appears to become empty + // around 200 times. + static constexpr int kNumHandles = 10; + + // Each thread is going to pick a random index and atomically swap its + // CordzHandle with one in handles. This way, each thread can avoid + // manipulating a CordzHandle that might be operated upon in another thread. + std::vector<std::atomic<CordzHandle*>> handles(kNumHandles); + + // global bool which is set when any thread did get some 'safe to inspect' + // handles. On some platforms and OSS tests, we might risk that some pool + // threads are starved, stalled, or just got a few unlikely random 'handle' + // coin tosses, so we satisfy this test with simply observing 'some' thread + // did something meaningful, which should minimize the potential for flakes. + std::atomic<bool> found_safe_to_inspect(false); + + { + absl::synchronization_internal::ThreadPool pool(kNumThreads); + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&stop, &handles, &found_safe_to_inspect]() { + std::minstd_rand gen; + std::uniform_int_distribution<int> dist_type(0, 2); + std::uniform_int_distribution<int> dist_handle(0, kNumHandles - 1); + + while (!stop.HasBeenNotified()) { + CordzHandle* handle; + switch (dist_type(gen)) { + case 0: + handle = new CordzHandle(); + break; + case 1: + handle = new CordzSnapshot(); + break; + default: + handle = nullptr; + break; + } + CordzHandle* old_handle = handles[dist_handle(gen)].exchange(handle); + if (old_handle != nullptr) { + std::vector<const CordzHandle*> safe_to_inspect = + old_handle->DiagnosticsGetSafeToInspectDeletedHandles(); + for (const CordzHandle* handle : safe_to_inspect) { + // We're in a tight loop, so don't generate too many error + // messages. + ASSERT_FALSE(handle->is_snapshot()); + } + if (!safe_to_inspect.empty()) { + found_safe_to_inspect.store(true); + } + CordzHandle::Delete(old_handle); + } + } + + // Have each thread attempt to clean up everything. Some thread will be + // the last to reach this cleanup code, and it will be guaranteed to + // clean up everything because nothing remains to create new handles. + for (auto& h : handles) { + if (CordzHandle* handle = h.exchange(nullptr)) { + CordzHandle::Delete(handle); + } + } + }); + } + + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); + } + + // Confirm that the test did *something*. This check will be satisfied as + // long as any thread has deleted a CordzSnapshot object and a non-snapshot + // CordzHandle was deleted after the CordzSnapshot was created. + // See also comments on `found_safe_to_inspect` + EXPECT_TRUE(found_safe_to_inspect.load()); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_info.cc b/absl/strings/internal/cordz_info.cc new file mode 100644 index 00000000..dac3fd8b --- /dev/null +++ b/absl/strings/internal/cordz_info.cc @@ -0,0 +1,418 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_info.h" + +#include "absl/base/config.h" +#include "absl/base/internal/spinlock.h" +#include "absl/container/inlined_vector.h" +#include "absl/debugging/stacktrace.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +using ::absl::base_internal::SpinLockHolder; + +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL +constexpr int CordzInfo::kMaxStackDepth; +#endif + +ABSL_CONST_INIT CordzInfo::List CordzInfo::global_list_{absl::kConstInit}; + +namespace { + +// CordRepAnalyzer performs the analysis of a cord. +// +// It computes absolute node counts and total memory usage, and an 'estimated +// fair share memory usage` statistic. +// Conceptually, it divides the 'memory usage' at each location in the 'cord +// graph' by the cumulative reference count of that location. The cumulative +// reference count is the factored total of all edges leading into that node. +// +// The top level node is treated specially: we assume the current thread +// (typically called from the CordzHandler) to hold a reference purely to +// perform a safe analysis, and not being part of the application. So we +// substract 1 from the reference count of the top node to compute the +// 'application fair share' excluding the reference of the current thread. +// +// An example of fair sharing, and why we multiply reference counts: +// Assume we have 2 CordReps, both being a Substring referencing a Flat: +// CordSubstring A (refcount = 5) --> child Flat C (refcount = 2) +// CordSubstring B (refcount = 9) --> child Flat C (refcount = 2) +// +// Flat C has 2 incoming edges from the 2 substrings (refcount = 2) and is not +// referenced directly anywhere else. Translated into a 'fair share', we then +// attribute 50% of the memory (memory / refcount = 2) to each incoming edge. +// Rep A has a refcount of 5, so we attribute each incoming edge 1 / 5th of the +// memory cost below it, i.e.: the fair share of Rep A of the memory used by C +// is then 'memory C / (refcount C * refcount A) + (memory A / refcount A)'. +// It is also easy to see how all incoming edges add up to 100%. +class CordRepAnalyzer { + public: + // Creates an analyzer instance binding to `statistics`. + explicit CordRepAnalyzer(CordzStatistics& statistics) + : statistics_(statistics) {} + + // Analyzes the memory statistics and node counts for the provided `rep`, and + // adds the results to `statistics`. Note that node counts and memory sizes + // are not initialized, computed values are added to any existing values. + void AnalyzeCordRep(const CordRep* rep) { + // Process all linear nodes. + // As per the class comments, use refcout - 1 on the top level node, as the + // top level node is assumed to be referenced only for analysis purposes. + size_t refcount = rep->refcount.Get(); + RepRef repref{rep, (refcount > 1) ? refcount - 1 : 1}; + + // Process the top level CRC node, if present. + if (repref.rep->tag == CRC) { + statistics_.node_count++; + statistics_.node_counts.crc++; + memory_usage_.Add(sizeof(CordRepCrc), repref.refcount); + repref = repref.Child(repref.rep->crc()->child); + } + + // Process all top level linear nodes (substrings and flats). + repref = CountLinearReps(repref, memory_usage_); + + if (repref.rep != nullptr) { + if (repref.rep->tag == RING) { + AnalyzeRing(repref); + } else if (repref.rep->tag == BTREE) { + AnalyzeBtree(repref); + } else { + // We should have either a concat, btree, or ring node if not null. + assert(false); + } + } + + // Adds values to output + statistics_.estimated_memory_usage += memory_usage_.total; + statistics_.estimated_fair_share_memory_usage += + static_cast<size_t>(memory_usage_.fair_share); + } + + private: + // RepRef identifies a CordRep* inside the Cord tree with its cumulative + // refcount including itself. For example, a tree consisting of a substring + // with a refcount of 3 and a child flat with a refcount of 4 will have RepRef + // refcounts of 3 and 12 respectively. + struct RepRef { + const CordRep* rep; + size_t refcount; + + // Returns a 'child' RepRef which contains the cumulative reference count of + // this instance multiplied by the child's reference count. + RepRef Child(const CordRep* child) const { + return RepRef{child, refcount * child->refcount.Get()}; + } + }; + + // Memory usage values + struct MemoryUsage { + size_t total = 0; + double fair_share = 0.0; + + // Adds 'size` memory usage to this class, with a cumulative (recursive) + // reference count of `refcount` + void Add(size_t size, size_t refcount) { + total += size; + fair_share += static_cast<double>(size) / refcount; + } + }; + + // Counts a flat of the provide allocated size + void CountFlat(size_t size) { + statistics_.node_count++; + statistics_.node_counts.flat++; + if (size <= 64) { + statistics_.node_counts.flat_64++; + } else if (size <= 128) { + statistics_.node_counts.flat_128++; + } else if (size <= 256) { + statistics_.node_counts.flat_256++; + } else if (size <= 512) { + statistics_.node_counts.flat_512++; + } else if (size <= 1024) { + statistics_.node_counts.flat_1k++; + } + } + + // Processes 'linear' reps (substring, flat, external) not requiring iteration + // or recursion. Returns RefRep{null} if all reps were processed, else returns + // the top-most non-linear concat or ring cordrep. + // Node counts are updated into `statistics_`, memory usage is update into + // `memory_usage`, which typically references `memory_usage_` except for ring + // buffers where we count children unrounded. + RepRef CountLinearReps(RepRef rep, MemoryUsage& memory_usage) { + // Consume all substrings + while (rep.rep->tag == SUBSTRING) { + statistics_.node_count++; + statistics_.node_counts.substring++; + memory_usage.Add(sizeof(CordRepSubstring), rep.refcount); + rep = rep.Child(rep.rep->substring()->child); + } + + // Consume possible FLAT + if (rep.rep->tag >= FLAT) { + size_t size = rep.rep->flat()->AllocatedSize(); + CountFlat(size); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + // Consume possible external + if (rep.rep->tag == EXTERNAL) { + statistics_.node_count++; + statistics_.node_counts.external++; + size_t size = rep.rep->length + sizeof(CordRepExternalImpl<intptr_t>); + memory_usage.Add(size, rep.refcount); + return RepRef{nullptr, 0}; + } + + return rep; + } + + // Analyzes the provided ring. + void AnalyzeRing(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.ring++; + const CordRepRing* ring = rep.rep->ring(); + memory_usage_.Add(CordRepRing::AllocSize(ring->capacity()), rep.refcount); + ring->ForEach([&](CordRepRing::index_type pos) { + CountLinearReps(rep.Child(ring->entry_child(pos)), memory_usage_); + }); + } + + // Analyzes the provided btree. + void AnalyzeBtree(RepRef rep) { + statistics_.node_count++; + statistics_.node_counts.btree++; + memory_usage_.Add(sizeof(CordRepBtree), rep.refcount); + const CordRepBtree* tree = rep.rep->btree(); + if (tree->height() > 0) { + for (CordRep* edge : tree->Edges()) { + AnalyzeBtree(rep.Child(edge)); + } + } else { + for (CordRep* edge : tree->Edges()) { + CountLinearReps(rep.Child(edge), memory_usage_); + } + } + } + + CordzStatistics& statistics_; + MemoryUsage memory_usage_; +}; + +} // namespace + +CordzInfo* CordzInfo::Head(const CordzSnapshot& snapshot) { + ABSL_ASSERT(snapshot.is_snapshot()); + + // We can do an 'unsafe' load of 'head', as we are guaranteed that the + // instance it points to is kept alive by the provided CordzSnapshot, so we + // can simply return the current value using an acquire load. + // We do enforce in DEBUG builds that the 'head' value is present in the + // delete queue: ODR violations may lead to 'snapshot' and 'global_list_' + // being in different libraries / modules. + CordzInfo* head = global_list_.head.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(head)); + return head; +} + +CordzInfo* CordzInfo::Next(const CordzSnapshot& snapshot) const { + ABSL_ASSERT(snapshot.is_snapshot()); + + // Similar to the 'Head()' function, we do not need a mutex here. + CordzInfo* next = ci_next_.load(std::memory_order_acquire); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(this)); + ABSL_ASSERT(snapshot.DiagnosticsHandleIsSafeToInspect(next)); + return next; +} + +void CordzInfo::TrackCord(InlineData& cord, MethodIdentifier method) { + assert(cord.is_tree()); + assert(!cord.is_profiled()); + CordzInfo* cordz_info = new CordzInfo(cord.as_tree(), nullptr, method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + assert(cord.is_tree()); + assert(src.is_tree()); + + // Unsample current as we the current cord is being replaced with 'src', + // so any method history is no longer relevant. + CordzInfo* cordz_info = cord.cordz_info(); + if (cordz_info != nullptr) cordz_info->Untrack(); + + // Start new cord sample + cordz_info = new CordzInfo(cord.as_tree(), src.cordz_info(), method); + cord.set_cordz_info(cordz_info); + cordz_info->Track(); +} + +void CordzInfo::MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method) { + if (src.is_profiled()) { + TrackCord(cord, src, method); + } else if (cord.is_profiled()) { + cord.cordz_info()->Untrack(); + cord.clear_cordz_info(); + } +} + +CordzInfo::MethodIdentifier CordzInfo::GetParentMethod(const CordzInfo* src) { + if (src == nullptr) return MethodIdentifier::kUnknown; + return src->parent_method_ != MethodIdentifier::kUnknown ? src->parent_method_ + : src->method_; +} + +int CordzInfo::FillParentStack(const CordzInfo* src, void** stack) { + assert(stack); + if (src == nullptr) return 0; + if (src->parent_stack_depth_) { + memcpy(stack, src->parent_stack_, src->parent_stack_depth_ * sizeof(void*)); + return src->parent_stack_depth_; + } + memcpy(stack, src->stack_, src->stack_depth_ * sizeof(void*)); + return src->stack_depth_; +} + +CordzInfo::CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method) + : rep_(rep), + stack_depth_(absl::GetStackTrace(stack_, /*max_depth=*/kMaxStackDepth, + /*skip_count=*/1)), + parent_stack_depth_(FillParentStack(src, parent_stack_)), + method_(method), + parent_method_(GetParentMethod(src)), + create_time_(absl::Now()) { + update_tracker_.LossyAdd(method); + if (src) { + // Copy parent counters. + update_tracker_.LossyAdd(src->update_tracker_); + } +} + +CordzInfo::~CordzInfo() { + // `rep_` is potentially kept alive if CordzInfo is included + // in a collection snapshot (which should be rare). + if (ABSL_PREDICT_FALSE(rep_)) { + CordRep::Unref(rep_); + } +} + +void CordzInfo::Track() { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + if (head != nullptr) { + head->ci_prev_.store(this, std::memory_order_release); + } + ci_next_.store(head, std::memory_order_release); + list_->head.store(this, std::memory_order_release); +} + +void CordzInfo::Untrack() { + ODRCheck(); + { + SpinLockHolder l(&list_->mutex); + + CordzInfo* const head = list_->head.load(std::memory_order_acquire); + CordzInfo* const next = ci_next_.load(std::memory_order_acquire); + CordzInfo* const prev = ci_prev_.load(std::memory_order_acquire); + + if (next) { + ABSL_ASSERT(next->ci_prev_.load(std::memory_order_acquire) == this); + next->ci_prev_.store(prev, std::memory_order_release); + } + if (prev) { + ABSL_ASSERT(head != this); + ABSL_ASSERT(prev->ci_next_.load(std::memory_order_acquire) == this); + prev->ci_next_.store(next, std::memory_order_release); + } else { + ABSL_ASSERT(head == this); + list_->head.store(next, std::memory_order_release); + } + } + + // We can no longer be discovered: perform a fast path check if we are not + // listed on any delete queue, so we can directly delete this instance. + if (SafeToDelete()) { + UnsafeSetCordRep(nullptr); + delete this; + return; + } + + // We are likely part of a snapshot, extend the life of the CordRep + { + absl::MutexLock lock(&mutex_); + if (rep_) CordRep::Ref(rep_); + } + CordzHandle::Delete(this); +} + +void CordzInfo::Lock(MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_) { + mutex_.Lock(); + update_tracker_.LossyAdd(method); + assert(rep_); +} + +void CordzInfo::Unlock() ABSL_UNLOCK_FUNCTION(mutex_) { + bool tracked = rep_ != nullptr; + mutex_.Unlock(); + if (!tracked) { + Untrack(); + } +} + +absl::Span<void* const> CordzInfo::GetStack() const { + return absl::MakeConstSpan(stack_, stack_depth_); +} + +absl::Span<void* const> CordzInfo::GetParentStack() const { + return absl::MakeConstSpan(parent_stack_, parent_stack_depth_); +} + +CordzStatistics CordzInfo::GetCordzStatistics() const { + CordzStatistics stats; + stats.method = method_; + stats.parent_method = parent_method_; + stats.update_tracker = update_tracker_; + if (CordRep* rep = RefCordRep()) { + stats.size = rep->length; + CordRepAnalyzer analyzer(stats); + analyzer.AnalyzeCordRep(rep); + CordRep::Unref(rep); + } + return stats; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_info.h b/absl/strings/internal/cordz_info.h new file mode 100644 index 00000000..026d5b99 --- /dev/null +++ b/absl/strings/internal/cordz_info.h @@ -0,0 +1,298 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_CORDZ_INFO_H_ +#define ABSL_STRINGS_CORDZ_INFO_H_ + +#include <atomic> +#include <cstdint> +#include <functional> + +#include "absl/base/config.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/internal/spinlock.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_functions.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/mutex.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzInfo tracks a profiled Cord. Each of these objects can be in two places. +// If a Cord is alive, the CordzInfo will be in the global_cordz_infos map, and +// can also be retrieved via the linked list starting with +// global_cordz_infos_head and continued via the cordz_info_next() method. When +// a Cord has reached the end of its lifespan, the CordzInfo object will be +// migrated out of the global_cordz_infos list and the global_cordz_infos_map, +// and will either be deleted or appended to the global_delete_queue. If it is +// placed on the global_delete_queue, the CordzInfo object will be cleaned in +// the destructor of a CordzSampleToken object. +class ABSL_LOCKABLE CordzInfo : public CordzHandle { + public: + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // TrackCord creates a CordzInfo instance which tracks important metrics of + // a sampled cord, and stores the created CordzInfo instance into `cord'. All + // CordzInfo instances are placed in a global list which is used to discover + // and snapshot all actively tracked cords. Callers are responsible for + // calling UntrackCord() before the tracked Cord instance is deleted, or to + // stop tracking the sampled Cord. Callers are also responsible for guarding + // changes to the 'tree' value of a Cord (InlineData.tree) through the Lock() + // and Unlock() calls. Any change resulting in a new tree value for the cord + // requires a call to SetCordRep() before the old tree has been unreffed + // and/or deleted. `method` identifies the Cord public API method initiating + // the cord to be sampled. + // Requires `cord` to hold a tree, and `cord.cordz_info()` to be null. + static void TrackCord(InlineData& cord, MethodIdentifier method); + + // Identical to TrackCord(), except that this function fills the + // `parent_stack` and `parent_method` properties of the returned CordzInfo + // instance from the provided `src` instance if `src` is sampled. + // This function should be used for sampling 'copy constructed' and 'copy + // assigned' cords. This function allows 'cord` to be already sampled, in + // which case the CordzInfo will be newly created from `src`. + static void TrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // Uses `cordz_should_profile` to randomly pick cords to be sampled, and if + // so, invokes `TrackCord` to start sampling `cord`. + static void MaybeTrackCord(InlineData& cord, MethodIdentifier method); + + // Maybe sample the cord identified by 'cord' for method 'method'. + // `src` identifies a 'parent' cord which is assigned to `cord`, typically the + // input cord for a copy constructor, or an assign method such as `operator=` + // `cord` will be sampled if (and only if) `src` is sampled. + // If `cord` is currently being sampled and `src` is not being sampled, then + // this function will stop sampling the cord and reset the cord's cordz_info. + // + // Previously this function defined that `cord` will be sampled if either + // `src` is sampled, or if `cord` is randomly picked for sampling. However, + // this can cause issues, as there may be paths where some cord is assigned an + // indirect copy of it's own value. As such a 'string of copies' would then + // remain sampled (`src.is_profiled`), then assigning such a cord back to + // 'itself' creates a cycle where the cord will converge to 'always sampled`. + // + // For example: + // + // Cord x; + // for (...) { + // // Copy ctor --> y.is_profiled := x.is_profiled | random(...) + // Cord y = x; + // ... + // // Assign x = y --> x.is_profiled = y.is_profiled | random(...) + // // ==> x.is_profiled |= random(...) + // // ==> x converges to 'always profiled' + // x = y; + // } + static void MaybeTrackCord(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + // Stops tracking changes for a sampled cord, and deletes the provided info. + // This function must be called before the sampled cord instance is deleted, + // and before the root cordrep of the sampled cord is unreffed. + // This function may extend the lifetime of the cordrep in cases where the + // CordInfo instance is being held by a concurrent collection thread. + void Untrack(); + + // Invokes UntrackCord() on `info` if `info` is not null. + static void MaybeUntrackCord(CordzInfo* info); + + CordzInfo() = delete; + CordzInfo(const CordzInfo&) = delete; + CordzInfo& operator=(const CordzInfo&) = delete; + + // Retrieves the oldest existing CordzInfo. + static CordzInfo* Head(const CordzSnapshot& snapshot) + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Retrieves the next oldest existing CordzInfo older than 'this' instance. + CordzInfo* Next(const CordzSnapshot& snapshot) const + ABSL_NO_THREAD_SAFETY_ANALYSIS; + + // Locks this instance for the update identified by `method`. + // Increases the count for `method` in `update_tracker`. + void Lock(MethodIdentifier method) ABSL_EXCLUSIVE_LOCK_FUNCTION(mutex_); + + // Unlocks this instance. If the contained `rep` has been set to null + // indicating the Cord has been cleared or is otherwise no longer sampled, + // then this method will delete this CordzInfo instance. + void Unlock() ABSL_UNLOCK_FUNCTION(mutex_); + + // Asserts that this CordzInfo instance is locked. + void AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_); + + // Updates the `rep` property of this instance. This methods is invoked by + // Cord logic each time the root node of a sampled Cord changes, and before + // the old root reference count is deleted. This guarantees that collection + // code can always safely take a reference on the tracked cord. + // Requires a lock to be held through the `Lock()` method. + // TODO(b/117940323): annotate with ABSL_EXCLUSIVE_LOCKS_REQUIRED once all + // Cord code is in a state where this can be proven true by the compiler. + void SetCordRep(CordRep* rep); + + // Returns the current `rep` property of this instance with a reference + // added, or null if this instance represents a cord that has since been + // deleted or untracked. + CordRep* RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_); + + // Returns the current value of `rep_` for testing purposes only. + CordRep* GetCordRepForTesting() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return rep_; + } + + // Sets the current value of `rep_` for testing purposes only. + void SetCordRepForTesting(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS { + rep_ = rep; + } + + // Returns the stack trace for where the cord was first sampled. Cords are + // potentially sampled when they promote from an inlined cord to a tree or + // ring representation, which is not necessarily the location where the cord + // was first created. Some cords are created as inlined cords, and only as + // data is added do they become a non-inlined cord. However, typically the + // location represents reasonably well where the cord is 'created'. + absl::Span<void* const> GetStack() const; + + // Returns the stack trace for a sampled cord's 'parent stack trace'. This + // value may be set if the cord is sampled (promoted) after being created + // from, or being assigned the value of an existing (sampled) cord. + absl::Span<void* const> GetParentStack() const; + + // Retrieves the CordzStatistics associated with this Cord. The statistics + // are only updated when a Cord goes through a mutation, such as an Append + // or RemovePrefix. + CordzStatistics GetCordzStatistics() const; + + private: + using SpinLock = absl::base_internal::SpinLock; + using SpinLockHolder = ::absl::base_internal::SpinLockHolder; + + // Global cordz info list. CordzInfo stores a pointer to the global list + // instance to harden against ODR violations. + struct List { + constexpr explicit List(absl::ConstInitType) + : mutex(absl::kConstInit, + absl::base_internal::SCHEDULE_COOPERATIVE_AND_KERNEL) {} + + SpinLock mutex; + std::atomic<CordzInfo*> head ABSL_GUARDED_BY(mutex){nullptr}; + }; + + static constexpr int kMaxStackDepth = 64; + + explicit CordzInfo(CordRep* rep, const CordzInfo* src, + MethodIdentifier method); + ~CordzInfo() override; + + // Sets `rep_` without holding a lock. + void UnsafeSetCordRep(CordRep* rep) ABSL_NO_THREAD_SAFETY_ANALYSIS; + + void Track(); + + // Returns the parent method from `src`, which is either `parent_method_` or + // `method_` depending on `parent_method_` being kUnknown. + // Returns kUnknown if `src` is null. + static MethodIdentifier GetParentMethod(const CordzInfo* src); + + // Fills the provided stack from `src`, copying either `parent_stack_` or + // `stack_` depending on `parent_stack_` being empty, returning the size of + // the parent stack. + // Returns 0 if `src` is null. + static int FillParentStack(const CordzInfo* src, void** stack); + + void ODRCheck() const { +#ifndef NDEBUG + ABSL_RAW_CHECK(list_ == &global_list_, "ODR violation in Cord"); +#endif + } + + // Non-inlined implementation of `MaybeTrackCord`, which is executed if + // either `src` is sampled or `cord` is sampled, and either untracks or + // tracks `cord` as documented per `MaybeTrackCord`. + static void MaybeTrackCordImpl(InlineData& cord, const InlineData& src, + MethodIdentifier method); + + ABSL_CONST_INIT static List global_list_; + List* const list_ = &global_list_; + + // ci_prev_ and ci_next_ require the global list mutex to be held. + // Unfortunately we can't use thread annotations such that the thread safety + // analysis understands that list_ and global_list_ are one and the same. + std::atomic<CordzInfo*> ci_prev_{nullptr}; + std::atomic<CordzInfo*> ci_next_{nullptr}; + + mutable absl::Mutex mutex_; + CordRep* rep_ ABSL_GUARDED_BY(mutex_); + + void* stack_[kMaxStackDepth]; + void* parent_stack_[kMaxStackDepth]; + const int stack_depth_; + const int parent_stack_depth_; + const MethodIdentifier method_; + const MethodIdentifier parent_method_; + CordzUpdateTracker update_tracker_; + const absl::Time create_time_; +}; + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(cordz_should_profile())) { + TrackCord(cord, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeTrackCord( + InlineData& cord, const InlineData& src, MethodIdentifier method) { + if (ABSL_PREDICT_FALSE(InlineData::is_either_profiled(cord, src))) { + MaybeTrackCordImpl(cord, src, method); + } +} + +inline ABSL_ATTRIBUTE_ALWAYS_INLINE void CordzInfo::MaybeUntrackCord( + CordzInfo* info) { + if (ABSL_PREDICT_FALSE(info)) { + info->Untrack(); + } +} + +inline void CordzInfo::AssertHeld() ABSL_ASSERT_EXCLUSIVE_LOCK(mutex_) { +#ifndef NDEBUG + mutex_.AssertHeld(); +#endif +} + +inline void CordzInfo::SetCordRep(CordRep* rep) { + AssertHeld(); + rep_ = rep; +} + +inline void CordzInfo::UnsafeSetCordRep(CordRep* rep) { rep_ = rep; } + +inline CordRep* CordzInfo::RefCordRep() const ABSL_LOCKS_EXCLUDED(mutex_) { + MutexLock lock(&mutex_); + return rep_ ? CordRep::Ref(rep_) : nullptr; +} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_INFO_H_ diff --git a/absl/strings/internal/cordz_info_statistics_test.cc b/absl/strings/internal/cordz_info_statistics_test.cc new file mode 100644 index 00000000..476c38d2 --- /dev/null +++ b/absl/strings/internal/cordz_info_statistics_test.cc @@ -0,0 +1,555 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <iostream> +#include <random> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/cord.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cord_rep_btree.h" +#include "absl/strings/internal/cord_rep_crc.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cord_rep_ring.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_sample_token.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_scope.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// Do not print statistics contents, the matcher prints them as needed. +inline void PrintTo(const CordzStatistics& stats, std::ostream* s) { + if (s) *s << "CordzStatistics{...}"; +} + +namespace { + +using ::testing::Ge; + +// Creates a flat of the specified allocated size +CordRepFlat* Flat(size_t size) { + // Round up to a tag size, as we are going to poke an exact tag size back into + // the allocated flat. 'size returning allocators' could grant us more than we + // wanted, but we are ok to poke the 'requested' size in the tag, even in the + // presence of sized deletes, so we need to make sure the size rounds + // perfectly to a tag value. + assert(size >= kMinFlatSize); + size = RoundUpForTag(size); + CordRepFlat* flat = CordRepFlat::New(size - kFlatOverhead); + flat->tag = AllocatedSizeToTag(size); + flat->length = size - kFlatOverhead; + return flat; +} + +// Creates an external of the specified length +CordRepExternal* External(int length = 512) { + return static_cast<CordRepExternal*>( + NewExternalRep(absl::string_view("", length), [](absl::string_view) {})); +} + +// Creates a substring on the provided rep of length - 1 +CordRepSubstring* Substring(CordRep* rep) { + auto* substring = new CordRepSubstring; + substring->length = rep->length - 1; + substring->tag = SUBSTRING; + substring->child = rep; + return substring; +} + +// Reference count helper +struct RefHelper { + std::vector<CordRep*> refs; + + ~RefHelper() { + for (CordRep* rep : refs) { + CordRep::Unref(rep); + } + } + + // Invokes CordRep::Unref() on `rep` when this instance is destroyed. + template <typename T> + T* NeedsUnref(T* rep) { + refs.push_back(rep); + return rep; + } + + // Adds `n` reference counts to `rep` which will be unreffed when this + // instance is destroyed. + template <typename T> + T* Ref(T* rep, size_t n = 1) { + while (n--) { + NeedsUnref(CordRep::Ref(rep)); + } + return rep; + } +}; + +// Sizeof helper. Returns the allocated size of `p`, excluding any child +// elements for substring, concat and ring cord reps. +template <typename T> +size_t SizeOf(const T* rep) { + return sizeof(T); +} + +template <> +size_t SizeOf(const CordRepFlat* rep) { + return rep->AllocatedSize(); +} + +template <> +size_t SizeOf(const CordRepExternal* rep) { + // See cord.cc + return sizeof(CordRepExternalImpl<intptr_t>) + rep->length; +} + +template <> +size_t SizeOf(const CordRepRing* rep) { + return CordRepRing::AllocSize(rep->capacity()); +} + +// Computes fair share memory used in a naive 'we dare to recurse' way. +double FairShareImpl(CordRep* rep, size_t ref) { + double self = 0.0, children = 0.0; + ref *= rep->refcount.Get(); + if (rep->tag >= FLAT) { + self = SizeOf(rep->flat()); + } else if (rep->tag == EXTERNAL) { + self = SizeOf(rep->external()); + } else if (rep->tag == SUBSTRING) { + self = SizeOf(rep->substring()); + children = FairShareImpl(rep->substring()->child, ref); + } else if (rep->tag == BTREE) { + self = SizeOf(rep->btree()); + for (CordRep*edge : rep->btree()->Edges()) { + children += FairShareImpl(edge, ref); + } + } else if (rep->tag == RING) { + self = SizeOf(rep->ring()); + rep->ring()->ForEach([&](CordRepRing::index_type i) { + self += FairShareImpl(rep->ring()->entry_child(i), 1); + }); + } else { + assert(false); + } + return self / ref + children; +} + +// Returns the fair share memory size from `ShareFhareImpl()` as a size_t. +size_t FairShare(CordRep* rep, size_t ref = 1) { + return static_cast<size_t>(FairShareImpl(rep, ref)); +} + +// Samples the cord and returns CordzInfo::GetStatistics() +CordzStatistics SampleCord(CordRep* rep) { + InlineData cord(rep); + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + CordzStatistics stats = cord.cordz_info()->GetCordzStatistics(); + cord.cordz_info()->Untrack(); + return stats; +} + +MATCHER_P(EqStatistics, stats, "Statistics equal expected values") { + bool ok = true; + +#define STATS_MATCHER_EXPECT_EQ(member) \ + if (stats.member != arg.member) { \ + *result_listener << "\n stats." << #member \ + << ": actual = " << arg.member << ", expected " \ + << stats.member; \ + ok = false; \ + } + + STATS_MATCHER_EXPECT_EQ(size); + STATS_MATCHER_EXPECT_EQ(node_count); + STATS_MATCHER_EXPECT_EQ(node_counts.flat); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_64); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_128); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_256); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_512); + STATS_MATCHER_EXPECT_EQ(node_counts.flat_1k); + STATS_MATCHER_EXPECT_EQ(node_counts.external); + STATS_MATCHER_EXPECT_EQ(node_counts.concat); + STATS_MATCHER_EXPECT_EQ(node_counts.substring); + STATS_MATCHER_EXPECT_EQ(node_counts.ring); + STATS_MATCHER_EXPECT_EQ(node_counts.btree); + STATS_MATCHER_EXPECT_EQ(estimated_memory_usage); + STATS_MATCHER_EXPECT_EQ(estimated_fair_share_memory_usage); + +#undef STATS_MATCHER_EXPECT_EQ + + return ok; +} + +TEST(CordzInfoStatisticsTest, Flat) { + RefHelper ref; + auto* flat = ref.NeedsUnref(Flat(512)); + + CordzStatistics expected; + expected.size = flat->length; + expected.estimated_memory_usage = SizeOf(flat); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1; + expected.node_counts.flat = 1; + expected.node_counts.flat_512 = 1; + + EXPECT_THAT(SampleCord(flat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedFlat) { + RefHelper ref; + auto* flat = ref.Ref(ref.NeedsUnref(Flat(64))); + + CordzStatistics expected; + expected.size = flat->length; + expected.estimated_memory_usage = SizeOf(flat); + expected.estimated_fair_share_memory_usage = SizeOf(flat) / 2; + expected.node_count = 1; + expected.node_counts.flat = 1; + expected.node_counts.flat_64 = 1; + + EXPECT_THAT(SampleCord(flat), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, External) { + RefHelper ref; + auto* external = ref.NeedsUnref(External()); + + CordzStatistics expected; + expected.size = external->length; + expected.estimated_memory_usage = SizeOf(external); + expected.estimated_fair_share_memory_usage = SizeOf(external); + expected.node_count = 1; + expected.node_counts.external = 1; + + EXPECT_THAT(SampleCord(external), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedExternal) { + RefHelper ref; + auto* external = ref.Ref(ref.NeedsUnref(External())); + + CordzStatistics expected; + expected.size = external->length; + expected.estimated_memory_usage = SizeOf(external); + expected.estimated_fair_share_memory_usage = SizeOf(external) / 2; + expected.node_count = 1; + expected.node_counts.external = 1; + + EXPECT_THAT(SampleCord(external), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Substring) { + RefHelper ref; + auto* flat = Flat(1024); + auto* substring = ref.NeedsUnref(Substring(flat)); + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(substring) + SizeOf(flat); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_1k = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedSubstring) { + RefHelper ref; + auto* flat = ref.Ref(Flat(511), 2); + auto* substring = ref.Ref(ref.NeedsUnref(Substring(flat))); + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(flat) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = + SizeOf(substring) / 2 + SizeOf(flat) / 6; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_512 = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + + +TEST(CordzInfoStatisticsTest, Ring) { + RefHelper ref; + auto* flat1 = Flat(240); + auto* flat2 = Flat(2000); + auto* flat3 = Flat(70); + auto* external = External(3000); + CordRepRing* ring = CordRepRing::Create(flat1); + ring = CordRepRing::Append(ring, flat2); + ring = CordRepRing::Append(ring, flat3); + ring = ref.NeedsUnref(CordRepRing::Append(ring, external)); + + CordzStatistics expected; + expected.size = ring->length; + expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 5; + expected.node_counts.flat = 3; + expected.node_counts.flat_128 = 1; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.ring = 1; + + EXPECT_THAT(SampleCord(ring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, SharedSubstringRing) { + RefHelper ref; + auto* flat1 = ref.Ref(Flat(240)); + auto* flat2 = Flat(200); + auto* flat3 = Flat(70); + auto* external = ref.Ref(External(3000), 5); + CordRepRing* ring = CordRepRing::Create(flat1); + ring = CordRepRing::Append(ring, flat2); + ring = CordRepRing::Append(ring, flat3); + ring = ref.Ref(CordRepRing::Append(ring, external), 4); + auto* substring = ref.Ref(ref.NeedsUnref(Substring(ring))); + + + CordzStatistics expected; + expected.size = substring->length; + expected.estimated_memory_usage = SizeOf(ring) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(flat3) + + SizeOf(external) + SizeOf(substring); + expected.estimated_fair_share_memory_usage = FairShare(substring); + expected.node_count = 6; + expected.node_counts.flat = 3; + expected.node_counts.flat_128 = 1; + expected.node_counts.flat_256 = 2; + expected.node_counts.external = 1; + expected.node_counts.ring = 1; + expected.node_counts.substring = 1; + + EXPECT_THAT(SampleCord(substring), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeLeaf) { + ASSERT_THAT(CordRepBtree::kMaxCapacity, Ge(3)); + RefHelper ref; + auto* flat1 = Flat(2000); + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + auto* external = External(3000); + + CordRepBtree* tree = CordRepBtree::Create(flat1); + tree = CordRepBtree::Append(tree, substr); + tree = CordRepBtree::Append(tree, external); + size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + size_t flat3_size = 0; + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + flat3_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + ref.NeedsUnref(tree); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + SizeOf(flat1) + + SizeOf(flat2) + SizeOf(substr) + + flat3_size + SizeOf(external); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 1 + 3 + 1 + flat3_count; + expected.node_counts.flat = 2 + flat3_count; + expected.node_counts.flat_128 = flat3_count; + expected.node_counts.flat_256 = 1; + expected.node_counts.external = 1; + expected.node_counts.substring = 1; + expected.node_counts.btree = 1; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, BtreeNodeShared) { + RefHelper ref; + static constexpr int leaf_count = 3; + const size_t flat3_count = CordRepBtree::kMaxCapacity - 3; + ASSERT_THAT(flat3_count, Ge(0)); + + CordRepBtree* tree = nullptr; + size_t mem_size = 0; + for (int i = 0; i < leaf_count; ++i) { + auto* flat1 = ref.Ref(Flat(2000), 9); + mem_size += SizeOf(flat1); + if (i == 0) { + tree = CordRepBtree::Create(flat1); + } else { + tree = CordRepBtree::Append(tree, flat1); + } + + auto* flat2 = Flat(200); + auto* substr = Substring(flat2); + mem_size += SizeOf(flat2) + SizeOf(substr); + tree = CordRepBtree::Append(tree, substr); + + auto* external = External(30); + mem_size += SizeOf(external); + tree = CordRepBtree::Append(tree, external); + + for (size_t i = 0; i < flat3_count; ++i) { + auto* flat3 = Flat(70); + mem_size += SizeOf(flat3); + tree = CordRepBtree::Append(tree, flat3); + } + + if (i == 0) { + mem_size += SizeOf(tree); + } else { + mem_size += SizeOf(tree->Edges().back()->btree()); + } + } + ref.NeedsUnref(tree); + + // Ref count: 2 for top (add 1), 5 for leaf 0 (add 4). + ref.Ref(tree, 1); + ref.Ref(tree->Edges().front(), 4); + + CordzStatistics expected; + expected.size = tree->length; + expected.estimated_memory_usage = SizeOf(tree) + mem_size; + expected.estimated_fair_share_memory_usage = FairShare(tree); + + expected.node_count = 1 + leaf_count * (1 + 3 + 1 + flat3_count); + expected.node_counts.flat = leaf_count * (2 + flat3_count); + expected.node_counts.flat_128 = leaf_count * flat3_count; + expected.node_counts.flat_256 = leaf_count; + expected.node_counts.external = leaf_count; + expected.node_counts.substring = leaf_count; + expected.node_counts.btree = 1 + leaf_count; + + EXPECT_THAT(SampleCord(tree), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, Crc) { + RefHelper ref; + auto* left = Flat(1000); + auto* crc = ref.NeedsUnref(CordRepCrc::New(left, 12345)); + + CordzStatistics expected; + expected.size = left->length; + expected.estimated_memory_usage = SizeOf(crc) + SizeOf(left); + expected.estimated_fair_share_memory_usage = expected.estimated_memory_usage; + expected.node_count = 2; + expected.node_counts.flat = 1; + expected.node_counts.flat_1k = 1; + expected.node_counts.crc = 1; + + EXPECT_THAT(SampleCord(crc), EqStatistics(expected)); +} + +TEST(CordzInfoStatisticsTest, ThreadSafety) { + Notification stop; + static constexpr int kNumThreads = 8; + int64_t sampled_node_count = 0; + + { + absl::synchronization_internal::ThreadPool pool(kNumThreads); + + // Run analyzer thread emulating a CordzHandler collection. + pool.Schedule([&]() { + while (!stop.HasBeenNotified()) { + // Run every 10us (about 100K total collections). + absl::SleepFor(absl::Microseconds(10)); + CordzSampleToken token; + for (const CordzInfo& cord_info : token) { + CordzStatistics stats = cord_info.GetCordzStatistics(); + sampled_node_count += stats.node_count; + } + } + }); + + // Run 'application threads' + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&]() { + // Track 0 - 2 cordz infos at a time, providing permutations of 0, 1 + // and 2 CordzHandle and CordzInfo queues being active, with plenty of + // 'empty to non empty' transitions. + InlineData cords[2]; + std::minstd_rand gen; + std::uniform_int_distribution<int> coin_toss(0, 1); + + while (!stop.HasBeenNotified()) { + for (InlineData& cord : cords) { + // 50/50 flip the state of the cord + if (coin_toss(gen) != 0) { + if (cord.is_tree()) { + // 50/50 simulate delete (untrack) or 'edit to empty' + if (coin_toss(gen) != 0) { + CordzInfo::MaybeUntrackCord(cord.cordz_info()); + } else { + CordzUpdateScope scope(cord.cordz_info(), + CordzUpdateTracker::kUnknown); + scope.SetCordRep(nullptr); + } + CordRep::Unref(cord.as_tree()); + cord.set_inline_size(0); + } else { + // Coin toss to 25% ring, 25% btree, and 50% flat. + CordRep* rep = Flat(256); + if (coin_toss(gen) != 0) { + if (coin_toss(gen) != 0) { + rep = CordRepRing::Create(rep); + } else { + rep = CordRepBtree::Create(rep); + } + } + cord.make_tree(rep); + + // 50/50 sample + if (coin_toss(gen) != 0) { + CordzInfo::TrackCord(cord, CordzUpdateTracker::kUnknown); + } + } + } + } + } + for (InlineData& cord : cords) { + if (cord.is_tree()) { + CordzInfo::MaybeUntrackCord(cord.cordz_info()); + CordRep::Unref(cord.as_tree()); + } + } + }); + } + + // Run for 1 second to give memory and thread safety analyzers plenty of + // time to detect any mishaps or undefined behaviors. + absl::SleepFor(absl::Seconds(1)); + stop.Notify(); + } + + std::cout << "Sampled " << sampled_node_count << " nodes\n"; +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_info_test.cc b/absl/strings/internal/cordz_info_test.cc new file mode 100644 index 00000000..b98343ae --- /dev/null +++ b/absl/strings/internal/cordz_info_test.cc @@ -0,0 +1,341 @@ +// Copyright 2019 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_info.h" + +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/debugging/stacktrace.h" +#include "absl/debugging/symbolize.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_statistics.h" +#include "absl/strings/internal/cordz_update_tracker.h" +#include "absl/strings/str_cat.h" +#include "absl/types/span.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::HasSubstr; +using ::testing::Ne; +using ::testing::SizeIs; + +// Used test values +auto constexpr kUnknownMethod = CordzUpdateTracker::kUnknown; +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; +auto constexpr kChildMethod = CordzUpdateTracker::kConstructorCord; +auto constexpr kUpdateMethod = CordzUpdateTracker::kAppendString; + +// Local less verbose helper +std::vector<const CordzHandle*> DeleteQueue() { + return CordzHandle::DiagnosticsGetDeleteQueue(); +} + +std::string FormatStack(absl::Span<void* const> raw_stack) { + static constexpr size_t buf_size = 1 << 14; + std::unique_ptr<char[]> buf(new char[buf_size]); + std::string output; + for (void* stackp : raw_stack) { + if (absl::Symbolize(stackp, buf.get(), buf_size)) { + absl::StrAppend(&output, " ", buf.get(), "\n"); + } + } + return output; +} + +TEST(CordzInfoTest, TrackCord) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + ASSERT_THAT(info, Ne(nullptr)); + EXPECT_FALSE(info->is_snapshot()); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info)); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep)); + info->Untrack(); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSampling) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithSampling) { + CordzSamplingIntervalHelper sample_all(1); + TestCordData parent, child; + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingParentSampled) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::TrackCord(parent.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + CordzInfo* parent_info = parent.data.cordz_info(); + CordzInfo* child_info = child.data.cordz_info(); + ASSERT_THAT(child_info, Ne(nullptr)); + EXPECT_THAT(child_info->GetCordRepForTesting(), Eq(child.rep.rep)); + EXPECT_THAT(child_info->GetParentStack(), parent_info->GetStack()); + parent_info->Untrack(); + child_info->Untrack(); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithoutSamplingChildSampled) { + CordzSamplingIntervalHelper sample_none(99999); + TestCordData parent, child; + CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, MaybeTrackChildCordWithSamplingChildSampled) { + CordzSamplingIntervalHelper sample_all(1); + TestCordData parent, child; + CordzInfo::TrackCord(child.data, kTrackCordMethod); + CordzInfo::MaybeTrackCord(child.data, parent.data, kTrackCordMethod); + EXPECT_THAT(child.data.cordz_info(), Eq(nullptr)); +} + +TEST(CordzInfoTest, UntrackCord) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + info->Untrack(); + EXPECT_THAT(DeleteQueue(), SizeIs(0)); +} + +TEST(CordzInfoTest, UntrackCordWithSnapshot) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + CordzSnapshot snapshot; + info->Untrack(); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr)); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(data.rep.rep)); + EXPECT_THAT(DeleteQueue(), ElementsAre(info, &snapshot)); +} + +TEST(CordzInfoTest, SetCordRep) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + TestCordRep rep; + info->Lock(CordzUpdateTracker::kAppendCord); + info->SetCordRep(rep.rep); + info->Unlock(); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(rep.rep)); + + info->Untrack(); +} + +TEST(CordzInfoTest, SetCordRepNullUntracksCordOnUnlock) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + info->Lock(CordzUpdateTracker::kAppendString); + info->SetCordRep(nullptr); + EXPECT_THAT(info->GetCordRepForTesting(), Eq(nullptr)); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(info)); + + info->Unlock(); + EXPECT_THAT(CordzInfo::Head(CordzSnapshot()), Eq(nullptr)); +} + +TEST(CordzInfoTest, RefCordRep) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + + size_t refcount = data.rep.rep->refcount.Get(); + EXPECT_THAT(info->RefCordRep(), Eq(data.rep.rep)); + EXPECT_THAT(data.rep.rep->refcount.Get(), Eq(refcount + 1)); + CordRep::Unref(data.rep.rep); + info->Untrack(); +} + +#if GTEST_HAS_DEATH_TEST + +TEST(CordzInfoTest, SetCordRepRequiresMutex) { + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + TestCordRep rep; + EXPECT_DEBUG_DEATH(info->SetCordRep(rep.rep), ".*"); + info->Untrack(); +} + +#endif // GTEST_HAS_DEATH_TEST + +TEST(CordzInfoTest, TrackUntrackHeadFirstV2) { + CordzSnapshot snapshot; + EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); + + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info1 = data.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + TestCordData data2; + CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo* info2 = data2.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info2->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info1->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); +} + +TEST(CordzInfoTest, TrackUntrackTailFirstV2) { + CordzSnapshot snapshot; + EXPECT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); + + TestCordData data; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info1 = data.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + TestCordData data2; + CordzInfo::TrackCord(data2.data, kTrackCordMethod); + CordzInfo* info2 = data2.data.cordz_info(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(info1)); + EXPECT_THAT(info1->Next(snapshot), Eq(nullptr)); + + info1->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(info2)); + EXPECT_THAT(info2->Next(snapshot), Eq(nullptr)); + + info2->Untrack(); + ASSERT_THAT(CordzInfo::Head(snapshot), Eq(nullptr)); +} + +TEST(CordzInfoTest, StackV2) { + TestCordData data; + // kMaxStackDepth is intentionally less than 64 (which is the max depth that + // Cordz will record) because if the actual stack depth is over 64 + // (which it is on Apple platforms) then the expected_stack will end up + // catching a few frames at the end that the actual_stack didn't get and + // it will no longer be subset. At the time of this writing 58 is the max + // that will allow this test to pass (with a minimum os version of iOS 9), so + // rounded down to 50 to hopefully not run into this in the future if Apple + // makes small modifications to its testing stack. 50 is sufficient to prove + // that we got a decent stack. + static constexpr int kMaxStackDepth = 50; + CordzInfo::TrackCord(data.data, kTrackCordMethod); + CordzInfo* info = data.data.cordz_info(); + std::vector<void*> local_stack; + local_stack.resize(kMaxStackDepth); + // In some environments we don't get stack traces. For example in Android + // absl::GetStackTrace will return 0 indicating it didn't find any stack. The + // resultant formatted stack will be "", but that still equals the stack + // recorded in CordzInfo, which is also empty. The skip_count is 1 so that the + // line number of the current stack isn't included in the HasSubstr check. + local_stack.resize(absl::GetStackTrace(local_stack.data(), kMaxStackDepth, + /*skip_count=*/1)); + + std::string got_stack = FormatStack(info->GetStack()); + std::string expected_stack = FormatStack(local_stack); + // If TrackCord is inlined, got_stack should match expected_stack. If it isn't + // inlined, got_stack should include an additional frame not present in + // expected_stack. Either way, expected_stack should be a substring of + // got_stack. + EXPECT_THAT(got_stack, HasSubstr(expected_stack)); + + info->Untrack(); +} + +// Local helper functions to get different stacks for child and parent. +CordzInfo* TrackChildCord(InlineData& data, const InlineData& parent) { + CordzInfo::TrackCord(data, parent, kChildMethod); + return data.cordz_info(); +} +CordzInfo* TrackParentCord(InlineData& data) { + CordzInfo::TrackCord(data, kTrackCordMethod); + return data.cordz_info(); +} + +TEST(CordzInfoTest, GetStatistics) { + TestCordData data; + CordzInfo* info = TrackParentCord(data.data); + + CordzStatistics statistics = info->GetCordzStatistics(); + EXPECT_THAT(statistics.size, Eq(data.rep.rep->length)); + EXPECT_THAT(statistics.method, Eq(kTrackCordMethod)); + EXPECT_THAT(statistics.parent_method, Eq(kUnknownMethod)); + EXPECT_THAT(statistics.update_tracker.Value(kTrackCordMethod), Eq(1)); + + info->Untrack(); +} + +TEST(CordzInfoTest, LockCountsMethod) { + TestCordData data; + CordzInfo* info = TrackParentCord(data.data); + + info->Lock(kUpdateMethod); + info->Unlock(); + info->Lock(kUpdateMethod); + info->Unlock(); + + CordzStatistics statistics = info->GetCordzStatistics(); + EXPECT_THAT(statistics.update_tracker.Value(kUpdateMethod), Eq(2)); + + info->Untrack(); +} + +TEST(CordzInfoTest, FromParent) { + TestCordData parent; + TestCordData child; + CordzInfo* info_parent = TrackParentCord(parent.data); + CordzInfo* info_child = TrackChildCord(child.data, parent.data); + + std::string stack = FormatStack(info_parent->GetStack()); + std::string parent_stack = FormatStack(info_child->GetParentStack()); + EXPECT_THAT(stack, Eq(parent_stack)); + + CordzStatistics statistics = info_child->GetCordzStatistics(); + EXPECT_THAT(statistics.size, Eq(child.rep.rep->length)); + EXPECT_THAT(statistics.method, Eq(kChildMethod)); + EXPECT_THAT(statistics.parent_method, Eq(kTrackCordMethod)); + EXPECT_THAT(statistics.update_tracker.Value(kChildMethod), Eq(1)); + + info_parent->Untrack(); + info_child->Untrack(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_sample_token.cc b/absl/strings/internal/cordz_sample_token.cc new file mode 100644 index 00000000..ba1270d8 --- /dev/null +++ b/absl/strings/internal/cordz_sample_token.cc @@ -0,0 +1,64 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_sample_token.h" + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +CordzSampleToken::Iterator& CordzSampleToken::Iterator::operator++() { + if (current_) { + current_ = current_->Next(*token_); + } + return *this; +} + +CordzSampleToken::Iterator CordzSampleToken::Iterator::operator++(int) { + Iterator it(*this); + operator++(); + return it; +} + +bool operator==(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return lhs.current_ == rhs.current_ && + (lhs.current_ == nullptr || lhs.token_ == rhs.token_); +} + +bool operator!=(const CordzSampleToken::Iterator& lhs, + const CordzSampleToken::Iterator& rhs) { + return !(lhs == rhs); +} + +CordzSampleToken::Iterator::reference CordzSampleToken::Iterator::operator*() + const { + return *current_; +} + +CordzSampleToken::Iterator::pointer CordzSampleToken::Iterator::operator->() + const { + return current_; +} + +CordzSampleToken::Iterator::Iterator(const CordzSampleToken* token) + : token_(token), current_(CordzInfo::Head(*token)) {} + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_sample_token.h b/absl/strings/internal/cordz_sample_token.h new file mode 100644 index 00000000..28a1d70c --- /dev/null +++ b/absl/strings/internal/cordz_sample_token.h @@ -0,0 +1,97 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" + +#ifndef ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ +#define ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// The existence of a CordzSampleToken guarantees that a reader can traverse the +// global_cordz_infos_head linked-list without needing to hold a mutex. When a +// CordzSampleToken exists, all CordzInfo objects that would be destroyed are +// instead appended to a deletion queue. When the CordzSampleToken is destroyed, +// it will also clean up any of these CordzInfo objects. +// +// E.g., ST are CordzSampleToken objects and CH are CordzHandle objects. +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- global_delete_queue_tail +// +// This list tracks that CH1 and CH2 were created after ST1, so the thread +// holding ST1 might have a referece to CH1, CH2, ST2, and CH3. However, ST2 was +// created later, so the thread holding the ST2 token cannot have a reference to +// ST1, CH1, or CH2. If ST1 is cleaned up first, that thread will delete ST1, +// CH1, and CH2. If instead ST2 is cleaned up first, that thread will only +// delete ST2. +// +// If ST1 is cleaned up first, the new list will be: +// ST2 <- CH3 <- global_delete_queue_tail +// +// If ST2 is cleaned up first, the new list will be: +// ST1 <- CH1 <- CH2 <- CH3 <- global_delete_queue_tail +// +// All new CordzHandle objects are appended to the list, so if a new thread +// comes along before either ST1 or ST2 are cleaned up, the new list will be: +// ST1 <- CH1 <- CH2 <- ST2 <- CH3 <- ST3 <- global_delete_queue_tail +// +// A thread must hold the global_delete_queue_mu mutex whenever it's altering +// this list. +// +// It is safe for thread that holds a CordzSampleToken to read +// global_cordz_infos at any time since the objects it is able to retrieve will +// not be deleted while the CordzSampleToken exists. +class CordzSampleToken : public CordzSnapshot { + public: + class Iterator { + public: + using iterator_category = std::input_iterator_tag; + using value_type = const CordzInfo&; + using difference_type = ptrdiff_t; + using pointer = const CordzInfo*; + using reference = value_type; + + Iterator() = default; + + Iterator& operator++(); + Iterator operator++(int); + friend bool operator==(const Iterator& lhs, const Iterator& rhs); + friend bool operator!=(const Iterator& lhs, const Iterator& rhs); + reference operator*() const; + pointer operator->() const; + + private: + friend class CordzSampleToken; + explicit Iterator(const CordzSampleToken* token); + + const CordzSampleToken* token_ = nullptr; + pointer current_ = nullptr; + }; + + CordzSampleToken() = default; + CordzSampleToken(const CordzSampleToken&) = delete; + CordzSampleToken& operator=(const CordzSampleToken&) = delete; + + Iterator begin() { return Iterator(this); } + Iterator end() { return Iterator(); } +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_CORDZ_SAMPLE_TOKEN_H_ diff --git a/absl/strings/internal/cordz_sample_token_test.cc b/absl/strings/internal/cordz_sample_token_test.cc new file mode 100644 index 00000000..6be1770d --- /dev/null +++ b/absl/strings/internal/cordz_sample_token_test.cc @@ -0,0 +1,208 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_sample_token.h" + +#include <memory> +#include <type_traits> +#include <vector> + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/memory/memory.h" +#include "absl/random/random.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_handle.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/synchronization/internal/thread_pool.h" +#include "absl/synchronization/notification.h" +#include "absl/time/clock.h" +#include "absl/time/time.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Ne; + +// Used test values +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; + +TEST(CordzSampleTokenTest, IteratorTraits) { + static_assert(std::is_copy_constructible<CordzSampleToken::Iterator>::value, + ""); + static_assert(std::is_copy_assignable<CordzSampleToken::Iterator>::value, ""); + static_assert(std::is_move_constructible<CordzSampleToken::Iterator>::value, + ""); + static_assert(std::is_move_assignable<CordzSampleToken::Iterator>::value, ""); + static_assert( + std::is_same< + std::iterator_traits<CordzSampleToken::Iterator>::iterator_category, + std::input_iterator_tag>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::value_type, + const CordzInfo&>::value, + ""); + static_assert( + std::is_same< + std::iterator_traits<CordzSampleToken::Iterator>::difference_type, + ptrdiff_t>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::pointer, + const CordzInfo*>::value, + ""); + static_assert( + std::is_same<std::iterator_traits<CordzSampleToken::Iterator>::reference, + const CordzInfo&>::value, + ""); +} + +TEST(CordzSampleTokenTest, IteratorEmpty) { + CordzSampleToken token; + EXPECT_THAT(token.begin(), Eq(token.end())); +} + +TEST(CordzSampleTokenTest, Iterator) { + TestCordData cord1, cord2, cord3; + CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo* info1 = cord1.data.cordz_info(); + CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo* info2 = cord2.data.cordz_info(); + CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo* info3 = cord3.data.cordz_info(); + + CordzSampleToken token; + std::vector<const CordzInfo*> found; + for (const CordzInfo& cord_info : token) { + found.push_back(&cord_info); + } + + EXPECT_THAT(found, ElementsAre(info3, info2, info1)); + + info1->Untrack(); + info2->Untrack(); + info3->Untrack(); +} + +TEST(CordzSampleTokenTest, IteratorEquality) { + TestCordData cord1; + TestCordData cord2; + TestCordData cord3; + CordzInfo::TrackCord(cord1.data, kTrackCordMethod); + CordzInfo* info1 = cord1.data.cordz_info(); + + CordzSampleToken token1; + // lhs starts with the CordzInfo corresponding to cord1 at the head. + CordzSampleToken::Iterator lhs = token1.begin(); + + CordzInfo::TrackCord(cord2.data, kTrackCordMethod); + CordzInfo* info2 = cord2.data.cordz_info(); + + CordzSampleToken token2; + // rhs starts with the CordzInfo corresponding to cord2 at the head. + CordzSampleToken::Iterator rhs = token2.begin(); + + CordzInfo::TrackCord(cord3.data, kTrackCordMethod); + CordzInfo* info3 = cord3.data.cordz_info(); + + // lhs is on cord1 while rhs is on cord2. + EXPECT_THAT(lhs, Ne(rhs)); + + rhs++; + // lhs and rhs are both on cord1, but they didn't come from the same + // CordzSampleToken. + EXPECT_THAT(lhs, Ne(rhs)); + + lhs++; + rhs++; + // Both lhs and rhs are done, so they are on nullptr. + EXPECT_THAT(lhs, Eq(rhs)); + + info1->Untrack(); + info2->Untrack(); + info3->Untrack(); +} + +TEST(CordzSampleTokenTest, MultiThreaded) { + Notification stop; + static constexpr int kNumThreads = 4; + static constexpr int kNumCords = 3; + static constexpr int kNumTokens = 3; + absl::synchronization_internal::ThreadPool pool(kNumThreads); + + for (int i = 0; i < kNumThreads; ++i) { + pool.Schedule([&stop]() { + absl::BitGen gen; + TestCordData cords[kNumCords]; + std::unique_ptr<CordzSampleToken> tokens[kNumTokens]; + + while (!stop.HasBeenNotified()) { + // Randomly perform one of five actions: + // 1) Untrack + // 2) Track + // 3) Iterate over Cords visible to a token. + // 4) Unsample + // 5) Sample + int index = absl::Uniform(gen, 0, kNumCords); + if (absl::Bernoulli(gen, 0.5)) { + TestCordData& cord = cords[index]; + // Track/untrack. + if (cord.data.is_profiled()) { + // 1) Untrack + cord.data.cordz_info()->Untrack(); + cord.data.clear_cordz_info(); + } else { + // 2) Track + CordzInfo::TrackCord(cord.data, kTrackCordMethod); + } + } else { + std::unique_ptr<CordzSampleToken>& token = tokens[index]; + if (token) { + if (absl::Bernoulli(gen, 0.5)) { + // 3) Iterate over Cords visible to a token. + for (const CordzInfo& info : *token) { + // This is trivial work to allow us to compile the loop. + EXPECT_THAT(info.Next(*token), Ne(&info)); + } + } else { + // 4) Unsample + token = nullptr; + } + } else { + // 5) Sample + token = absl::make_unique<CordzSampleToken>(); + } + } + } + for (TestCordData& cord : cords) { + CordzInfo::MaybeUntrackCord(cord.data.cordz_info()); + } + }); + } + // The threads will hammer away. Give it a little bit of time for tsan to + // spot errors. + absl::SleepFor(absl::Seconds(3)); + stop.Notify(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/cordz_statistics.h b/absl/strings/internal/cordz_statistics.h new file mode 100644 index 00000000..57071905 --- /dev/null +++ b/absl/strings/internal/cordz_statistics.h @@ -0,0 +1,88 @@ +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ + +#include <cstdint> + +#include "absl/base/config.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzStatistics captures some meta information about a Cord's shape. +struct CordzStatistics { + using MethodIdentifier = CordzUpdateTracker::MethodIdentifier; + + // Node counts information + struct NodeCounts { + size_t flat = 0; // #flats + size_t flat_64 = 0; // #flats up to 64 bytes + size_t flat_128 = 0; // #flats up to 128 bytes + size_t flat_256 = 0; // #flats up to 256 bytes + size_t flat_512 = 0; // #flats up to 512 bytes + size_t flat_1k = 0; // #flats up to 1K bytes + size_t external = 0; // #external reps + size_t substring = 0; // #substring reps + size_t concat = 0; // #concat reps + size_t ring = 0; // #ring buffer reps + size_t btree = 0; // #btree reps + size_t crc = 0; // #crc reps + }; + + // The size of the cord in bytes. This matches the result of Cord::size(). + int64_t size = 0; + + // The estimated memory used by the sampled cord. This value matches the + // value as reported by Cord::EstimatedMemoryUsage(). + // A value of 0 implies the property has not been recorded. + int64_t estimated_memory_usage = 0; + + // The effective memory used by the sampled cord, inversely weighted by the + // effective indegree of each allocated node. This is a representation of the + // fair share of memory usage that should be attributed to the sampled cord. + // This value is more useful for cases where one or more nodes are referenced + // by multiple Cord instances, and for cases where a Cord includes the same + // node multiple times (either directly or indirectly). + // A value of 0 implies the property has not been recorded. + int64_t estimated_fair_share_memory_usage = 0; + + // The total number of nodes referenced by this cord. + // For ring buffer Cords, this includes the 'ring buffer' node. + // For btree Cords, this includes all 'CordRepBtree' tree nodes as well as all + // the substring, flat and external nodes referenced by the tree. + // A value of 0 implies the property has not been recorded. + int64_t node_count = 0; + + // Detailed node counts per type + NodeCounts node_counts; + + // The cord method responsible for sampling the cord. + MethodIdentifier method = MethodIdentifier::kUnknown; + + // The cord method responsible for sampling the parent cord if applicable. + MethodIdentifier parent_method = MethodIdentifier::kUnknown; + + // Update tracker tracking invocation count per cord method. + CordzUpdateTracker update_tracker; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_STATISTICS_H_ diff --git a/absl/strings/internal/cordz_update_scope.h b/absl/strings/internal/cordz_update_scope.h new file mode 100644 index 00000000..57ba75de --- /dev/null +++ b/absl/strings/internal/cordz_update_scope.h @@ -0,0 +1,71 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ + +#include "absl/base/config.h" +#include "absl/base/optimization.h" +#include "absl/base/thread_annotations.h" +#include "absl/strings/internal/cord_internal.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateScope scopes an update to the provided CordzInfo. +// The class invokes `info->Lock(method)` and `info->Unlock()` to guard +// cordrep updates. This class does nothing if `info` is null. +// See also the 'Lock`, `Unlock` and `SetCordRep` methods in `CordzInfo`. +class ABSL_SCOPED_LOCKABLE CordzUpdateScope { + public: + CordzUpdateScope(CordzInfo* info, CordzUpdateTracker::MethodIdentifier method) + ABSL_EXCLUSIVE_LOCK_FUNCTION(info) + : info_(info) { + if (ABSL_PREDICT_FALSE(info_)) { + info->Lock(method); + } + } + + // CordzUpdateScope can not be copied or assigned to. + CordzUpdateScope(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope(const CordzUpdateScope&) = delete; + CordzUpdateScope& operator=(CordzUpdateScope&& rhs) = delete; + CordzUpdateScope& operator=(const CordzUpdateScope&) = delete; + + ~CordzUpdateScope() ABSL_UNLOCK_FUNCTION() { + if (ABSL_PREDICT_FALSE(info_)) { + info_->Unlock(); + } + } + + void SetCordRep(CordRep* rep) const { + if (ABSL_PREDICT_FALSE(info_)) { + info_->SetCordRep(rep); + } + } + + CordzInfo* info() const { return info_; } + + private: + CordzInfo* info_; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_SCOPE_H_ diff --git a/absl/strings/internal/cordz_update_scope_test.cc b/absl/strings/internal/cordz_update_scope_test.cc new file mode 100644 index 00000000..3d08c622 --- /dev/null +++ b/absl/strings/internal/cordz_update_scope_test.cc @@ -0,0 +1,49 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_update_scope.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/config.h" +#include "absl/strings/cordz_test_helpers.h" +#include "absl/strings/internal/cord_rep_flat.h" +#include "absl/strings/internal/cordz_info.h" +#include "absl/strings/internal/cordz_update_tracker.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +namespace { + +// Used test values +auto constexpr kTrackCordMethod = CordzUpdateTracker::kConstructorString; + +TEST(CordzUpdateScopeTest, ScopeNullptr) { + CordzUpdateScope scope(nullptr, kTrackCordMethod); +} + +TEST(CordzUpdateScopeTest, ScopeSampledCord) { + TestCordData cord; + CordzInfo::TrackCord(cord.data, kTrackCordMethod); + CordzUpdateScope scope(cord.data.cordz_info(), kTrackCordMethod); + cord.data.cordz_info()->SetCordRep(nullptr); +} + +} // namespace +ABSL_NAMESPACE_END +} // namespace cord_internal + +} // namespace absl diff --git a/absl/strings/internal/cordz_update_tracker.h b/absl/strings/internal/cordz_update_tracker.h new file mode 100644 index 00000000..c5170662 --- /dev/null +++ b/absl/strings/internal/cordz_update_tracker.h @@ -0,0 +1,123 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ +#define ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ + +#include <atomic> +#include <cstdint> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { + +// CordzUpdateTracker tracks counters for Cord update methods. +// +// The purpose of CordzUpdateTracker is to track the number of calls to methods +// updating Cord data for sampled cords. The class internally uses 'lossy' +// atomic operations: Cord is thread-compatible, so there is no need to +// synchronize updates. However, Cordz collection threads may call 'Value()' at +// any point, so the class needs to provide thread safe access. +// +// This class is thread-safe. But as per above comments, all non-const methods +// should be used single-threaded only: updates are thread-safe but lossy. +class CordzUpdateTracker { + public: + // Tracked update methods. + enum MethodIdentifier { + kUnknown, + kAppendCord, + kAppendCordBuffer, + kAppendExternalMemory, + kAppendString, + kAssignCord, + kAssignString, + kClear, + kConstructorCord, + kConstructorString, + kCordReader, + kFlatten, + kGetAppendBuffer, + kGetAppendRegion, + kMakeCordFromExternal, + kMoveAppendCord, + kMoveAssignCord, + kMovePrependCord, + kPrependCord, + kPrependCordBuffer, + kPrependString, + kRemovePrefix, + kRemoveSuffix, + kSetExpectedChecksum, + kSubCord, + + // kNumMethods defines the number of entries: must be the last entry. + kNumMethods, + }; + + // Constructs a new instance. All counters are zero-initialized. + constexpr CordzUpdateTracker() noexcept : values_{} {} + + // Copy constructs a new instance. + CordzUpdateTracker(const CordzUpdateTracker& rhs) noexcept { *this = rhs; } + + // Assigns the provided value to this instance. + CordzUpdateTracker& operator=(const CordzUpdateTracker& rhs) noexcept { + for (int i = 0; i < kNumMethods; ++i) { + values_[i].store(rhs.values_[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + } + return *this; + } + + // Returns the value for the specified method. + int64_t Value(MethodIdentifier method) const { + return values_[method].load(std::memory_order_relaxed); + } + + // Increases the value for the specified method by `n` + void LossyAdd(MethodIdentifier method, int64_t n = 1) { + auto& value = values_[method]; + value.store(value.load(std::memory_order_relaxed) + n, + std::memory_order_relaxed); + } + + // Adds all the values from `src` to this instance + void LossyAdd(const CordzUpdateTracker& src) { + for (int i = 0; i < kNumMethods; ++i) { + MethodIdentifier method = static_cast<MethodIdentifier>(i); + if (int64_t value = src.Value(method)) { + LossyAdd(method, value); + } + } + } + + private: + // Until C++20 std::atomic is not constexpr default-constructible, so we need + // a wrapper for this class to be constexpr constructible. + class Counter : public std::atomic<int64_t> { + public: + constexpr Counter() noexcept : std::atomic<int64_t>(0) {} + }; + + Counter values_[kNumMethods]; +}; + +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_CORDZ_UPDATE_TRACKER_H_ diff --git a/absl/strings/internal/cordz_update_tracker_test.cc b/absl/strings/internal/cordz_update_tracker_test.cc new file mode 100644 index 00000000..9b1f7986 --- /dev/null +++ b/absl/strings/internal/cordz_update_tracker_test.cc @@ -0,0 +1,147 @@ +// Copyright 2021 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/cordz_update_tracker.h" + +#include <array> +#include <thread> // NOLINT + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/attributes.h" +#include "absl/base/config.h" +#include "absl/synchronization/notification.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace cord_internal { +namespace { + +using ::testing::AnyOf; +using ::testing::Eq; + +using Method = CordzUpdateTracker::MethodIdentifier; +using Methods = std::array<Method, Method::kNumMethods>; + +// Returns an array of all methods defined in `MethodIdentifier` +Methods AllMethods() { + return Methods{Method::kUnknown, + Method::kAppendCord, + Method::kAppendCordBuffer, + Method::kAppendExternalMemory, + Method::kAppendString, + Method::kAssignCord, + Method::kAssignString, + Method::kClear, + Method::kConstructorCord, + Method::kConstructorString, + Method::kCordReader, + Method::kFlatten, + Method::kGetAppendBuffer, + Method::kGetAppendRegion, + Method::kMakeCordFromExternal, + Method::kMoveAppendCord, + Method::kMoveAssignCord, + Method::kMovePrependCord, + Method::kPrependCord, + Method::kPrependCordBuffer, + Method::kPrependString, + Method::kRemovePrefix, + Method::kRemoveSuffix, + Method::kSetExpectedChecksum, + Method::kSubCord}; +} + +TEST(CordzUpdateTracker, IsConstExprAndInitializesToZero) { + constexpr CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + ASSERT_THAT(tracker.Value(method), Eq(0)); + } +} + +TEST(CordzUpdateTracker, LossyAdd) { + int64_t n = 1; + CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + tracker.LossyAdd(method, n); + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, CopyConstructor) { + int64_t n = 1; + CordzUpdateTracker src; + for (Method method : AllMethods()) { + src.LossyAdd(method, n); + n += 2; + } + + n = 1; + CordzUpdateTracker tracker(src); + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, OperatorAssign) { + int64_t n = 1; + CordzUpdateTracker src; + CordzUpdateTracker tracker; + for (Method method : AllMethods()) { + src.LossyAdd(method, n); + n += 2; + } + + n = 1; + tracker = src; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } +} + +TEST(CordzUpdateTracker, ThreadSanitizedValueCheck) { + absl::Notification done; + CordzUpdateTracker tracker; + + std::thread reader([&done, &tracker] { + while (!done.HasBeenNotified()) { + int n = 1; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), AnyOf(Eq(n), Eq(0))); + n += 2; + } + } + int n = 1; + for (Method method : AllMethods()) { + EXPECT_THAT(tracker.Value(method), Eq(n)); + n += 2; + } + }); + + int64_t n = 1; + for (Method method : AllMethods()) { + tracker.LossyAdd(method, n); + n += 2; + } + done.Notify(); + reader.join(); +} + +} // namespace +} // namespace cord_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc index c5271286..cfea0961 100644 --- a/absl/strings/internal/escaping.cc +++ b/absl/strings/internal/escaping.cc @@ -21,7 +21,7 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -const char kBase64Chars[] = +ABSL_CONST_INIT const char kBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { @@ -102,8 +102,8 @@ size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, } } // To save time, we didn't update szdest or szsrc in the loop. So do it now. - szdest = limit_dest - cur_dest; - szsrc = limit_src - cur_src; + szdest = static_cast<size_t>(limit_dest - cur_dest); + szsrc = static_cast<size_t>(limit_src - cur_src); /* now deal with the tail (<=3 bytes) */ switch (szsrc) { @@ -154,7 +154,8 @@ size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, // the loop because the loop above always reads 4 bytes, and the fourth // byte is past the end of the input. if (szdest < 4) return 0; - uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + uint32_t in = + (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1); cur_dest[0] = base64[in >> 18]; in &= 0x3FFFF; cur_dest[1] = base64[in >> 12]; @@ -172,7 +173,7 @@ size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); break; } - return (cur_dest - dest); + return static_cast<size_t>(cur_dest - dest); } } // namespace strings_internal diff --git a/absl/strings/internal/ostringstream.cc b/absl/strings/internal/ostringstream.cc index 05324c78..dc6cfe16 100644 --- a/absl/strings/internal/ostringstream.cc +++ b/absl/strings/internal/ostringstream.cc @@ -27,7 +27,7 @@ OStringStream::Buf::int_type OStringStream::overflow(int c) { std::streamsize OStringStream::xsputn(const char* s, std::streamsize n) { assert(s_); - s_->append(s, n); + s_->append(s, static_cast<size_t>(n)); return n; } diff --git a/absl/strings/internal/resize_uninitialized.h b/absl/strings/internal/resize_uninitialized.h index e42628e3..49859dcc 100644 --- a/absl/strings/internal/resize_uninitialized.h +++ b/absl/strings/internal/resize_uninitialized.h @@ -17,6 +17,7 @@ #ifndef ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ #define ABSL_STRINGS_INTERNAL_RESIZE_UNINITIALIZED_H_ +#include <algorithm> #include <string> #include <type_traits> #include <utility> @@ -28,8 +29,9 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { -// Is a subclass of true_type or false_type, depending on whether or not -// T has a __resize_default_init member. +// In this type trait, we look for a __resize_default_init member function, and +// we use it if available, otherwise, we use resize. We provide HasMember to +// indicate whether __resize_default_init is present. template <typename string_type, typename = void> struct ResizeUninitializedTraits { using HasMember = std::false_type; @@ -66,6 +68,50 @@ inline void STLStringResizeUninitialized(string_type* s, size_t new_size) { ResizeUninitializedTraits<string_type>::Resize(s, new_size); } +// Used to ensure exponential growth so that the amortized complexity of +// increasing the string size by a small amount is O(1), in contrast to +// O(str->size()) in the case of precise growth. +template <typename string_type> +void STLStringReserveAmortized(string_type* s, size_t new_size) { + const size_t cap = s->capacity(); + if (new_size > cap) { + // Make sure to always grow by at least a factor of 2x. + s->reserve((std::max)(new_size, 2 * cap)); + } +} + +// In this type trait, we look for an __append_default_init member function, and +// we use it if available, otherwise, we use append. +template <typename string_type, typename = void> +struct AppendUninitializedTraits { + static void Append(string_type* s, size_t n) { + s->append(n, typename string_type::value_type()); + } +}; + +template <typename string_type> +struct AppendUninitializedTraits< + string_type, absl::void_t<decltype(std::declval<string_type&>() + .__append_default_init(237))> > { + static void Append(string_type* s, size_t n) { + s->__append_default_init(n); + } +}; + +// Like STLStringResizeUninitialized(str, new_size), except guaranteed to use +// exponential growth so that the amortized complexity of increasing the string +// size by a small amount is O(1), in contrast to O(str->size()) in the case of +// precise growth. +template <typename string_type> +void STLStringResizeUninitializedAmortized(string_type* s, size_t new_size) { + const size_t size = s->size(); + if (new_size > size) { + AppendUninitializedTraits<string_type>::Append(s, new_size - size); + } else { + s->erase(new_size); + } +} + } // namespace strings_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/internal/resize_uninitialized_test.cc b/absl/strings/internal/resize_uninitialized_test.cc index 0f8b3c2a..ad1b9c58 100644 --- a/absl/strings/internal/resize_uninitialized_test.cc +++ b/absl/strings/internal/resize_uninitialized_test.cc @@ -19,64 +19,115 @@ namespace { int resize_call_count = 0; +int append_call_count = 0; // A mock string class whose only purpose is to track how many times its -// resize() method has been called. +// resize()/append() methods have been called. struct resizable_string { + using value_type = char; size_t size() const { return 0; } + size_t capacity() const { return 0; } char& operator[](size_t) { static char c = '\0'; return c; } void resize(size_t) { resize_call_count += 1; } + void append(size_t, value_type) { append_call_count += 1; } + void reserve(size_t) {} + resizable_string& erase(size_t = 0, size_t = 0) { return *this; } }; int resize_default_init_call_count = 0; +int append_default_init_call_count = 0; // A mock string class whose only purpose is to track how many times its -// resize() and __resize_default_init() methods have been called. -struct resize_default_init_string { +// resize()/__resize_default_init()/append()/__append_default_init() methods +// have been called. +struct default_init_string { size_t size() const { return 0; } + size_t capacity() const { return 0; } char& operator[](size_t) { static char c = '\0'; return c; } void resize(size_t) { resize_call_count += 1; } void __resize_default_init(size_t) { resize_default_init_call_count += 1; } + void __append_default_init(size_t) { append_default_init_call_count += 1; } + void reserve(size_t) {} + default_init_string& erase(size_t = 0, size_t = 0) { return *this; } }; TEST(ResizeUninit, WithAndWithout) { resize_call_count = 0; + append_call_count = 0; resize_default_init_call_count = 0; + append_default_init_call_count = 0; { resizable_string rs; EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); EXPECT_FALSE( absl::strings_internal::STLStringSupportsNontrashingResize(&rs)); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); absl::strings_internal::STLStringResizeUninitialized(&rs, 237); EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitializedAmortized(&rs, 1000); + EXPECT_EQ(resize_call_count, 1); + EXPECT_EQ(append_call_count, 1); + EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); } resize_call_count = 0; + append_call_count = 0; resize_default_init_call_count = 0; + append_default_init_call_count = 0; { - resize_default_init_string rus; + default_init_string rus; EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); EXPECT_TRUE( absl::strings_internal::STLStringSupportsNontrashingResize(&rus)); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 0); + EXPECT_EQ(append_default_init_call_count, 0); absl::strings_internal::STLStringResizeUninitialized(&rus, 237); EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); + EXPECT_EQ(resize_default_init_call_count, 1); + EXPECT_EQ(append_default_init_call_count, 0); + absl::strings_internal::STLStringResizeUninitializedAmortized(&rus, 1000); + EXPECT_EQ(resize_call_count, 0); + EXPECT_EQ(append_call_count, 0); EXPECT_EQ(resize_default_init_call_count, 1); + EXPECT_EQ(append_default_init_call_count, 1); + } +} + +TEST(ResizeUninit, Amortized) { + std::string str; + size_t prev_cap = str.capacity(); + int cap_increase_count = 0; + for (int i = 0; i < 1000; ++i) { + absl::strings_internal::STLStringResizeUninitializedAmortized(&str, i); + size_t new_cap = str.capacity(); + if (new_cap > prev_cap) ++cap_increase_count; + prev_cap = new_cap; } + EXPECT_LT(cap_increase_count, 50); } } // namespace diff --git a/absl/strings/internal/str_format/arg.cc b/absl/strings/internal/str_format/arg.cc index e28a29b1..02aeeebe 100644 --- a/absl/strings/internal/str_format/arg.cc +++ b/absl/strings/internal/str_format/arg.cc @@ -320,7 +320,7 @@ bool ConvertIntArg(T v, const FormatConversionSpecImpl conv, return ConvertFloatImpl(static_cast<double>(v), conv, sink); default: - ABSL_INTERNAL_ASSUME(false); + ABSL_ASSUME(false); } if (conv.is_basic()) { diff --git a/absl/strings/internal/str_format/arg.h b/absl/strings/internal/str_format/arg.h index 7040c866..b9dda909 100644 --- a/absl/strings/internal/str_format/arg.h +++ b/absl/strings/internal/str_format/arg.h @@ -122,6 +122,14 @@ StringConvertResult FormatConvertImpl(const std::string& v, StringConvertResult FormatConvertImpl(string_view v, FormatConversionSpecImpl conv, FormatSinkImpl* sink); +#if defined(ABSL_HAVE_STD_STRING_VIEW) && !defined(ABSL_USES_STD_STRING_VIEW) +inline StringConvertResult FormatConvertImpl(std::string_view v, + FormatConversionSpecImpl conv, + FormatSinkImpl* sink) { + return FormatConvertImpl(absl::string_view(v.data(), v.size()), conv, sink); +} +#endif // ABSL_HAVE_STD_STRING_VIEW && !ABSL_USES_STD_STRING_VIEW + ArgConvertResult<FormatConversionCharSetUnion( FormatConversionCharSetInternal::s, FormatConversionCharSetInternal::p)> FormatConvertImpl(const char* v, const FormatConversionSpecImpl conv, @@ -136,7 +144,7 @@ StringConvertResult FormatConvertImpl(const AbslCord& value, size_t space_remaining = 0; int width = conv.width(); - if (width >= 0) space_remaining = width; + if (width >= 0) space_remaining = static_cast<size_t>(width); size_t to_write = value.size(); diff --git a/absl/strings/internal/str_format/bind.cc b/absl/strings/internal/str_format/bind.cc index 4e68b90b..c988ba8f 100644 --- a/absl/strings/internal/str_format/bind.cc +++ b/absl/strings/internal/str_format/bind.cc @@ -58,7 +58,7 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, if (static_cast<size_t>(arg_position - 1) >= pack_.size()) return false; arg = &pack_[arg_position - 1]; // 1-based - if (!unbound->flags.basic) { + if (unbound->flags != Flags::kBasic) { int width = unbound->width.value(); bool force_left = false; if (unbound->width.is_from_arg()) { @@ -84,9 +84,8 @@ inline bool ArgContext::Bind(const UnboundConversion* unbound, FormatConversionSpecImplFriend::SetPrecision(precision, bound); if (force_left) { - Flags flags = unbound->flags; - flags.left = true; - FormatConversionSpecImplFriend::SetFlags(flags, bound); + FormatConversionSpecImplFriend::SetFlags(unbound->flags | Flags::kLeft, + bound); } else { FormatConversionSpecImplFriend::SetFlags(unbound->flags, bound); } diff --git a/absl/strings/internal/str_format/bind.h b/absl/strings/internal/str_format/bind.h index 267cc0ef..80f29654 100644 --- a/absl/strings/internal/str_format/bind.h +++ b/absl/strings/internal/str_format/bind.h @@ -25,6 +25,7 @@ #include "absl/strings/internal/str_format/checker.h" #include "absl/strings/internal/str_format/parser.h" #include "absl/types/span.h" +#include "absl/utility/utility.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -87,6 +88,36 @@ class FormatSpecTemplate : public MakeDependent<UntypedFormatSpec, Args...>::type { using Base = typename MakeDependent<UntypedFormatSpec, Args...>::type; + template <bool res> + struct ErrorMaker { + constexpr bool operator()(int) const { return res; } + }; + + template <int i, int j> + static constexpr bool CheckArity(ErrorMaker<true> SpecifierCount = {}, + ErrorMaker<i == j> ParametersPassed = {}) { + static_assert(SpecifierCount(i) == ParametersPassed(j), + "Number of arguments passed must match the number of " + "conversion specifiers."); + return true; + } + + template <FormatConversionCharSet specified, FormatConversionCharSet passed, + int arg> + static constexpr bool CheckMatch( + ErrorMaker<Contains(specified, passed)> MismatchedArgumentNumber = {}) { + static_assert(MismatchedArgumentNumber(arg), + "Passed argument must match specified format."); + return true; + } + + template <FormatConversionCharSet... C, size_t... I> + static bool CheckMatches(absl::index_sequence<I...>) { + bool res[] = {true, CheckMatch<Args, C, I + 1>()...}; + (void)res; + return true; + } + public: #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER @@ -100,7 +131,7 @@ class FormatSpecTemplate // We use the 'unavailable' attribute to give a better compiler error than // just 'method is deleted'. // To avoid checking the format twice, we just check that the format is - // constexpr. If is it valid, then the overload below will kick in. + // constexpr. If it is valid, then the overload below will kick in. // We add the template here to make this overload have lower priority. template <typename = void> FormatSpecTemplate(const char* s) // NOLINT @@ -112,7 +143,8 @@ class FormatSpecTemplate template <typename T = void> FormatSpecTemplate(string_view s) // NOLINT __attribute__((enable_if(str_format_internal::EnsureConstexpr(s), - "constexpr trap"))) { + "constexpr trap"))) + : Base("to avoid noise in the compiler error") { static_assert(sizeof(T*) == 0, "Format specified does not match the arguments passed."); } @@ -133,13 +165,12 @@ class FormatSpecTemplate #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER - template < - FormatConversionCharSet... C, - typename = typename std::enable_if<sizeof...(C) == sizeof...(Args)>::type, - typename = typename std::enable_if<AllOf(Contains(Args, - C)...)>::type> + template <FormatConversionCharSet... C> FormatSpecTemplate(const ExtendedParsedFormat<C...>& pc) // NOLINT - : Base(&pc) {} + : Base(&pc) { + CheckArity<sizeof...(C), sizeof...(Args)>(); + CheckMatches<C...>(absl::make_index_sequence<sizeof...(C)>{}); + } }; class Streamable { diff --git a/absl/strings/internal/str_format/checker.h b/absl/strings/internal/str_format/checker.h index 2a2601ec..4fd19d13 100644 --- a/absl/strings/internal/str_format/checker.h +++ b/absl/strings/internal/str_format/checker.h @@ -22,9 +22,14 @@ // Compile time check support for entry points. #ifndef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER -#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +// We disable format checker under vscode intellisense compilation. +// See https://github.com/microsoft/vscode-cpptools/issues/3683 for +// more details. +#if ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) && \ + !defined(__INTELLISENSE__) #define ABSL_INTERNAL_ENABLE_FORMAT_CHECKER 1 -#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) +#endif // ABSL_HAVE_ATTRIBUTE(enable_if) && !defined(__native_client__) && + // !defined(__INTELLISENSE__) #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER namespace absl { diff --git a/absl/strings/internal/str_format/convert_test.cc b/absl/strings/internal/str_format/convert_test.cc index 926283cf..300612b7 100644 --- a/absl/strings/internal/str_format/convert_test.cc +++ b/absl/strings/internal/str_format/convert_test.cc @@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/base/attributes.h" #include "absl/base/internal/raw_logging.h" #include "absl/strings/internal/str_format/bind.h" #include "absl/strings/match.h" @@ -124,6 +125,7 @@ void StrAppendV(std::string *dst, const char *format, va_list ap) { delete[] buf; } +void StrAppend(std::string *, const char *, ...) ABSL_PRINTF_ATTRIBUTE(2, 3); void StrAppend(std::string *out, const char *format, ...) { va_list ap; va_start(ap, format); @@ -131,6 +133,7 @@ void StrAppend(std::string *out, const char *format, ...) { va_end(ap); } +std::string StrPrint(const char *, ...) ABSL_PRINTF_ATTRIBUTE(1, 2); std::string StrPrint(const char *format, ...) { va_list ap; va_start(ap, format); @@ -229,6 +232,9 @@ TEST_F(FormatConvertTest, BasicString) { TestStringConvert(static_cast<const char*>("hello")); TestStringConvert(std::string("hello")); TestStringConvert(string_view("hello")); +#if defined(ABSL_HAVE_STD_STRING_VIEW) + TestStringConvert(std::string_view("hello")); +#endif // ABSL_HAVE_STD_STRING_VIEW } TEST_F(FormatConvertTest, NullString) { @@ -452,25 +458,36 @@ TYPED_TEST_P(TypedFormatConvertTest, AllIntsWithFlags) { } TYPED_TEST_P(TypedFormatConvertTest, Char) { + // Pass a bunch of values of type TypeParam to both FormatPack and libc's + // vsnprintf("%c", ...) (wrapped in StrPrint) to make sure we get the same + // value. typedef TypeParam T; using remove_volatile_t = typename std::remove_volatile<T>::type; - static const T kMin = std::numeric_limits<remove_volatile_t>::min(); - static const T kMax = std::numeric_limits<remove_volatile_t>::max(); - T kVals[] = { - remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10), - remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10), - remove_volatile_t(0), - kMin + remove_volatile_t(1), kMin, - kMax - remove_volatile_t(1), kMax + std::vector<remove_volatile_t> vals = { + remove_volatile_t(1), remove_volatile_t(2), remove_volatile_t(10), // + remove_volatile_t(-1), remove_volatile_t(-2), remove_volatile_t(-10), // + remove_volatile_t(0), }; - for (const T &c : kVals) { + + // We'd like to test values near std::numeric_limits::min() and + // std::numeric_limits::max(), too, but vsnprintf("%c", ...) can't handle + // anything larger than an int. Add in the most extreme values we can without + // exceeding that range. + static const T kMin = + static_cast<remove_volatile_t>(std::numeric_limits<int>::min()); + static const T kMax = + static_cast<remove_volatile_t>(std::numeric_limits<int>::max()); + vals.insert(vals.end(), {kMin + 1, kMin, kMax - 1, kMax}); + + for (const T c : vals) { const FormatArgImpl args[] = {FormatArgImpl(c)}; UntypedFormatSpecImpl format("%c"); - EXPECT_EQ(StrPrint("%c", c), FormatPack(format, absl::MakeSpan(args))); + EXPECT_EQ(StrPrint("%c", static_cast<int>(c)), + FormatPack(format, absl::MakeSpan(args))); } } -REGISTER_TYPED_TEST_CASE_P(TypedFormatConvertTest, AllIntsWithFlags, Char); +REGISTER_TYPED_TEST_SUITE_P(TypedFormatConvertTest, AllIntsWithFlags, Char); typedef ::testing::Types< int, unsigned, volatile int, @@ -479,8 +496,8 @@ typedef ::testing::Types< long long, unsigned long long, signed char, unsigned char, char> AllIntTypes; -INSTANTIATE_TYPED_TEST_CASE_P(TypedFormatConvertTestWithAllIntTypes, - TypedFormatConvertTest, AllIntTypes); +INSTANTIATE_TYPED_TEST_SUITE_P(TypedFormatConvertTestWithAllIntTypes, + TypedFormatConvertTest, AllIntTypes); TEST_F(FormatConvertTest, VectorBool) { // Make sure vector<bool>'s values behave as bools. std::vector<bool> v = {true, false}; diff --git a/absl/strings/internal/str_format/extension.cc b/absl/strings/internal/str_format/extension.cc index bb0d96cf..f93153d5 100644 --- a/absl/strings/internal/str_format/extension.cc +++ b/absl/strings/internal/str_format/extension.cc @@ -23,16 +23,18 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace str_format_internal { -std::string Flags::ToString() const { +std::string FlagsToString(Flags v) { std::string s; - s.append(left ? "-" : ""); - s.append(show_pos ? "+" : ""); - s.append(sign_col ? " " : ""); - s.append(alt ? "#" : ""); - s.append(zero ? "0" : ""); + s.append(FlagsContains(v, Flags::kLeft) ? "-" : ""); + s.append(FlagsContains(v, Flags::kShowPos) ? "+" : ""); + s.append(FlagsContains(v, Flags::kSignCol) ? " " : ""); + s.append(FlagsContains(v, Flags::kAlt) ? "#" : ""); + s.append(FlagsContains(v, Flags::kZero) ? "0" : ""); return s; } +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL + #define ABSL_INTERNAL_X_VAL(id) \ constexpr absl::FormatConversionChar FormatConversionCharInternal::id; ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_X_VAL, ) @@ -45,17 +47,14 @@ constexpr absl::FormatConversionChar FormatConversionCharInternal::kNone; ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(ABSL_INTERNAL_CHAR_SET_CASE, ) #undef ABSL_INTERNAL_CHAR_SET_CASE -// NOLINTNEXTLINE(readability-redundant-declaration) constexpr FormatConversionCharSet FormatConversionCharSetInternal::kStar; -// NOLINTNEXTLINE(readability-redundant-declaration) constexpr FormatConversionCharSet FormatConversionCharSetInternal::kIntegral; -// NOLINTNEXTLINE(readability-redundant-declaration) constexpr FormatConversionCharSet FormatConversionCharSetInternal::kFloating; -// NOLINTNEXTLINE(readability-redundant-declaration) constexpr FormatConversionCharSet FormatConversionCharSetInternal::kNumeric; -// NOLINTNEXTLINE(readability-redundant-declaration) constexpr FormatConversionCharSet FormatConversionCharSetInternal::kPointer; +#endif // ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL + bool FormatSinkImpl::PutPaddedString(string_view value, int width, int precision, bool left) { size_t space_remaining = 0; diff --git a/absl/strings/internal/str_format/extension.h b/absl/strings/internal/str_format/extension.h index a9b9e137..55e8ac88 100644 --- a/absl/strings/internal/str_format/extension.h +++ b/absl/strings/internal/str_format/extension.h @@ -19,6 +19,7 @@ #include <limits.h> #include <cstddef> +#include <cstdint> #include <cstring> #include <ostream> @@ -70,7 +71,7 @@ class FormatSinkImpl { ~FormatSinkImpl() { Flush(); } void Flush() { - raw_.Write(string_view(buf_, pos_ - buf_)); + raw_.Write(string_view(buf_, static_cast<size_t>(pos_ - buf_))); pos_ = buf_; } @@ -120,7 +121,9 @@ class FormatSinkImpl { } private: - size_t Avail() const { return buf_ + sizeof(buf_) - pos_; } + size_t Avail() const { + return static_cast<size_t>(buf_ + sizeof(buf_) - pos_); + } FormatRawSinkImpl raw_; size_t size_ = 0; @@ -128,19 +131,33 @@ class FormatSinkImpl { char buf_[1024]; }; -struct Flags { - bool basic : 1; // fastest conversion: no flags, width, or precision - bool left : 1; // "-" - bool show_pos : 1; // "+" - bool sign_col : 1; // " " - bool alt : 1; // "#" - bool zero : 1; // "0" - std::string ToString() const; - friend std::ostream& operator<<(std::ostream& os, const Flags& v) { - return os << v.ToString(); - } +enum class Flags : uint8_t { + kBasic = 0, + kLeft = 1 << 0, + kShowPos = 1 << 1, + kSignCol = 1 << 2, + kAlt = 1 << 3, + kZero = 1 << 4, + // This is not a real flag. It just exists to turn off kBasic when no other + // flags are set. This is for when width/precision are specified. + kNonBasic = 1 << 5, }; +constexpr Flags operator|(Flags a, Flags b) { + return static_cast<Flags>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b)); +} + +constexpr bool FlagsContains(Flags haystack, Flags needle) { + return (static_cast<uint8_t>(haystack) & static_cast<uint8_t>(needle)) == + static_cast<uint8_t>(needle); +} + +std::string FlagsToString(Flags v); + +inline std::ostream& operator<<(std::ostream& os, Flags v) { + return os << FlagsToString(v); +} + // clang-format off #define ABSL_INTERNAL_CONVERSION_CHARS_EXPAND_(X_VAL, X_SEP) \ /* text */ \ @@ -257,12 +274,16 @@ struct FormatConversionSpecImplFriend; class FormatConversionSpecImpl { public: // Width and precison are not specified, no flags are set. - bool is_basic() const { return flags_.basic; } - bool has_left_flag() const { return flags_.left; } - bool has_show_pos_flag() const { return flags_.show_pos; } - bool has_sign_col_flag() const { return flags_.sign_col; } - bool has_alt_flag() const { return flags_.alt; } - bool has_zero_flag() const { return flags_.zero; } + bool is_basic() const { return flags_ == Flags::kBasic; } + bool has_left_flag() const { return FlagsContains(flags_, Flags::kLeft); } + bool has_show_pos_flag() const { + return FlagsContains(flags_, Flags::kShowPos); + } + bool has_sign_col_flag() const { + return FlagsContains(flags_, Flags::kSignCol); + } + bool has_alt_flag() const { return FlagsContains(flags_, Flags::kAlt); } + bool has_zero_flag() const { return FlagsContains(flags_, Flags::kZero); } FormatConversionChar conversion_char() const { // Keep this field first in the struct . It generates better code when @@ -306,7 +327,7 @@ struct FormatConversionSpecImplFriend final { conv->precision_ = p; } static std::string FlagsToString(const FormatConversionSpecImpl& spec) { - return spec.flags_.ToString(); + return str_format_internal::FlagsToString(spec.flags_); } }; diff --git a/absl/strings/internal/str_format/output.h b/absl/strings/internal/str_format/output.h index 8030dae0..15e751ab 100644 --- a/absl/strings/internal/str_format/output.h +++ b/absl/strings/internal/str_format/output.h @@ -22,6 +22,7 @@ #define ABSL_STRINGS_INTERNAL_STR_FORMAT_OUTPUT_H_ #include <cstdio> +#include <ios> #include <ostream> #include <string> @@ -71,7 +72,7 @@ inline void AbslFormatFlush(std::string* out, string_view s) { out->append(s.data(), s.size()); } inline void AbslFormatFlush(std::ostream* out, string_view s) { - out->write(s.data(), s.size()); + out->write(s.data(), static_cast<std::streamsize>(s.size())); } inline void AbslFormatFlush(FILERawSink* sink, string_view v) { diff --git a/absl/strings/internal/str_format/parser.cc b/absl/strings/internal/str_format/parser.cc index f308d023..2c9c07da 100644 --- a/absl/strings/internal/str_format/parser.cc +++ b/absl/strings/internal/str_format/parser.cc @@ -34,60 +34,67 @@ namespace str_format_internal { using CC = FormatConversionCharInternal; using LM = LengthMod; +// Abbreviations to fit in the table below. +constexpr auto f_sign = Flags::kSignCol; +constexpr auto f_alt = Flags::kAlt; +constexpr auto f_pos = Flags::kShowPos; +constexpr auto f_left = Flags::kLeft; +constexpr auto f_zero = Flags::kZero; + ABSL_CONST_INIT const ConvTag kTags[256] = { - {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 - {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f - {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 - {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f - {}, {}, {}, {}, {}, {}, {}, {}, // 20-27 - {}, {}, {}, {}, {}, {}, {}, {}, // 28-2f - {}, {}, {}, {}, {}, {}, {}, {}, // 30-37 - {}, {}, {}, {}, {}, {}, {}, {}, // 38-3f - {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG - {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO - {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW - CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ - {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg - LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno - CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw - CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! - {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 - {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f - {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 - {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f - {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 - {}, {}, {}, {}, {}, {}, {}, {}, // a8-af - {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 - {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf - {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 - {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf - {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 - {}, {}, {}, {}, {}, {}, {}, {}, // d8-df - {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 - {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef - {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 - {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff + {}, {}, {}, {}, {}, {}, {}, {}, // 00-07 + {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f + {}, {}, {}, {}, {}, {}, {}, {}, // 10-17 + {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f + f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&' + {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./ + f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567 + {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>? + {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG + {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO + {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW + CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_ + {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg + LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno + CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw + CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}! + {}, {}, {}, {}, {}, {}, {}, {}, // 80-87 + {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f + {}, {}, {}, {}, {}, {}, {}, {}, // 90-97 + {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f + {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7 + {}, {}, {}, {}, {}, {}, {}, {}, // a8-af + {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7 + {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf + {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7 + {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf + {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7 + {}, {}, {}, {}, {}, {}, {}, {}, // d8-df + {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7 + {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef + {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7 + {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff }; namespace { bool CheckFastPathSetting(const UnboundConversion& conv) { - bool should_be_basic = !conv.flags.left && // - !conv.flags.show_pos && // - !conv.flags.sign_col && // - !conv.flags.alt && // - !conv.flags.zero && // - (conv.width.value() == -1) && - (conv.precision.value() == -1); - if (should_be_basic != conv.flags.basic) { + bool width_precision_needed = + conv.width.value() >= 0 || conv.precision.value() >= 0; + if (width_precision_needed && conv.flags == Flags::kBasic) { fprintf(stderr, "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d " "width=%d precision=%d\n", - conv.flags.basic, conv.flags.left, conv.flags.show_pos, - conv.flags.sign_col, conv.flags.alt, conv.flags.zero, - conv.width.value(), conv.precision.value()); + conv.flags == Flags::kBasic ? 1 : 0, + FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0, + FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0, + FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0, + FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0, + FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(), + conv.precision.value()); + return false; } - return should_be_basic == conv.flags.basic; + return true; } template <bool is_positional> @@ -131,40 +138,21 @@ const char *ConsumeConversion(const char *pos, const char *const end, ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); // We should start with the basic flag on. - assert(conv->flags.basic); + assert(conv->flags == Flags::kBasic); // Any non alpha character makes this conversion not basic. // This includes flags (-+ #0), width (1-9, *) or precision (.). // All conversion characters and length modifiers are alpha characters. if (c < 'A') { - conv->flags.basic = false; - - for (; c <= '0';) { - // FIXME: We might be able to speed this up reusing the lookup table from - // above. It might require changing Flags to be a plain integer where we - // can |= a value. - switch (c) { - case '-': - conv->flags.left = true; - break; - case '+': - conv->flags.show_pos = true; - break; - case ' ': - conv->flags.sign_col = true; - break; - case '#': - conv->flags.alt = true; - break; - case '0': - conv->flags.zero = true; - break; - default: - goto flags_done; + while (c <= '0') { + auto tag = GetTagForChar(c); + if (tag.is_flags()) { + conv->flags = conv->flags | tag.as_flags(); + ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); + } else { + break; } - ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); } -flags_done: if (c <= '9') { if (c >= '0') { @@ -173,12 +161,12 @@ flags_done: if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr; // Positional conversion. *next_arg = -1; - conv->flags = Flags(); - conv->flags.basic = true; return ConsumeConversion<true>(original_pos, end, conv, next_arg); } + conv->flags = conv->flags | Flags::kNonBasic; conv->width.set_value(maybe_width); } else if (c == '*') { + conv->flags = conv->flags | Flags::kNonBasic; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (is_positional) { if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr; @@ -192,6 +180,7 @@ flags_done: } if (c == '.') { + conv->flags = conv->flags | Flags::kNonBasic; ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR(); if (std::isdigit(c)) { conv->precision.set_value(parse_digits()); diff --git a/absl/strings/internal/str_format/parser.h b/absl/strings/internal/str_format/parser.h index 6504dd3d..32b91d03 100644 --- a/absl/strings/internal/str_format/parser.h +++ b/absl/strings/internal/str_format/parser.h @@ -41,10 +41,7 @@ std::string LengthModToString(LengthMod v); // The analyzed properties of a single specified conversion. struct UnboundConversion { - UnboundConversion() - : flags() /* This is required to zero all the fields of flags. */ { - flags.basic = true; - } + UnboundConversion() {} class InputValue { public: @@ -79,7 +76,7 @@ struct UnboundConversion { InputValue width; InputValue precision; - Flags flags; + Flags flags = Flags::kBasic; LengthMod length_mod = LengthMod::none; FormatConversionChar conv = FormatConversionCharInternal::kNone; }; @@ -93,32 +90,43 @@ const char* ConsumeUnboundConversion(const char* p, const char* end, UnboundConversion* conv, int* next_arg); // Helper tag class for the table below. -// It allows fast `char -> ConversionChar/LengthMod` checking and +// It allows fast `char -> ConversionChar/LengthMod/Flags` checking and // conversions. class ConvTag { public: constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT - : tag_(static_cast<int8_t>(conversion_char)) {} - // We invert the length modifiers to make them negative so that we can easily - // test for them. + : tag_(static_cast<uint8_t>(conversion_char)) {} constexpr ConvTag(LengthMod length_mod) // NOLINT - : tag_(~static_cast<std::int8_t>(length_mod)) {} - // Everything else is -128, which is negative to make is_conv() simpler. - constexpr ConvTag() : tag_(-128) {} + : tag_(0x80 | static_cast<uint8_t>(length_mod)) {} + constexpr ConvTag(Flags flags) // NOLINT + : tag_(0xc0 | static_cast<uint8_t>(flags)) {} + constexpr ConvTag() : tag_(0xFF) {} + + bool is_conv() const { return (tag_ & 0x80) == 0; } + bool is_length() const { return (tag_ & 0xC0) == 0x80; } + bool is_flags() const { return (tag_ & 0xE0) == 0xC0; } - bool is_conv() const { return tag_ >= 0; } - bool is_length() const { return tag_ < 0 && tag_ != -128; } FormatConversionChar as_conv() const { assert(is_conv()); + assert(!is_length()); + assert(!is_flags()); return static_cast<FormatConversionChar>(tag_); } LengthMod as_length() const { + assert(!is_conv()); assert(is_length()); - return static_cast<LengthMod>(~tag_); + assert(!is_flags()); + return static_cast<LengthMod>(tag_ & 0x3F); + } + Flags as_flags() const { + assert(!is_conv()); + assert(!is_length()); + assert(is_flags()); + return static_cast<Flags>(tag_ & 0x1F); } private: - std::int8_t tag_; + uint8_t tag_; }; extern const ConvTag kTags[256]; @@ -143,7 +151,8 @@ bool ParseFormatString(string_view src, Consumer consumer) { const char* p = src.data(); const char* const end = p + src.size(); while (p != end) { - const char* percent = static_cast<const char*>(memchr(p, '%', end - p)); + const char* percent = + static_cast<const char*>(memchr(p, '%', static_cast<size_t>(end - p))); if (!percent) { // We found the last substring. return consumer.Append(string_view(p, end - p)); @@ -234,7 +243,8 @@ class ParsedFormatBase { string_view text(base, 0); for (const auto& item : items_) { const char* const end = text.data() + text.size(); - text = string_view(end, (base + item.text_end) - end); + text = + string_view(end, static_cast<size_t>((base + item.text_end) - end)); if (item.is_conversion) { if (!consumer.ConvertOne(item.conv, text)) return false; } else { diff --git a/absl/strings/internal/str_format/parser_test.cc b/absl/strings/internal/str_format/parser_test.cc index a5fa1c79..fe0d2963 100644 --- a/absl/strings/internal/str_format/parser_test.cc +++ b/absl/strings/internal/str_format/parser_test.cc @@ -270,15 +270,22 @@ TEST_F(ConsumeUnboundConversionTest, Flags) { for (int k = 0; k < kNumFlags; ++k) if ((i >> k) & 1) fmt += kAllFlags[k]; // flag order shouldn't matter - if (rev == 1) { std::reverse(fmt.begin(), fmt.end()); } + if (rev == 1) { + std::reverse(fmt.begin(), fmt.end()); + } fmt += 'd'; SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt.c_str())); - EXPECT_EQ(fmt.find('-') == std::string::npos, !o.flags.left); - EXPECT_EQ(fmt.find('+') == std::string::npos, !o.flags.show_pos); - EXPECT_EQ(fmt.find(' ') == std::string::npos, !o.flags.sign_col); - EXPECT_EQ(fmt.find('#') == std::string::npos, !o.flags.alt); - EXPECT_EQ(fmt.find('0') == std::string::npos, !o.flags.zero); + EXPECT_EQ(fmt.find('-') == std::string::npos, + !FlagsContains(o.flags, Flags::kLeft)); + EXPECT_EQ(fmt.find('+') == std::string::npos, + !FlagsContains(o.flags, Flags::kShowPos)); + EXPECT_EQ(fmt.find(' ') == std::string::npos, + !FlagsContains(o.flags, Flags::kSignCol)); + EXPECT_EQ(fmt.find('#') == std::string::npos, + !FlagsContains(o.flags, Flags::kAlt)); + EXPECT_EQ(fmt.find('0') == std::string::npos, + !FlagsContains(o.flags, Flags::kZero)); } } } @@ -288,14 +295,14 @@ TEST_F(ConsumeUnboundConversionTest, BasicFlag) { for (const char* fmt : {"d", "llx", "G", "1$X"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); - EXPECT_TRUE(o.flags.basic); + EXPECT_EQ(o.flags, Flags::kBasic); } // Flag is off for (const char* fmt : {"3d", ".llx", "-G", "1$#X"}) { SCOPED_TRACE(fmt); EXPECT_TRUE(Run(fmt)); - EXPECT_FALSE(o.flags.basic); + EXPECT_NE(o.flags, Flags::kBasic); } } diff --git a/absl/strings/internal/str_join_internal.h b/absl/strings/internal/str_join_internal.h index 31dbf672..d97d5033 100644 --- a/absl/strings/internal/str_join_internal.h +++ b/absl/strings/internal/str_join_internal.h @@ -229,10 +229,11 @@ std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, std::string result; if (start != end) { // Sums size - size_t result_size = start->size(); + auto&& start_value = *start; + size_t result_size = start_value.size(); for (Iterator it = start; ++it != end;) { result_size += s.size(); - result_size += it->size(); + result_size += (*it).size(); } if (result_size > 0) { @@ -240,13 +241,15 @@ std::string JoinAlgorithm(Iterator start, Iterator end, absl::string_view s, // Joins strings char* result_buf = &*result.begin(); - memcpy(result_buf, start->data(), start->size()); - result_buf += start->size(); + + memcpy(result_buf, start_value.data(), start_value.size()); + result_buf += start_value.size(); for (Iterator it = start; ++it != end;) { memcpy(result_buf, s.data(), s.size()); result_buf += s.size(); - memcpy(result_buf, it->data(), it->size()); - result_buf += it->size(); + auto&& value = *it; + memcpy(result_buf, value.data(), value.size()); + result_buf += value.size(); } } } diff --git a/absl/strings/internal/str_split_internal.h b/absl/strings/internal/str_split_internal.h index a2f41c15..e7664216 100644 --- a/absl/strings/internal/str_split_internal.h +++ b/absl/strings/internal/str_split_internal.h @@ -32,7 +32,7 @@ #include <array> #include <initializer_list> #include <iterator> -#include <map> +#include <tuple> #include <type_traits> #include <utility> #include <vector> @@ -64,7 +64,7 @@ class ConvertibleToStringView { ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) : value_(s) {} - // Matches rvalue strings and moves their data to a member. + // Disable conversion from rvalue strings. ConvertibleToStringView(std::string&& s) = delete; ConvertibleToStringView(const std::string&& s) = delete; @@ -182,6 +182,13 @@ template <typename T> struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> : std::true_type {}; +// HasEmplace<T>::value is true iff there exists a method T::emplace(). +template <typename T, typename = void> +struct HasEmplace : std::false_type {}; +template <typename T> +struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>> + : std::true_type {}; + // IsInitializerList<T>::value is true iff T is an std::initializer_list. More // details below in Splitter<> where this is used. std::false_type IsInitializerListDispatch(...); // default: No @@ -372,50 +379,43 @@ class Splitter { // value. template <typename Container, typename First, typename Second> struct ConvertToContainer<Container, std::pair<const First, Second>, true> { + using iterator = typename Container::iterator; + Container operator()(const Splitter& splitter) const { Container m; - typename Container::iterator it; + iterator it; bool insert = true; - for (const auto& sp : splitter) { + for (const absl::string_view sv : splitter) { if (insert) { - it = Inserter<Container>::Insert(&m, First(sp), Second()); + it = InsertOrEmplace(&m, sv); } else { - it->second = Second(sp); + it->second = Second(sv); } insert = !insert; } return m; } - // Inserts the key and value into the given map, returning an iterator to - // the inserted item. Specialized for std::map and std::multimap to use - // emplace() and adapt emplace()'s return value. - template <typename Map> - struct Inserter { - using M = Map; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->insert(std::make_pair(std::forward<Args>(args)...)).first; - } - }; - - template <typename... Ts> - struct Inserter<std::map<Ts...>> { - using M = std::map<Ts...>; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; - } - }; - - template <typename... Ts> - struct Inserter<std::multimap<Ts...>> { - using M = std::multimap<Ts...>; - template <typename... Args> - static typename M::iterator Insert(M* m, Args&&... args) { - return m->emplace(std::make_pair(std::forward<Args>(args)...)); - } - }; + // Inserts the key and an empty value into the map, returning an iterator to + // the inserted item. We use emplace() if available, otherwise insert(). + template <typename M> + static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, absl::string_view key) { + // Use piecewise_construct to support old versions of gcc in which pair + // constructor can't otherwise construct string from string_view. + return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), + std::tuple<>())); + } + template <typename M> + static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( + M* m, absl::string_view key) { + return ToIter(m->insert(std::make_pair(First(key), Second("")))); + } + + static iterator ToIter(std::pair<iterator, bool> pair) { + return pair.first; + } + static iterator ToIter(iterator iter) { return iter; } }; StringType text_; diff --git a/absl/strings/internal/string_constant.h b/absl/strings/internal/string_constant.h index a11336b7..f68b17d7 100644 --- a/absl/strings/internal/string_constant.h +++ b/absl/strings/internal/string_constant.h @@ -35,17 +35,25 @@ namespace strings_internal { // below. template <typename T> struct StringConstant { + private: + static constexpr bool TryConstexprEval(absl::string_view view) { + return view.empty() || 2 * view[0] != 1; + } + + public: static constexpr absl::string_view value = T{}(); constexpr absl::string_view operator()() const { return value; } // Check to be sure `view` points to constant data. // Otherwise, it can't be constant evaluated. - static_assert(value.empty() || 2 * value[0] != 1, + static_assert(TryConstexprEval(value), "The input string_view must point to constant data."); }; +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL template <typename T> -constexpr absl::string_view StringConstant<T>::value; // NOLINT +constexpr absl::string_view StringConstant<T>::value; +#endif // Factory function for `StringConstant` instances. // It supports callables that have a constexpr default constructor and a diff --git a/absl/strings/internal/utf8.cc b/absl/strings/internal/utf8.cc index 8fd8edc1..7ecb93df 100644 --- a/absl/strings/internal/utf8.cc +++ b/absl/strings/internal/utf8.cc @@ -25,25 +25,25 @@ size_t EncodeUTF8Char(char *buffer, char32_t utf8_char) { *buffer = static_cast<char>(utf8_char); return 1; } else if (utf8_char <= 0x7FF) { - buffer[1] = 0x80 | (utf8_char & 0x3F); + buffer[1] = static_cast<char>(0x80 | (utf8_char & 0x3F)); utf8_char >>= 6; - buffer[0] = 0xC0 | utf8_char; + buffer[0] = static_cast<char>(0xC0 | utf8_char); return 2; } else if (utf8_char <= 0xFFFF) { - buffer[2] = 0x80 | (utf8_char & 0x3F); + buffer[2] = static_cast<char>(0x80 | (utf8_char & 0x3F)); utf8_char >>= 6; - buffer[1] = 0x80 | (utf8_char & 0x3F); + buffer[1] = static_cast<char>(0x80 | (utf8_char & 0x3F)); utf8_char >>= 6; - buffer[0] = 0xE0 | utf8_char; + buffer[0] = static_cast<char>(0xE0 | utf8_char); return 3; } else { - buffer[3] = 0x80 | (utf8_char & 0x3F); + buffer[3] = static_cast<char>(0x80 | (utf8_char & 0x3F)); utf8_char >>= 6; - buffer[2] = 0x80 | (utf8_char & 0x3F); + buffer[2] = static_cast<char>(0x80 | (utf8_char & 0x3F)); utf8_char >>= 6; - buffer[1] = 0x80 | (utf8_char & 0x3F); + buffer[1] = static_cast<char>(0x80 | (utf8_char & 0x3F)); utf8_char >>= 6; - buffer[0] = 0xF0 | utf8_char; + buffer[0] = static_cast<char>(0xF0 | utf8_char); return 4; } } diff --git a/absl/strings/numbers.cc b/absl/strings/numbers.cc index 966d94bd..e798fc69 100644 --- a/absl/strings/numbers.cc +++ b/absl/strings/numbers.cc @@ -505,7 +505,7 @@ size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) { *out++ = '-'; d = -d; } - if (std::isinf(d)) { + if (d > std::numeric_limits<double>::max()) { strcpy(out, "inf"); // NOLINT(runtime/printf) return out + 3 - buffer; } @@ -757,8 +757,8 @@ struct LookupTables { // // uint128& operator/=(uint128) is not constexpr, so hardcode the resulting // array to avoid a static initializer. -template<> -const uint128 LookupTables<uint128>::kVmaxOverBase[] = { +template <> +ABSL_CONST_INIT const uint128 LookupTables<uint128>::kVmaxOverBase[] = { 0, 0, MakeUint128(9223372036854775807u, 18446744073709551615u), @@ -809,8 +809,8 @@ const uint128 LookupTables<uint128>::kVmaxOverBase[] = { // // int128& operator/=(int128) is not constexpr, so hardcode the resulting array // to avoid a static initializer. -template<> -const int128 LookupTables<int128>::kVmaxOverBase[] = { +template <> +ABSL_CONST_INIT const int128 LookupTables<int128>::kVmaxOverBase[] = { 0, 0, MakeInt128(4611686018427387903, 18446744073709551615u), @@ -862,8 +862,8 @@ const int128 LookupTables<int128>::kVmaxOverBase[] = { // // int128& operator/=(int128) is not constexpr, so hardcode the resulting array // to avoid a static initializer. -template<> -const int128 LookupTables<int128>::kVminOverBase[] = { +template <> +ABSL_CONST_INIT const int128 LookupTables<int128>::kVminOverBase[] = { 0, 0, MakeInt128(-4611686018427387904, 0u), @@ -904,11 +904,11 @@ const int128 LookupTables<int128>::kVminOverBase[] = { }; template <typename IntType> -const IntType LookupTables<IntType>::kVmaxOverBase[] = +ABSL_CONST_INIT const IntType LookupTables<IntType>::kVmaxOverBase[] = X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max()); template <typename IntType> -const IntType LookupTables<IntType>::kVminOverBase[] = +ABSL_CONST_INIT const IntType LookupTables<IntType>::kVminOverBase[] = X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min()); #undef X_OVER_BASE_INITIALIZER diff --git a/absl/strings/numbers.h b/absl/strings/numbers.h index 1780bb44..86c84ed3 100644 --- a/absl/strings/numbers.h +++ b/absl/strings/numbers.h @@ -23,8 +23,12 @@ #ifndef ABSL_STRINGS_NUMBERS_H_ #define ABSL_STRINGS_NUMBERS_H_ -#ifdef __SSE4_2__ -#include <x86intrin.h> +#ifdef __SSSE3__ +#include <tmmintrin.h> +#endif + +#ifdef _MSC_VER +#include <intrin.h> #endif #include <cstddef> @@ -36,14 +40,7 @@ #include <type_traits> #include "absl/base/config.h" -#ifdef __SSE4_2__ -// TODO(jorg): Remove this when we figure out the right way -// to swap bytes on SSE 4.2 that works with the compilers -// we claim to support. Also, add tests for the compiler -// that doesn't support the Intel _bswap64 intrinsic but -// does support all the SSE 4.2 intrinsics #include "absl/base/internal/endian.h" -#endif #include "absl/base/macros.h" #include "absl/base/port.h" #include "absl/numeric/bits.h" @@ -96,6 +93,25 @@ ABSL_MUST_USE_RESULT bool SimpleAtod(absl::string_view str, double* out); // unspecified state. ABSL_MUST_USE_RESULT bool SimpleAtob(absl::string_view str, bool* out); +// SimpleHexAtoi() +// +// Converts a hexadecimal string (optionally followed or preceded by ASCII +// whitespace) to an integer, returning `true` if successful. Only valid base-16 +// hexadecimal integers whose value falls within the range of the integer type +// (optionally preceded by a `+` or `-`) can be converted. A valid hexadecimal +// value may include both upper and lowercase character symbols, and may +// optionally include a leading "0x" (or "0X") number prefix, which is ignored +// by this function. If any errors are encountered, this function returns +// `false`, leaving `out` in an unspecified state. +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out); + +// Overloads of SimpleHexAtoi() for 128 bit integers. +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out); +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out); + ABSL_NAMESPACE_END } // namespace absl @@ -162,16 +178,19 @@ char* FastIntToBuffer(int_type i, char* buffer) { // TODO(jorg): This signed-ness check is used because it works correctly // with enums, and it also serves to check that int_type is not a pointer. // If one day something like std::is_signed<enum E> works, switch to it. - if (static_cast<int_type>(1) - 2 < 0) { // Signed - if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + // These conditions are constexpr bools to suppress MSVC warning C4127. + constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0; + constexpr bool kUse64Bit = sizeof(i) > 32 / 8; + if (kIsSigned) { + if (kUse64Bit) { return FastIntToBuffer(static_cast<int64_t>(i), buffer); - } else { // 32-bit or less + } else { return FastIntToBuffer(static_cast<int32_t>(i), buffer); } - } else { // Unsigned - if (sizeof(i) > 32 / 8) { // 33-bit to 64-bit + } else { + if (kUse64Bit) { return FastIntToBuffer(static_cast<uint64_t>(i), buffer); - } else { // 32-bit or less + } else { return FastIntToBuffer(static_cast<uint32_t>(i), buffer); } } @@ -190,22 +209,25 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out, // TODO(jorg): This signed-ness check is used because it works correctly // with enums, and it also serves to check that int_type is not a pointer. // If one day something like std::is_signed<enum E> works, switch to it. - if (static_cast<int_type>(1) - 2 < 0) { // Signed - if (sizeof(*out) == 64 / 8) { // 64-bit + // These conditions are constexpr bools to suppress MSVC warning C4127. + constexpr bool kIsSigned = static_cast<int_type>(1) - 2 < 0; + constexpr bool kUse64Bit = sizeof(*out) == 64 / 8; + if (kIsSigned) { + if (kUse64Bit) { int64_t val; parsed = numbers_internal::safe_strto64_base(s, &val, base); *out = static_cast<int_type>(val); - } else { // 32-bit + } else { int32_t val; parsed = numbers_internal::safe_strto32_base(s, &val, base); *out = static_cast<int_type>(val); } - } else { // Unsigned - if (sizeof(*out) == 64 / 8) { // 64-bit + } else { + if (kUse64Bit) { uint64_t val; parsed = numbers_internal::safe_strtou64_base(s, &val, base); *out = static_cast<int_type>(val); - } else { // 32-bit + } else { uint32_t val; parsed = numbers_internal::safe_strtou32_base(s, &val, base); *out = static_cast<int_type>(val); @@ -221,7 +243,7 @@ ABSL_MUST_USE_RESULT bool safe_strtoi_base(absl::string_view s, int_type* out, // Returns the number of non-pad digits of the output (it can never be zero // since 0 has one digit). inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { -#ifdef __SSE4_2__ +#ifdef ABSL_INTERNAL_HAVE_SSSE3 uint64_t be = absl::big_endian::FromHost64(val); const auto kNibbleMask = _mm_set1_epi8(0xf); const auto kHexDigits = _mm_setr_epi8('0', '1', '2', '3', '4', '5', '6', '7', @@ -240,7 +262,7 @@ inline size_t FastHexToBufferZeroPad16(uint64_t val, char* out) { } #endif // | 0x1 so that even 0 has 1 digit. - return 16 - countl_zero(val | 0x1) / 4; + return 16 - static_cast<size_t>(countl_zero(val | 0x1) / 4); } } // namespace numbers_internal @@ -260,6 +282,21 @@ ABSL_MUST_USE_RESULT inline bool SimpleAtoi(absl::string_view str, return numbers_internal::safe_strtou128_base(str, out, 10); } +template <typename int_type> +ABSL_MUST_USE_RESULT bool SimpleHexAtoi(absl::string_view str, int_type* out) { + return numbers_internal::safe_strtoi_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::int128* out) { + return numbers_internal::safe_strto128_base(str, out, 16); +} + +ABSL_MUST_USE_RESULT inline bool SimpleHexAtoi(absl::string_view str, + absl::uint128* out) { + return numbers_internal::safe_strtou128_base(str, out, 16); +} + ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/numbers_test.cc b/absl/strings/numbers_test.cc index f3103106..498c210d 100644 --- a/absl/strings/numbers_test.cc +++ b/absl/strings/numbers_test.cc @@ -47,6 +47,7 @@ namespace { using absl::SimpleAtoi; +using absl::SimpleHexAtoi; using absl::numbers_internal::kSixDigitsToBufferSize; using absl::numbers_internal::safe_strto32_base; using absl::numbers_internal::safe_strto64_base; @@ -468,6 +469,148 @@ TEST(NumbersTest, Atoenum) { VerifySimpleAtoiGood<E_biguint>(E_biguint_max32, E_biguint_max32); } +template <typename int_type, typename in_val_type> +void VerifySimpleHexAtoiGood(in_val_type in_value, int_type exp_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleHexAtoi(s, &x)) + << "in_value=" << std::hex << in_value << " s=" << s << " x=" << x; + EXPECT_EQ(exp_value, x); + x = static_cast<int_type>(~exp_value); + EXPECT_TRUE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions + EXPECT_EQ(exp_value, x); +} + +template <typename int_type, typename in_val_type> +void VerifySimpleHexAtoiBad(in_val_type in_value) { + std::string s; + // uint128 can be streamed but not StrCat'd + absl::strings_internal::OStringStream strm(&s); + if (in_value >= 0) { + strm << std::hex << in_value; + } else { + // Inefficient for small integers, but works with all integral types. + strm << "-" << std::hex << -absl::uint128(in_value); + } + int_type x; + EXPECT_FALSE(SimpleHexAtoi(s, &x)); + EXPECT_FALSE(SimpleHexAtoi( + s.c_str(), &x)); // NOLINT: readability-redundant-string-conversions +} + +TEST(NumbersTest, HexAtoi) { + // SimpleHexAtoi(absl::string_view, int32_t) + VerifySimpleHexAtoiGood<int32_t>(0, 0); + VerifySimpleHexAtoiGood<int32_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<int32_t>(-0x42, -0x42); + + VerifySimpleHexAtoiGood<int32_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<int32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + + // SimpleHexAtoi(absl::string_view, uint32_t) + VerifySimpleHexAtoiGood<uint32_t>(0, 0); + VerifySimpleHexAtoiGood<uint32_t>(0x42, 0x42); + VerifySimpleHexAtoiBad<uint32_t>(-0x42); + + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<uint32_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<uint32_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiBad<uint32_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, int64_t) + VerifySimpleHexAtoiGood<int64_t>(0, 0); + VerifySimpleHexAtoiGood<int64_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<int64_t>(-0x42, -0x42); + + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int32_t>::min(), + std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int64_t>::min(), + std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<int64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiBad<int64_t>(std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, uint64_t) + VerifySimpleHexAtoiGood<uint64_t>(0, 0); + VerifySimpleHexAtoiGood<uint64_t>(0x42, 0x42); + VerifySimpleHexAtoiBad<uint64_t>(-0x42); + + VerifySimpleHexAtoiBad<uint64_t>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<uint64_t>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiGood<uint64_t>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + + // SimpleHexAtoi(absl::string_view, absl::uint128) + VerifySimpleHexAtoiGood<absl::uint128>(0, 0); + VerifySimpleHexAtoiGood<absl::uint128>(0x42, 0x42); + VerifySimpleHexAtoiBad<absl::uint128>(-0x42); + + VerifySimpleHexAtoiBad<absl::uint128>(std::numeric_limits<int32_t>::min()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<int32_t>::max(), + std::numeric_limits<int32_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<uint32_t>::max(), + std::numeric_limits<uint32_t>::max()); + VerifySimpleHexAtoiBad<absl::uint128>(std::numeric_limits<int64_t>::min()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>(std::numeric_limits<uint64_t>::max(), + std::numeric_limits<uint64_t>::max()); + VerifySimpleHexAtoiGood<absl::uint128>( + std::numeric_limits<absl::uint128>::max(), + std::numeric_limits<absl::uint128>::max()); + + // Some other types + VerifySimpleHexAtoiGood<int>(-0x42, -0x42); + VerifySimpleHexAtoiGood<int32_t>(-0x42, -0x42); + VerifySimpleHexAtoiGood<uint32_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<unsigned int>(0x42, 0x42); + VerifySimpleHexAtoiGood<int64_t>(-0x42, -0x42); + VerifySimpleHexAtoiGood<long>(-0x42, -0x42); // NOLINT: runtime-int + VerifySimpleHexAtoiGood<uint64_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<size_t>(0x42, 0x42); + VerifySimpleHexAtoiGood<std::string::size_type>(0x42, 0x42); + + // Number prefix + int32_t value; + EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("0X34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + // ASCII whitespace + EXPECT_TRUE(safe_strto32_base(" \t\n 34234324", &value, 16)); + EXPECT_EQ(0x34234324, value); + + EXPECT_TRUE(safe_strto32_base("34234324 \t\n ", &value, 16)); + EXPECT_EQ(0x34234324, value); +} + TEST(stringtest, safe_strto32_base) { int32_t value; EXPECT_TRUE(safe_strto32_base("0x34234324", &value, 16)); diff --git a/absl/strings/str_cat.cc b/absl/strings/str_cat.cc index dd5d25b0..f4a77493 100644 --- a/absl/strings/str_cat.cc +++ b/absl/strings/str_cat.cc @@ -174,7 +174,7 @@ void AppendPieces(std::string* dest, ASSERT_NO_OVERLAP(*dest, piece); total_size += piece.size(); } - strings_internal::STLStringResizeUninitialized(dest, total_size); + strings_internal::STLStringResizeUninitializedAmortized(dest, total_size); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -199,7 +199,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b) { ASSERT_NO_OVERLAP(*dest, a); ASSERT_NO_OVERLAP(*dest, b); std::string::size_type old_size = dest->size(); - strings_internal::STLStringResizeUninitialized( + strings_internal::STLStringResizeUninitializedAmortized( dest, old_size + a.size() + b.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -214,7 +214,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, ASSERT_NO_OVERLAP(*dest, b); ASSERT_NO_OVERLAP(*dest, c); std::string::size_type old_size = dest->size(); - strings_internal::STLStringResizeUninitialized( + strings_internal::STLStringResizeUninitializedAmortized( dest, old_size + a.size() + b.size() + c.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; @@ -231,7 +231,7 @@ void StrAppend(std::string* dest, const AlphaNum& a, const AlphaNum& b, ASSERT_NO_OVERLAP(*dest, c); ASSERT_NO_OVERLAP(*dest, d); std::string::size_type old_size = dest->size(); - strings_internal::STLStringResizeUninitialized( + strings_internal::STLStringResizeUninitializedAmortized( dest, old_size + a.size() + b.size() + c.size() + d.size()); char* const begin = &(*dest)[0]; char* out = begin + old_size; diff --git a/absl/strings/str_cat.h b/absl/strings/str_cat.h index a8a85c73..a94bc5df 100644 --- a/absl/strings/str_cat.h +++ b/absl/strings/str_cat.h @@ -214,23 +214,29 @@ class AlphaNum { // A bool ctor would also convert incoming pointers (bletch). AlphaNum(int x) // NOLINT(runtime/explicit) - : piece_(digits_, - numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + : piece_(digits_, static_cast<size_t>( + numbers_internal::FastIntToBuffer(x, digits_) - + &digits_[0])) {} AlphaNum(unsigned int x) // NOLINT(runtime/explicit) - : piece_(digits_, - numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + : piece_(digits_, static_cast<size_t>( + numbers_internal::FastIntToBuffer(x, digits_) - + &digits_[0])) {} AlphaNum(long x) // NOLINT(*) - : piece_(digits_, - numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + : piece_(digits_, static_cast<size_t>( + numbers_internal::FastIntToBuffer(x, digits_) - + &digits_[0])) {} AlphaNum(unsigned long x) // NOLINT(*) - : piece_(digits_, - numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + : piece_(digits_, static_cast<size_t>( + numbers_internal::FastIntToBuffer(x, digits_) - + &digits_[0])) {} AlphaNum(long long x) // NOLINT(*) - : piece_(digits_, - numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + : piece_(digits_, static_cast<size_t>( + numbers_internal::FastIntToBuffer(x, digits_) - + &digits_[0])) {} AlphaNum(unsigned long long x) // NOLINT(*) - : piece_(digits_, - numbers_internal::FastIntToBuffer(x, digits_) - &digits_[0]) {} + : piece_(digits_, static_cast<size_t>( + numbers_internal::FastIntToBuffer(x, digits_) - + &digits_[0])) {} AlphaNum(float f) // NOLINT(runtime/explicit) : piece_(digits_, numbers_internal::SixDigitsToBuffer(f, digits_)) {} @@ -245,7 +251,8 @@ class AlphaNum { const strings_internal::AlphaNumBuffer<size>& buf) : piece_(&buf.data[0], buf.size) {} - AlphaNum(const char* c_str) : piece_(c_str) {} // NOLINT(runtime/explicit) + AlphaNum(const char* c_str) // NOLINT(runtime/explicit) + : piece_(NullSafeStringView(c_str)) {} // NOLINT(runtime/explicit) AlphaNum(absl::string_view pc) : piece_(pc) {} // NOLINT(runtime/explicit) template <typename Allocator> diff --git a/absl/strings/str_cat_test.cc b/absl/strings/str_cat_test.cc index f3770dc0..69df2502 100644 --- a/absl/strings/str_cat_test.cc +++ b/absl/strings/str_cat_test.cc @@ -210,6 +210,11 @@ TEST(StrCat, CornerCases) { EXPECT_EQ(result, ""); } +TEST(StrCat, NullConstCharPtr) { + const char* null = nullptr; + EXPECT_EQ(absl::StrCat("mon", null, "key"), "monkey"); +} + // A minimal allocator that uses malloc(). template <typename T> struct Mallocator { diff --git a/absl/strings/str_format.h b/absl/strings/str_format.h index 01465107..4b05c70c 100644 --- a/absl/strings/str_format.h +++ b/absl/strings/str_format.h @@ -536,8 +536,7 @@ using FormatArg = str_format_internal::FormatArgImpl; // The arguments are provided in an `absl::Span<const absl::FormatArg>`. // Each `absl::FormatArg` object binds to a single argument and keeps a // reference to it. The values used to create the `FormatArg` objects must -// outlive this function call. (See `str_format_arg.h` for information on -// the `FormatArg` class.)_ +// outlive this function call. // // Example: // diff --git a/absl/strings/str_format_test.cc b/absl/strings/str_format_test.cc index c60027ad..804e6c22 100644 --- a/absl/strings/str_format_test.cc +++ b/absl/strings/str_format_test.cc @@ -719,6 +719,7 @@ TEST_F(FormatWrapperTest, ParsedFormat) { ABSL_NAMESPACE_END } // namespace absl +namespace { using FormatExtensionTest = ::testing::Test; struct Point { @@ -750,6 +751,7 @@ TEST_F(FormatExtensionTest, AbslFormatConvertExample) { // FormatUntyped will return false for bad character. EXPECT_FALSE(absl::FormatUntyped(&actual, f1, {absl::FormatArg(p)})); } +} // namespace // Some codegen thunks that we can use to easily dump the generated assembly for // different StrFormat calls. diff --git a/absl/strings/str_join.h b/absl/strings/str_join.h index 33534536..ee5ae7ef 100644 --- a/absl/strings/str_join.h +++ b/absl/strings/str_join.h @@ -72,21 +72,15 @@ ABSL_NAMESPACE_BEGIN // functions. You may provide your own Formatter to enable `absl::StrJoin()` to // work with arbitrary types. // -// The following is an example of a custom Formatter that simply uses -// `std::to_string()` to format an integer as a std::string. -// -// struct MyFormatter { -// void operator()(std::string* out, int i) const { -// out->append(std::to_string(i)); -// } -// }; -// -// You would use the above formatter by passing an instance of it as the final -// argument to `absl::StrJoin()`: -// -// std::vector<int> v = {1, 2, 3, 4}; -// std::string s = absl::StrJoin(v, "-", MyFormatter()); -// EXPECT_EQ("1-2-3-4", s); +// The following is an example of a custom Formatter that uses +// `absl::FormatDuration` to join a list of `absl::Duration`s. +// +// std::vector<absl::Duration> v = {absl::Seconds(1), absl::Milliseconds(10)}; +// std::string s = +// absl::StrJoin(v, ", ", [](std::string* out, absl::Duration dur) { +// absl::StrAppend(out, absl::FormatDuration(dur)); +// }); +// EXPECT_EQ("1s, 10ms", s); // // The following standard formatters are provided within this file: // diff --git a/absl/strings/str_join_test.cc b/absl/strings/str_join_test.cc index 2be6256e..c986e863 100644 --- a/absl/strings/str_join_test.cc +++ b/absl/strings/str_join_test.cc @@ -21,6 +21,7 @@ #include <cstdio> #include <functional> #include <initializer_list> +#include <iterator> #include <map> #include <memory> #include <ostream> @@ -33,6 +34,7 @@ #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" namespace { @@ -471,4 +473,136 @@ TEST(StrJoin, Tuple) { "-", absl::DereferenceFormatter(TestFormatter()))); } +// A minimal value type for `StrJoin` inputs. +// Used to ensure we do not excessively require more a specific type, such as a +// `string_view`. +// +// Anything that can be `data()` and `size()` is OK. +class TestValue { + public: + TestValue(const char* data, size_t size) : data_(data), size_(size) {} + const char* data() const { return data_; } + size_t size() const { return size_; } + + private: + const char* data_; + size_t size_; +}; + +// A minimal C++20 forward iterator, used to test that we do not impose +// excessive requirements on StrJoin inputs. +// +// The 2 main differences between pre-C++20 LegacyForwardIterator and the +// C++20 ForwardIterator are: +// 1. `operator->` is not required in C++20. +// 2. `operator*` result does not need to be an lvalue (a reference). +// +// The `operator->` requirement was removed on page 17 in: +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p1037r0.pdf +// +// See the `[iterator.requirements]` section of the C++ standard. +// +// The value type is a template parameter so that we can test the behaviour +// of `StrJoin` specializations, e.g. the NoFormatter specialization for +// `string_view`. +template <typename ValueT> +class TestIterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = ValueT; + using pointer = void; + using reference = const value_type&; + using difference_type = int; + + // `data` must outlive the result. + static TestIterator begin(const std::vector<absl::string_view>& data) { + return TestIterator(&data, 0); + } + + static TestIterator end(const std::vector<absl::string_view>& data) { + return TestIterator(nullptr, data.size()); + } + + bool operator==(const TestIterator& other) const { + return pos_ == other.pos_; + } + bool operator!=(const TestIterator& other) const { + return pos_ != other.pos_; + } + + // This deliberately returns a `prvalue`. + // The requirement to return a reference was removed in C++20. + value_type operator*() const { + return ValueT((*data_)[pos_].data(), (*data_)[pos_].size()); + } + + // `operator->()` is deliberately omitted. + // The requirement to provide it was removed in C++20. + + TestIterator& operator++() { + ++pos_; + return *this; + } + + TestIterator operator++(int) { + TestIterator result = *this; + ++(*this); + return result; + } + + TestIterator& operator--() { + --pos_; + return *this; + } + + TestIterator operator--(int) { + TestIterator result = *this; + --(*this); + return result; + } + + private: + TestIterator(const std::vector<absl::string_view>* data, size_t pos) + : data_(data), pos_(pos) {} + + const std::vector<absl::string_view>* data_; + size_t pos_; +}; + +template <typename ValueT> +class TestIteratorRange { + public: + // `data` must be non-null and must outlive the result. + explicit TestIteratorRange(const std::vector<absl::string_view>& data) + : begin_(TestIterator<ValueT>::begin(data)), + end_(TestIterator<ValueT>::end(data)) {} + + const TestIterator<ValueT>& begin() const { return begin_; } + const TestIterator<ValueT>& end() const { return end_; } + + private: + TestIterator<ValueT> begin_; + TestIterator<ValueT> end_; +}; + +TEST(StrJoin, TestIteratorRequirementsNoFormatter) { + const std::vector<absl::string_view> a = {"a", "b", "c"}; + + // When the value type is string-like (`std::string` or `string_view`), + // the NoFormatter template specialization is used internally. + EXPECT_EQ("a-b-c", + absl::StrJoin(TestIteratorRange<absl::string_view>(a), "-")); +} + +TEST(StrJoin, TestIteratorRequirementsCustomFormatter) { + const std::vector<absl::string_view> a = {"a", "b", "c"}; + EXPECT_EQ("a-b-c", + absl::StrJoin(TestIteratorRange<TestValue>(a), "-", + [](std::string* out, const TestValue& value) { + absl::StrAppend( + out, + absl::string_view(value.data(), value.size())); + })); +} + } // namespace diff --git a/absl/strings/str_split.h b/absl/strings/str_split.h index bfbca422..7bbb68a3 100644 --- a/absl/strings/str_split.h +++ b/absl/strings/str_split.h @@ -461,8 +461,7 @@ using EnableSplitIfString = // first two split strings become the `std::pair` `.first` and `.second` // members, respectively. The remaining split substrings are discarded. If there // are less than two split substrings, the empty string is used for the -// corresponding -// `std::pair` member. +// corresponding `std::pair` member. // // Example: // diff --git a/absl/strings/str_split_test.cc b/absl/strings/str_split_test.cc index 7f7c097f..1b4427b8 100644 --- a/absl/strings/str_split_test.cc +++ b/absl/strings/str_split_test.cc @@ -29,6 +29,8 @@ #include "gtest/gtest.h" #include "absl/base/dynamic_annotations.h" #include "absl/base/macros.h" +#include "absl/container/btree_map.h" +#include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" #include "absl/container/node_hash_map.h" #include "absl/strings/numbers.h" @@ -405,6 +407,10 @@ TEST(Splitter, ConversionOperator) { TestConversionOperator<std::set<std::string>>(splitter); TestConversionOperator<std::multiset<absl::string_view>>(splitter); TestConversionOperator<std::multiset<std::string>>(splitter); + TestConversionOperator<absl::btree_set<absl::string_view>>(splitter); + TestConversionOperator<absl::btree_set<std::string>>(splitter); + TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter); + TestConversionOperator<absl::btree_multiset<std::string>>(splitter); TestConversionOperator<std::unordered_set<std::string>>(splitter); // Tests conversion to map-like objects. @@ -421,6 +427,22 @@ TEST(Splitter, ConversionOperator) { TestMapConversionOperator<std::multimap<std::string, absl::string_view>>( splitter); TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); + TestMapConversionOperator< + absl::btree_map<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>( + splitter); + TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>( + splitter); + TestMapConversionOperator<absl::btree_map<std::string, std::string>>( + splitter); + TestMapConversionOperator< + absl::btree_multimap<absl::string_view, absl::string_view>>(splitter); + TestMapConversionOperator< + absl::btree_multimap<absl::string_view, std::string>>(splitter); + TestMapConversionOperator< + absl::btree_multimap<std::string, absl::string_view>>(splitter); + TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>( + splitter); TestMapConversionOperator<std::unordered_map<std::string, std::string>>( splitter); TestMapConversionOperator< @@ -921,8 +943,14 @@ TEST(Delimiter, ByLength) { } TEST(Split, WorksWithLargeStrings) { +#if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ + defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER) + constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte +#else + constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte +#endif if (sizeof(size_t) > 4) { - std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte + std::string s(kSize, 'x'); s.back() = '-'; std::vector<absl::string_view> v = absl::StrSplit(s, '-'); EXPECT_EQ(2, v.size()); diff --git a/absl/strings/string_view.cc b/absl/strings/string_view.cc index c5f5de93..adce3be9 100644 --- a/absl/strings/string_view.cc +++ b/absl/strings/string_view.cc @@ -78,8 +78,8 @@ std::ostream& operator<<(std::ostream& o, string_view piece) { return o; } -string_view::size_type string_view::find(string_view s, size_type pos) const - noexcept { +string_view::size_type string_view::find(string_view s, + size_type pos) const noexcept { if (empty() || pos > length_) { if (empty() && pos == 0 && s.empty()) return 0; return npos; @@ -98,8 +98,8 @@ string_view::size_type string_view::find(char c, size_type pos) const noexcept { return result != nullptr ? result - ptr_ : npos; } -string_view::size_type string_view::rfind(string_view s, size_type pos) const - noexcept { +string_view::size_type string_view::rfind(string_view s, + size_type pos) const noexcept { if (length_ < s.length_) return npos; if (s.empty()) return std::min(length_, pos); const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; @@ -108,8 +108,8 @@ string_view::size_type string_view::rfind(string_view s, size_type pos) const } // Search range is [0..pos] inclusive. If pos == npos, search everything. -string_view::size_type string_view::rfind(char c, size_type pos) const - noexcept { +string_view::size_type string_view::rfind(char c, + size_type pos) const noexcept { // Note: memrchr() is not available on Windows. if (empty()) return npos; for (size_type i = std::min(pos, length_ - 1);; --i) { @@ -121,9 +121,8 @@ string_view::size_type string_view::rfind(char c, size_type pos) const return npos; } -string_view::size_type string_view::find_first_of(string_view s, - size_type pos) const - noexcept { +string_view::size_type string_view::find_first_of( + string_view s, size_type pos) const noexcept { if (empty() || s.empty()) { return npos; } @@ -138,9 +137,8 @@ string_view::size_type string_view::find_first_of(string_view s, return npos; } -string_view::size_type string_view::find_first_not_of(string_view s, - size_type pos) const - noexcept { +string_view::size_type string_view::find_first_not_of( + string_view s, size_type pos) const noexcept { if (empty()) return npos; // Avoid the cost of LookupTable() for a single-character search. if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); @@ -153,9 +151,8 @@ string_view::size_type string_view::find_first_not_of(string_view s, return npos; } -string_view::size_type string_view::find_first_not_of(char c, - size_type pos) const - noexcept { +string_view::size_type string_view::find_first_not_of( + char c, size_type pos) const noexcept { if (empty()) return npos; for (; pos < length_; ++pos) { if (ptr_[pos] != c) { @@ -180,9 +177,8 @@ string_view::size_type string_view::find_last_of(string_view s, return npos; } -string_view::size_type string_view::find_last_not_of(string_view s, - size_type pos) const - noexcept { +string_view::size_type string_view::find_last_not_of( + string_view s, size_type pos) const noexcept { if (empty()) return npos; size_type i = std::min(pos, length_ - 1); if (s.empty()) return i; @@ -198,9 +194,8 @@ string_view::size_type string_view::find_last_not_of(string_view s, return npos; } -string_view::size_type string_view::find_last_not_of(char c, - size_type pos) const - noexcept { +string_view::size_type string_view::find_last_not_of( + char c, size_type pos) const noexcept { if (empty()) return npos; size_type i = std::min(pos, length_ - 1); for (;; --i) { @@ -212,22 +207,11 @@ string_view::size_type string_view::find_last_not_of(char c, return npos; } -// MSVC has non-standard behavior that implicitly creates definitions for static -// const members. These implicit definitions conflict with explicit out-of-class -// member definitions that are required by the C++ standard, resulting in -// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks -// MSVC to choose only one definition for the symbol it decorates. See details -// at https://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx -#ifdef _MSC_VER -#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany) -#else -#define ABSL_STRING_VIEW_SELECTANY -#endif -ABSL_STRING_VIEW_SELECTANY +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL constexpr string_view::size_type string_view::npos; -ABSL_STRING_VIEW_SELECTANY constexpr string_view::size_type string_view::kMaxSize; +#endif ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 5260b5b7..e3239f57 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -36,6 +36,7 @@ #include <limits> #include <string> +#include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/throw_delegate.h" #include "absl/base/macros.h" @@ -54,13 +55,20 @@ ABSL_NAMESPACE_END #else // ABSL_USES_STD_STRING_VIEW -#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ - (defined(__GNUC__) && !defined(__clang__)) +#if ABSL_HAVE_BUILTIN(__builtin_memcmp) || \ + (defined(__GNUC__) && !defined(__clang__)) || \ + (defined(_MSC_VER) && _MSC_VER >= 1928) #define ABSL_INTERNAL_STRING_VIEW_MEMCMP __builtin_memcmp #else // ABSL_HAVE_BUILTIN(__builtin_memcmp) #define ABSL_INTERNAL_STRING_VIEW_MEMCMP memcmp #endif // ABSL_HAVE_BUILTIN(__builtin_memcmp) +#if defined(__cplusplus) && __cplusplus >= 201402L +#define ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR constexpr +#else +#define ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR +#endif + namespace absl { ABSL_NAMESPACE_BEGIN @@ -180,18 +188,20 @@ class string_view { template <typename Allocator> string_view( // NOLINT(runtime/explicit) - const std::basic_string<char, std::char_traits<char>, Allocator>& - str) noexcept + const std::basic_string<char, std::char_traits<char>, Allocator>& str + ABSL_ATTRIBUTE_LIFETIME_BOUND) noexcept // This is implemented in terms of `string_view(p, n)` so `str.size()` // doesn't need to be reevaluated after `ptr_` is set. - : string_view(str.data(), str.size()) {} + // The length check is also skipped since it is unnecessary and causes + // code bloat. + : string_view(str.data(), str.size(), SkipCheckLengthTag{}) {} // Implicit constructor of a `string_view` from NUL-terminated `str`. When // accepting possibly null strings, use `absl::NullSafeStringView(str)` // instead (see below). + // The length check is skipped since it is unnecessary and causes code bloat. constexpr string_view(const char* str) // NOLINT(runtime/explicit) - : ptr_(str), - length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {} + : ptr_(str), length_(str ? StrlenInternal(str) : 0) {} // Implicit constructor of a `string_view` from a `const char*` and length. constexpr string_view(const char* data, size_type len) @@ -264,9 +274,7 @@ class string_view { // string_view::size() // // Returns the number of characters in the `string_view`. - constexpr size_type size() const noexcept { - return length_; - } + constexpr size_type size() const noexcept { return length_; } // string_view::length() // @@ -333,7 +341,7 @@ class string_view { // // Removes the first `n` characters from the `string_view`. Note that the // underlying string is not changed, only the view. - void remove_prefix(size_type n) { + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void remove_prefix(size_type n) { ABSL_HARDENING_ASSERT(n <= length_); ptr_ += n; length_ -= n; @@ -343,7 +351,7 @@ class string_view { // // Removes the last `n` characters from the `string_view`. Note that the // underlying string is not changed, only the view. - void remove_suffix(size_type n) { + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void remove_suffix(size_type n) { ABSL_HARDENING_ASSERT(n <= length_); length_ -= n; } @@ -351,7 +359,7 @@ class string_view { // string_view::swap() // // Swaps this `string_view` with another `string_view`. - void swap(string_view& s) noexcept { + ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR void swap(string_view& s) noexcept { auto t = *this; *this = s; s = t; @@ -388,7 +396,7 @@ class string_view { // `n`) as another string_view. This function throws `std::out_of_bounds` if // `pos > size`. // Use absl::ClippedSubstr if you need a truncating substr operation. - constexpr string_view substr(size_type pos, size_type n = npos) const { + constexpr string_view substr(size_type pos = 0, size_type n = npos) const { return ABSL_PREDICT_FALSE(pos > length_) ? (base_internal::ThrowStdOutOfRange( "absl::string_view::substr"), @@ -398,12 +406,10 @@ class string_view { // string_view::compare() // - // Performs a lexicographical comparison between the `string_view` and - // another `absl::string_view`, returning -1 if `this` is less than, 0 if - // `this` is equal to, and 1 if `this` is greater than the passed string - // view. Note that in the case of data equality, a further comparison is made - // on the respective sizes of the two `string_view`s to determine which is - // smaller, equal, or greater. + // Performs a lexicographical comparison between this `string_view` and + // another `string_view` `x`, returning a negative value if `*this` is less + // than `x`, 0 if `*this` is equal to `x`, and a positive value if `*this` + // is greater than `x`. constexpr int compare(string_view x) const noexcept { return CompareImpl(length_, x.length_, Min(length_, x.length_) == 0 @@ -414,31 +420,31 @@ class string_view { // Overload of `string_view::compare()` for comparing a substring of the // 'string_view` and another `absl::string_view`. - int compare(size_type pos1, size_type count1, string_view v) const { + constexpr int compare(size_type pos1, size_type count1, string_view v) const { return substr(pos1, count1).compare(v); } // Overload of `string_view::compare()` for comparing a substring of the // `string_view` and a substring of another `absl::string_view`. - int compare(size_type pos1, size_type count1, string_view v, size_type pos2, - size_type count2) const { + constexpr int compare(size_type pos1, size_type count1, string_view v, + size_type pos2, size_type count2) const { return substr(pos1, count1).compare(v.substr(pos2, count2)); } // Overload of `string_view::compare()` for comparing a `string_view` and a - // a different C-style string `s`. - int compare(const char* s) const { return compare(string_view(s)); } + // a different C-style string `s`. + constexpr int compare(const char* s) const { return compare(string_view(s)); } // Overload of `string_view::compare()` for comparing a substring of the // `string_view` and a different string C-style string `s`. - int compare(size_type pos1, size_type count1, const char* s) const { + constexpr int compare(size_type pos1, size_type count1, const char* s) const { return substr(pos1, count1).compare(string_view(s)); } // Overload of `string_view::compare()` for comparing a substring of the // `string_view` and a substring of a different C-style string `s`. - int compare(size_type pos1, size_type count1, const char* s, - size_type count2) const { + constexpr int compare(size_type pos1, size_type count1, const char* s, + size_type count2) const { return substr(pos1, count1).compare(string_view(s, count2)); } @@ -455,48 +461,92 @@ class string_view { // within the `string_view`. size_type find(char c, size_type pos = 0) const noexcept; + // Overload of `string_view::find()` for finding a substring of a different + // C-style string `s` within the `string_view`. + size_type find(const char* s, size_type pos, size_type count) const { + return find(string_view(s, count), pos); + } + + // Overload of `string_view::find()` for finding a different C-style string + // `s` within the `string_view`. + size_type find(const char* s, size_type pos = 0) const { + return find(string_view(s), pos); + } + // string_view::rfind() // // Finds the last occurrence of a substring `s` within the `string_view`, // returning the position of the first character's match, or `npos` if no // match was found. - size_type rfind(string_view s, size_type pos = npos) const - noexcept; + size_type rfind(string_view s, size_type pos = npos) const noexcept; // Overload of `string_view::rfind()` for finding the last given character `c` // within the `string_view`. size_type rfind(char c, size_type pos = npos) const noexcept; + // Overload of `string_view::rfind()` for finding a substring of a different + // C-style string `s` within the `string_view`. + size_type rfind(const char* s, size_type pos, size_type count) const { + return rfind(string_view(s, count), pos); + } + + // Overload of `string_view::rfind()` for finding a different C-style string + // `s` within the `string_view`. + size_type rfind(const char* s, size_type pos = npos) const { + return rfind(string_view(s), pos); + } + // string_view::find_first_of() // // Finds the first occurrence of any of the characters in `s` within the // `string_view`, returning the start position of the match, or `npos` if no // match was found. - size_type find_first_of(string_view s, size_type pos = 0) const - noexcept; + size_type find_first_of(string_view s, size_type pos = 0) const noexcept; // Overload of `string_view::find_first_of()` for finding a character `c` // within the `string_view`. - size_type find_first_of(char c, size_type pos = 0) const - noexcept { + size_type find_first_of(char c, size_type pos = 0) const noexcept { return find(c, pos); } + // Overload of `string_view::find_first_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_first_of(const char* s, size_type pos, + size_type count) const { + return find_first_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_first_of()` for finding a different C-style + // string `s` within the `string_view`. + size_type find_first_of(const char* s, size_type pos = 0) const { + return find_first_of(string_view(s), pos); + } + // string_view::find_last_of() // // Finds the last occurrence of any of the characters in `s` within the // `string_view`, returning the start position of the match, or `npos` if no // match was found. - size_type find_last_of(string_view s, size_type pos = npos) const - noexcept; + size_type find_last_of(string_view s, size_type pos = npos) const noexcept; // Overload of `string_view::find_last_of()` for finding a character `c` // within the `string_view`. - size_type find_last_of(char c, size_type pos = npos) const - noexcept { + size_type find_last_of(char c, size_type pos = npos) const noexcept { return rfind(c, pos); } + // Overload of `string_view::find_last_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_last_of(const char* s, size_type pos, size_type count) const { + return find_last_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_last_of()` for finding a different C-style + // string `s` within the `string_view`. + size_type find_last_of(const char* s, size_type pos = npos) const { + return find_last_of(string_view(s), pos); + } + // string_view::find_first_not_of() // // Finds the first occurrence of any of the characters not in `s` within the @@ -508,20 +558,51 @@ class string_view { // that is not `c` within the `string_view`. size_type find_first_not_of(char c, size_type pos = 0) const noexcept; + // Overload of `string_view::find_first_not_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_first_not_of(const char* s, size_type pos, + size_type count) const { + return find_first_not_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_first_not_of()` for finding a different + // C-style string `s` within the `string_view`. + size_type find_first_not_of(const char* s, size_type pos = 0) const { + return find_first_not_of(string_view(s), pos); + } + // string_view::find_last_not_of() // // Finds the last occurrence of any of the characters not in `s` within the // `string_view`, returning the start position of the last non-match, or // `npos` if no non-match was found. size_type find_last_not_of(string_view s, - size_type pos = npos) const noexcept; + size_type pos = npos) const noexcept; // Overload of `string_view::find_last_not_of()` for finding a character // that is not `c` within the `string_view`. - size_type find_last_not_of(char c, size_type pos = npos) const - noexcept; + size_type find_last_not_of(char c, size_type pos = npos) const noexcept; + + // Overload of `string_view::find_last_not_of()` for finding a substring of a + // different C-style string `s` within the `string_view`. + size_type find_last_not_of(const char* s, size_type pos, + size_type count) const { + return find_last_not_of(string_view(s, count), pos); + } + + // Overload of `string_view::find_last_not_of()` for finding a different + // C-style string `s` within the `string_view`. + size_type find_last_not_of(const char* s, size_type pos = npos) const { + return find_last_not_of(string_view(s), pos); + } private: + // The constructor from std::string delegates to this constructor. + // See the comment on that constructor for the rationale. + struct SkipCheckLengthTag {}; + string_view(const char* data, size_type len, SkipCheckLengthTag) noexcept + : ptr_(data), length_(len) {} + static constexpr size_type kMaxSize = (std::numeric_limits<difference_type>::max)(); @@ -597,6 +678,7 @@ std::ostream& operator<<(std::ostream& o, string_view piece); ABSL_NAMESPACE_END } // namespace absl +#undef ABSL_INTERNAL_STRING_VIEW_CXX14_CONSTEXPR #undef ABSL_INTERNAL_STRING_VIEW_MEMCMP #endif // ABSL_USES_STD_STRING_VIEW diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 643af8f8..9d5463a1 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -449,6 +449,24 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(d.find('x', 4), absl::string_view::npos); EXPECT_EQ(e.find('x', 7), absl::string_view::npos); + EXPECT_EQ(a.find(b.data(), 1, 0), 1); + EXPECT_EQ(a.find(c.data(), 9, 0), 9); + EXPECT_EQ(a.find(c.data(), absl::string_view::npos, 0), + absl::string_view::npos); + EXPECT_EQ(b.find(c.data(), absl::string_view::npos, 0), + absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.find(b.data(), 4, 0), absl::string_view::npos); + EXPECT_EQ(e.find(b.data(), 7, 0), absl::string_view::npos); + + EXPECT_EQ(a.find(b.data(), 1), absl::string_view::npos); + EXPECT_EQ(a.find(c.data(), 9), 23); + EXPECT_EQ(a.find(c.data(), absl::string_view::npos), absl::string_view::npos); + EXPECT_EQ(b.find(c.data(), absl::string_view::npos), absl::string_view::npos); + // empty string nonsense + EXPECT_EQ(d.find(b.data(), 4), absl::string_view::npos); + EXPECT_EQ(e.find(b.data(), 7), absl::string_view::npos); + EXPECT_EQ(a.rfind(b), 0); EXPECT_EQ(a.rfind(b, 1), 0); EXPECT_EQ(a.rfind(c), 23); @@ -490,6 +508,14 @@ TEST(StringViewTest, STL2) { EXPECT_EQ(e.rfind('o'), absl::string_view::npos); EXPECT_EQ(d.rfind('o', 4), absl::string_view::npos); EXPECT_EQ(e.rfind('o', 7), absl::string_view::npos); + + EXPECT_EQ(a.rfind(b.data(), 1, 0), 1); + EXPECT_EQ(a.rfind(c.data(), 22, 0), 22); + EXPECT_EQ(a.rfind(c.data(), 1, 0), 1); + EXPECT_EQ(a.rfind(c.data(), 0, 0), 0); + EXPECT_EQ(b.rfind(c.data(), 0, 0), 0); + EXPECT_EQ(d.rfind(b.data(), 4, 0), 0); + EXPECT_EQ(e.rfind(b.data(), 7, 0), 0); } // Continued from STL2 @@ -678,6 +704,7 @@ TEST(StringViewTest, STL2Substr) { EXPECT_EQ(a.substr(23, 3), c); EXPECT_EQ(a.substr(23, 99), c); EXPECT_EQ(a.substr(0), a); + EXPECT_EQ(a.substr(), a); EXPECT_EQ(a.substr(3, 2), "de"); // empty string nonsense EXPECT_EQ(d.substr(0, 99), e); @@ -1087,7 +1114,24 @@ TEST(StringViewTest, ConstexprCompiles) { EXPECT_EQ(sp_npos, -1); } -TEST(StringViewTest, ConstexprSubstr) { +constexpr char ConstexprMethodsHelper() { +#if defined(__cplusplus) && __cplusplus >= 201402L + absl::string_view str("123", 3); + str.remove_prefix(1); + str.remove_suffix(1); + absl::string_view bar; + str.swap(bar); + return bar.front(); +#else + return '2'; +#endif +} + +TEST(StringViewTest, ConstexprMethods) { + // remove_prefix, remove_suffix, swap + static_assert(ConstexprMethodsHelper() == '2', ""); + + // substr constexpr absl::string_view foobar("foobar", 6); constexpr absl::string_view foo = foobar.substr(0, 3); constexpr absl::string_view bar = foobar.substr(3); @@ -1145,7 +1189,7 @@ TEST(ComparisonOpsTest, StringCompareNotAmbiguous) { EXPECT_LT("hello", std::string("world")); } -TEST(ComparisonOpsTest, HeterogenousStringViewEquals) { +TEST(ComparisonOpsTest, HeterogeneousStringViewEquals) { EXPECT_EQ(absl::string_view("hello"), std::string("hello")); EXPECT_EQ("hello", absl::string_view("hello")); } diff --git a/absl/strings/strip.h b/absl/strings/strip.h index 111872ca..341e66fc 100644 --- a/absl/strings/strip.h +++ b/absl/strings/strip.h @@ -34,8 +34,9 @@ ABSL_NAMESPACE_BEGIN // ConsumePrefix() // -// Strips the `expected` prefix from the start of the given string, returning -// `true` if the strip operation succeeded or false otherwise. +// Strips the `expected` prefix, if found, from the start of `str`. +// If the operation succeeded, `true` is returned. If not, `false` +// is returned and `str` is not modified. // // Example: // @@ -49,8 +50,9 @@ inline bool ConsumePrefix(absl::string_view* str, absl::string_view expected) { } // ConsumeSuffix() // -// Strips the `expected` suffix from the end of the given string, returning -// `true` if the strip operation succeeded or false otherwise. +// Strips the `expected` suffix, if found, from the end of `str`. +// If the operation succeeded, `true` is returned. If not, `false` +// is returned and `str` is not modified. // // Example: // @@ -65,7 +67,7 @@ inline bool ConsumeSuffix(absl::string_view* str, absl::string_view expected) { // StripPrefix() // -// Returns a view into the input string 'str' with the given 'prefix' removed, +// Returns a view into the input string `str` with the given `prefix` removed, // but leaving the original string intact. If the prefix does not match at the // start of the string, returns the original string instead. ABSL_MUST_USE_RESULT inline absl::string_view StripPrefix( @@ -76,7 +78,7 @@ ABSL_MUST_USE_RESULT inline absl::string_view StripPrefix( // StripSuffix() // -// Returns a view into the input string 'str' with the given 'suffix' removed, +// Returns a view into the input string `str` with the given `suffix` removed, // but leaving the original string intact. If the suffix does not match at the // end of the string, returns the original string instead. ABSL_MUST_USE_RESULT inline absl::string_view StripSuffix( diff --git a/absl/strings/substitute.cc b/absl/strings/substitute.cc index 1f3c7409..8980b198 100644 --- a/absl/strings/substitute.cc +++ b/absl/strings/substitute.cc @@ -75,7 +75,8 @@ void SubstituteAndAppendArray(std::string* output, absl::string_view format, // Build the string. size_t original_size = output->size(); - strings_internal::STLStringResizeUninitialized(output, original_size + size); + strings_internal::STLStringResizeUninitializedAmortized(output, + original_size + size); char* target = &(*output)[original_size]; for (size_t i = 0; i < format.size(); i++) { if (format[i] == '$') { diff --git a/absl/strings/substitute.h b/absl/strings/substitute.h index c6da4dc6..6d2b08ab 100644 --- a/absl/strings/substitute.h +++ b/absl/strings/substitute.h @@ -159,8 +159,8 @@ class Arg { Arg(Hex hex); // NOLINT(runtime/explicit) Arg(Dec dec); // NOLINT(runtime/explicit) - // vector<bool>::reference and const_reference require special help to - // convert to `AlphaNum` because it requires two user defined conversions. + // vector<bool>::reference and const_reference require special help to convert + // to `Arg` because it requires two user defined conversions. template <typename T, absl::enable_if_t< std::is_class<T>::value && @@ -174,6 +174,14 @@ class Arg { // "0x<hex value>". However, in the case of `nullptr`, "NULL" is printed. Arg(const void* value); // NOLINT(runtime/explicit) + // Normal enums are already handled by the integer formatters. + // This overload matches only scoped enums. + template <typename T, + typename = typename std::enable_if< + std::is_enum<T>{} && !std::is_convertible<T, int>{}>::type> + Arg(T value) // NOLINT(google-explicit-constructor) + : Arg(static_cast<typename std::underlying_type<T>::type>(value)) {} + Arg(const Arg&) = delete; Arg& operator=(const Arg&) = delete; @@ -361,43 +369,49 @@ inline void SubstituteAndAppend( // This body of functions catches cases where the number of placeholders // doesn't match the number of data arguments. void SubstituteAndAppend(std::string* output, const char* format) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, - "There were no substitution arguments " - "but this format string has a $[0-9] in it"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 0, + "There were no substitution arguments " + "but this format string either has a $[0-9] in it or contains " + "an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, "There was 1 substitution argument given, but " - "this format string is either missing its $0, or " - "contains one of $1-$9"); + "this format string is missing its $0, contains " + "one of $1-$9, or contains an unescaped $ character (use " + "$$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, - "There were 2 substitution arguments given, but " - "this format string is either missing its $0/$1, or " - "contains one of $2-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but this format string is " + "missing its $0/$1, contains one of $2-$9, or contains an " + "unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, - "There were 3 substitution arguments given, but " - "this format string is either missing its $0/$1/$2, or " - "contains one of $3-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is missing its $0/$1/$2, contains one of " + "$3-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, - "There were 4 substitution arguments given, but " - "this format string is either missing its $0-$3, or " - "contains one of $4-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is missing its $0-$3, contains one of " + "$4-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -405,10 +419,11 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, - "There were 5 substitution arguments given, but " - "this format string is either missing its $0-$4, or " - "contains one of $5-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is missing its $0-$4, contains one of " + "$5-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -417,20 +432,22 @@ void SubstituteAndAppend(std::string* output, const char* format, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, - "There were 6 substitution arguments given, but " - "this format string is either missing its $0-$5, or " - "contains one of $6-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is missing its $0-$5, contains one of " + "$6-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, - "There were 7 substitution arguments given, but " - "this format string is either missing its $0-$6, or " - "contains one of $7-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is missing its $0-$6, contains one of " + "$7-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -438,10 +455,11 @@ void SubstituteAndAppend( const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, - "There were 8 substitution arguments given, but " - "this format string is either missing its $0-$7, or " - "contains one of $8-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is missing its $0-$7, contains one of " + "$8-$9, or contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -452,7 +470,8 @@ void SubstituteAndAppend( ABSL_BAD_CALL_IF( substitute_internal::PlaceholderBitmask(format) != 511, "There were 9 substitution arguments given, but " - "this format string is either missing its $0-$8, or contains a $9"); + "this format string is missing its $0-$8, contains a $9, or " + "contains an unescaped $ character (use $$ instead)"); void SubstituteAndAppend( std::string* output, const char* format, const substitute_internal::Arg& a0, @@ -461,9 +480,11 @@ void SubstituteAndAppend( const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, - "There were 10 substitution arguments given, but this " - "format string doesn't contain all of $0 through $9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string either doesn't contain all of $0 through $9 or " + "contains an unescaped $ character (use $$ instead)"); #endif // ABSL_BAD_CALL_IF // Substitute() @@ -589,47 +610,53 @@ ABSL_MUST_USE_RESULT inline std::string Substitute( std::string Substitute(const char* format) ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 0, "There were no substitution arguments " - "but this format string has a $[0-9] in it"); + "but this format string either has a $[0-9] in it or " + "contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1, - "There was 1 substitution argument given, but " - "this format string is either missing its $0, or " - "contains one of $1-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1, + "There was 1 substitution argument given, but " + "this format string is missing its $0, contains one of $1-$9, " + "or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 3, - "There were 2 substitution arguments given, but " - "this format string is either missing its $0/$1, or " - "contains one of $2-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 3, + "There were 2 substitution arguments given, but " + "this format string is missing its $0/$1, contains one of " + "$2-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 7, - "There were 3 substitution arguments given, but " - "this format string is either missing its $0/$1/$2, or " - "contains one of $3-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 7, + "There were 3 substitution arguments given, but " + "this format string is missing its $0/$1/$2, contains one of " + "$3-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 15, - "There were 4 substitution arguments given, but " - "this format string is either missing its $0-$3, or " - "contains one of $4-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 15, + "There were 4 substitution arguments given, but " + "this format string is missing its $0-$3, contains one of " + "$4-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, const substitute_internal::Arg& a2, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 31, - "There were 5 substitution arguments given, but " - "this format string is either missing its $0-$4, or " - "contains one of $5-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 31, + "There were 5 substitution arguments given, but " + "this format string is missing its $0-$4, contains one of " + "$5-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, @@ -637,10 +664,11 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a3, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 63, - "There were 6 substitution arguments given, but " - "this format string is either missing its $0-$5, or " - "contains one of $6-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 63, + "There were 6 substitution arguments given, but " + "this format string is missing its $0-$5, contains one of " + "$6-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, @@ -649,10 +677,11 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a4, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 127, - "There were 7 substitution arguments given, but " - "this format string is either missing its $0-$6, or " - "contains one of $7-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 127, + "There were 7 substitution arguments given, but " + "this format string is missing its $0-$6, contains one of " + "$7-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a1, @@ -662,10 +691,11 @@ std::string Substitute(const char* format, const substitute_internal::Arg& a0, const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 255, - "There were 8 substitution arguments given, but " - "this format string is either missing its $0-$7, or " - "contains one of $8-$9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 255, + "There were 8 substitution arguments given, but " + "this format string is missing its $0-$7, contains one of " + "$8-$9, or contains an unescaped $ character (use $$ instead)"); std::string Substitute( const char* format, const substitute_internal::Arg& a0, @@ -676,7 +706,8 @@ std::string Substitute( ABSL_BAD_CALL_IF( substitute_internal::PlaceholderBitmask(format) != 511, "There were 9 substitution arguments given, but " - "this format string is either missing its $0-$8, or contains a $9"); + "this format string is missing its $0-$8, contains a $9, or " + "contains an unescaped $ character (use $$ instead)"); std::string Substitute( const char* format, const substitute_internal::Arg& a0, @@ -685,9 +716,11 @@ std::string Substitute( const substitute_internal::Arg& a5, const substitute_internal::Arg& a6, const substitute_internal::Arg& a7, const substitute_internal::Arg& a8, const substitute_internal::Arg& a9) - ABSL_BAD_CALL_IF(substitute_internal::PlaceholderBitmask(format) != 1023, - "There were 10 substitution arguments given, but this " - "format string doesn't contain all of $0 through $9"); + ABSL_BAD_CALL_IF( + substitute_internal::PlaceholderBitmask(format) != 1023, + "There were 10 substitution arguments given, but this " + "format string either doesn't contain all of $0 through $9 or " + "contains an unescaped $ character (use $$ instead)"); #endif // ABSL_BAD_CALL_IF ABSL_NAMESPACE_END diff --git a/absl/strings/substitute_test.cc b/absl/strings/substitute_test.cc index 442c9215..9e6b9403 100644 --- a/absl/strings/substitute_test.cc +++ b/absl/strings/substitute_test.cc @@ -184,6 +184,54 @@ TEST(SubstituteTest, VectorBoolRef) { EXPECT_EQ("Logic be like: true false true false", str); } +TEST(SubstituteTest, Enums) { + enum UnscopedEnum { kEnum0 = 0, kEnum1 = 1 }; + EXPECT_EQ("0 1", absl::Substitute("$0 $1", UnscopedEnum::kEnum0, + UnscopedEnum::kEnum1)); + + enum class ScopedEnum { kEnum0 = 0, kEnum1 = 1 }; + EXPECT_EQ("0 1", + absl::Substitute("$0 $1", ScopedEnum::kEnum0, ScopedEnum::kEnum1)); + + enum class ScopedEnumInt32 : int32_t { kEnum0 = 989, kEnum1 = INT32_MIN }; + EXPECT_EQ("989 -2147483648", + absl::Substitute("$0 $1", ScopedEnumInt32::kEnum0, + ScopedEnumInt32::kEnum1)); + + enum class ScopedEnumUInt32 : uint32_t { kEnum0 = 1, kEnum1 = UINT32_MAX }; + EXPECT_EQ("1 4294967295", absl::Substitute("$0 $1", ScopedEnumUInt32::kEnum0, + ScopedEnumUInt32::kEnum1)); + + enum class ScopedEnumInt64 : int64_t { kEnum0 = -1, kEnum1 = 42949672950 }; + EXPECT_EQ("-1 42949672950", absl::Substitute("$0 $1", ScopedEnumInt64::kEnum0, + ScopedEnumInt64::kEnum1)); + + enum class ScopedEnumUInt64 : uint64_t { kEnum0 = 1, kEnum1 = 42949672950 }; + EXPECT_EQ("1 42949672950", absl::Substitute("$0 $1", ScopedEnumUInt64::kEnum0, + ScopedEnumUInt64::kEnum1)); + + enum class ScopedEnumChar : signed char { kEnum0 = -1, kEnum1 = 1 }; + EXPECT_EQ("-1 1", absl::Substitute("$0 $1", ScopedEnumChar::kEnum0, + ScopedEnumChar::kEnum1)); + + enum class ScopedEnumUChar : unsigned char { + kEnum0 = 0, + kEnum1 = 1, + kEnumMax = 255 + }; + EXPECT_EQ("0 1 255", absl::Substitute("$0 $1 $2", ScopedEnumUChar::kEnum0, + ScopedEnumUChar::kEnum1, + ScopedEnumUChar::kEnumMax)); + + enum class ScopedEnumInt16 : int16_t { kEnum0 = -100, kEnum1 = 10000 }; + EXPECT_EQ("-100 10000", absl::Substitute("$0 $1", ScopedEnumInt16::kEnum0, + ScopedEnumInt16::kEnum1)); + + enum class ScopedEnumUInt16 : uint16_t { kEnum0 = 0, kEnum1 = 10000 }; + EXPECT_EQ("0 10000", absl::Substitute("$0 $1", ScopedEnumUInt16::kEnum0, + ScopedEnumUInt16::kEnum1)); +} + #ifdef GTEST_HAS_DEATH_TEST TEST(SubstituteDeathTest, SubstituteDeath) { |