From b3aaac8a37c467a1125c794196caa90d0957bdc3 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 9 Jan 2020 09:58:48 -0800 Subject: Export of internal Abseil changes -- 9beb68204986a015c9cb065b9fae4f9a8879a788 by Abseil Team : Move Base64EscapeInternal and CalculateBase64EscapedLenInternal to an internal header. PiperOrigin-RevId: 288917378 -- 90acfbe03b3f9f6de3ffa49c39343dfaa2c5d38c by Greg Falcon : Update macos CI script to support the ALTERNATE_OPTIONS environment variable. PiperOrigin-RevId: 288913564 -- f1572e870678cdcda6b48cb39780d1ad984e4c1b by Derek Mauro : Makes absl::NullSafeStringView constexpr Fixes https://github.com/abseil/abseil-cpp/issues/583 PiperOrigin-RevId: 288906940 -- d28a8471e32c10caa64bfffe6d6d4d0a8d144013 by Abseil Team : absl::GetFlag is lock free for small trivially copyable types. PiperOrigin-RevId: 288768172 -- 2643b8ed1a1dc836b38ab9e46538a1af129ffd67 by Gennadiy Rozental : Eliminate call to callback from flag initialization. We do not need to have this invocation inside FlagImpl::Init since SetCallback performs invocation anyways. Calling InitCallback from inside of Init complicates separation of value initialization from data guard initialization, which is about to happen. PiperOrigin-RevId: 288732526 -- 22caa880b7a4cb6da34e16a2e064a473c99e880b by Abseil Team : Fix the documentation on how to create a null string_view. PiperOrigin-RevId: 288727968 -- 10727c5cadc561837141176f4c9b9717cec9233a by Greg Falcon : Change CI scripts for gcc to use the ALTERNATE_OPTIONS file as well. PiperOrigin-RevId: 288718855 -- 5d1e2dd6c7fb12af8aa4337a0f61872f5f0c5992 by Greg Falcon : Add an option for using inline namespaces in Abseil. PiperOrigin-RevId: 288614491 GitOrigin-RevId: 9beb68204986a015c9cb065b9fae4f9a8879a788 Change-Id: If9acd46301e3df8cb231b4c16f7ed651bf4fb3c3 --- absl/base/config.h | 34 ++++++ absl/base/options.h | 26 +++++ absl/flags/config.h | 11 ++ absl/flags/flag.cc | 22 ---- absl/flags/flag.h | 45 +++++--- absl/flags/internal/commandlineflag.h | 32 +++--- absl/flags/internal/flag.cc | 15 ++- absl/flags/internal/flag.h | 95 +++++++++++++++-- absl/flags/parse.cc | 4 +- absl/strings/BUILD.bazel | 2 + absl/strings/CMakeLists.txt | 2 + absl/strings/escaping.cc | 188 +++------------------------------- absl/strings/internal/escaping.cc | 180 ++++++++++++++++++++++++++++++++ absl/strings/internal/escaping.h | 58 +++++++++++ absl/strings/string_view.h | 4 +- absl/strings/string_view_test.cc | 25 +++++ 16 files changed, 502 insertions(+), 241 deletions(-) create mode 100644 absl/strings/internal/escaping.cc create mode 100644 absl/strings/internal/escaping.h (limited to 'absl') diff --git a/absl/base/config.h b/absl/base/config.h index 87f5b4a..edbf224 100644 --- a/absl/base/config.h +++ b/absl/base/config.h @@ -90,8 +90,42 @@ // not support forward declarations of its own types, nor does it support // user-provided specialization of Abseil templates. Code that violates these // rules may be broken without warning.) +#if !defined(ABSL_OPTION_USE_INLINE_NAMESPACE) || \ + !defined(ABSL_OPTION_INLINE_NAMESPACE_NAME) +#error options.h is misconfigured. +#endif + +// Check that ABSL_OPTION_INLINE_NAMESPACE_NAME is neither "head" nor "" +#if defined(__cplusplus) && ABSL_OPTION_USE_INLINE_NAMESPACE == 1 + +#define ABSL_INTERNAL_DO_TOKEN_STR(x) #x +#define ABSL_INTERNAL_TOKEN_STR(x) ABSL_INTERNAL_DO_TOKEN_STR(x) +#define ABSL_INTERNAL_INLINE_NAMESPACE_STR \ + ABSL_INTERNAL_TOKEN_STR(ABSL_OPTION_INLINE_NAMESPACE_NAME) + +static_assert(ABSL_INTERNAL_INLINE_NAMESPACE_STR[0] != '\0', + "options.h misconfigured: ABSL_OPTION_INLINE_NAMESPACE_NAME must " + "not be empty."); +static_assert(ABSL_INTERNAL_INLINE_NAMESPACE_STR[0] != 'h' || + ABSL_INTERNAL_INLINE_NAMESPACE_STR[1] != 'e' || + ABSL_INTERNAL_INLINE_NAMESPACE_STR[2] != 'a' || + ABSL_INTERNAL_INLINE_NAMESPACE_STR[3] != 'd' || + ABSL_INTERNAL_INLINE_NAMESPACE_STR[4] != '\0', + "options.h misconfigured: ABSL_OPTION_INLINE_NAMESPACE_NAME must " + "be changed to a new, unique identifier name."); + +#endif + +#if ABSL_OPTION_USE_INLINE_NAMESPACE == 0 #define ABSL_NAMESPACE_BEGIN #define ABSL_NAMESPACE_END +#elif ABSL_OPTION_USE_INLINE_NAMESPACE == 1 +#define ABSL_NAMESPACE_BEGIN \ + inline namespace ABSL_OPTION_INLINE_NAMESPACE_NAME { +#define ABSL_NAMESPACE_END } +#else +#error options.h is misconfigured. +#endif // ----------------------------------------------------------------------------- // Compiler Feature Checks diff --git a/absl/base/options.h b/absl/base/options.h index 3961e63..592b33b 100644 --- a/absl/base/options.h +++ b/absl/base/options.h @@ -185,4 +185,30 @@ #define ABSL_OPTION_USE_STD_VARIANT 2 + +// ABSL_OPTION_USE_INLINE_NAMESPACE +// ABSL_OPTION_INLINE_NAMESPACE_NAME +// +// These options controls whether all entities in the absl namespace are +// contained within an inner inline namespace. This does not affect the +// user-visible API of Abseil, but it changes the mangled names of all symbols. +// +// This can be useful as a version tag if you are distributing Abseil in +// precompiled form. This will prevent a binary library build of Abseil with +// one inline namespace being used with headers configured with a different +// inline namespace name. Binary packagers are reminded that Abseil does not +// guarantee any ABI stability in Abseil, so any update of Abseil or +// configuration change in such a binary package should be combined with a +// new, unique value for the inline namespace name. +// +// A value of 0 means not to use inline namespaces. +// +// A value of 1 means to use an inline namespace with the given name inside +// namespace absl. If this is set, ABSL_OPTION_INLINE_NAMESPACE_NAME must also +// be changed to a new, unique identifier name. In particular "head" is not +// allowed. + +#define ABSL_OPTION_USE_INLINE_NAMESPACE 0 +#define ABSL_OPTION_INLINE_NAMESPACE_NAME head + #endif // ABSL_BASE_OPTIONS_H_ diff --git a/absl/flags/config.h b/absl/flags/config.h index a9fd97a..fbe3496 100644 --- a/absl/flags/config.h +++ b/absl/flags/config.h @@ -45,4 +45,15 @@ #define ABSL_FLAGS_STRIP_HELP ABSL_FLAGS_STRIP_NAMES #endif +// ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD macro is used for using atomics with +// double words, e.g. absl::Duration. +// For reasons in bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80878, modern +// versions of GCC do not support cmpxchg16b instruction in standard atomics. +#ifdef ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD +#error "ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD should not be defined." +#elif defined(__clang__) && defined(__x86_64__) && \ + defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) +#define ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD 1 +#endif + #endif // ABSL_FLAGS_CONFIG_H_ diff --git a/absl/flags/flag.cc b/absl/flags/flag.cc index 7faa7ad..491a66b 100644 --- a/absl/flags/flag.cc +++ b/absl/flags/flag.cc @@ -20,28 +20,6 @@ namespace absl { ABSL_NAMESPACE_BEGIN -// We want to validate the type mismatch between type definition and -// declaration. The lock-free implementation does not allow us to do it, -// so in debug builds we always use the slower implementation, which always -// validates the type. -#ifndef NDEBUG -#define ABSL_FLAGS_ATOMIC_GET(T) \ - T GetFlag(const absl::Flag& flag) { return flag.Get(); } -#else -#define ABSL_FLAGS_ATOMIC_GET(T) \ - T GetFlag(const absl::Flag& flag) { \ - T result; \ - if (flag.AtomicGet(&result)) { \ - return result; \ - } \ - return flag.Get(); \ - } -#endif - -ABSL_FLAGS_INTERNAL_FOR_EACH_LOCK_FREE(ABSL_FLAGS_ATOMIC_GET) - -#undef ABSL_FLAGS_ATOMIC_GET - // This global nutex protects on-demand construction of flag objects in MSVC // builds. #if defined(_MSC_VER) && !defined(__clang__) diff --git a/absl/flags/flag.h b/absl/flags/flag.h index 326fb8e..62e73f8 100644 --- a/absl/flags/flag.h +++ b/absl/flags/flag.h @@ -29,6 +29,8 @@ #ifndef ABSL_FLAGS_FLAG_H_ #define ABSL_FLAGS_FLAG_H_ +#include + #include "absl/base/attributes.h" #include "absl/base/casts.h" #include "absl/flags/config.h" @@ -181,23 +183,42 @@ class Flag { // // // FLAGS_firstname is a Flag of type `std::string` // std::string first_name = absl::GetFlag(FLAGS_firstname); -template +template ::value, int>::type = 0> ABSL_MUST_USE_RESULT T GetFlag(const absl::Flag& flag) { -#define ABSL_FLAGS_INTERNAL_LOCK_FREE_VALIDATE(BIT) \ - static_assert( \ - !std::is_same::value, \ - "Do not specify explicit template parameters to absl::GetFlag"); - ABSL_FLAGS_INTERNAL_FOR_EACH_LOCK_FREE(ABSL_FLAGS_INTERNAL_LOCK_FREE_VALIDATE) -#undef ABSL_FLAGS_INTERNAL_LOCK_FREE_VALIDATE - return flag.Get(); } +// We want to validate the type mismatch between type definition and +// declaration. The lock-free implementation does not allow us to do it, +// so in debug builds we always use the slower implementation, which always +// validates the type. +#ifndef NDEBUG +template ::value, int>::type = 0> +ABSL_MUST_USE_RESULT T GetFlag(const absl::Flag& flag) { + return flag.Get(); +} +#else // Overload for `GetFlag()` for types that support lock-free reads. -#define ABSL_FLAGS_INTERNAL_LOCK_FREE_EXPORT(T) \ - ABSL_MUST_USE_RESULT T GetFlag(const absl::Flag& flag); -ABSL_FLAGS_INTERNAL_FOR_EACH_LOCK_FREE(ABSL_FLAGS_INTERNAL_LOCK_FREE_EXPORT) -#undef ABSL_FLAGS_INTERNAL_LOCK_FREE_EXPORT +template ::value, int>::type = 0> +ABSL_MUST_USE_RESULT T GetFlag(const absl::Flag& flag) { + // T might not be default constructible. + union U { + T value; + U() {} + }; + U result; + if (flag.AtomicGet(&result.value)) { + return result.value; + } + return flag.Get(); +} +#endif // SetFlag() // diff --git a/absl/flags/internal/commandlineflag.h b/absl/flags/internal/commandlineflag.h index a0c18e8..1862306 100644 --- a/absl/flags/internal/commandlineflag.h +++ b/absl/flags/internal/commandlineflag.h @@ -259,22 +259,22 @@ class CommandLineFlag { virtual void Read(void* dst) const = 0; }; -// This macro is the "source of truth" for the list of supported flag types we -// expect to perform lock free operations on. Specifically it generates code, -// a one argument macro operating on a type, supplied as a macro argument, for -// each type in the list. -#define ABSL_FLAGS_INTERNAL_FOR_EACH_LOCK_FREE(A) \ - A(bool) \ - A(short) \ - A(unsigned short) \ - A(int) \ - A(unsigned int) \ - A(long) \ - A(unsigned long) \ - A(long long) \ - A(unsigned long long) \ - A(double) \ - A(float) +// This macro is the "source of truth" for the list of supported flag built-in +// types. +#define ABSL_FLAGS_INTERNAL_BUILTIN_TYPES(A) \ + A(bool) \ + A(short) \ + A(unsigned short) \ + A(int) \ + A(unsigned int) \ + A(long) \ + A(unsigned long) \ + A(long long) \ + A(unsigned long long) \ + A(double) \ + A(float) \ + A(std::string) \ + A(std::vector) } // namespace flags_internal ABSL_NAMESPACE_END diff --git a/absl/flags/internal/flag.cc b/absl/flags/internal/flag.cc index bb9a98f..6979dc4 100644 --- a/absl/flags/internal/flag.cc +++ b/absl/flags/internal/flag.cc @@ -16,6 +16,7 @@ #include "absl/flags/internal/flag.h" #include "absl/base/optimization.h" +#include "absl/flags/config.h" #include "absl/flags/usage_config.h" #include "absl/synchronization/mutex.h" @@ -35,9 +36,7 @@ namespace { bool ShouldValidateFlagValue(FlagOpFn flag_type_id) { #define DONT_VALIDATE(T) \ if (flag_type_id == &flags_internal::FlagOps) return false; - ABSL_FLAGS_INTERNAL_FOR_EACH_LOCK_FREE(DONT_VALIDATE) - DONT_VALIDATE(std::string) - DONT_VALIDATE(std::vector) + ABSL_FLAGS_INTERNAL_BUILTIN_TYPES(DONT_VALIDATE) #undef DONT_VALIDATE return true; @@ -85,7 +84,6 @@ void FlagImpl::Init() { cur_ = MakeInitValue().release(); StoreAtomic(); inited_.store(true, std::memory_order_release); - InvokeCallback(); } } @@ -264,8 +262,15 @@ void FlagImpl::StoreAtomic() { if (data_size <= sizeof(int64_t)) { int64_t t = 0; std::memcpy(&t, cur_, data_size); - atomic_.store(t, std::memory_order_release); + atomics_.small_atomic.store(t, std::memory_order_release); } +#if defined(ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD) + else if (data_size <= sizeof(FlagsInternalTwoWordsType)) { + FlagsInternalTwoWordsType t{0, 0}; + std::memcpy(&t, cur_, data_size); + atomics_.big_atomic.store(t, std::memory_order_release); + } +#endif } void FlagImpl::Write(const void* src, const flags_internal::FlagOpFn src_op) { diff --git a/absl/flags/internal/flag.h b/absl/flags/internal/flag.h index 7d5271c..a5edfd1 100644 --- a/absl/flags/internal/flag.h +++ b/absl/flags/internal/flag.h @@ -20,6 +20,7 @@ #include #include "absl/base/thread_annotations.h" +#include "absl/flags/config.h" #include "absl/flags/internal/commandlineflag.h" #include "absl/flags/internal/registry.h" #include "absl/memory/memory.h" @@ -30,7 +31,61 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace flags_internal { -constexpr int64_t AtomicInit() { return 0xababababababababll; } +// The minimum atomic size we believe to generate lock free code, i.e. all +// trivially copyable types not bigger this size generate lock free code. +static constexpr int kMinLockFreeAtomicSize = 8; + +// The same as kMinLockFreeAtomicSize but maximum atomic size. As double words +// might use two registers, we want to dispatch the logic for them. +#if defined(ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD) +static constexpr int kMaxLockFreeAtomicSize = 16; +#else +static constexpr int kMaxLockFreeAtomicSize = 8; +#endif + +// We can use atomic in cases when it fits in the register, trivially copyable +// in order to make memcpy operations. +template +struct IsAtomicFlagTypeTrait { + static constexpr bool value = + (sizeof(T) <= kMaxLockFreeAtomicSize && + type_traits_internal::is_trivially_copyable::value); +}; + +// Clang does not always produce cmpxchg16b instruction when alignment of a 16 +// bytes type is not 16. +struct alignas(16) FlagsInternalTwoWordsType { + int64_t first; + int64_t second; +}; + +constexpr bool operator==(const FlagsInternalTwoWordsType& that, + const FlagsInternalTwoWordsType& other) { + return that.first == other.first && that.second == other.second; +} +constexpr bool operator!=(const FlagsInternalTwoWordsType& that, + const FlagsInternalTwoWordsType& other) { + return !(that == other); +} + +constexpr int64_t SmallAtomicInit() { return 0xababababababababll; } + +template +struct BestAtomicType { + using type = int64_t; + static constexpr int64_t AtomicInit() { return SmallAtomicInit(); } +}; + +template +struct BestAtomicType< + T, typename std::enable_if<(kMinLockFreeAtomicSize < sizeof(T) && + sizeof(T) <= kMaxLockFreeAtomicSize), + void>::type> { + using type = FlagsInternalTwoWordsType; + static constexpr FlagsInternalTwoWordsType AtomicInit() { + return {SmallAtomicInit(), SmallAtomicInit()}; + } +}; template class Flag; @@ -182,14 +237,15 @@ class FlagImpl { // it replaces `dst` with the new value. bool TryParse(void** dst, absl::string_view value, std::string* err) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(*DataGuard()); + template bool AtomicGet(T* v) const { - const int64_t r = atomic_.load(std::memory_order_acquire); - if (r != flags_internal::AtomicInit()) { - std::memcpy(v, &r, sizeof(T)); + using U = flags_internal::BestAtomicType; + const typename U::type r = atomics_.template load(); + if (r != U::AtomicInit()) { + std::memcpy(static_cast(v), &r, sizeof(T)); return true; } - return false; } @@ -271,7 +327,34 @@ class FlagImpl { int64_t counter_ ABSL_GUARDED_BY(*DataGuard()) = 0; // For some types, a copy of the current value is kept in an atomically // accessible field. - std::atomic atomic_{flags_internal::AtomicInit()}; + union Atomics { + // Using small atomic for small types. + std::atomic small_atomic; + template ::type> + int64_t load() const { + return small_atomic.load(std::memory_order_acquire); + } + +#if defined(ABSL_FLAGS_INTERNAL_ATOMIC_DOUBLE_WORD) + // Using big atomics for big types. + std::atomic big_atomic; + template ::type> + FlagsInternalTwoWordsType load() const { + return big_atomic.load(std::memory_order_acquire); + } + constexpr Atomics() + : big_atomic{FlagsInternalTwoWordsType{SmallAtomicInit(), + SmallAtomicInit()}} {} +#else + constexpr Atomics() : small_atomic{SmallAtomicInit()} {} +#endif + }; + Atomics atomics_{}; struct CallbackData { FlagCallback func; diff --git a/absl/flags/parse.cc b/absl/flags/parse.cc index 2f07725..a288ace 100644 --- a/absl/flags/parse.cc +++ b/absl/flags/parse.cc @@ -280,9 +280,7 @@ void CheckDefaultValuesParsingRoundtrip() { #define IGNORE_TYPE(T) \ if (flag->IsOfType()) return; - ABSL_FLAGS_INTERNAL_FOR_EACH_LOCK_FREE(IGNORE_TYPE) - IGNORE_TYPE(std::string) - IGNORE_TYPE(std::vector) + ABSL_FLAGS_INTERNAL_BUILTIN_TYPES(IGNORE_TYPE) #undef IGNORE_TYPE flag->CheckDefaultValueParsingRoundtrip(); diff --git a/absl/strings/BUILD.bazel b/absl/strings/BUILD.bazel index 8d0a6b6..dc7e1bf 100644 --- a/absl/strings/BUILD.bazel +++ b/absl/strings/BUILD.bazel @@ -37,6 +37,7 @@ cc_library( "internal/charconv_bigint.h", "internal/charconv_parse.cc", "internal/charconv_parse.h", + "internal/escaping.cc", "internal/memutil.cc", "internal/memutil.h", "internal/stl_type_traits.h", @@ -54,6 +55,7 @@ cc_library( "ascii.h", "charconv.h", "escaping.h", + "internal/escaping.h", "match.h", "numbers.h", "str_cat.h", diff --git a/absl/strings/CMakeLists.txt b/absl/strings/CMakeLists.txt index 9810157..36702f7 100644 --- a/absl/strings/CMakeLists.txt +++ b/absl/strings/CMakeLists.txt @@ -38,6 +38,8 @@ absl_cc_library( "internal/charconv_bigint.h" "internal/charconv_parse.cc" "internal/charconv_parse.h" + "internal/escaping.cc" + "internal/escaping.h" "internal/memutil.cc" "internal/memutil.h" "internal/stl_type_traits.h" diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index d2fcd9c..7adc1b6 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -26,6 +26,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/base/internal/unaligned_access.h" #include "absl/strings/internal/char_map.h" +#include "absl/strings/internal/escaping.h" #include "absl/strings/internal/resize_uninitialized.h" #include "absl/strings/internal/utf8.h" #include "absl/strings/str_cat.h" @@ -764,176 +765,9 @@ constexpr signed char kUnWebSafeBase64[] = { }; /* clang-format on */ -size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { - // Base64 encodes three bytes of input at a time. If the input is not - // divisible by three, we pad as appropriate. - // - // (from https://tools.ietf.org/html/rfc3548) - // Special processing is performed if fewer than 24 bits are available - // at the end of the data being encoded. A full encoding quantum is - // always completed at the end of a quantity. When fewer than 24 input - // bits are available in an input group, zero bits are added (on the - // right) to form an integral number of 6-bit groups. Padding at the - // end of the data is performed using the '=' character. Since all base - // 64 input is an integral number of octets, only the following cases - // can arise: - - // Base64 encodes each three bytes of input into four bytes of output. - size_t len = (input_len / 3) * 4; - - if (input_len % 3 == 0) { - // (from https://tools.ietf.org/html/rfc3548) - // (1) the final quantum of encoding input is an integral multiple of 24 - // bits; here, the final unit of encoded output will be an integral - // multiple of 4 characters with no "=" padding, - } else if (input_len % 3 == 1) { - // (from https://tools.ietf.org/html/rfc3548) - // (2) the final quantum of encoding input is exactly 8 bits; here, the - // final unit of encoded output will be two characters followed by two - // "=" padding characters, or - len += 2; - if (do_padding) { - len += 2; - } - } else { // (input_len % 3 == 2) - // (from https://tools.ietf.org/html/rfc3548) - // (3) the final quantum of encoding input is exactly 16 bits; here, the - // final unit of encoded output will be three characters followed by one - // "=" padding character. - len += 3; - if (do_padding) { - len += 1; - } - } - - assert(len >= input_len); // make sure we didn't overflow - return len; -} - -size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, - size_t szdest, const char* base64, - bool do_padding) { - static const char kPad64 = '='; - - if (szsrc * 4 > szdest * 3) return 0; - - char* cur_dest = dest; - const unsigned char* cur_src = src; - - char* const limit_dest = dest + szdest; - const unsigned char* const limit_src = src + szsrc; - - // Three bytes of data encodes to four characters of cyphertext. - // So we can pump through three-byte chunks atomically. - if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. - while (cur_src < limit_src - 3) { // While we have >= 32 bits. - uint32_t in = absl::big_endian::Load32(cur_src) >> 8; - - cur_dest[0] = base64[in >> 18]; - in &= 0x3FFFF; - cur_dest[1] = base64[in >> 12]; - in &= 0xFFF; - cur_dest[2] = base64[in >> 6]; - in &= 0x3F; - cur_dest[3] = base64[in]; - - cur_dest += 4; - cur_src += 3; - } - } - // To save time, we didn't update szdest or szsrc in the loop. So do it now. - szdest = limit_dest - cur_dest; - szsrc = limit_src - cur_src; - - /* now deal with the tail (<=3 bytes) */ - switch (szsrc) { - case 0: - // Nothing left; nothing more to do. - break; - case 1: { - // One byte left: this encodes to two characters, and (optionally) - // two pad characters to round out the four-character cypherblock. - if (szdest < 2) return 0; - uint32_t in = cur_src[0]; - cur_dest[0] = base64[in >> 2]; - in &= 0x3; - cur_dest[1] = base64[in << 4]; - cur_dest += 2; - szdest -= 2; - if (do_padding) { - if (szdest < 2) return 0; - cur_dest[0] = kPad64; - cur_dest[1] = kPad64; - cur_dest += 2; - szdest -= 2; - } - break; - } - case 2: { - // Two bytes left: this encodes to three characters, and (optionally) - // one pad character to round out the four-character cypherblock. - if (szdest < 3) return 0; - uint32_t in = absl::big_endian::Load16(cur_src); - cur_dest[0] = base64[in >> 10]; - in &= 0x3FF; - cur_dest[1] = base64[in >> 4]; - in &= 0x00F; - cur_dest[2] = base64[in << 2]; - cur_dest += 3; - szdest -= 3; - if (do_padding) { - if (szdest < 1) return 0; - cur_dest[0] = kPad64; - cur_dest += 1; - szdest -= 1; - } - break; - } - case 3: { - // Three bytes left: same as in the big loop above. We can't do this in - // the loop because the loop above always reads 4 bytes, and the fourth - // byte is past the end of the input. - if (szdest < 4) return 0; - uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); - cur_dest[0] = base64[in >> 18]; - in &= 0x3FFFF; - cur_dest[1] = base64[in >> 12]; - in &= 0xFFF; - cur_dest[2] = base64[in >> 6]; - in &= 0x3F; - cur_dest[3] = base64[in]; - cur_dest += 4; - szdest -= 4; - break; - } - default: - // Should not be reached: blocks of 4 bytes are handled - // in the while loop before this switch statement. - ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); - break; - } - return (cur_dest - dest); -} - -constexpr char kBase64Chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - constexpr char kWebSafeBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -template -void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, - bool do_padding, const char* base64_chars) { - const size_t calc_escaped_size = - CalculateBase64EscapedLenInternal(szsrc, do_padding); - strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); - - const size_t escaped_len = Base64EscapeInternal( - src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); - assert(calc_escaped_size == escaped_len); - dest->erase(escaped_len); -} - template bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { @@ -1068,26 +902,30 @@ bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { } void Base64Escape(absl::string_view src, std::string* dest) { - Base64EscapeInternal(reinterpret_cast(src.data()), - src.size(), dest, true, kBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast(src.data()), src.size(), dest, + true, strings_internal::kBase64Chars); } void WebSafeBase64Escape(absl::string_view src, std::string* dest) { - Base64EscapeInternal(reinterpret_cast(src.data()), - src.size(), dest, false, kWebSafeBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast(src.data()), src.size(), dest, + false, kWebSafeBase64Chars); } std::string Base64Escape(absl::string_view src) { std::string dest; - Base64EscapeInternal(reinterpret_cast(src.data()), - src.size(), &dest, true, kBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast(src.data()), src.size(), &dest, + true, strings_internal::kBase64Chars); return dest; } std::string WebSafeBase64Escape(absl::string_view src) { std::string dest; - Base64EscapeInternal(reinterpret_cast(src.data()), - src.size(), &dest, false, kWebSafeBase64Chars); + strings_internal::Base64EscapeInternal( + reinterpret_cast(src.data()), src.size(), &dest, + false, kWebSafeBase64Chars); return dest; } diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc new file mode 100644 index 0000000..c527128 --- /dev/null +++ b/absl/strings/internal/escaping.cc @@ -0,0 +1,180 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/strings/internal/escaping.h" + +#include "absl/base/internal/endian.h" +#include "absl/base/internal/raw_logging.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +const char kBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { + // Base64 encodes three bytes of input at a time. If the input is not + // divisible by three, we pad as appropriate. + // + // (from https://tools.ietf.org/html/rfc3548) + // Special processing is performed if fewer than 24 bits are available + // at the end of the data being encoded. A full encoding quantum is + // always completed at the end of a quantity. When fewer than 24 input + // bits are available in an input group, zero bits are added (on the + // right) to form an integral number of 6-bit groups. Padding at the + // end of the data is performed using the '=' character. Since all base + // 64 input is an integral number of octets, only the following cases + // can arise: + + // Base64 encodes each three bytes of input into four bytes of output. + size_t len = (input_len / 3) * 4; + + if (input_len % 3 == 0) { + // (from https://tools.ietf.org/html/rfc3548) + // (1) the final quantum of encoding input is an integral multiple of 24 + // bits; here, the final unit of encoded output will be an integral + // multiple of 4 characters with no "=" padding, + } else if (input_len % 3 == 1) { + // (from https://tools.ietf.org/html/rfc3548) + // (2) the final quantum of encoding input is exactly 8 bits; here, the + // final unit of encoded output will be two characters followed by two + // "=" padding characters, or + len += 2; + if (do_padding) { + len += 2; + } + } else { // (input_len % 3 == 2) + // (from https://tools.ietf.org/html/rfc3548) + // (3) the final quantum of encoding input is exactly 16 bits; here, the + // final unit of encoded output will be three characters followed by one + // "=" padding character. + len += 3; + if (do_padding) { + len += 1; + } + } + + assert(len >= input_len); // make sure we didn't overflow + return len; +} + +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, + bool do_padding) { + static const char kPad64 = '='; + + if (szsrc * 4 > szdest * 3) return 0; + + char* cur_dest = dest; + const unsigned char* cur_src = src; + + char* const limit_dest = dest + szdest; + const unsigned char* const limit_src = src + szsrc; + + // Three bytes of data encodes to four characters of cyphertext. + // So we can pump through three-byte chunks atomically. + if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. + while (cur_src < limit_src - 3) { // While we have >= 32 bits. + uint32_t in = absl::big_endian::Load32(cur_src) >> 8; + + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + + cur_dest += 4; + cur_src += 3; + } + } + // To save time, we didn't update szdest or szsrc in the loop. So do it now. + szdest = limit_dest - cur_dest; + szsrc = limit_src - cur_src; + + /* now deal with the tail (<=3 bytes) */ + switch (szsrc) { + case 0: + // Nothing left; nothing more to do. + break; + case 1: { + // One byte left: this encodes to two characters, and (optionally) + // two pad characters to round out the four-character cypherblock. + if (szdest < 2) return 0; + uint32_t in = cur_src[0]; + cur_dest[0] = base64[in >> 2]; + in &= 0x3; + cur_dest[1] = base64[in << 4]; + cur_dest += 2; + szdest -= 2; + if (do_padding) { + if (szdest < 2) return 0; + cur_dest[0] = kPad64; + cur_dest[1] = kPad64; + cur_dest += 2; + szdest -= 2; + } + break; + } + case 2: { + // Two bytes left: this encodes to three characters, and (optionally) + // one pad character to round out the four-character cypherblock. + if (szdest < 3) return 0; + uint32_t in = absl::big_endian::Load16(cur_src); + cur_dest[0] = base64[in >> 10]; + in &= 0x3FF; + cur_dest[1] = base64[in >> 4]; + in &= 0x00F; + cur_dest[2] = base64[in << 2]; + cur_dest += 3; + szdest -= 3; + if (do_padding) { + if (szdest < 1) return 0; + cur_dest[0] = kPad64; + cur_dest += 1; + szdest -= 1; + } + break; + } + case 3: { + // Three bytes left: same as in the big loop above. We can't do this in + // the loop because the loop above always reads 4 bytes, and the fourth + // byte is past the end of the input. + if (szdest < 4) return 0; + uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); + cur_dest[0] = base64[in >> 18]; + in &= 0x3FFFF; + cur_dest[1] = base64[in >> 12]; + in &= 0xFFF; + cur_dest[2] = base64[in >> 6]; + in &= 0x3F; + cur_dest[3] = base64[in]; + cur_dest += 4; + szdest -= 4; + break; + } + default: + // Should not be reached: blocks of 4 bytes are handled + // in the while loop before this switch statement. + ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc); + break; + } + return (cur_dest - dest); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h new file mode 100644 index 0000000..6a9ce60 --- /dev/null +++ b/absl/strings/internal/escaping.h @@ -0,0 +1,58 @@ +// Copyright 2020 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_ +#define ABSL_STRINGS_INTERNAL_ESCAPING_H_ + +#include + +#include "absl/strings/internal/resize_uninitialized.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace strings_internal { + +ABSL_CONST_INIT extern const char kBase64Chars[]; + +// Calculates how long a string will be when it is base64 encoded given its +// length and whether or not the result should be padded. +size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. Returns the length of `dest`. +size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, + size_t szdest, const char* base64, bool do_padding); + +// Base64-encodes `src` using the alphabet provided in `base64` and writes the +// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars +// until its length is a multiple of 3. +template +void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, + bool do_padding, const char* base64_chars) { + const size_t calc_escaped_size = + CalculateBase64EscapedLenInternal(szsrc, do_padding); + STLStringResizeUninitialized(dest, calc_escaped_size); + + const size_t escaped_len = Base64EscapeInternal( + src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); + assert(calc_escaped_size == escaped_len); + dest->erase(escaped_len); +} + +} // namespace strings_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_ diff --git a/absl/strings/string_view.h b/absl/strings/string_view.h index 4b34e56..4f7dd6b 100644 --- a/absl/strings/string_view.h +++ b/absl/strings/string_view.h @@ -122,7 +122,7 @@ ABSL_NAMESPACE_BEGIN // // You may create a null `string_view` in two ways: // -// absl::string_view sv(); +// absl::string_view sv; // absl::string_view sv(nullptr, 0); // // For the above, `sv.data() == nullptr`, `sv.length() == 0`, and @@ -605,7 +605,7 @@ inline string_view ClippedSubstr(string_view s, size_t pos, // Creates an `absl::string_view` from a pointer `p` even if it's null-valued. // This function should be used where an `absl::string_view` can be created from // a possibly-null pointer. -inline string_view NullSafeStringView(const char* p) { +constexpr string_view NullSafeStringView(const char* p) { return p ? string_view(p) : string_view(); } diff --git a/absl/strings/string_view_test.cc b/absl/strings/string_view_test.cc index 96dacdf..c4fbd16 100644 --- a/absl/strings/string_view_test.cc +++ b/absl/strings/string_view_test.cc @@ -931,6 +931,31 @@ TEST(StringViewTest, NullSafeStringView) { } } +TEST(StringViewTest, ConstexprNullSafeStringView) { + { + constexpr absl::string_view s = absl::NullSafeStringView(nullptr); + EXPECT_EQ(nullptr, s.data()); + EXPECT_EQ(0, s.size()); + EXPECT_EQ(absl::string_view(), s); + } +#if !defined(_MSC_VER) || _MSC_VER >= 1910 + // MSVC 2017+ is required for good constexpr string_view support. + // See the implementation of `absl::string_view::StrlenInternal()`. + { + static constexpr char kHi[] = "hi"; + absl::string_view s = absl::NullSafeStringView(kHi); + EXPECT_EQ(kHi, s.data()); + EXPECT_EQ(strlen(kHi), s.size()); + EXPECT_EQ(absl::string_view("hi"), s); + } + { + constexpr absl::string_view s = absl::NullSafeStringView("hello"); + EXPECT_EQ(s.size(), 5); + EXPECT_EQ("hello", s); + } +#endif +} + TEST(StringViewTest, ConstexprCompiles) { constexpr absl::string_view sp; #ifdef ABSL_HAVE_STRING_VIEW_FROM_NULLPTR -- cgit v1.2.3