diff options
author | Abseil Team <absl-team@google.com> | 2020-09-25 10:29:25 -0700 |
---|---|---|
committer | Andy Getz <durandal@google.com> | 2020-09-28 23:47:58 -0400 |
commit | d1de75bf540f091b4dfc860713d556e578c0f158 (patch) | |
tree | 91669db9a8381dd2c89c548e8e5a008064bee2b3 /absl/strings | |
parent | cad3f30b44c2bfac54ee82c6fc9e49ba49078620 (diff) |
Export of internal Abseil changes
--
f50d25c8f8491ef7031cbbcad78edd15f98c2bd1 by Abseil Team <absl-team@google.com>:
Add myriad2 to HAVE_MMAP
Remove mutex_nonprod and associated defines.
PiperOrigin-RevId: 333759830
--
25ef4c577ea983aa3fcd6cfe2af6cdc62a06f520 by Samuel Benzaquen <sbenza@google.com>:
Internal refactor.
Represent the data with a union to allow for better constexpr support in the future.
PiperOrigin-RevId: 333756733
GitOrigin-RevId: f50d25c8f8491ef7031cbbcad78edd15f98c2bd1
Change-Id: Ieecd2c47cb20de638726eb3f9fc2e5682d05dcca
Diffstat (limited to 'absl/strings')
-rw-r--r-- | absl/strings/cord.cc | 85 | ||||
-rw-r--r-- | absl/strings/cord.h | 95 | ||||
-rw-r--r-- | absl/strings/internal/cord_internal.h | 39 |
3 files changed, 125 insertions, 94 deletions
diff --git a/absl/strings/cord.cc b/absl/strings/cord.cc index 763dcc45..5092fd14 100644 --- a/absl/strings/cord.cc +++ b/absl/strings/cord.cc @@ -50,16 +50,10 @@ using ::absl::cord_internal::CordRepConcat; using ::absl::cord_internal::CordRepExternal; using ::absl::cord_internal::CordRepSubstring; -// Various representations that we allow -enum CordRepKind { - CONCAT = 0, - EXTERNAL = 1, - SUBSTRING = 2, - - // We have different tags for different sized flat arrays, - // starting with FLAT - FLAT = 3, -}; +using ::absl::cord_internal::CONCAT; +using ::absl::cord_internal::EXTERNAL; +using ::absl::cord_internal::FLAT; +using ::absl::cord_internal::SUBSTRING; namespace cord_internal { @@ -447,48 +441,49 @@ inline void Cord::InlineRep::set_data(const char* data, size_t n, bool nullify_tail) { static_assert(kMaxInline == 15, "set_data is hard-coded for a length of 15"); - cord_internal::SmallMemmove(data_, data, n, nullify_tail); - data_[kMaxInline] = static_cast<char>(n); + cord_internal::SmallMemmove(data_.as_chars, data, n, nullify_tail); + set_tagged_size(static_cast<char>(n)); } inline char* Cord::InlineRep::set_data(size_t n) { assert(n <= kMaxInline); - memset(data_, 0, sizeof(data_)); - data_[kMaxInline] = static_cast<char>(n); - return data_; + ResetToEmpty(); + set_tagged_size(static_cast<char>(n)); + return data_.as_chars; } inline CordRep* Cord::InlineRep::force_tree(size_t extra_hint) { - size_t len = data_[kMaxInline]; - CordRep* result; + size_t len = tagged_size(); if (len > kMaxInline) { - memcpy(&result, data_, sizeof(result)); - } else { - result = NewFlat(len + extra_hint); - result->length = len; - memcpy(result->data, data_, len); - set_tree(result); + return data_.as_tree.rep; } + + CordRep* result = NewFlat(len + extra_hint); + result->length = len; + static_assert(kMinFlatLength >= sizeof(data_.as_chars), ""); + memcpy(result->data, data_.as_chars, sizeof(data_.as_chars)); + set_tree(result); return result; } inline void Cord::InlineRep::reduce_size(size_t n) { - size_t tag = data_[kMaxInline]; + size_t tag = tagged_size(); assert(tag <= kMaxInline); assert(tag >= n); tag -= n; - memset(data_ + tag, 0, n); - data_[kMaxInline] = static_cast<char>(tag); + memset(data_.as_chars + tag, 0, n); + set_tagged_size(static_cast<char>(tag)); } inline void Cord::InlineRep::remove_prefix(size_t n) { - cord_internal::SmallMemmove(data_, data_ + n, data_[kMaxInline] - n); + cord_internal::SmallMemmove(data_.as_chars, data_.as_chars + n, + tagged_size() - n); reduce_size(n); } void Cord::InlineRep::AppendTree(CordRep* tree) { if (tree == nullptr) return; - size_t len = data_[kMaxInline]; + size_t len = tagged_size(); if (len == 0) { set_tree(tree); } else { @@ -498,7 +493,7 @@ void Cord::InlineRep::AppendTree(CordRep* tree) { void Cord::InlineRep::PrependTree(CordRep* tree) { assert(tree != nullptr); - size_t len = data_[kMaxInline]; + size_t len = tagged_size(); if (len == 0) { set_tree(tree); } else { @@ -554,11 +549,11 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size, } // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; + size_t inline_length = tagged_size(); if (inline_length < kMaxInline && max_length <= kMaxInline - inline_length) { - *region = data_ + inline_length; + *region = data_.as_chars + inline_length; *size = max_length; - data_[kMaxInline] = static_cast<char>(inline_length + max_length); + set_tagged_size(static_cast<char>(inline_length + max_length)); return; } @@ -582,11 +577,11 @@ void Cord::InlineRep::GetAppendRegion(char** region, size_t* size) { const size_t max_length = std::numeric_limits<size_t>::max(); // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; + size_t inline_length = tagged_size(); if (inline_length < kMaxInline) { - *region = data_ + inline_length; + *region = data_.as_chars + inline_length; *size = kMaxInline - inline_length; - data_[kMaxInline] = kMaxInline; + set_tagged_size(kMaxInline); return; } @@ -621,7 +616,7 @@ static bool RepMemoryUsageLeaf(const CordRep* rep, size_t* total_mem_usage) { void Cord::InlineRep::AssignSlow(const Cord::InlineRep& src) { ClearSlow(); - memcpy(data_, src.data_, sizeof(data_)); + data_ = src.data_; if (is_tree()) { Ref(tree()); } @@ -631,7 +626,7 @@ void Cord::InlineRep::ClearSlow() { if (is_tree()) { Unref(tree()); } - memset(data_, 0, sizeof(data_)); + ResetToEmpty(); } // -------------------------------------------------------------------- @@ -735,11 +730,11 @@ template Cord& Cord::operator=(std::string&& src); void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { if (src_size == 0) return; // memcpy(_, nullptr, 0) is undefined. // Try to fit in the inline buffer if possible. - size_t inline_length = data_[kMaxInline]; + size_t inline_length = tagged_size(); if (inline_length < kMaxInline && src_size <= kMaxInline - inline_length) { // Append new data to embedded array - data_[kMaxInline] = static_cast<char>(inline_length + src_size); - memcpy(data_ + inline_length, src_data, src_size); + set_tagged_size(static_cast<char>(inline_length + src_size)); + memcpy(data_.as_chars + inline_length, src_data, src_size); return; } @@ -762,7 +757,7 @@ void Cord::InlineRep::AppendArray(const char* src_data, size_t src_size) { const size_t size2 = inline_length + src_size / 10; root = NewFlat(std::max<size_t>(size1, size2)); appended = std::min(src_size, TagToLength(root->tag) - inline_length); - memcpy(root->data, data_, inline_length); + memcpy(root->data, data_.as_chars, inline_length); memcpy(root->data + inline_length, src_data, appended); root->length = inline_length + appended; set_tree(root); @@ -1071,7 +1066,7 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { } else if (new_size <= InlineRep::kMaxInline) { Cord::ChunkIterator it = chunk_begin(); it.AdvanceBytes(pos); - char* dest = sub_cord.contents_.data_; + char* dest = sub_cord.contents_.data_.as_chars; size_t remaining_size = new_size; while (remaining_size > it->size()) { cord_internal::SmallMemmove(dest, it->data(), it->size()); @@ -1080,7 +1075,7 @@ Cord Cord::Subcord(size_t pos, size_t new_size) const { ++it; } cord_internal::SmallMemmove(dest, it->data(), remaining_size); - sub_cord.contents_.data_[InlineRep::kMaxInline] = new_size; + sub_cord.contents_.set_tagged_size(new_size); } else { sub_cord.contents_.set_tree(NewSubRange(tree, pos, new_size)); } @@ -1269,9 +1264,9 @@ bool ComputeCompareResult<bool>(int memcmp_res) { // Helper routine. Locates the first flat chunk of the Cord without // initializing the iterator. inline absl::string_view Cord::InlineRep::FindFlatStartPiece() const { - size_t n = data_[kMaxInline]; + size_t n = tagged_size(); if (n <= kMaxInline) { - return absl::string_view(data_, n); + return absl::string_view(data_.as_chars, n); } CordRep* node = tree(); diff --git a/absl/strings/cord.h b/absl/strings/cord.h index b8b251b0..653a1181 100644 --- a/absl/strings/cord.h +++ b/absl/strings/cord.h @@ -644,14 +644,12 @@ class Cord { // InlineRep holds either a tree pointer, or an array of kMaxInline bytes. class InlineRep { public: - static constexpr unsigned char kMaxInline = 15; + static constexpr unsigned char kMaxInline = cord_internal::kMaxInline; static_assert(kMaxInline >= sizeof(absl::cord_internal::CordRep*), ""); - // Tag byte & kMaxInline means we are storing a pointer. - static constexpr unsigned char kTreeFlag = 1 << 4; - // Tag byte & kProfiledFlag means we are profiling the Cord. - static constexpr unsigned char kProfiledFlag = 1 << 5; + static constexpr unsigned char kTreeFlag = cord_internal::kTreeFlag; + static constexpr unsigned char kProfiledFlag = cord_internal::kProfiledFlag; - constexpr InlineRep() : data_{} {} + constexpr InlineRep() : data_() {} InlineRep(const InlineRep& src); InlineRep(InlineRep&& src); InlineRep& operator=(const InlineRep& src); @@ -684,16 +682,16 @@ class Cord { void GetAppendRegion(char** region, size_t* size, size_t max_length); void GetAppendRegion(char** region, size_t* size); bool IsSame(const InlineRep& other) const { - return memcmp(data_, other.data_, sizeof(data_)) == 0; + return memcmp(&data_, &other.data_, sizeof(data_)) == 0; } int BitwiseCompare(const InlineRep& other) const { uint64_t x, y; - // Use memcpy to avoid anti-aliasing issues. - memcpy(&x, data_, sizeof(x)); - memcpy(&y, other.data_, sizeof(y)); + // Use memcpy to avoid aliasing issues. + memcpy(&x, &data_, sizeof(x)); + memcpy(&y, &other.data_, sizeof(y)); if (x == y) { - memcpy(&x, data_ + 8, sizeof(x)); - memcpy(&y, other.data_ + 8, sizeof(y)); + memcpy(&x, reinterpret_cast<const char*>(&data_) + 8, sizeof(x)); + memcpy(&y, reinterpret_cast<const char*>(&other.data_) + 8, sizeof(y)); if (x == y) return 0; } return absl::big_endian::FromHost64(x) < absl::big_endian::FromHost64(y) @@ -706,16 +704,16 @@ class Cord { // to 15 bytes does not cause a memory allocation. absl::strings_internal::STLStringResizeUninitialized(dst, sizeof(data_) - 1); - memcpy(&(*dst)[0], data_, sizeof(data_) - 1); + memcpy(&(*dst)[0], &data_, sizeof(data_) - 1); // erase is faster than resize because the logic for memory allocation is // not needed. - dst->erase(data_[kMaxInline]); + dst->erase(tagged_size()); } // Copies the inline contents into `dst`. Assumes the cord is not empty. void CopyToArray(char* dst) const; - bool is_tree() const { return data_[kMaxInline] > kMaxInline; } + bool is_tree() const { return tagged_size() > kMaxInline; } private: friend class Cord; @@ -724,10 +722,18 @@ class Cord { // Unrefs the tree, stops profiling, and zeroes the contents void ClearSlow(); - // If the data has length <= kMaxInline, we store it in data_[0..len-1], - // and store the length in data_[kMaxInline]. Else we store it in a tree - // and store a pointer to that tree in data_[0..sizeof(CordRep*)-1]. - alignas(absl::cord_internal::CordRep*) char data_[kMaxInline + 1]; + void ResetToEmpty() { data_ = {}; } + + // This uses reinterpret_cast instead of the union to avoid accessing the + // inactive union element. The tagged size is not a common prefix. + void set_tagged_size(char new_tag) { + reinterpret_cast<char*>(&data_)[kMaxInline] = new_tag; + } + char tagged_size() const { + return reinterpret_cast<const char*>(&data_)[kMaxInline]; + } + + cord_internal::InlineData data_; }; InlineRep contents_; @@ -879,12 +885,12 @@ Cord MakeCordFromExternal(absl::string_view data, Releaser&& releaser) { } inline Cord::InlineRep::InlineRep(const Cord::InlineRep& src) { - cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + data_ = src.data_; } inline Cord::InlineRep::InlineRep(Cord::InlineRep&& src) { - memcpy(data_, src.data_, sizeof(data_)); - memset(src.data_, 0, sizeof(data_)); + data_ = src.data_; + src.ResetToEmpty(); } inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { @@ -892,7 +898,7 @@ inline Cord::InlineRep& Cord::InlineRep::operator=(const Cord::InlineRep& src) { return *this; } if (!is_tree() && !src.is_tree()) { - cord_internal::SmallMemmove(data_, src.data_, sizeof(data_)); + data_ = src.data_; return *this; } AssignSlow(src); @@ -904,8 +910,8 @@ inline Cord::InlineRep& Cord::InlineRep::operator=( if (is_tree()) { ClearSlow(); } - memcpy(data_, src.data_, sizeof(data_)); - memset(src.data_, 0, sizeof(data_)); + data_ = src.data_; + src.ResetToEmpty(); return *this; } @@ -914,43 +920,39 @@ inline void Cord::InlineRep::Swap(Cord::InlineRep* rhs) { return; } - Cord::InlineRep tmp; - cord_internal::SmallMemmove(tmp.data_, data_, sizeof(data_)); - cord_internal::SmallMemmove(data_, rhs->data_, sizeof(data_)); - cord_internal::SmallMemmove(rhs->data_, tmp.data_, sizeof(data_)); + std::swap(data_, rhs->data_); } inline const char* Cord::InlineRep::data() const { - return is_tree() ? nullptr : data_; + return is_tree() ? nullptr : data_.as_chars; } inline absl::cord_internal::CordRep* Cord::InlineRep::tree() const { if (is_tree()) { - absl::cord_internal::CordRep* rep; - memcpy(&rep, data_, sizeof(rep)); - return rep; + return data_.as_tree.rep; } else { return nullptr; } } -inline bool Cord::InlineRep::empty() const { return data_[kMaxInline] == 0; } +inline bool Cord::InlineRep::empty() const { return tagged_size() == 0; } inline size_t Cord::InlineRep::size() const { - const char tag = data_[kMaxInline]; + const char tag = tagged_size(); if (tag <= kMaxInline) return tag; return static_cast<size_t>(tree()->length); } inline void Cord::InlineRep::set_tree(absl::cord_internal::CordRep* rep) { if (rep == nullptr) { - memset(data_, 0, sizeof(data_)); + ResetToEmpty(); } else { bool was_tree = is_tree(); - memcpy(data_, &rep, sizeof(rep)); - memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); + data_.as_tree = {rep, {}, tagged_size()}; if (!was_tree) { - data_[kMaxInline] = kTreeFlag; + // If we were not a tree already, set the tag. + // Otherwise, leave it alone because it might have the profile bit on. + set_tagged_size(kTreeFlag); } } } @@ -961,25 +963,20 @@ inline void Cord::InlineRep::replace_tree(absl::cord_internal::CordRep* rep) { set_tree(rep); return; } - memcpy(data_, &rep, sizeof(rep)); - memset(data_ + sizeof(rep), 0, sizeof(data_) - sizeof(rep) - 1); + data_.as_tree = {rep, {}, tagged_size()}; } inline absl::cord_internal::CordRep* Cord::InlineRep::clear() { - const char tag = data_[kMaxInline]; - absl::cord_internal::CordRep* result = nullptr; - if (tag > kMaxInline) { - memcpy(&result, data_, sizeof(result)); - } - memset(data_, 0, sizeof(data_)); // Clear the cord + absl::cord_internal::CordRep* result = tree(); + ResetToEmpty(); return result; } inline void Cord::InlineRep::CopyToArray(char* dst) const { assert(!is_tree()); - size_t n = data_[kMaxInline]; + size_t n = tagged_size(); assert(n != 0); - cord_internal::SmallMemmove(dst, data_, n); + cord_internal::SmallMemmove(dst, data_.as_chars, n); } constexpr inline Cord::Cord() noexcept {} diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h index d456eef8..00b9baa9 100644 --- a/absl/strings/internal/cord_internal.h +++ b/absl/strings/internal/cord_internal.h @@ -85,6 +85,17 @@ struct CordRepConcat; struct CordRepSubstring; struct CordRepExternal; +// Various representations that we allow +enum CordRepKind { + CONCAT = 0, + EXTERNAL = 1, + SUBSTRING = 2, + + // We have different tags for different sized flat arrays, + // starting with FLAT + FLAT = 3, +}; + struct CordRep { // The following three fields have to be less than 32 bytes since // that is the smallest supported flat node size. @@ -167,6 +178,34 @@ struct CordRepExternalImpl } }; +enum { + kMaxInline = 15, + // Tag byte & kMaxInline means we are storing a pointer. + kTreeFlag = 1 << 4, + // Tag byte & kProfiledFlag means we are profiling the Cord. + kProfiledFlag = 1 << 5 +}; + +// If the data has length <= kMaxInline, we store it in `as_chars`, and +// store the size in `tagged_size`. +// Else we store it in a tree and store a pointer to that tree in +// `as_tree.rep` and store a tag in `tagged_size`. +struct AsTree { + absl::cord_internal::CordRep* rep; + char padding[kMaxInline + 1 - sizeof(absl::cord_internal::CordRep*) - 1]; + char tagged_size; +}; +union InlineData { + constexpr InlineData() : as_chars{} {} + explicit constexpr InlineData(AsTree tree) : as_tree(tree) {} + + AsTree as_tree; + char as_chars[kMaxInline + 1]; +}; +static_assert(sizeof(InlineData) == kMaxInline + 1, ""); +static_assert(sizeof(AsTree) == sizeof(InlineData), ""); +static_assert(offsetof(AsTree, tagged_size) == kMaxInline, ""); + } // namespace cord_internal ABSL_NAMESPACE_END } // namespace absl |