summaryrefslogtreecommitdiff
path: root/absl/strings/internal
diff options
context:
space:
mode:
authorGravatar Martijn Vels <mvels@google.com>2022-12-07 07:14:42 -0800
committerGravatar Copybara-Service <copybara-worker@google.com>2022-12-07 07:15:19 -0800
commit5736d76ae60c1746cf6fe60466df655c91bf7a7b (patch)
treef2a307cbb31cc2de71ba424784626d3b3a2568f8 /absl/strings/internal
parentc96db73c09dbb528eca6d19a50bd258b37e9fd5e (diff)
Remove possible UB from CopyRaw()
This change removes the 'create by single memcpy' CopyRaw() logic to avoid potential UB from the atomic refcount data being included in the memcpy. While we know this works on all supported platforms, it is officially UB, and we should change it to something else. This change changes the CopyRaw() method to explicitly create a (default initialized) instance, initialize `length` and `refcount`, and then memcpy the remaining contents which are trivial uint8_t and CordRep* values. PiperOrigin-RevId: 493596072 Change-Id: I46618883eb1c7c9ed9eb083f4d3e7fc501f23df5
Diffstat (limited to 'absl/strings/internal')
-rw-r--r--absl/strings/internal/cord_internal.h7
-rw-r--r--absl/strings/internal/cord_rep_btree.cc2
-rw-r--r--absl/strings/internal/cord_rep_btree.h35
3 files changed, 30 insertions, 14 deletions
diff --git a/absl/strings/internal/cord_internal.h b/absl/strings/internal/cord_internal.h
index fcca3a28..6022c1df 100644
--- a/absl/strings/internal/cord_internal.h
+++ b/absl/strings/internal/cord_internal.h
@@ -225,7 +225,11 @@ struct CordRep {
: length(l), refcount(immortal), tag(EXTERNAL), storage{} {}
// The following three fields have to be less than 32 bytes since
- // that is the smallest supported flat node size.
+ // that is the smallest supported flat node size. Some code optimizations rely
+ // on the specific layout of these fields. Notably: the non-trivial field
+ // `refcount` being preceeded by `length`, and being tailed by POD data
+ // members only.
+ // # LINT.IfChange
size_t length;
RefcountAndFlags refcount;
// If tag < FLAT, it represents CordRepKind and indicates the type of node.
@@ -241,6 +245,7 @@ struct CordRep {
// allocate room for these in the derived class, as not all compilers reuse
// padding space from the base class (clang and gcc do, MSVC does not, etc)
uint8_t storage[3];
+ // # LINT.ThenChange(cord_rep_btree.h:copy_raw)
// Returns true if this instance's tag matches the requested type.
constexpr bool IsRing() const { return tag == RING; }
diff --git a/absl/strings/internal/cord_rep_btree.cc b/absl/strings/internal/cord_rep_btree.cc
index 7ce36128..985f0724 100644
--- a/absl/strings/internal/cord_rep_btree.cc
+++ b/absl/strings/internal/cord_rep_btree.cc
@@ -502,7 +502,7 @@ OpResult CordRepBtree::SetEdge(bool owned, CordRep* edge, size_t delta) {
// open interval [begin, back) or [begin + 1, end) depending on `edge_type`.
// We conveniently cover both case using a constexpr `shift` being 0 or 1
// as `end :== back + 1`.
- result = {CopyRaw(), kCopied};
+ result = {CopyRaw(length), kCopied};
constexpr int shift = edge_type == kFront ? 1 : 0;
for (CordRep* r : Edges(begin() + shift, back() + shift)) {
CordRep::Ref(r);
diff --git a/absl/strings/internal/cord_rep_btree.h b/absl/strings/internal/cord_rep_btree.h
index eed5609e..4209e512 100644
--- a/absl/strings/internal/cord_rep_btree.h
+++ b/absl/strings/internal/cord_rep_btree.h
@@ -446,9 +446,9 @@ class CordRepBtree : public CordRep {
template <EdgeType edge_type>
static CordRepBtree* NewLeaf(absl::string_view data, size_t extra);
- // Creates a raw copy of this Btree node, copying all properties, but
- // without adding any references to existing edges.
- CordRepBtree* CopyRaw() const;
+ // Creates a raw copy of this Btree node with the specified length, copying
+ // all properties, but without adding any references to existing edges.
+ CordRepBtree* CopyRaw(size_t new_length) const;
// Creates a full copy of this Btree node, adding a reference on all edges.
CordRepBtree* Copy() const;
@@ -666,15 +666,28 @@ inline void CordRepBtree::Unref(absl::Span<CordRep* const> edges) {
}
}
-inline CordRepBtree* CordRepBtree::CopyRaw() const {
- auto* tree = static_cast<CordRepBtree*>(::operator new(sizeof(CordRepBtree)));
- memcpy(static_cast<void*>(tree), this, sizeof(CordRepBtree));
- new (&tree->refcount) RefcountAndFlags;
+inline CordRepBtree* CordRepBtree::CopyRaw(size_t new_length) const {
+ CordRepBtree* tree = new CordRepBtree;
+
+ // `length` and `refcount` are the first members of `CordRepBtree`.
+ // We initialize `length` using the given length, have `refcount` be set to
+ // ref = 1 through its default constructor, and copy all data beyond
+ // 'refcount' which starts with `tag` using a single memcpy: all contents
+ // except `refcount` is trivially copyable, and the compiler does not
+ // efficiently coalesce member-wise copy of these members.
+ // See https://gcc.godbolt.org/z/qY8zsca6z
+ // # LINT.IfChange(copy_raw)
+ tree->length = new_length;
+ uint8_t* dst = &tree->tag;
+ const uint8_t* src = &tag;
+ const ptrdiff_t offset = src - reinterpret_cast<const uint8_t*>(this);
+ memcpy(dst, src, sizeof(CordRepBtree) - static_cast<size_t>(offset));
return tree;
+ // # LINT.ThenChange()
}
inline CordRepBtree* CordRepBtree::Copy() const {
- CordRepBtree* tree = CopyRaw();
+ CordRepBtree* tree = CopyRaw(length);
for (CordRep* rep : Edges()) CordRep::Ref(rep);
return tree;
}
@@ -683,8 +696,7 @@ inline CordRepBtree* CordRepBtree::CopyToEndFrom(size_t begin,
size_t new_length) const {
assert(begin >= this->begin());
assert(begin <= this->end());
- CordRepBtree* tree = CopyRaw();
- tree->length = new_length;
+ CordRepBtree* tree = CopyRaw(new_length);
tree->set_begin(begin);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;
@@ -694,8 +706,7 @@ inline CordRepBtree* CordRepBtree::CopyBeginTo(size_t end,
size_t new_length) const {
assert(end <= capacity());
assert(end >= this->begin());
- CordRepBtree* tree = CopyRaw();
- tree->length = new_length;
+ CordRepBtree* tree = CopyRaw(new_length);
tree->set_end(end);
for (CordRep* edge : tree->Edges()) CordRep::Ref(edge);
return tree;