summaryrefslogtreecommitdiff
path: root/absl/container
diff options
context:
space:
mode:
authorGravatar Connal de Souza <connaldesouza@google.com>2024-05-28 16:52:45 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2024-05-28 16:55:16 -0700
commit40b2776ef9a2f0cff6e7c6fc4ab79c1ebd87c554 (patch)
treea407c6a279751c4d83e02eb9215f358ee0c8eda3 /absl/container
parentcf071bb3277e3277c246d5ef0aeaa13b89c4d228 (diff)
Optimize GrowIntoSingleGroupShuffleControlBytes.
This implementation is designed to avoid needing to copy to an intermediate buffer and then read from it again, which is an expensive Read-after-Write hazard. PiperOrigin-RevId: 638071429 Change-Id: I390b4d38b8c1bd7fffba3d403baba6f1511555b0
Diffstat (limited to 'absl/container')
-rw-r--r--absl/container/internal/raw_hash_set.cc150
1 files changed, 100 insertions, 50 deletions
diff --git a/absl/container/internal/raw_hash_set.cc b/absl/container/internal/raw_hash_set.cc
index 9d399a1b..1cae0381 100644
--- a/absl/container/internal/raw_hash_set.cc
+++ b/absl/container/internal/raw_hash_set.cc
@@ -23,6 +23,7 @@
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/endian.h"
#include "absl/base/optimization.h"
#include "absl/container/internal/container_memory.h"
#include "absl/container/internal/hashtablez_sampler.h"
@@ -342,77 +343,126 @@ void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy,
}
void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
- ctrl_t* new_ctrl, size_t new_capacity) const {
+ ctrl_t* __restrict new_ctrl, size_t new_capacity) const {
assert(is_single_group(new_capacity));
constexpr size_t kHalfWidth = Group::kWidth / 2;
+ constexpr size_t kQuarterWidth = Group::kWidth / 4;
assert(old_capacity_ < kHalfWidth);
+ static_assert(sizeof(uint64_t) >= kHalfWidth,
+ "Group size is too large. The ctrl bytes for half a group must "
+ "fit into a uint64_t for this implementation.");
+ static_assert(sizeof(uint64_t) <= Group::kWidth,
+ "Group size is too small. The ctrl bytes for a group must "
+ "cover a uint64_t for this implementation.");
const size_t half_old_capacity = old_capacity_ / 2;
// NOTE: operations are done with compile time known size = kHalfWidth.
// Compiler optimizes that into single ASM operation.
- // Copy second half of bytes to the beginning.
- // We potentially copy more bytes in order to have compile time known size.
- // Mirrored bytes from the old_ctrl() will also be copied.
- // In case of old_capacity_ == 3, we will copy 1st element twice.
+ // Load the bytes from half_old_capacity + 1. This contains the last half of
+ // old_ctrl bytes, followed by the sentinel byte, and then the first half of
+ // the cloned bytes. This effectively shuffles the control bytes.
+ uint64_t copied_bytes = 0;
+ copied_bytes =
+ absl::little_endian::Load64(old_ctrl() + half_old_capacity + 1);
+
+ // We change the sentinel byte to kEmpty before storing to both the start of
+ // the new_ctrl, and past the end of the new_ctrl later for the new cloned
+ // bytes. Note that this is faster than setting the sentinel byte to kEmpty
+ // after the copy directly in new_ctrl because we are limited on store
+ // bandwidth.
+ constexpr uint64_t kEmptyXorSentinel =
+ static_cast<uint8_t>(ctrl_t::kEmpty) ^
+ static_cast<uint8_t>(ctrl_t::kSentinel);
+ const uint64_t mask_convert_old_sentinel_to_empty =
+ kEmptyXorSentinel << (half_old_capacity * 8);
+ copied_bytes ^= mask_convert_old_sentinel_to_empty;
+
+ // Copy second half of bytes to the beginning. This correctly sets the bytes
+ // [0, old_capacity]. We potentially copy more bytes in order to have compile
+ // time known size. Mirrored bytes from the old_ctrl() will also be copied. In
+ // case of old_capacity_ == 3, we will copy 1st element twice.
// Examples:
+ // (old capacity = 1)
// old_ctrl = 0S0EEEEEEE...
- // new_ctrl = S0EEEEEEEE...
+ // new_ctrl = E0EEEEEE??...
//
- // old_ctrl = 01S01EEEEE...
- // new_ctrl = 1S01EEEEEE...
+ // (old capacity = 3)
+ // old_ctrl = 012S012EEEEE...
+ // new_ctrl = 12E012EE????...
//
+ // (old capacity = 7)
// old_ctrl = 0123456S0123456EE...
- // new_ctrl = 456S0123?????????...
- std::memcpy(new_ctrl, old_ctrl() + half_old_capacity + 1, kHalfWidth);
- // Clean up copied kSentinel from old_ctrl.
- new_ctrl[half_old_capacity] = ctrl_t::kEmpty;
-
- // Clean up damaged or uninitialized bytes.
-
- // Clean bytes after the intended size of the copy.
- // Example:
- // new_ctrl = 1E01EEEEEEE????
- // *new_ctrl= 1E0EEEEEEEE????
- // position /
- std::memset(new_ctrl + old_capacity_ + 1, static_cast<int8_t>(ctrl_t::kEmpty),
- kHalfWidth);
- // Clean non-mirrored bytes that are not initialized.
- // For small old_capacity that may be inside of mirrored bytes zone.
+ // new_ctrl = 456E0123?????????...
+ absl::little_endian::Store64(new_ctrl, copied_bytes);
+
+ // Set the space [old_capacity + 1, new_capacity] to empty as these bytes will
+ // not be written again. This is safe because
+ // NumControlBytes = new_capacity + kWidth and new_capacity >=
+ // old_capacity+1.
// Examples:
- // new_ctrl = 1E0EEEEEEEE??????????....
- // *new_ctrl= 1E0EEEEEEEEEEEEE?????....
- // position /
+ // (old_capacity = 3, new_capacity = 15)
+ // new_ctrl = 12E012EE?????????????...??
+ // *new_ctrl = 12E0EEEEEEEEEEEEEEEE?...??
+ // position / S
//
- // new_ctrl = 456E0123???????????...
- // *new_ctrl= 456E0123EEEEEEEE???...
- // position /
- std::memset(new_ctrl + kHalfWidth, static_cast<int8_t>(ctrl_t::kEmpty),
- kHalfWidth);
- // Clean last mirrored bytes that are not initialized
- // and will not be overwritten by mirroring.
+ // (old_capacity = 7, new_capacity = 15)
+ // new_ctrl = 456E0123?????????????????...??
+ // *new_ctrl = 456E0123EEEEEEEEEEEEEEEE?...??
+ // position / S
+ std::memset(new_ctrl + old_capacity_ + 1, static_cast<int8_t>(ctrl_t::kEmpty),
+ Group::kWidth);
+
+ // Set the last kHalfWidth bytes to empty, to ensure the bytes all the way to
+ // the end are initialized.
// Examples:
- // new_ctrl = 1E0EEEEEEEEEEEEE????????
- // *new_ctrl= 1E0EEEEEEEEEEEEEEEEEEEEE
- // position S /
+ // new_ctrl = 12E0EEEEEEEEEEEEEEEE?...???????
+ // *new_ctrl = 12E0EEEEEEEEEEEEEEEE???EEEEEEEE
+ // position S /
//
- // new_ctrl = 456E0123EEEEEEEE???????????????
- // *new_ctrl= 456E0123EEEEEEEE???????EEEEEEEE
- // position S /
- std::memset(new_ctrl + new_capacity + kHalfWidth,
+ // new_ctrl = 456E0123EEEEEEEEEEEEEEEE???????
+ // *new_ctrl = 456E0123EEEEEEEEEEEEEEEEEEEEEEE
+ // position S /
+ std::memset(new_ctrl + NumControlBytes(new_capacity) - kHalfWidth,
static_cast<int8_t>(ctrl_t::kEmpty), kHalfWidth);
- // Create mirrored bytes. old_capacity_ < kHalfWidth
- // Example:
- // new_ctrl = 456E0123EEEEEEEE???????EEEEEEEE
- // *new_ctrl= 456E0123EEEEEEEE456E0123EEEEEEE
- // position S/
- ctrl_t g[kHalfWidth];
- std::memcpy(g, new_ctrl, kHalfWidth);
- std::memcpy(new_ctrl + new_capacity + 1, g, kHalfWidth);
+ // Copy the first bytes to the end (starting at new_capacity +1) to set the
+ // cloned bytes. Note that we use the already copied bytes from old_ctrl here
+ // rather than copying from new_ctrl to avoid a Read-after-Write hazard, since
+ // new_ctrl was just written to. The first old_capacity-1 bytes are set
+ // correctly. Then there may be up to old_capacity bytes that need to be
+ // overwritten, and any remaining bytes will be correctly set to empty. This
+ // sets [new_capacity + 1, new_capacity +1 + old_capacity] correctly.
+ // Examples:
+ // new_ctrl = 12E0EEEEEEEEEEEEEEEE?...???????
+ // *new_ctrl = 12E0EEEEEEEEEEEE12E012EEEEEEEEE
+ // position S/
+ //
+ // new_ctrl = 456E0123EEEEEEEE?...???EEEEEEEE
+ // *new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE
+ // position S/
+ absl::little_endian::Store64(new_ctrl + new_capacity + 1, copied_bytes);
+
+ // Set The remaining bytes at the end past the cloned bytes to empty. The
+ // incorrectly set bytes are [new_capacity + old_capacity + 2,
+ // min(new_capacity + 1 + kHalfWidth, new_capacity + old_capacity + 2 +
+ // half_old_capacity)]. Taking the difference, we need to set min(kHalfWidth -
+ // (old_capacity + 1), half_old_capacity)]. Since old_capacity < kHalfWidth,
+ // half_old_capacity < kQuarterWidth, so we set kQuarterWidth beginning at
+ // new_capacity + old_capacity + 2 to kEmpty.
+ // Examples:
+ // new_ctrl = 12E0EEEEEEEEEEEE12E012EEEEEEEEE
+ // *new_ctrl = 12E0EEEEEEEEEEEE12E0EEEEEEEEEEE
+ // position S /
+ //
+ // new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE
+ // *new_ctrl = 456E0123EEEEEEEE456E0123EEEEEEE (no change)
+ // position S /
+ std::memset(new_ctrl + new_capacity + old_capacity_ + 2,
+ static_cast<int8_t>(ctrl_t::kEmpty), kQuarterWidth);
- // Finally set sentinel to its place.
+ // Finally, we set the new sentinel byte.
new_ctrl[new_capacity] = ctrl_t::kSentinel;
}