diff options
author | Abseil Team <absl-team@google.com> | 2021-07-08 15:41:34 -0700 |
---|---|---|
committer | Derek Mauro <dmauro@google.com> | 2021-07-09 09:27:22 -0400 |
commit | b06e719ee985ecd63e0dffbc68499549216f817f (patch) | |
tree | 8d53095e0ee1b45e7c2920a296aeb337f909bc95 /absl/random/internal/randen_hwaes.cc | |
parent | 58e042da9210710dc4ac3b320e48b54e2449521e (diff) |
Export of internal Abseil changes
--
007ce045d5d38a727ededdb5bf06e64785fd73bd by Martijn Vels <mvels@google.com>:
Add `cord_enable_btree` feature flag (default false).
PiperOrigin-RevId: 383729939
--
98e7dc6a0407b0fd7b8713d883cdb3a766e0583d by Benjamin Barenblat <bbaren@google.com>:
Eliminate some byte swapping from randen_slow
Stop swapping bytes when serializing randen_slow’s Vector128 into and
out of memory. Instead, simply index different bytes in the AES round
function. This requires byte swapping the te{0..3} lookup tables, but it
produces an 8% speedup on my Xeon W-2135.
PiperOrigin-RevId: 383689402
--
180b6bf45049188840d439b16a28e6b968669340 by Evan Brown <ezb@google.com>:
Minor simplification in drop_deletes_without_resize() - save probe_offset outside the lambda.
Also, add some consts, avoid an auto, and use lambda capture by value instead of reference.
I realized that the compiler can already optimize this - https://godbolt.org/z/Wxd9c4TfK, but I think this way makes the code a bit clearer.
PiperOrigin-RevId: 383646658
--
781706a974c4dc1c0abbb6b801fca0550229e883 by Martijn Vels <mvels@google.com>:
Change storage to contain 3 bytes.
As per the comments in the code, this allows us to utilize all available space in CordRep that may otherwise be 'lost' in padding in derived clases. For the upcoming CordrepBtree class, we want a strong guarantee on having a 64 bytes aligned implementation.
PiperOrigin-RevId: 383633963
--
8fe22ecf92492fa6649938a2215934ebfe01c714 by Derek Mauro <dmauro@google.com>:
Remove reference to str_format_arg.h, which no longer exists
PiperOrigin-RevId: 383517865
--
79397f3b18f18c1e2d7aea993b687329d626ce64 by Benjamin Barenblat <bbaren@google.com>:
Use absl::uint128 for AES random number generator
Replace randen’s internal 128-bit integer struct, u64x2, with
absl::uint128. This eliminates some code and improves support for
big-endian platforms.
PiperOrigin-RevId: 383475671
GitOrigin-RevId: 007ce045d5d38a727ededdb5bf06e64785fd73bd
Change-Id: Ia9d9c40de557221f1744fb0d6d4d6ca7ac569070
Diffstat (limited to 'absl/random/internal/randen_hwaes.cc')
-rw-r--r-- | absl/random/internal/randen_hwaes.cc | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/absl/random/internal/randen_hwaes.cc b/absl/random/internal/randen_hwaes.cc index 3738cc38..ab51e4a3 100644 --- a/absl/random/internal/randen_hwaes.cc +++ b/absl/random/internal/randen_hwaes.cc @@ -23,6 +23,7 @@ #include <cstring> #include "absl/base/attributes.h" +#include "absl/numeric/int128.h" #include "absl/random/internal/platform.h" #include "absl/random/internal/randen_traits.h" @@ -120,11 +121,6 @@ namespace { using absl::random_internal::RandenTraits; -// Randen operates on 128-bit vectors. -struct alignas(16) u64x2 { - uint64_t data[2]; -}; - } // namespace // TARGET_CRYPTO defines a crypto attribute for each architecture. @@ -186,7 +182,7 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, } // Enables native loads in the round loop by pre-swapping. -inline ABSL_TARGET_CRYPTO void SwapEndian(u64x2* state) { +inline ABSL_TARGET_CRYPTO void SwapEndian(absl::uint128* state) { for (uint32_t block = 0; block < RandenTraits::kFeistelBlocks; ++block) { Vector128Store(ReverseBytes(Vector128Load(state + block)), state + block); } @@ -327,7 +323,7 @@ namespace { // Block shuffles applies a shuffle to the entire state between AES rounds. // Improved odd-even shuffle from "New criterion for diffusion property". -inline ABSL_TARGET_CRYPTO void BlockShuffle(u64x2* state) { +inline ABSL_TARGET_CRYPTO void BlockShuffle(absl::uint128* state) { static_assert(RandenTraits::kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); @@ -374,8 +370,9 @@ inline ABSL_TARGET_CRYPTO void BlockShuffle(u64x2* state) { // per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). -inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( - u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { +inline ABSL_TARGET_CRYPTO const absl::uint128* FeistelRound( + absl::uint128* state, + const absl::uint128* ABSL_RANDOM_INTERNAL_RESTRICT keys) { static_assert(RandenTraits::kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); @@ -436,7 +433,8 @@ inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. inline ABSL_TARGET_CRYPTO void Permute( - u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { + absl::uint128* state, + const absl::uint128* ABSL_RANDOM_INTERNAL_RESTRICT keys) { // (Successfully unrolled; the first iteration jumps into the second half) #ifdef __clang__ #pragma clang loop unroll_count(2) @@ -473,10 +471,11 @@ void ABSL_TARGET_CRYPTO RandenHwAes::Absorb(const void* seed_void, static_assert(RandenTraits::kStateBytes / sizeof(Vector128) == 16, "Unexpected Randen kStateBlocks"); - auto* state = - reinterpret_cast<u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(state_void); + auto* state = reinterpret_cast<absl::uint128 * ABSL_RANDOM_INTERNAL_RESTRICT>( + state_void); const auto* seed = - reinterpret_cast<const u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(seed_void); + reinterpret_cast<const absl::uint128 * ABSL_RANDOM_INTERNAL_RESTRICT>( + seed_void); Vector128 b1 = Vector128Load(state + 1); b1 ^= Vector128Load(seed + 0); @@ -545,8 +544,8 @@ void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys_void, static_assert(RandenTraits::kCapacityBytes == sizeof(Vector128), "Capacity mismatch"); - auto* state = reinterpret_cast<u64x2*>(state_void); - const auto* keys = reinterpret_cast<const u64x2*>(keys_void); + auto* state = reinterpret_cast<absl::uint128*>(state_void); + const auto* keys = reinterpret_cast<const absl::uint128*>(keys_void); const Vector128 prev_inner = Vector128Load(state); |