diff options
Diffstat (limited to 'absl/random/internal/randen_hwaes.cc')
-rw-r--r-- | absl/random/internal/randen_hwaes.cc | 128 |
1 files changed, 39 insertions, 89 deletions
diff --git a/absl/random/internal/randen_hwaes.cc b/absl/random/internal/randen_hwaes.cc index 6b82d1d0..7d5b2b74 100644 --- a/absl/random/internal/randen_hwaes.cc +++ b/absl/random/internal/randen_hwaes.cc @@ -22,39 +22,9 @@ #include <cstdint> #include <cstring> +#include "absl/base/attributes.h" #include "absl/random/internal/platform.h" -// ABSL_HAVE_ATTRIBUTE -#if !defined(ABSL_HAVE_ATTRIBUTE) -#ifdef __has_attribute -#define ABSL_HAVE_ATTRIBUTE(x) __has_attribute(x) -#else -#define ABSL_HAVE_ATTRIBUTE(x) 0 -#endif -#endif - -#if ABSL_HAVE_ATTRIBUTE(always_inline) || \ - (defined(__GNUC__) && !defined(__clang__)) -#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE \ - __attribute__((always_inline)) -#elif defined(_MSC_VER) -// We can achieve something similar to attribute((always_inline)) with MSVC by -// using the __forceinline keyword, however this is not perfect. MSVC is -// much less aggressive about inlining, and even with the __forceinline keyword. -#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __forceinline -#else -#define ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE -#endif - -// ABSL_ATTRIBUTE_FLATTEN enables much more aggressive inlining within -// the indicated function. -#undef ABSL_ATTRIBUTE_FLATTEN -#if ABSL_HAVE_ATTRIBUTE(flatten) || (defined(__GNUC__) && !defined(__clang__)) -#define ABSL_ATTRIBUTE_FLATTEN __attribute__((flatten)) -#else -#define ABSL_ATTRIBUTE_FLATTEN -#endif - // ABSL_RANDEN_HWAES_IMPL indicates whether this file will contain // a hardware accelerated implementation of randen, or whether it // will contain stubs that exit the process. @@ -146,18 +116,6 @@ void RandenHwAes::Generate(const void*, void*) { #include "absl/random/internal/randen_traits.h" -// ABSL_FUNCTION_ALIGN32 defines a 32-byte alignment attribute -// for the functions in this file. -// -// NOTE: Determine whether we actually have any wins from ALIGN32 -// using microbenchmarks. If not, remove. -#undef ABSL_FUNCTION_ALIGN32 -#if ABSL_HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) -#define ABSL_FUNCTION_ALIGN32 __attribute__((aligned(32))) -#else -#define ABSL_FUNCTION_ALIGN32 -#endif - // TARGET_CRYPTO defines a crypto attribute for each architecture. // // NOTE: Evaluate whether we should eliminate ABSL_TARGET_CRYPTO. @@ -191,8 +149,7 @@ using Vector128 = __vector unsigned long long; // NOLINT(runtime/int) namespace { -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 -ReverseBytes(const Vector128& v) { +inline ABSL_TARGET_CRYPTO Vector128 ReverseBytes(const Vector128& v) { // Reverses the bytes of the vector. const __vector unsigned char perm = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; @@ -202,26 +159,26 @@ ReverseBytes(const Vector128& v) { // WARNING: these load/store in native byte order. It is OK to load and then // store an unchanged vector, but interpreting the bits as a number or input // to AES will have undefined results. -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return vec_vsx_ld(0, reinterpret_cast<const Vector128*>(from)); } -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void -Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { +inline ABSL_TARGET_CRYPTO void Vector128Store( + const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { vec_vsx_st(v, 0, reinterpret_cast<Vector128*>(to)); } // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 -AesRound(const Vector128& state, const Vector128& round_key) { +inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, + const Vector128& round_key) { return Vector128(__builtin_crypto_vcipher(state, round_key)); } // Enables native loads in the round loop by pre-swapping. -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void -SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { +inline ABSL_TARGET_CRYPTO void SwapEndian( + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { using absl::random_internal::RandenTraits; constexpr size_t kLanes = 2; constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks; @@ -273,20 +230,20 @@ using Vector128 = uint8x16_t; namespace { -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return vld1q_u8(reinterpret_cast<const uint8_t*>(from)); } -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void -Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { +inline ABSL_TARGET_CRYPTO void Vector128Store( + const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { vst1q_u8(reinterpret_cast<uint8_t*>(to), v); } // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 -AesRound(const Vector128& state, const Vector128& round_key) { +inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, + const Vector128& round_key) { // It is important to always use the full round function - omitting the // final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf] // and does not help because we never decrypt. @@ -297,8 +254,8 @@ AesRound(const Vector128& state, const Vector128& round_key) { return vaesmcq_u8(vaeseq_u8(state, uint8x16_t{})) ^ round_key; } -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void -SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} +inline ABSL_TARGET_CRYPTO void SwapEndian( + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} } // namespace @@ -313,16 +270,11 @@ namespace { class Vector128 { public: // Convert from/to intrinsics. - inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE explicit Vector128( - const __m128i& Vector128) - : data_(Vector128) {} + inline explicit Vector128(const __m128i& Vector128) : data_(Vector128) {} - inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE __m128i data() const { - return data_; - } + inline __m128i data() const { return data_; } - inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( - const Vector128& other) { + inline Vector128& operator^=(const Vector128& other) { data_ = _mm_xor_si128(data_, other.data()); return *this; } @@ -331,29 +283,29 @@ class Vector128 { __m128i data_; }; -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 +inline ABSL_TARGET_CRYPTO Vector128 Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { return Vector128(_mm_load_si128(reinterpret_cast<const __m128i*>(from))); } -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void -Vector128Store(const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { +inline ABSL_TARGET_CRYPTO void Vector128Store( + const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { _mm_store_si128(reinterpret_cast<__m128i * ABSL_RANDOM_INTERNAL_RESTRICT>(to), v.data()); } // One round of AES. "round_key" is a public constant for breaking the // symmetry of AES (ensures previously equal columns differ afterwards). -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 -AesRound(const Vector128& state, const Vector128& round_key) { +inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, + const Vector128& round_key) { // It is important to always use the full round function - omitting the // final MixColumns reduces security [https://eprint.iacr.org/2010/041.pdf] // and does not help because we never decrypt. return Vector128(_mm_aesenc_si128(state.data(), round_key.data())); } -inline ABSL_TARGET_CRYPTO ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void -SwapEndian(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} +inline ABSL_TARGET_CRYPTO void SwapEndian( + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT) {} } // namespace @@ -450,8 +402,8 @@ constexpr size_t kLanes = 2; // Block shuffles applies a shuffle to the entire state between AES rounds. // Improved odd-even shuffle from "New criterion for diffusion property". -inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void -BlockShuffle(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { +inline ABSL_TARGET_CRYPTO void BlockShuffle( + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); constexpr size_t shuffle[kFeistelBlocks] = {7, 2, 13, 4, 11, 8, 3, 6, @@ -499,10 +451,9 @@ BlockShuffle(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { // per 16 bytes (vs. 10 for AES-CTR). Computing eight round functions in // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). -inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO const - u64x2* - FeistelRound(uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, - const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { +inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, + const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); // MSVC does a horrible job at unrolling loops. @@ -561,9 +512,9 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO const // Indistinguishable from ideal by chosen-ciphertext adversaries using less than // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. -inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE ABSL_TARGET_CRYPTO void -Permute(const void* ABSL_RANDOM_INTERNAL_RESTRICT keys, - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { +inline ABSL_TARGET_CRYPTO void Permute( + const void* ABSL_RANDOM_INTERNAL_RESTRICT keys, + uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = static_cast<const u64x2*>(keys); @@ -584,16 +535,15 @@ namespace random_internal { bool HasRandenHwAesImplementation() { return true; } -const void* ABSL_TARGET_CRYPTO ABSL_FUNCTION_ALIGN32 ABSL_ATTRIBUTE_FLATTEN -RandenHwAes::GetKeys() { +const void* ABSL_TARGET_CRYPTO RandenHwAes::GetKeys() { // Round keys for one AES per Feistel round and branch. // The canonical implementation uses first digits of Pi. return round_keys; } // NOLINTNEXTLINE -void ABSL_TARGET_CRYPTO ABSL_FUNCTION_ALIGN32 ABSL_ATTRIBUTE_FLATTEN -RandenHwAes::Absorb(const void* seed_void, void* state_void) { +void ABSL_TARGET_CRYPTO RandenHwAes::Absorb(const void* seed_void, + void* state_void) { uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state = reinterpret_cast<uint64_t*>(state_void); const uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT seed = @@ -669,8 +619,8 @@ RandenHwAes::Absorb(const void* seed_void, void* state_void) { } // NOLINTNEXTLINE -void ABSL_TARGET_CRYPTO ABSL_FUNCTION_ALIGN32 ABSL_ATTRIBUTE_FLATTEN -RandenHwAes::Generate(const void* keys, void* state_void) { +void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys, + void* state_void) { static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch"); uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state = |