diff options
Diffstat (limited to 'absl/random/internal')
24 files changed, 1622 insertions, 1066 deletions
diff --git a/absl/random/internal/BUILD.bazel b/absl/random/internal/BUILD.bazel index d7ad4efe..8485e28b 100644 --- a/absl/random/internal/BUILD.bazel +++ b/absl/random/internal/BUILD.bazel @@ -30,16 +30,13 @@ package(default_visibility = [ "//absl/random:__pkg__", ]) -licenses(["notice"]) # Apache 2.0 +licenses(["notice"]) cc_library( name = "traits", hdrs = ["traits.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = [ - "//absl/random:__pkg__", - ], deps = ["//absl/base:config"], ) @@ -48,24 +45,10 @@ cc_library( hdrs = ["distribution_caller.h"], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = [ - "//absl/random:__pkg__", - ], - deps = ["//absl/base:config"], -) - -cc_library( - name = "distributions", - hdrs = ["distributions.h"], - copts = ABSL_DEFAULT_COPTS, - linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ - ":distribution_caller", - ":traits", - ":uniform_helper", - "//absl/base", - "//absl/meta:type_traits", - "//absl/strings", + "//absl/base:config", + "//absl/base:fast_type_id", + "//absl/utility", ], ) @@ -76,10 +59,10 @@ cc_library( ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, - visibility = [ - "//absl/random:__pkg__", + deps = [ + "//absl/base:config", + "//absl/meta:type_traits", ], - deps = ["//absl/base:config"], ) cc_library( @@ -116,6 +99,7 @@ cc_library( copts = ABSL_DEFAULT_COPTS, linkopts = select({ "//absl:windows": [], + "//absl:wasm": [], "//conditions:default": ["-pthread"], }) + ABSL_DEFAULT_LINKOPTS, deps = [ @@ -230,7 +214,6 @@ cc_library( ":seed_material", "//absl/base:core_headers", "//absl/meta:type_traits", - "//absl/strings", "//absl/types:optional", "//absl/types:span", ], @@ -264,13 +247,15 @@ cc_library( cc_library( name = "platform", + srcs = [ + "randen_round_keys.cc", + ], hdrs = [ "randen_traits.h", ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, textual_hdrs = [ - "randen-keys.inc", "platform.h", ], deps = ["//absl/base:config"], @@ -338,10 +323,6 @@ cc_library( "//absl:windows": [], "//conditions:default": ["-Wno-pass-failed"], }), - # copts in RANDEN_HWAES_COPTS can make this target unusable as a module - # leading to a Clang diagnostic. Furthermore, it only has a private header - # anyway and thus there wouldn't be any gain from using it as a module. - features = ["-header_modules"], linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ ":platform", @@ -504,12 +485,11 @@ cc_test( ) cc_library( - name = "mocking_bit_gen_base", - hdrs = ["mocking_bit_gen_base.h"], - linkopts = ABSL_DEFAULT_LINKOPTS, + name = "mock_helpers", + hdrs = ["mock_helpers.h"], deps = [ - "//absl/random", - "//absl/strings", + "//absl/base:fast_type_id", + "//absl/types:optional", ], ) @@ -517,10 +497,8 @@ cc_library( name = "mock_overload_set", testonly = 1, hdrs = ["mock_overload_set.h"], - visibility = [ - "//absl/random:__pkg__", - ], deps = [ + ":mock_helpers", "//absl/random:mocking_bit_gen", "@com_google_googletest//:gtest", ], @@ -625,6 +603,7 @@ cc_test( copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ + ":platform", ":randen_slow", "@com_google_googletest//:gtest_main", ], @@ -669,6 +648,7 @@ cc_library( deps = [ ":platform", ":randen_engine", + "//absl/base:config", "//absl/base:core_headers", "//absl/base:raw_logging_internal", ], @@ -680,6 +660,8 @@ cc_library( copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ + ":traits", + "//absl/base:config", "//absl/meta:type_traits", ], ) @@ -705,6 +687,7 @@ cc_test( cc_test( name = "randen_benchmarks", size = "medium", + timeout = "long", srcs = ["randen_benchmarks.cc"], copts = ABSL_TEST_COPTS + ABSL_RANDOM_RANDEN_COPTS, flaky = 1, @@ -733,3 +716,15 @@ cc_test( "@com_google_googletest//:gtest_main", ], ) + +cc_test( + name = "uniform_helper_test", + size = "small", + srcs = ["uniform_helper_test.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":uniform_helper", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/absl/random/internal/distribution_caller.h b/absl/random/internal/distribution_caller.h index 02603cf8..fc81b787 100644 --- a/absl/random/internal/distribution_caller.h +++ b/absl/random/internal/distribution_caller.h @@ -20,6 +20,8 @@ #include <utility> #include "absl/base/config.h" +#include "absl/base/internal/fast_type_id.h" +#include "absl/utility/utility.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -30,27 +32,57 @@ namespace random_internal { // to intercept such calls. template <typename URBG> struct DistributionCaller { - // Call the provided distribution type. The parameters are expected - // to be explicitly specified. - // DistrT is the distribution type. - // FormatT is the formatter type: + // SFINAE to detect whether the URBG type includes a member matching + // bool InvokeMock(base_internal::FastTypeIdType, void*, void*). // - // struct FormatT { - // using result_type = distribution_t::result_type; - // static std::string FormatCall( - // const distribution_t& distr, - // absl::Span<const result_type>); - // - // static std::string FormatExpectation( - // absl::string_view match_args, - // absl::Span<const result_t> results); - // } - // - template <typename DistrT, typename FormatT, typename... Args> - static typename DistrT::result_type Call(URBG* urbg, Args&&... args) { + // These live inside BitGenRef so that they have friend access + // to MockingBitGen. (see similar methods in DistributionCaller). + template <template <class...> class Trait, class AlwaysVoid, class... Args> + struct detector : std::false_type {}; + template <template <class...> class Trait, class... Args> + struct detector<Trait, absl::void_t<Trait<Args...>>, Args...> + : std::true_type {}; + + template <class T> + using invoke_mock_t = decltype(std::declval<T*>()->InvokeMock( + std::declval<::absl::base_internal::FastTypeIdType>(), + std::declval<void*>(), std::declval<void*>())); + + using HasInvokeMock = typename detector<invoke_mock_t, void, URBG>::type; + + // Default implementation of distribution caller. + template <typename DistrT, typename... Args> + static typename DistrT::result_type Impl(std::false_type, URBG* urbg, + Args&&... args) { DistrT dist(std::forward<Args>(args)...); return dist(*urbg); } + + // Mock implementation of distribution caller. + // The underlying KeyT must match the KeyT constructed by MockOverloadSet. + template <typename DistrT, typename... Args> + static typename DistrT::result_type Impl(std::true_type, URBG* urbg, + Args&&... args) { + using ResultT = typename DistrT::result_type; + using ArgTupleT = std::tuple<absl::decay_t<Args>...>; + using KeyT = ResultT(DistrT, ArgTupleT); + + ArgTupleT arg_tuple(std::forward<Args>(args)...); + ResultT result; + if (!urbg->InvokeMock(::absl::base_internal::FastTypeId<KeyT>(), &arg_tuple, + &result)) { + auto dist = absl::make_from_tuple<DistrT>(arg_tuple); + result = dist(*urbg); + } + return result; + } + + // Default implementation of distribution caller. + template <typename DistrT, typename... Args> + static typename DistrT::result_type Call(URBG* urbg, Args&&... args) { + return Impl<DistrT, Args...>(HasInvokeMock{}, urbg, + std::forward<Args>(args)...); + } }; } // namespace random_internal diff --git a/absl/random/internal/distributions.h b/absl/random/internal/distributions.h deleted file mode 100644 index d7e3c016..00000000 --- a/absl/random/internal/distributions.h +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright 2019 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef ABSL_RANDOM_INTERNAL_DISTRIBUTIONS_H_ -#define ABSL_RANDOM_INTERNAL_DISTRIBUTIONS_H_ - -#include <type_traits> - -#include "absl/meta/type_traits.h" -#include "absl/random/internal/distribution_caller.h" -#include "absl/random/internal/traits.h" -#include "absl/random/internal/uniform_helper.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace random_internal { - -// In the absence of an explicitly provided return-type, the template -// "uniform_inferred_return_t<A, B>" is used to derive a suitable type, based on -// the data-types of the endpoint-arguments {A lo, B hi}. -// -// Given endpoints {A lo, B hi}, one of {A, B} will be chosen as the -// return-type, if one type can be implicitly converted into the other, in a -// lossless way. The template "is_widening_convertible" implements the -// compile-time logic for deciding if such a conversion is possible. -// -// If no such conversion between {A, B} exists, then the overload for -// absl::Uniform() will be discarded, and the call will be ill-formed. -// Return-type for absl::Uniform() when the return-type is inferred. -template <typename A, typename B> -using uniform_inferred_return_t = - absl::enable_if_t<absl::disjunction<is_widening_convertible<A, B>, - is_widening_convertible<B, A>>::value, - typename std::conditional< - is_widening_convertible<A, B>::value, B, A>::type>; - -} // namespace random_internal -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_RANDOM_INTERNAL_DISTRIBUTIONS_H_ diff --git a/absl/random/internal/fast_uniform_bits.h b/absl/random/internal/fast_uniform_bits.h index f13c8729..425aaf7d 100644 --- a/absl/random/internal/fast_uniform_bits.h +++ b/absl/random/internal/fast_uniform_bits.h @@ -21,6 +21,7 @@ #include <type_traits> #include "absl/base/config.h" +#include "absl/meta/type_traits.h" namespace absl { ABSL_NAMESPACE_BEGIN @@ -38,28 +39,17 @@ constexpr bool IsPowerOfTwoOrZero(UIntType n) { template <typename URBG> constexpr typename URBG::result_type RangeSize() { using result_type = typename URBG::result_type; + static_assert((URBG::max)() != (URBG::min)(), "URBG range cannot be 0."); return ((URBG::max)() == (std::numeric_limits<result_type>::max)() && (URBG::min)() == std::numeric_limits<result_type>::lowest()) ? result_type{0} - : (URBG::max)() - (URBG::min)() + result_type{1}; -} - -template <typename UIntType> -constexpr UIntType LargestPowerOfTwoLessThanOrEqualTo(UIntType n) { - return n < 2 ? n : 2 * LargestPowerOfTwoLessThanOrEqualTo(n / 2); -} - -// Given a URBG generating values in the closed interval [Lo, Hi], returns the -// largest power of two less than or equal to `Hi - Lo + 1`. -template <typename URBG> -constexpr typename URBG::result_type PowerOfTwoSubRangeSize() { - return LargestPowerOfTwoLessThanOrEqualTo(RangeSize<URBG>()); + : ((URBG::max)() - (URBG::min)() + result_type{1}); } // Computes the floor of the log. (i.e., std::floor(std::log2(N)); template <typename UIntType> constexpr UIntType IntegerLog2(UIntType n) { - return (n <= 1) ? 0 : 1 + IntegerLog2(n / 2); + return (n <= 1) ? 0 : 1 + IntegerLog2(n >> 1); } // Returns the number of bits of randomness returned through @@ -68,18 +58,23 @@ template <typename URBG> constexpr size_t NumBits() { return RangeSize<URBG>() == 0 ? std::numeric_limits<typename URBG::result_type>::digits - : IntegerLog2(PowerOfTwoSubRangeSize<URBG>()); + : IntegerLog2(RangeSize<URBG>()); } // Given a shift value `n`, constructs a mask with exactly the low `n` bits set. // If `n == 0`, all bits are set. template <typename UIntType> -constexpr UIntType MaskFromShift(UIntType n) { +constexpr UIntType MaskFromShift(size_t n) { return ((n % std::numeric_limits<UIntType>::digits) == 0) ? ~UIntType{0} : (UIntType{1} << n) - UIntType{1}; } +// Tags used to dispatch FastUniformBits::generate to the simple or more complex +// entropy extraction algorithm. +struct SimplifiedLoopTag {}; +struct RejectionLoopTag {}; + // FastUniformBits implements a fast path to acquire uniform independent bits // from a type which conforms to the [rand.req.urbg] concept. // Parameterized by: @@ -107,50 +102,16 @@ class FastUniformBits { "Class-template FastUniformBits<> must be parameterized using " "an unsigned type."); - // PowerOfTwoVariate() generates a single random variate, always returning a - // value in the half-open interval `[0, PowerOfTwoSubRangeSize<URBG>())`. If - // the URBG already generates values in a power-of-two range, the generator - // itself is used. Otherwise, we use rejection sampling on the largest - // possible power-of-two-sized subrange. - struct PowerOfTwoTag {}; - struct RejectionSamplingTag {}; - template <typename URBG> - static typename URBG::result_type PowerOfTwoVariate( - URBG& g) { // NOLINT(runtime/references) - using tag = - typename std::conditional<IsPowerOfTwoOrZero(RangeSize<URBG>()), - PowerOfTwoTag, RejectionSamplingTag>::type; - return PowerOfTwoVariate(g, tag{}); - } - - template <typename URBG> - static typename URBG::result_type PowerOfTwoVariate( - URBG& g, // NOLINT(runtime/references) - PowerOfTwoTag) { - return g() - (URBG::min)(); - } - - template <typename URBG> - static typename URBG::result_type PowerOfTwoVariate( - URBG& g, // NOLINT(runtime/references) - RejectionSamplingTag) { - // Use rejection sampling to ensure uniformity across the range. - typename URBG::result_type u; - do { - u = g() - (URBG::min)(); - } while (u >= PowerOfTwoSubRangeSize<URBG>()); - return u; - } - // Generate() generates a random value, dispatched on whether - // the underlying URBG must loop over multiple calls or not. + // the underlying URBG must use rejection sampling to generate a value, + // or whether a simplified loop will suffice. template <typename URBG> result_type Generate(URBG& g, // NOLINT(runtime/references) - std::true_type /* avoid_looping */); + SimplifiedLoopTag); template <typename URBG> result_type Generate(URBG& g, // NOLINT(runtime/references) - std::false_type /* avoid_looping */); + RejectionLoopTag); }; template <typename UIntType> @@ -162,31 +123,47 @@ FastUniformBits<UIntType>::operator()(URBG& g) { // NOLINT(runtime/references) // Y = (2 ^ kRange) - 1 static_assert((URBG::max)() > (URBG::min)(), "URBG::max and URBG::min may not be equal."); - using urbg_result_type = typename URBG::result_type; - constexpr urbg_result_type kRangeMask = - RangeSize<URBG>() == 0 - ? (std::numeric_limits<urbg_result_type>::max)() - : static_cast<urbg_result_type>(PowerOfTwoSubRangeSize<URBG>() - 1); - return Generate(g, std::integral_constant<bool, (kRangeMask >= (max)())>{}); + + using tag = absl::conditional_t<IsPowerOfTwoOrZero(RangeSize<URBG>()), + SimplifiedLoopTag, RejectionLoopTag>; + return Generate(g, tag{}); } template <typename UIntType> template <typename URBG> typename FastUniformBits<UIntType>::result_type FastUniformBits<UIntType>::Generate(URBG& g, // NOLINT(runtime/references) - std::true_type /* avoid_looping */) { - // The width of the result_type is less than than the width of the random bits - // provided by URBG. Thus, generate a single value and then simply mask off - // the required bits. + SimplifiedLoopTag) { + // The simplified version of FastUniformBits works only on URBGs that have + // a range that is a power of 2. In this case we simply loop and shift without + // attempting to balance the bits across calls. + static_assert(IsPowerOfTwoOrZero(RangeSize<URBG>()), + "incorrect Generate tag for URBG instance"); + + static constexpr size_t kResultBits = + std::numeric_limits<result_type>::digits; + static constexpr size_t kUrbgBits = NumBits<URBG>(); + static constexpr size_t kIters = + (kResultBits / kUrbgBits) + (kResultBits % kUrbgBits != 0); + static constexpr size_t kShift = (kIters == 1) ? 0 : kUrbgBits; + static constexpr auto kMin = (URBG::min)(); - return PowerOfTwoVariate(g) & (max)(); + result_type r = static_cast<result_type>(g() - kMin); + for (size_t n = 1; n < kIters; ++n) { + r = (r << kShift) + static_cast<result_type>(g() - kMin); + } + return r; } template <typename UIntType> template <typename URBG> typename FastUniformBits<UIntType>::result_type FastUniformBits<UIntType>::Generate(URBG& g, // NOLINT(runtime/references) - std::false_type /* avoid_looping */) { + RejectionLoopTag) { + static_assert(!IsPowerOfTwoOrZero(RangeSize<URBG>()), + "incorrect Generate tag for URBG instance"); + using urbg_result_type = typename URBG::result_type; + // See [rand.adapt.ibits] for more details on the constants calculated below. // // It is preferable to use roughly the same number of bits from each generator @@ -199,21 +176,44 @@ FastUniformBits<UIntType>::Generate(URBG& g, // NOLINT(runtime/references) // `kSmallIters` and `kLargeIters` times respectively such // that // - // `kTotalWidth == kSmallIters * kSmallWidth - // + kLargeIters * kLargeWidth` + // `kResultBits == kSmallIters * kSmallBits + // + kLargeIters * kLargeBits` // - // where `kTotalWidth` is the total number of bits in `result_type`. + // where `kResultBits` is the total number of bits in `result_type`. // - constexpr size_t kTotalWidth = std::numeric_limits<result_type>::digits; - constexpr size_t kUrbgWidth = NumBits<URBG>(); - constexpr size_t kTotalIters = - kTotalWidth / kUrbgWidth + (kTotalWidth % kUrbgWidth != 0); - constexpr size_t kSmallWidth = kTotalWidth / kTotalIters; - constexpr size_t kLargeWidth = kSmallWidth + 1; + static constexpr size_t kResultBits = + std::numeric_limits<result_type>::digits; // w + static constexpr urbg_result_type kUrbgRange = RangeSize<URBG>(); // R + static constexpr size_t kUrbgBits = NumBits<URBG>(); // m + + // compute the initial estimate of the bits used. + // [rand.adapt.ibits] 2 (c) + static constexpr size_t kA = // ceil(w/m) + (kResultBits / kUrbgBits) + ((kResultBits % kUrbgBits) != 0); // n' + + static constexpr size_t kABits = kResultBits / kA; // w0' + static constexpr urbg_result_type kARejection = + ((kUrbgRange >> kABits) << kABits); // y0' + + // refine the selection to reduce the rejection frequency. + static constexpr size_t kTotalIters = + ((kUrbgRange - kARejection) <= (kARejection / kA)) ? kA : (kA + 1); // n + + // [rand.adapt.ibits] 2 (b) + static constexpr size_t kSmallIters = + kTotalIters - (kResultBits % kTotalIters); // n0 + static constexpr size_t kSmallBits = kResultBits / kTotalIters; // w0 + static constexpr urbg_result_type kSmallRejection = + ((kUrbgRange >> kSmallBits) << kSmallBits); // y0 + + static constexpr size_t kLargeBits = kSmallBits + 1; // w0+1 + static constexpr urbg_result_type kLargeRejection = + ((kUrbgRange >> kLargeBits) << kLargeBits); // y1 + // - // Because `kLargeWidth == kSmallWidth + 1`, it follows that + // Because `kLargeBits == kSmallBits + 1`, it follows that // - // `kTotalWidth == kTotalIters * kSmallWidth + kLargeIters` + // `kResultBits == kSmallIters * kSmallBits + kLargeIters` // // and therefore // @@ -224,36 +224,40 @@ FastUniformBits<UIntType>::Generate(URBG& g, // NOLINT(runtime/references) // mentioned above, if the URBG width is a divisor of `kTotalWidth`, then // there would be no need for any large iterations (i.e., one loop would // suffice), and indeed, in this case, `kLargeIters` would be zero. - constexpr size_t kLargeIters = kTotalWidth % kSmallWidth; - constexpr size_t kSmallIters = - (kTotalWidth - (kLargeWidth * kLargeIters)) / kSmallWidth; + static_assert(kResultBits == kSmallIters * kSmallBits + + (kTotalIters - kSmallIters) * kLargeBits, + "Error in looping constant calculations."); - static_assert( - kTotalWidth == kSmallIters * kSmallWidth + kLargeIters * kLargeWidth, - "Error in looping constant calculations."); + // The small shift is essentially small bits, but due to the potential + // of generating a smaller result_type from a larger urbg type, the actual + // shift might be 0. + static constexpr size_t kSmallShift = kSmallBits % kResultBits; + static constexpr auto kSmallMask = + MaskFromShift<urbg_result_type>(kSmallShift); + static constexpr size_t kLargeShift = kLargeBits % kResultBits; + static constexpr auto kLargeMask = + MaskFromShift<urbg_result_type>(kLargeShift); - result_type s = 0; + static constexpr auto kMin = (URBG::min)(); - constexpr size_t kSmallShift = kSmallWidth % kTotalWidth; - constexpr result_type kSmallMask = MaskFromShift(result_type{kSmallShift}); + result_type s = 0; for (size_t n = 0; n < kSmallIters; ++n) { - s = (s << kSmallShift) + - (static_cast<result_type>(PowerOfTwoVariate(g)) & kSmallMask); - } + urbg_result_type v; + do { + v = g() - kMin; + } while (v >= kSmallRejection); - constexpr size_t kLargeShift = kLargeWidth % kTotalWidth; - constexpr result_type kLargeMask = MaskFromShift(result_type{kLargeShift}); - for (size_t n = 0; n < kLargeIters; ++n) { - s = (s << kLargeShift) + - (static_cast<result_type>(PowerOfTwoVariate(g)) & kLargeMask); + s = (s << kSmallShift) + static_cast<result_type>(v & kSmallMask); } - static_assert( - kLargeShift == kSmallShift + 1 || - (kLargeShift == 0 && - kSmallShift == std::numeric_limits<result_type>::digits - 1), - "Error in looping constant calculations"); + for (size_t n = kSmallIters; n < kTotalIters; ++n) { + urbg_result_type v; + do { + v = g() - kMin; + } while (v >= kLargeRejection); + s = (s << kLargeShift) + static_cast<result_type>(v & kLargeMask); + } return s; } diff --git a/absl/random/internal/fast_uniform_bits_test.cc b/absl/random/internal/fast_uniform_bits_test.cc index f5b837e5..cee702df 100644 --- a/absl/random/internal/fast_uniform_bits_test.cc +++ b/absl/random/internal/fast_uniform_bits_test.cc @@ -34,8 +34,8 @@ TYPED_TEST(FastUniformBitsTypedTest, BasicTest) { using Limits = std::numeric_limits<TypeParam>; using FastBits = FastUniformBits<TypeParam>; - EXPECT_EQ(0, FastBits::min()); - EXPECT_EQ(Limits::max(), FastBits::max()); + EXPECT_EQ(0, (FastBits::min)()); + EXPECT_EQ((Limits::max)(), (FastBits::max)()); constexpr int kIters = 10000; std::random_device rd; @@ -43,8 +43,8 @@ TYPED_TEST(FastUniformBitsTypedTest, BasicTest) { FastBits fast; for (int i = 0; i < kIters; i++) { const auto v = fast(gen); - EXPECT_LE(v, FastBits::max()); - EXPECT_GE(v, FastBits::min()); + EXPECT_LE(v, (FastBits::max)()); + EXPECT_GE(v, (FastBits::min)()); } } @@ -52,21 +52,26 @@ template <typename UIntType, UIntType Lo, UIntType Hi, UIntType Val = Lo> struct FakeUrbg { using result_type = UIntType; + FakeUrbg() = default; + explicit FakeUrbg(bool r) : reject(r) {} + static constexpr result_type(max)() { return Hi; } static constexpr result_type(min)() { return Lo; } - result_type operator()() { return Val; } -}; + result_type operator()() { + // when reject is set, return Hi half the time. + return ((++calls % 2) == 1 && reject) ? Hi : Val; + } -using UrngOddbits = FakeUrbg<uint8_t, 1, 0xfe, 0x73>; -using Urng4bits = FakeUrbg<uint8_t, 1, 0x10, 2>; -using Urng31bits = FakeUrbg<uint32_t, 1, 0xfffffffe, 0x60070f03>; -using Urng32bits = FakeUrbg<uint32_t, 0, 0xffffffff, 0x74010f01>; + bool reject = false; + size_t calls = 0; +}; TEST(FastUniformBitsTest, IsPowerOfTwoOrZero) { EXPECT_TRUE(IsPowerOfTwoOrZero(uint8_t{0})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint8_t{1})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint8_t{2})); EXPECT_FALSE(IsPowerOfTwoOrZero(uint8_t{3})); + EXPECT_TRUE(IsPowerOfTwoOrZero(uint8_t{4})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint8_t{16})); EXPECT_FALSE(IsPowerOfTwoOrZero(uint8_t{17})); EXPECT_FALSE(IsPowerOfTwoOrZero((std::numeric_limits<uint8_t>::max)())); @@ -75,6 +80,7 @@ TEST(FastUniformBitsTest, IsPowerOfTwoOrZero) { EXPECT_TRUE(IsPowerOfTwoOrZero(uint16_t{1})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint16_t{2})); EXPECT_FALSE(IsPowerOfTwoOrZero(uint16_t{3})); + EXPECT_TRUE(IsPowerOfTwoOrZero(uint16_t{4})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint16_t{16})); EXPECT_FALSE(IsPowerOfTwoOrZero(uint16_t{17})); EXPECT_FALSE(IsPowerOfTwoOrZero((std::numeric_limits<uint16_t>::max)())); @@ -91,181 +97,237 @@ TEST(FastUniformBitsTest, IsPowerOfTwoOrZero) { EXPECT_TRUE(IsPowerOfTwoOrZero(uint64_t{1})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint64_t{2})); EXPECT_FALSE(IsPowerOfTwoOrZero(uint64_t{3})); + EXPECT_TRUE(IsPowerOfTwoOrZero(uint64_t{4})); EXPECT_TRUE(IsPowerOfTwoOrZero(uint64_t{64})); EXPECT_FALSE(IsPowerOfTwoOrZero(uint64_t{17})); EXPECT_FALSE(IsPowerOfTwoOrZero((std::numeric_limits<uint64_t>::max)())); } TEST(FastUniformBitsTest, IntegerLog2) { - EXPECT_EQ(IntegerLog2(uint16_t{0}), 0); - EXPECT_EQ(IntegerLog2(uint16_t{1}), 0); - EXPECT_EQ(IntegerLog2(uint16_t{2}), 1); - EXPECT_EQ(IntegerLog2(uint16_t{3}), 1); - EXPECT_EQ(IntegerLog2(uint16_t{4}), 2); - EXPECT_EQ(IntegerLog2(uint16_t{5}), 2); - EXPECT_EQ(IntegerLog2(std::numeric_limits<uint64_t>::max()), 63); + EXPECT_EQ(0, IntegerLog2(uint16_t{0})); + EXPECT_EQ(0, IntegerLog2(uint16_t{1})); + EXPECT_EQ(1, IntegerLog2(uint16_t{2})); + EXPECT_EQ(1, IntegerLog2(uint16_t{3})); + EXPECT_EQ(2, IntegerLog2(uint16_t{4})); + EXPECT_EQ(2, IntegerLog2(uint16_t{5})); + EXPECT_EQ(2, IntegerLog2(uint16_t{7})); + EXPECT_EQ(3, IntegerLog2(uint16_t{8})); + EXPECT_EQ(63, IntegerLog2((std::numeric_limits<uint64_t>::max)())); } TEST(FastUniformBitsTest, RangeSize) { - EXPECT_EQ((RangeSize<FakeUrbg<uint8_t, 0, 3>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint8_t, 2, 2>>()), 1); - EXPECT_EQ((RangeSize<FakeUrbg<uint8_t, 2, 5>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint8_t, 2, 6>>()), 5); - EXPECT_EQ((RangeSize<FakeUrbg<uint8_t, 2, 10>>()), 9); + EXPECT_EQ(2, (RangeSize<FakeUrbg<uint8_t, 0, 1>>())); + EXPECT_EQ(3, (RangeSize<FakeUrbg<uint8_t, 0, 2>>())); + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint8_t, 0, 3>>())); + // EXPECT_EQ(0, (RangeSize<FakeUrbg<uint8_t, 2, 2>>())); + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint8_t, 2, 5>>())); + EXPECT_EQ(5, (RangeSize<FakeUrbg<uint8_t, 2, 6>>())); + EXPECT_EQ(9, (RangeSize<FakeUrbg<uint8_t, 2, 10>>())); EXPECT_EQ( - (RangeSize<FakeUrbg<uint8_t, 0, std::numeric_limits<uint8_t>::max()>>()), - 0); - - EXPECT_EQ((RangeSize<FakeUrbg<uint16_t, 0, 3>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint16_t, 2, 2>>()), 1); - EXPECT_EQ((RangeSize<FakeUrbg<uint16_t, 2, 5>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint16_t, 2, 6>>()), 5); - EXPECT_EQ((RangeSize<FakeUrbg<uint16_t, 1000, 1017>>()), 18); - EXPECT_EQ((RangeSize< - FakeUrbg<uint16_t, 0, std::numeric_limits<uint16_t>::max()>>()), - 0); - - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 0, 3>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 2, 2>>()), 1); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 2, 5>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 2, 6>>()), 5); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 1000, 1017>>()), 18); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 0, 0xffffffff>>()), 0); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 1, 0xffffffff>>()), 0xffffffff); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 1, 0xfffffffe>>()), 0xfffffffe); - EXPECT_EQ((RangeSize<FakeUrbg<uint32_t, 2, 0xfffffffe>>()), 0xfffffffd); - EXPECT_EQ((RangeSize< - FakeUrbg<uint32_t, 0, std::numeric_limits<uint32_t>::max()>>()), - 0); - - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 0, 3>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 2, 2>>()), 1); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 2, 5>>()), 4); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 2, 6>>()), 5); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 1000, 1017>>()), 18); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 0, 0xffffffff>>()), 0x100000000ull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 1, 0xffffffff>>()), 0xffffffffull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 1, 0xfffffffe>>()), 0xfffffffeull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 2, 0xfffffffe>>()), 0xfffffffdull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 0, 0xffffffffffffffffull>>()), 0ull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 1, 0xffffffffffffffffull>>()), - 0xffffffffffffffffull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 1, 0xfffffffffffffffeull>>()), - 0xfffffffffffffffeull); - EXPECT_EQ((RangeSize<FakeUrbg<uint64_t, 2, 0xfffffffffffffffeull>>()), - 0xfffffffffffffffdull); - EXPECT_EQ((RangeSize< - FakeUrbg<uint64_t, 0, std::numeric_limits<uint64_t>::max()>>()), - 0); -} + 0, (RangeSize< + FakeUrbg<uint8_t, 0, (std::numeric_limits<uint8_t>::max)()>>())); -TEST(FastUniformBitsTest, PowerOfTwoSubRangeSize) { - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint8_t, 0, 3>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint8_t, 2, 2>>()), 1); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint8_t, 2, 5>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint8_t, 2, 6>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint8_t, 2, 10>>()), 8); - EXPECT_EQ((PowerOfTwoSubRangeSize< - FakeUrbg<uint8_t, 0, std::numeric_limits<uint8_t>::max()>>()), - 0); - - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint16_t, 0, 3>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint16_t, 2, 2>>()), 1); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint16_t, 2, 5>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint16_t, 2, 6>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint16_t, 1000, 1017>>()), 16); - EXPECT_EQ((PowerOfTwoSubRangeSize< - FakeUrbg<uint16_t, 0, std::numeric_limits<uint16_t>::max()>>()), - 0); - - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 0, 3>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 2, 2>>()), 1); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 2, 5>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 2, 6>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 1000, 1017>>()), 16); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 0, 0xffffffff>>()), 0); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 1, 0xffffffff>>()), - 0x80000000); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint32_t, 1, 0xfffffffe>>()), - 0x80000000); - EXPECT_EQ((PowerOfTwoSubRangeSize< - FakeUrbg<uint32_t, 0, std::numeric_limits<uint32_t>::max()>>()), - 0); - - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 0, 3>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 2, 2>>()), 1); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 2, 5>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 2, 6>>()), 4); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 1000, 1017>>()), 16); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 0, 0xffffffff>>()), - 0x100000000ull); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 1, 0xffffffff>>()), - 0x80000000ull); - EXPECT_EQ((PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 1, 0xfffffffe>>()), - 0x80000000ull); + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint16_t, 0, 3>>())); + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint16_t, 2, 5>>())); + EXPECT_EQ(5, (RangeSize<FakeUrbg<uint16_t, 2, 6>>())); + EXPECT_EQ(18, (RangeSize<FakeUrbg<uint16_t, 1000, 1017>>())); EXPECT_EQ( - (PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 0, 0xffffffffffffffffull>>()), - 0); + 0, (RangeSize< + FakeUrbg<uint16_t, 0, (std::numeric_limits<uint16_t>::max)()>>())); + + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint32_t, 0, 3>>())); + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint32_t, 2, 5>>())); + EXPECT_EQ(5, (RangeSize<FakeUrbg<uint32_t, 2, 6>>())); + EXPECT_EQ(18, (RangeSize<FakeUrbg<uint32_t, 1000, 1017>>())); + EXPECT_EQ(0, (RangeSize<FakeUrbg<uint32_t, 0, 0xffffffff>>())); + EXPECT_EQ(0xffffffff, (RangeSize<FakeUrbg<uint32_t, 1, 0xffffffff>>())); + EXPECT_EQ(0xfffffffe, (RangeSize<FakeUrbg<uint32_t, 1, 0xfffffffe>>())); + EXPECT_EQ(0xfffffffd, (RangeSize<FakeUrbg<uint32_t, 2, 0xfffffffe>>())); EXPECT_EQ( - (PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 1, 0xffffffffffffffffull>>()), - 0x8000000000000000ull); + 0, (RangeSize< + FakeUrbg<uint32_t, 0, (std::numeric_limits<uint32_t>::max)()>>())); + + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint64_t, 0, 3>>())); + EXPECT_EQ(4, (RangeSize<FakeUrbg<uint64_t, 2, 5>>())); + EXPECT_EQ(5, (RangeSize<FakeUrbg<uint64_t, 2, 6>>())); + EXPECT_EQ(18, (RangeSize<FakeUrbg<uint64_t, 1000, 1017>>())); + EXPECT_EQ(0x100000000, (RangeSize<FakeUrbg<uint64_t, 0, 0xffffffff>>())); + EXPECT_EQ(0xffffffff, (RangeSize<FakeUrbg<uint64_t, 1, 0xffffffff>>())); + EXPECT_EQ(0xfffffffe, (RangeSize<FakeUrbg<uint64_t, 1, 0xfffffffe>>())); + EXPECT_EQ(0xfffffffd, (RangeSize<FakeUrbg<uint64_t, 2, 0xfffffffe>>())); + EXPECT_EQ(0, (RangeSize<FakeUrbg<uint64_t, 0, 0xffffffffffffffff>>())); + EXPECT_EQ(0xffffffffffffffff, + (RangeSize<FakeUrbg<uint64_t, 1, 0xffffffffffffffff>>())); + EXPECT_EQ(0xfffffffffffffffe, + (RangeSize<FakeUrbg<uint64_t, 1, 0xfffffffffffffffe>>())); + EXPECT_EQ(0xfffffffffffffffd, + (RangeSize<FakeUrbg<uint64_t, 2, 0xfffffffffffffffe>>())); EXPECT_EQ( - (PowerOfTwoSubRangeSize<FakeUrbg<uint64_t, 1, 0xfffffffffffffffeull>>()), - 0x8000000000000000ull); - EXPECT_EQ((PowerOfTwoSubRangeSize< - FakeUrbg<uint64_t, 0, std::numeric_limits<uint64_t>::max()>>()), - 0); + 0, (RangeSize< + FakeUrbg<uint64_t, 0, (std::numeric_limits<uint64_t>::max)()>>())); } -TEST(FastUniformBitsTest, Urng4_VariousOutputs) { +// The constants need to be choosen so that an infinite rejection loop doesn't +// happen... +using Urng1_5bit = FakeUrbg<uint8_t, 0, 2, 0>; // ~1.5 bits (range 3) +using Urng4bits = FakeUrbg<uint8_t, 1, 0x10, 2>; +using Urng22bits = FakeUrbg<uint32_t, 0, 0x3fffff, 0x301020>; +using Urng31bits = FakeUrbg<uint32_t, 1, 0xfffffffe, 0x60070f03>; // ~31.9 bits +using Urng32bits = FakeUrbg<uint32_t, 0, 0xffffffff, 0x74010f01>; +using Urng33bits = + FakeUrbg<uint64_t, 1, 0x1ffffffff, 0x013301033>; // ~32.9 bits +using Urng63bits = FakeUrbg<uint64_t, 1, 0xfffffffffffffffe, + 0xfedcba9012345678>; // ~63.9 bits +using Urng64bits = + FakeUrbg<uint64_t, 0, 0xffffffffffffffff, 0x123456780fedcba9>; + +TEST(FastUniformBitsTest, OutputsUpTo32Bits) { // Tests that how values are composed; the single-bit deltas should be spread // across each invocation. + Urng1_5bit urng1_5; Urng4bits urng4; + Urng22bits urng22; Urng31bits urng31; Urng32bits urng32; + Urng33bits urng33; + Urng63bits urng63; + Urng64bits urng64; // 8-bit types { FastUniformBits<uint8_t> fast8; + EXPECT_EQ(0x0, fast8(urng1_5)); EXPECT_EQ(0x11, fast8(urng4)); + EXPECT_EQ(0x20, fast8(urng22)); EXPECT_EQ(0x2, fast8(urng31)); EXPECT_EQ(0x1, fast8(urng32)); + EXPECT_EQ(0x32, fast8(urng33)); + EXPECT_EQ(0x77, fast8(urng63)); + EXPECT_EQ(0xa9, fast8(urng64)); } // 16-bit types { FastUniformBits<uint16_t> fast16; + EXPECT_EQ(0x0, fast16(urng1_5)); EXPECT_EQ(0x1111, fast16(urng4)); - EXPECT_EQ(0xf02, fast16(urng31)); - EXPECT_EQ(0xf01, fast16(urng32)); + EXPECT_EQ(0x1020, fast16(urng22)); + EXPECT_EQ(0x0f02, fast16(urng31)); + EXPECT_EQ(0x0f01, fast16(urng32)); + EXPECT_EQ(0x1032, fast16(urng33)); + EXPECT_EQ(0x5677, fast16(urng63)); + EXPECT_EQ(0xcba9, fast16(urng64)); } // 32-bit types { FastUniformBits<uint32_t> fast32; + EXPECT_EQ(0x0, fast32(urng1_5)); EXPECT_EQ(0x11111111, fast32(urng4)); + EXPECT_EQ(0x08301020, fast32(urng22)); EXPECT_EQ(0x0f020f02, fast32(urng31)); EXPECT_EQ(0x74010f01, fast32(urng32)); + EXPECT_EQ(0x13301032, fast32(urng33)); + EXPECT_EQ(0x12345677, fast32(urng63)); + EXPECT_EQ(0x0fedcba9, fast32(urng64)); } +} + +TEST(FastUniformBitsTest, Outputs64Bits) { + // Tests that how values are composed; the single-bit deltas should be spread + // across each invocation. + FastUniformBits<uint64_t> fast64; - // 64-bit types { - FastUniformBits<uint64_t> fast64; + FakeUrbg<uint8_t, 0, 1, 0> urng0; + FakeUrbg<uint8_t, 0, 1, 1> urng1; + Urng4bits urng4; + Urng22bits urng22; + Urng31bits urng31; + Urng32bits urng32; + Urng33bits urng33; + Urng63bits urng63; + Urng64bits urng64; + + // somewhat degenerate cases only create a single bit. + EXPECT_EQ(0x0, fast64(urng0)); + EXPECT_EQ(64, urng0.calls); + EXPECT_EQ(0xffffffffffffffff, fast64(urng1)); + EXPECT_EQ(64, urng1.calls); + + // less degenerate cases. EXPECT_EQ(0x1111111111111111, fast64(urng4)); + EXPECT_EQ(16, urng4.calls); + EXPECT_EQ(0x01020c0408301020, fast64(urng22)); + EXPECT_EQ(3, urng22.calls); EXPECT_EQ(0x387811c3c0870f02, fast64(urng31)); + EXPECT_EQ(3, urng31.calls); EXPECT_EQ(0x74010f0174010f01, fast64(urng32)); + EXPECT_EQ(2, urng32.calls); + EXPECT_EQ(0x808194040cb01032, fast64(urng33)); + EXPECT_EQ(3, urng33.calls); + EXPECT_EQ(0x1234567712345677, fast64(urng63)); + EXPECT_EQ(2, urng63.calls); + EXPECT_EQ(0x123456780fedcba9, fast64(urng64)); + EXPECT_EQ(1, urng64.calls); + } + + // The 1.5 bit case is somewhat interesting in that the algorithm refinement + // causes one extra small sample. Comments here reference the names used in + // [rand.adapt.ibits] that correspond to this case. + { + Urng1_5bit urng1_5; + + // w = 64 + // R = 3 + // m = 1 + // n' = 64 + // w0' = 1 + // y0' = 2 + // n = (1 <= 0) > 64 : 65 = 65 + // n0 = 65 - (64%65) = 1 + // n1 = 64 + // w0 = 0 + // y0 = 3 + // w1 = 1 + // y1 = 2 + EXPECT_EQ(0x0, fast64(urng1_5)); + EXPECT_EQ(65, urng1_5.calls); + } + + // Validate rejections for non-power-of-2 cases. + { + Urng1_5bit urng1_5(true); + Urng31bits urng31(true); + Urng33bits urng33(true); + Urng63bits urng63(true); + + // For 1.5 bits, there would be 1+2*64, except the first + // value was accepted and shifted off the end. + EXPECT_EQ(0, fast64(urng1_5)); + EXPECT_EQ(128, urng1_5.calls); + EXPECT_EQ(0x387811c3c0870f02, fast64(urng31)); + EXPECT_EQ(6, urng31.calls); + EXPECT_EQ(0x808194040cb01032, fast64(urng33)); + EXPECT_EQ(6, urng33.calls); + EXPECT_EQ(0x1234567712345677, fast64(urng63)); + EXPECT_EQ(4, urng63.calls); } } TEST(FastUniformBitsTest, URBG32bitRegression) { // Validate with deterministic 32-bit std::minstd_rand // to ensure that operator() performs as expected. + + EXPECT_EQ(2147483646, RangeSize<std::minstd_rand>()); + EXPECT_EQ(30, IntegerLog2(RangeSize<std::minstd_rand>())); + std::minstd_rand gen(1); FastUniformBits<uint64_t> fast64; - EXPECT_EQ(0x05e47095f847c122ull, fast64(gen)); - EXPECT_EQ(0x8f82c1ba30b64d22ull, fast64(gen)); - EXPECT_EQ(0x3b971a3558155039ull, fast64(gen)); + EXPECT_EQ(0x05e47095f8791f45, fast64(gen)); + EXPECT_EQ(0x028be17e3c07c122, fast64(gen)); + EXPECT_EQ(0x55d2847c1626e8c2, fast64(gen)); } } // namespace diff --git a/absl/random/internal/gaussian_distribution_gentables.cc b/absl/random/internal/gaussian_distribution_gentables.cc index a2bf0394..a95333d5 100644 --- a/absl/random/internal/gaussian_distribution_gentables.cc +++ b/absl/random/internal/gaussian_distribution_gentables.cc @@ -111,12 +111,9 @@ void TableGenerator::Print(std::ostream* os) { "\n" "#include \"absl/random/gaussian_distribution.h\"\n" "\n" - // "namespace " and "absl" are broken apart so as not to conflict with - // script that adds the LTS inline namespace. - "namespace " - "absl {\n" - "namespace " - "random_internal {\n" + "namespace absl {\n" + "ABSL_NAMESPACE_BEGIN\n" + "namespace random_internal {\n" "\n" "const gaussian_distribution_base::Tables\n" " gaussian_distribution_base::zg_ = {\n"; @@ -125,10 +122,9 @@ void TableGenerator::Print(std::ostream* os) { FormatArrayContents(os, tables_.f); *os << "};\n" "\n" - "} // namespace " - "random_internal\n" - "} // namespace " - "absl\n" + "} // namespace random_internal\n" + "ABSL_NAMESPACE_END\n" + "} // namespace absl\n" "\n" "// clang-format on\n" "// END GENERATED CODE"; diff --git a/absl/random/internal/iostream_state_saver.h b/absl/random/internal/iostream_state_saver.h index 7378829a..e6e242ee 100644 --- a/absl/random/internal/iostream_state_saver.h +++ b/absl/random/internal/iostream_state_saver.h @@ -192,8 +192,8 @@ struct stream_u128_helper<absl::uint128> { template <typename OStream> inline void write(absl::uint128 val, OStream& out) { - uint64_t h = Uint128High64(val); - uint64_t l = Uint128Low64(val); + uint64_t h = absl::Uint128High64(val); + uint64_t l = absl::Uint128Low64(val); out << h << out.fill() << l; } }; diff --git a/absl/random/internal/mock_helpers.h b/absl/random/internal/mock_helpers.h new file mode 100644 index 00000000..9af27ab3 --- /dev/null +++ b/absl/random/internal/mock_helpers.h @@ -0,0 +1,127 @@ +// +// Copyright 2019 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_RANDOM_INTERNAL_MOCK_HELPERS_H_ +#define ABSL_RANDOM_INTERNAL_MOCK_HELPERS_H_ + +#include <tuple> +#include <type_traits> + +#include "absl/base/internal/fast_type_id.h" +#include "absl/types/optional.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace random_internal { + +// MockHelpers works in conjunction with MockOverloadSet, MockingBitGen, and +// BitGenRef to enable the mocking capability for absl distribution functions. +// +// MockingBitGen registers mocks based on the typeid of a mock signature, KeyT, +// which is used to generate a unique id. +// +// KeyT is a signature of the form: +// result_type(discriminator_type, std::tuple<args...>) +// The mocked function signature will be composed from KeyT as: +// result_type(args...) +// +class MockHelpers { + using IdType = ::absl::base_internal::FastTypeIdType; + + // Given a key signature type used to index the mock, extract the components. + // KeyT is expected to have the form: + // result_type(discriminator_type, arg_tuple_type) + template <typename KeyT> + struct KeySignature; + + template <typename ResultT, typename DiscriminatorT, typename ArgTupleT> + struct KeySignature<ResultT(DiscriminatorT, ArgTupleT)> { + using result_type = ResultT; + using discriminator_type = DiscriminatorT; + using arg_tuple_type = ArgTupleT; + }; + + // Detector for InvokeMock. + template <class T> + using invoke_mock_t = decltype(std::declval<T*>()->InvokeMock( + std::declval<IdType>(), std::declval<void*>(), std::declval<void*>())); + + // Empty implementation of InvokeMock. + template <typename KeyT, typename ReturnT, typename ArgTupleT, typename URBG, + typename... Args> + static absl::optional<ReturnT> InvokeMockImpl(char, URBG*, Args&&...) { + return absl::nullopt; + } + + // Non-empty implementation of InvokeMock. + template <typename KeyT, typename ReturnT, typename ArgTupleT, typename URBG, + typename = invoke_mock_t<URBG>, typename... Args> + static absl::optional<ReturnT> InvokeMockImpl(int, URBG* urbg, + Args&&... args) { + ArgTupleT arg_tuple(std::forward<Args>(args)...); + ReturnT result; + if (urbg->InvokeMock(::absl::base_internal::FastTypeId<KeyT>(), &arg_tuple, + &result)) { + return result; + } + return absl::nullopt; + } + + public: + // Invoke a mock for the KeyT (may or may not be a signature). + // + // KeyT is used to generate a typeid-based lookup key for the mock. + // KeyT is a signature of the form: + // result_type(discriminator_type, std::tuple<args...>) + // The mocked function signature will be composed from KeyT as: + // result_type(args...) + // + // An instance of arg_tuple_type must be constructable from Args..., since + // the underlying mechanism requires a pointer to an argument tuple. + template <typename KeyT, typename URBG, typename... Args> + static auto MaybeInvokeMock(URBG* urbg, Args&&... args) + -> absl::optional<typename KeySignature<KeyT>::result_type> { + // Use function overloading to dispatch to the implemenation since + // more modern patterns (e.g. require + constexpr) are not supported in all + // compiler configurations. + return InvokeMockImpl<KeyT, typename KeySignature<KeyT>::result_type, + typename KeySignature<KeyT>::arg_tuple_type, URBG>( + 0, urbg, std::forward<Args>(args)...); + } + + // Acquire a mock for the KeyT (may or may not be a signature). + // + // KeyT is used to generate a typeid-based lookup for the mock. + // KeyT is a signature of the form: + // result_type(discriminator_type, std::tuple<args...>) + // The mocked function signature will be composed from KeyT as: + // result_type(args...) + template <typename KeyT, typename MockURBG> + static auto MockFor(MockURBG& m) -> decltype( + std::declval<MockURBG>() + .template RegisterMock<typename KeySignature<KeyT>::result_type, + typename KeySignature<KeyT>::arg_tuple_type>( + std::declval<IdType>())) { + return m.template RegisterMock<typename KeySignature<KeyT>::result_type, + typename KeySignature<KeyT>::arg_tuple_type>( + ::absl::base_internal::FastTypeId<KeyT>()); + } +}; + +} // namespace random_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_RANDOM_INTERNAL_MOCK_HELPERS_H_ diff --git a/absl/random/internal/mock_overload_set.h b/absl/random/internal/mock_overload_set.h index c2a30d89..dccc6cee 100644 --- a/absl/random/internal/mock_overload_set.h +++ b/absl/random/internal/mock_overload_set.h @@ -20,6 +20,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/random/internal/mock_helpers.h" #include "absl/random/mocking_bit_gen.h" namespace absl { @@ -35,17 +36,20 @@ struct MockSingleOverload; // EXPECT_CALL(mock_single_overload, Call(...))` will expand to a call to // `mock_single_overload.gmock_Call(...)`. Because expectations are stored on // the MockingBitGen (an argument passed inside `Call(...)`), this forwards to -// arguments to Mocking::Register. +// arguments to MockingBitGen::Register. +// +// The underlying KeyT must match the KeyT constructed by DistributionCaller. template <typename DistrT, typename Ret, typename... Args> struct MockSingleOverload<DistrT, Ret(MockingBitGen&, Args...)> { static_assert(std::is_same<typename DistrT::result_type, Ret>::value, "Overload signature must have return type matching the " - "distributions result type."); + "distribution result_type."); + using KeyT = Ret(DistrT, std::tuple<Args...>); auto gmock_Call( absl::MockingBitGen& gen, // NOLINT(google-runtime-references) - const ::testing::Matcher<Args>&... args) - -> decltype(gen.Register<DistrT, Args...>(args...)) { - return gen.Register<DistrT, Args...>(args...); + const ::testing::Matcher<Args>&... matchers) + -> decltype(MockHelpers::MockFor<KeyT>(gen).gmock_Call(matchers...)) { + return MockHelpers::MockFor<KeyT>(gen).gmock_Call(matchers...); } }; @@ -53,13 +57,15 @@ template <typename DistrT, typename Ret, typename Arg, typename... Args> struct MockSingleOverload<DistrT, Ret(Arg, MockingBitGen&, Args...)> { static_assert(std::is_same<typename DistrT::result_type, Ret>::value, "Overload signature must have return type matching the " - "distributions result type."); + "distribution result_type."); + using KeyT = Ret(DistrT, std::tuple<Arg, Args...>); auto gmock_Call( - const ::testing::Matcher<Arg>& arg, + const ::testing::Matcher<Arg>& matcher, absl::MockingBitGen& gen, // NOLINT(google-runtime-references) - const ::testing::Matcher<Args>&... args) - -> decltype(gen.Register<DistrT, Arg, Args...>(arg, args...)) { - return gen.Register<DistrT, Arg, Args...>(arg, args...); + const ::testing::Matcher<Args>&... matchers) + -> decltype(MockHelpers::MockFor<KeyT>(gen).gmock_Call(matcher, + matchers...)) { + return MockHelpers::MockFor<KeyT>(gen).gmock_Call(matcher, matchers...); } }; diff --git a/absl/random/internal/mocking_bit_gen_base.h b/absl/random/internal/mocking_bit_gen_base.h deleted file mode 100644 index eeeae9d2..00000000 --- a/absl/random/internal/mocking_bit_gen_base.h +++ /dev/null @@ -1,120 +0,0 @@ -// -// Copyright 2018 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#ifndef ABSL_RANDOM_INTERNAL_MOCKING_BIT_GEN_BASE_H_ -#define ABSL_RANDOM_INTERNAL_MOCKING_BIT_GEN_BASE_H_ - -#include <atomic> -#include <deque> -#include <string> -#include <typeinfo> - -#include "absl/random/random.h" -#include "absl/strings/str_cat.h" - -namespace absl { -ABSL_NAMESPACE_BEGIN -namespace random_internal { - -// MockingBitGenExpectationFormatter is invoked to format unsatisfied mocks -// and remaining results into a description string. -template <typename DistrT, typename FormatT> -struct MockingBitGenExpectationFormatter { - std::string operator()(absl::string_view args) { - return absl::StrCat(FormatT::FunctionName(), "(", args, ")"); - } -}; - -// MockingBitGenCallFormatter is invoked to format each distribution call -// into a description string for the mock log. -template <typename DistrT, typename FormatT> -struct MockingBitGenCallFormatter { - std::string operator()(const DistrT& dist, - const typename DistrT::result_type& result) { - return absl::StrCat( - FormatT::FunctionName(), "(", FormatT::FormatArgs(dist), ") => {", - FormatT::FormatResults(absl::MakeSpan(&result, 1)), "}"); - } -}; - -class MockingBitGenBase { - template <typename> - friend struct DistributionCaller; - using generator_type = absl::BitGen; - - public: - // URBG interface - using result_type = generator_type::result_type; - static constexpr result_type(min)() { return (generator_type::min)(); } - static constexpr result_type(max)() { return (generator_type::max)(); } - result_type operator()() { return gen_(); } - - MockingBitGenBase() : gen_(), observed_call_log_() {} - virtual ~MockingBitGenBase() = default; - - protected: - const std::deque<std::string>& observed_call_log() { - return observed_call_log_; - } - - // CallImpl is the type-erased virtual dispatch. - // The type of dist is always distribution<T>, - // The type of result is always distribution<T>::result_type. - virtual bool CallImpl(const std::type_info& distr_type, void* dist_args, - void* result) = 0; - - template <typename DistrT, typename ArgTupleT> - static const std::type_info& GetTypeId() { - return typeid(std::pair<absl::decay_t<DistrT>, absl::decay_t<ArgTupleT>>); - } - - // Call the generating distribution function. - // Invoked by DistributionCaller<>::Call<DistT, FormatT>. - // DistT is the distribution type. - // FormatT is the distribution formatter traits type. - template <typename DistrT, typename FormatT, typename... Args> - typename DistrT::result_type Call(Args&&... args) { - using distr_result_type = typename DistrT::result_type; - using ArgTupleT = std::tuple<absl::decay_t<Args>...>; - - ArgTupleT arg_tuple(std::forward<Args>(args)...); - auto dist = absl::make_from_tuple<DistrT>(arg_tuple); - - distr_result_type result{}; - bool found_match = - CallImpl(GetTypeId<DistrT, ArgTupleT>(), &arg_tuple, &result); - - if (!found_match) { - result = dist(gen_); - } - - // TODO(asoffer): Forwarding the args through means we no longer need to - // extract them from the from the distribution in formatter traits. We can - // just StrJoin them. - observed_call_log_.push_back( - MockingBitGenCallFormatter<DistrT, FormatT>{}(dist, result)); - return result; - } - - private: - generator_type gen_; - std::deque<std::string> observed_call_log_; -}; // namespace random_internal - -} // namespace random_internal -ABSL_NAMESPACE_END -} // namespace absl - -#endif // ABSL_RANDOM_INTERNAL_MOCKING_BIT_GEN_BASE_H_ diff --git a/absl/random/internal/nanobenchmark.cc b/absl/random/internal/nanobenchmark.cc index 8fee77fc..c9181813 100644 --- a/absl/random/internal/nanobenchmark.cc +++ b/absl/random/internal/nanobenchmark.cc @@ -101,7 +101,7 @@ std::string BrandString() { char brand_string[49]; uint32_t abcd[4]; - // Check if brand std::string is supported (it is on all reasonable Intel/AMD) + // Check if brand string is supported (it is on all reasonable Intel/AMD) Cpuid(0x80000000U, 0, abcd); if (abcd[0] < 0x80000004U) { return std::string(); diff --git a/absl/random/internal/nanobenchmark_test.cc b/absl/random/internal/nanobenchmark_test.cc index ab824ef5..f1571e26 100644 --- a/absl/random/internal/nanobenchmark_test.cc +++ b/absl/random/internal/nanobenchmark_test.cc @@ -53,7 +53,7 @@ void RunAll(const int argc, char* argv[]) { // Avoid migrating between cores - important on multi-socket systems. int cpu = -1; if (argc == 2) { - if (!SimpleAtoi(argv[1], &cpu)) { + if (!absl::SimpleAtoi(argv[1], &cpu)) { ABSL_RAW_LOG(FATAL, "The optional argument must be a CPU number >= 0.\n"); } } diff --git a/absl/random/internal/randen-keys.inc b/absl/random/internal/randen-keys.inc deleted file mode 100644 index fa4b1668..00000000 --- a/absl/random/internal/randen-keys.inc +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef ABSL_RANDOM_INTERNAL_RANDEN_KEYS_INC_ -#define ABSL_RANDOM_INTERNAL_RANDEN_KEYS_INC_ - -// Textual header to include the randen_keys where necessary. -// REQUIRES: struct u64x2{} -// -// PROVIDES: kKeys -// PROVIDES: round_keys[] - -// "Nothing up my sleeve" numbers from the first hex digits of Pi, obtained -// from http://hexpi.sourceforge.net/. The array was generated by following -// Python script: -/* -python << EOF -"""Generates Randen round keys array from pi-hex.62500.txt file.""" -import binascii - -KEYS = 136 - -def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in range(0, len(l), n): - yield l[i:i + n] - -def pairwise(t): - """Transforms sequence into sequence of pairs.""" - it = iter(t) - return zip(it,it) - -def digits_from_pi(): - """Reads digits from hexpi.sourceforge.net file.""" - with open("pi-hex.62500.txt") as file: - return file.read() - -def digits_from_urandom(): - """Reads digits from /dev/urandom.""" - with open("/dev/urandom") as file: - return binascii.hexlify(file.read(KEYS * 16)) - -digits = digits_from_pi() -print("static constexpr const size_t kRoundKeys = {0};\n".format(KEYS)) -print("alignas(16) constexpr const u64x2 round_keys[kRoundKeys] = {") - -for i, (hi, lo) in zip(range(KEYS), pairwise(chunks(digits, 16))): - hi = "0x{0}ull".format(hi) - lo = "0x{0}ull".format(lo) - print(" u64x2({0}, {1}){2}".format(hi, lo, ',' if i+1 < KEYS else '')) - -print("};") -EOF -*/ - -static constexpr const size_t kRoundKeys = 136; - -alignas(16) constexpr u64x2 round_keys[kRoundKeys] = { - u64x2(0x243F6A8885A308D3ull, 0x13198A2E03707344ull), - u64x2(0xA4093822299F31D0ull, 0x082EFA98EC4E6C89ull), - u64x2(0x452821E638D01377ull, 0xBE5466CF34E90C6Cull), - u64x2(0xC0AC29B7C97C50DDull, 0x3F84D5B5B5470917ull), - u64x2(0x9216D5D98979FB1Bull, 0xD1310BA698DFB5ACull), - u64x2(0x2FFD72DBD01ADFB7ull, 0xB8E1AFED6A267E96ull), - u64x2(0xBA7C9045F12C7F99ull, 0x24A19947B3916CF7ull), - u64x2(0x0801F2E2858EFC16ull, 0x636920D871574E69ull), - u64x2(0xA458FEA3F4933D7Eull, 0x0D95748F728EB658ull), - u64x2(0x718BCD5882154AEEull, 0x7B54A41DC25A59B5ull), - u64x2(0x9C30D5392AF26013ull, 0xC5D1B023286085F0ull), - u64x2(0xCA417918B8DB38EFull, 0x8E79DCB0603A180Eull), - u64x2(0x6C9E0E8BB01E8A3Eull, 0xD71577C1BD314B27ull), - u64x2(0x78AF2FDA55605C60ull, 0xE65525F3AA55AB94ull), - u64x2(0x5748986263E81440ull, 0x55CA396A2AAB10B6ull), - u64x2(0xB4CC5C341141E8CEull, 0xA15486AF7C72E993ull), - u64x2(0xB3EE1411636FBC2Aull, 0x2BA9C55D741831F6ull), - u64x2(0xCE5C3E169B87931Eull, 0xAFD6BA336C24CF5Cull), - u64x2(0x7A32538128958677ull, 0x3B8F48986B4BB9AFull), - u64x2(0xC4BFE81B66282193ull, 0x61D809CCFB21A991ull), - u64x2(0x487CAC605DEC8032ull, 0xEF845D5DE98575B1ull), - u64x2(0xDC262302EB651B88ull, 0x23893E81D396ACC5ull), - u64x2(0x0F6D6FF383F44239ull, 0x2E0B4482A4842004ull), - u64x2(0x69C8F04A9E1F9B5Eull, 0x21C66842F6E96C9Aull), - u64x2(0x670C9C61ABD388F0ull, 0x6A51A0D2D8542F68ull), - u64x2(0x960FA728AB5133A3ull, 0x6EEF0B6C137A3BE4ull), - u64x2(0xBA3BF0507EFB2A98ull, 0xA1F1651D39AF0176ull), - u64x2(0x66CA593E82430E88ull, 0x8CEE8619456F9FB4ull), - u64x2(0x7D84A5C33B8B5EBEull, 0xE06F75D885C12073ull), - u64x2(0x401A449F56C16AA6ull, 0x4ED3AA62363F7706ull), - u64x2(0x1BFEDF72429B023Dull, 0x37D0D724D00A1248ull), - u64x2(0xDB0FEAD349F1C09Bull, 0x075372C980991B7Bull), - u64x2(0x25D479D8F6E8DEF7ull, 0xE3FE501AB6794C3Bull), - u64x2(0x976CE0BD04C006BAull, 0xC1A94FB6409F60C4ull), - u64x2(0x5E5C9EC2196A2463ull, 0x68FB6FAF3E6C53B5ull), - u64x2(0x1339B2EB3B52EC6Full, 0x6DFC511F9B30952Cull), - u64x2(0xCC814544AF5EBD09ull, 0xBEE3D004DE334AFDull), - u64x2(0x660F2807192E4BB3ull, 0xC0CBA85745C8740Full), - u64x2(0xD20B5F39B9D3FBDBull, 0x5579C0BD1A60320Aull), - u64x2(0xD6A100C6402C7279ull, 0x679F25FEFB1FA3CCull), - u64x2(0x8EA5E9F8DB3222F8ull, 0x3C7516DFFD616B15ull), - u64x2(0x2F501EC8AD0552ABull, 0x323DB5FAFD238760ull), - u64x2(0x53317B483E00DF82ull, 0x9E5C57BBCA6F8CA0ull), - u64x2(0x1A87562EDF1769DBull, 0xD542A8F6287EFFC3ull), - u64x2(0xAC6732C68C4F5573ull, 0x695B27B0BBCA58C8ull), - u64x2(0xE1FFA35DB8F011A0ull, 0x10FA3D98FD2183B8ull), - u64x2(0x4AFCB56C2DD1D35Bull, 0x9A53E479B6F84565ull), - u64x2(0xD28E49BC4BFB9790ull, 0xE1DDF2DAA4CB7E33ull), - u64x2(0x62FB1341CEE4C6E8ull, 0xEF20CADA36774C01ull), - u64x2(0xD07E9EFE2BF11FB4ull, 0x95DBDA4DAE909198ull), - u64x2(0xEAAD8E716B93D5A0ull, 0xD08ED1D0AFC725E0ull), - u64x2(0x8E3C5B2F8E7594B7ull, 0x8FF6E2FBF2122B64ull), - u64x2(0x8888B812900DF01Cull, 0x4FAD5EA0688FC31Cull), - u64x2(0xD1CFF191B3A8C1ADull, 0x2F2F2218BE0E1777ull), - u64x2(0xEA752DFE8B021FA1ull, 0xE5A0CC0FB56F74E8ull), - u64x2(0x18ACF3D6CE89E299ull, 0xB4A84FE0FD13E0B7ull), - u64x2(0x7CC43B81D2ADA8D9ull, 0x165FA26680957705ull), - u64x2(0x93CC7314211A1477ull, 0xE6AD206577B5FA86ull), - u64x2(0xC75442F5FB9D35CFull, 0xEBCDAF0C7B3E89A0ull), - u64x2(0xD6411BD3AE1E7E49ull, 0x00250E2D2071B35Eull), - u64x2(0x226800BB57B8E0AFull, 0x2464369BF009B91Eull), - u64x2(0x5563911D59DFA6AAull, 0x78C14389D95A537Full), - u64x2(0x207D5BA202E5B9C5ull, 0x832603766295CFA9ull), - u64x2(0x11C819684E734A41ull, 0xB3472DCA7B14A94Aull), - u64x2(0x1B5100529A532915ull, 0xD60F573FBC9BC6E4ull), - u64x2(0x2B60A47681E67400ull, 0x08BA6FB5571BE91Full), - u64x2(0xF296EC6B2A0DD915ull, 0xB6636521E7B9F9B6ull), - u64x2(0xFF34052EC5855664ull, 0x53B02D5DA99F8FA1ull), - u64x2(0x08BA47996E85076Aull, 0x4B7A70E9B5B32944ull), - u64x2(0xDB75092EC4192623ull, 0xAD6EA6B049A7DF7Dull), - u64x2(0x9CEE60B88FEDB266ull, 0xECAA8C71699A18FFull), - u64x2(0x5664526CC2B19EE1ull, 0x193602A575094C29ull), - u64x2(0xA0591340E4183A3Eull, 0x3F54989A5B429D65ull), - u64x2(0x6B8FE4D699F73FD6ull, 0xA1D29C07EFE830F5ull), - u64x2(0x4D2D38E6F0255DC1ull, 0x4CDD20868470EB26ull), - u64x2(0x6382E9C6021ECC5Eull, 0x09686B3F3EBAEFC9ull), - u64x2(0x3C9718146B6A70A1ull, 0x687F358452A0E286ull), - u64x2(0xB79C5305AA500737ull, 0x3E07841C7FDEAE5Cull), - u64x2(0x8E7D44EC5716F2B8ull, 0xB03ADA37F0500C0Dull), - u64x2(0xF01C1F040200B3FFull, 0xAE0CF51A3CB574B2ull), - u64x2(0x25837A58DC0921BDull, 0xD19113F97CA92FF6ull), - u64x2(0x9432477322F54701ull, 0x3AE5E58137C2DADCull), - u64x2(0xC8B576349AF3DDA7ull, 0xA94461460FD0030Eull), - u64x2(0xECC8C73EA4751E41ull, 0xE238CD993BEA0E2Full), - u64x2(0x3280BBA1183EB331ull, 0x4E548B384F6DB908ull), - u64x2(0x6F420D03F60A04BFull, 0x2CB8129024977C79ull), - u64x2(0x5679B072BCAF89AFull, 0xDE9A771FD9930810ull), - u64x2(0xB38BAE12DCCF3F2Eull, 0x5512721F2E6B7124ull), - u64x2(0x501ADDE69F84CD87ull, 0x7A5847187408DA17ull), - u64x2(0xBC9F9ABCE94B7D8Cull, 0xEC7AEC3ADB851DFAull), - u64x2(0x63094366C464C3D2ull, 0xEF1C18473215D808ull), - u64x2(0xDD433B3724C2BA16ull, 0x12A14D432A65C451ull), - u64x2(0x50940002133AE4DDull, 0x71DFF89E10314E55ull), - u64x2(0x81AC77D65F11199Bull, 0x043556F1D7A3C76Bull), - u64x2(0x3C11183B5924A509ull, 0xF28FE6ED97F1FBFAull), - u64x2(0x9EBABF2C1E153C6Eull, 0x86E34570EAE96FB1ull), - u64x2(0x860E5E0A5A3E2AB3ull, 0x771FE71C4E3D06FAull), - u64x2(0x2965DCB999E71D0Full, 0x803E89D65266C825ull), - u64x2(0x2E4CC9789C10B36Aull, 0xC6150EBA94E2EA78ull), - u64x2(0xA6FC3C531E0A2DF4ull, 0xF2F74EA7361D2B3Dull), - u64x2(0x1939260F19C27960ull, 0x5223A708F71312B6ull), - u64x2(0xEBADFE6EEAC31F66ull, 0xE3BC4595A67BC883ull), - u64x2(0xB17F37D1018CFF28ull, 0xC332DDEFBE6C5AA5ull), - u64x2(0x6558218568AB9702ull, 0xEECEA50FDB2F953Bull), - u64x2(0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull), - u64x2(0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull), - u64x2(0x0334FE1EAA0363CFull, 0xB5735C904C70A239ull), - u64x2(0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull), - u64x2(0x9CAB5CABB2F3846Eull, 0x648B1EAF19BDF0CAull), - u64x2(0xA02369B9655ABB50ull, 0x40685A323C2AB4B3ull), - u64x2(0x319EE9D5C021B8F7ull, 0x9B540B19875FA099ull), - u64x2(0x95F7997E623D7DA8ull, 0xF837889A97E32D77ull), - u64x2(0x11ED935F16681281ull, 0x0E358829C7E61FD6ull), - u64x2(0x96DEDFA17858BA99ull, 0x57F584A51B227263ull), - u64x2(0x9B83C3FF1AC24696ull, 0xCDB30AEB532E3054ull), - u64x2(0x8FD948E46DBC3128ull, 0x58EBF2EF34C6FFEAull), - u64x2(0xFE28ED61EE7C3C73ull, 0x5D4A14D9E864B7E3ull), - u64x2(0x42105D14203E13E0ull, 0x45EEE2B6A3AAABEAull), - u64x2(0xDB6C4F15FACB4FD0ull, 0xC742F442EF6ABBB5ull), - u64x2(0x654F3B1D41CD2105ull, 0xD81E799E86854DC7ull), - u64x2(0xE44B476A3D816250ull, 0xCF62A1F25B8D2646ull), - u64x2(0xFC8883A0C1C7B6A3ull, 0x7F1524C369CB7492ull), - u64x2(0x47848A0B5692B285ull, 0x095BBF00AD19489Dull), - u64x2(0x1462B17423820D00ull, 0x58428D2A0C55F5EAull), - u64x2(0x1DADF43E233F7061ull, 0x3372F0928D937E41ull), - u64x2(0xD65FECF16C223BDBull, 0x7CDE3759CBEE7460ull), - u64x2(0x4085F2A7CE77326Eull, 0xA607808419F8509Eull), - u64x2(0xE8EFD85561D99735ull, 0xA969A7AAC50C06C2ull), - u64x2(0x5A04ABFC800BCADCull, 0x9E447A2EC3453484ull), - u64x2(0xFDD567050E1E9EC9ull, 0xDB73DBD3105588CDull), - u64x2(0x675FDA79E3674340ull, 0xC5C43465713E38D8ull), - u64x2(0x3D28F89EF16DFF20ull, 0x153E21E78FB03D4Aull), - u64x2(0xE6E39F2BDB83ADF7ull, 0xE93D5A68948140F7ull), - u64x2(0xF64C261C94692934ull, 0x411520F77602D4F7ull), - u64x2(0xBCF46B2ED4A10068ull, 0xD40824713320F46Aull), - u64x2(0x43B7D4B7500061AFull, 0x1E39F62E97244546ull)}; - -#endif // ABSL_RANDOM_INTERNAL_RANDEN_KEYS_INC_ diff --git a/absl/random/internal/randen_hwaes.cc b/absl/random/internal/randen_hwaes.cc index e23844f1..b5a3f90a 100644 --- a/absl/random/internal/randen_hwaes.cc +++ b/absl/random/internal/randen_hwaes.cc @@ -24,6 +24,7 @@ #include "absl/base/attributes.h" #include "absl/random/internal/platform.h" +#include "absl/random/internal/randen_traits.h" // ABSL_RANDEN_HWAES_IMPL indicates whether this file will contain // a hardware accelerated implementation of randen, or whether it @@ -115,8 +116,16 @@ ABSL_NAMESPACE_END // Accelerated implementations are supported. // We need the per-architecture includes and defines. // +namespace { -#include "absl/random/internal/randen_traits.h" +using absl::random_internal::RandenTraits; + +// Randen operates on 128-bit vectors. +struct alignas(16) u64x2 { + uint64_t data[2]; +}; + +} // namespace // TARGET_CRYPTO defines a crypto attribute for each architecture. // @@ -141,6 +150,7 @@ ABSL_NAMESPACE_END #include <altivec.h> // <altivec.h> #defines vector __vector; in C++, this is bad form. #undef vector +#undef bool // Rely on the PowerPC AltiVec vector operations for accelerated AES // instructions. GCC support of the PPC vector types is described in: @@ -150,7 +160,6 @@ ABSL_NAMESPACE_END using Vector128 = __vector unsigned long long; // NOLINT(runtime/int) namespace { - inline ABSL_TARGET_CRYPTO Vector128 ReverseBytes(const Vector128& v) { // Reverses the bytes of the vector. const __vector unsigned char perm = {15, 14, 13, 12, 11, 10, 9, 8, @@ -177,14 +186,9 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, } // Enables native loads in the round loop by pre-swapping. -inline ABSL_TARGET_CRYPTO void SwapEndian(uint64_t* state) { - using absl::random_internal::RandenTraits; - constexpr size_t kLanes = 2; - constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks; - - for (uint32_t branch = 0; branch < kFeistelBlocks; ++branch) { - const Vector128 v = ReverseBytes(Vector128Load(state + kLanes * branch)); - Vector128Store(v, state + kLanes * branch); +inline ABSL_TARGET_CRYPTO void SwapEndian(u64x2* state) { + for (uint32_t block = 0; block < RandenTraits::kFeistelBlocks; ++block) { + Vector128Store(ReverseBytes(Vector128Load(state + block)), state + block); } } @@ -251,7 +255,7 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, return vaesmcq_u8(vaeseq_u8(state, uint8x16_t{})) ^ round_key; } -inline ABSL_TARGET_CRYPTO void SwapEndian(uint64_t*) {} +inline ABSL_TARGET_CRYPTO void SwapEndian(void*) {} } // namespace @@ -297,39 +301,12 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, return Vector128(_mm_aesenc_si128(state.data(), round_key.data())); } -inline ABSL_TARGET_CRYPTO void SwapEndian(uint64_t*) {} +inline ABSL_TARGET_CRYPTO void SwapEndian(void*) {} } // namespace #endif -namespace { - -// u64x2 is a 128-bit, (2 x uint64_t lanes) struct used to store -// the randen_keys. -struct alignas(16) u64x2 { - constexpr u64x2(uint64_t hi, uint64_t lo) -#if defined(ABSL_ARCH_PPC) - // This has been tested with PPC running in little-endian mode; - // We byte-swap the u64x2 structure from little-endian to big-endian - // because altivec always runs in big-endian mode. - : v{__builtin_bswap64(hi), __builtin_bswap64(lo)} { -#else - : v{lo, hi} { -#endif - } - - constexpr bool operator==(const u64x2& other) const { - return v[0] == other.v[0] && v[1] == other.v[1]; - } - - constexpr bool operator!=(const u64x2& other) const { - return !(*this == other); - } - - uint64_t v[2]; -}; // namespace - #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunknown-pragmas" @@ -338,7 +315,6 @@ struct alignas(16) u64x2 { // At this point, all of the platform-specific features have been defined / // implemented. // -// REQUIRES: using u64x2 = ... // REQUIRES: using Vector128 = ... // REQUIRES: Vector128 Vector128Load(void*) {...} // REQUIRES: void Vector128Store(Vector128, void*) {...} @@ -347,94 +323,50 @@ struct alignas(16) u64x2 { // // PROVIDES: absl::random_internal::RandenHwAes::Absorb // PROVIDES: absl::random_internal::RandenHwAes::Generate - -// RANDen = RANDom generator or beetroots in Swiss German. -// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random -// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32. -// -// High-level summary: -// 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is -// a sponge-like random generator that requires a cryptographic permutation. -// It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by -// achieving backtracking resistance with only one Permute() per buffer. -// -// 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round -// Function" constructs up to 1024-bit permutations using an improved -// Generalized Feistel network with 2-round AES-128 functions. This Feistel -// block shuffle achieves diffusion faster and is less vulnerable to -// sliced-biclique attacks than the Type-2 cyclic shuffle. -// -// 3) "Improving the Generalized Feistel" and "New criterion for diffusion -// property" extends the same kind of improved Feistel block shuffle to 16 -// branches, which enables a 2048-bit permutation. -// -// We combine these three ideas and also change Simpira's subround keys from -// structured/low-entropy counters to digits of Pi. - -// Randen constants. -using absl::random_internal::RandenTraits; -constexpr size_t kStateBytes = RandenTraits::kStateBytes; -constexpr size_t kCapacityBytes = RandenTraits::kCapacityBytes; -constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks; -constexpr size_t kFeistelRounds = RandenTraits::kFeistelRounds; -constexpr size_t kFeistelFunctions = RandenTraits::kFeistelFunctions; - -// Independent keys (272 = 2.1 KiB) for the first AES subround of each function. -constexpr size_t kKeys = kFeistelRounds * kFeistelFunctions; - -// INCLUDE keys. -#include "absl/random/internal/randen-keys.inc" - -static_assert(kKeys == kRoundKeys, "kKeys and kRoundKeys must be equal"); -static_assert(round_keys[kKeys - 1] != u64x2(0, 0), - "Too few round_keys initializers"); - -// Number of uint64_t lanes per 128-bit vector; -constexpr size_t kLanes = 2; +namespace { // Block shuffles applies a shuffle to the entire state between AES rounds. // Improved odd-even shuffle from "New criterion for diffusion property". -inline ABSL_TARGET_CRYPTO void BlockShuffle(uint64_t* state) { - static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); - - constexpr size_t shuffle[kFeistelBlocks] = {7, 2, 13, 4, 11, 8, 3, 6, - 15, 0, 9, 10, 1, 14, 5, 12}; - - // The fully unrolled loop without the memcpy improves the speed by about - // 30% over the equivalent loop. - const Vector128 v0 = Vector128Load(state + kLanes * shuffle[0]); - const Vector128 v1 = Vector128Load(state + kLanes * shuffle[1]); - const Vector128 v2 = Vector128Load(state + kLanes * shuffle[2]); - const Vector128 v3 = Vector128Load(state + kLanes * shuffle[3]); - const Vector128 v4 = Vector128Load(state + kLanes * shuffle[4]); - const Vector128 v5 = Vector128Load(state + kLanes * shuffle[5]); - const Vector128 v6 = Vector128Load(state + kLanes * shuffle[6]); - const Vector128 v7 = Vector128Load(state + kLanes * shuffle[7]); - const Vector128 w0 = Vector128Load(state + kLanes * shuffle[8]); - const Vector128 w1 = Vector128Load(state + kLanes * shuffle[9]); - const Vector128 w2 = Vector128Load(state + kLanes * shuffle[10]); - const Vector128 w3 = Vector128Load(state + kLanes * shuffle[11]); - const Vector128 w4 = Vector128Load(state + kLanes * shuffle[12]); - const Vector128 w5 = Vector128Load(state + kLanes * shuffle[13]); - const Vector128 w6 = Vector128Load(state + kLanes * shuffle[14]); - const Vector128 w7 = Vector128Load(state + kLanes * shuffle[15]); - - Vector128Store(v0, state + kLanes * 0); - Vector128Store(v1, state + kLanes * 1); - Vector128Store(v2, state + kLanes * 2); - Vector128Store(v3, state + kLanes * 3); - Vector128Store(v4, state + kLanes * 4); - Vector128Store(v5, state + kLanes * 5); - Vector128Store(v6, state + kLanes * 6); - Vector128Store(v7, state + kLanes * 7); - Vector128Store(w0, state + kLanes * 8); - Vector128Store(w1, state + kLanes * 9); - Vector128Store(w2, state + kLanes * 10); - Vector128Store(w3, state + kLanes * 11); - Vector128Store(w4, state + kLanes * 12); - Vector128Store(w5, state + kLanes * 13); - Vector128Store(w6, state + kLanes * 14); - Vector128Store(w7, state + kLanes * 15); +inline ABSL_TARGET_CRYPTO void BlockShuffle(u64x2* state) { + static_assert(RandenTraits::kFeistelBlocks == 16, + "Expecting 16 FeistelBlocks."); + + constexpr size_t shuffle[RandenTraits::kFeistelBlocks] = { + 7, 2, 13, 4, 11, 8, 3, 6, 15, 0, 9, 10, 1, 14, 5, 12}; + + const Vector128 v0 = Vector128Load(state + shuffle[0]); + const Vector128 v1 = Vector128Load(state + shuffle[1]); + const Vector128 v2 = Vector128Load(state + shuffle[2]); + const Vector128 v3 = Vector128Load(state + shuffle[3]); + const Vector128 v4 = Vector128Load(state + shuffle[4]); + const Vector128 v5 = Vector128Load(state + shuffle[5]); + const Vector128 v6 = Vector128Load(state + shuffle[6]); + const Vector128 v7 = Vector128Load(state + shuffle[7]); + const Vector128 w0 = Vector128Load(state + shuffle[8]); + const Vector128 w1 = Vector128Load(state + shuffle[9]); + const Vector128 w2 = Vector128Load(state + shuffle[10]); + const Vector128 w3 = Vector128Load(state + shuffle[11]); + const Vector128 w4 = Vector128Load(state + shuffle[12]); + const Vector128 w5 = Vector128Load(state + shuffle[13]); + const Vector128 w6 = Vector128Load(state + shuffle[14]); + const Vector128 w7 = Vector128Load(state + shuffle[15]); + + Vector128Store(v0, state + 0); + Vector128Store(v1, state + 1); + Vector128Store(v2, state + 2); + Vector128Store(v3, state + 3); + Vector128Store(v4, state + 4); + Vector128Store(v5, state + 5); + Vector128Store(v6, state + 6); + Vector128Store(v7, state + 7); + Vector128Store(w0, state + 8); + Vector128Store(w1, state + 9); + Vector128Store(w2, state + 10); + Vector128Store(w3, state + 11); + Vector128Store(w4, state + 12); + Vector128Store(w5, state + 13); + Vector128Store(w6, state + 14); + Vector128Store(w7, state + 15); } // Feistel round function using two AES subrounds. Very similar to F() @@ -443,27 +375,28 @@ inline ABSL_TARGET_CRYPTO void BlockShuffle(uint64_t* state) { // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( - uint64_t* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { - static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); + u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { + static_assert(RandenTraits::kFeistelBlocks == 16, + "Expecting 16 FeistelBlocks."); // MSVC does a horrible job at unrolling loops. // So we unroll the loop by hand to improve the performance. - const Vector128 s0 = Vector128Load(state + kLanes * 0); - const Vector128 s1 = Vector128Load(state + kLanes * 1); - const Vector128 s2 = Vector128Load(state + kLanes * 2); - const Vector128 s3 = Vector128Load(state + kLanes * 3); - const Vector128 s4 = Vector128Load(state + kLanes * 4); - const Vector128 s5 = Vector128Load(state + kLanes * 5); - const Vector128 s6 = Vector128Load(state + kLanes * 6); - const Vector128 s7 = Vector128Load(state + kLanes * 7); - const Vector128 s8 = Vector128Load(state + kLanes * 8); - const Vector128 s9 = Vector128Load(state + kLanes * 9); - const Vector128 s10 = Vector128Load(state + kLanes * 10); - const Vector128 s11 = Vector128Load(state + kLanes * 11); - const Vector128 s12 = Vector128Load(state + kLanes * 12); - const Vector128 s13 = Vector128Load(state + kLanes * 13); - const Vector128 s14 = Vector128Load(state + kLanes * 14); - const Vector128 s15 = Vector128Load(state + kLanes * 15); + const Vector128 s0 = Vector128Load(state + 0); + const Vector128 s1 = Vector128Load(state + 1); + const Vector128 s2 = Vector128Load(state + 2); + const Vector128 s3 = Vector128Load(state + 3); + const Vector128 s4 = Vector128Load(state + 4); + const Vector128 s5 = Vector128Load(state + 5); + const Vector128 s6 = Vector128Load(state + 6); + const Vector128 s7 = Vector128Load(state + 7); + const Vector128 s8 = Vector128Load(state + 8); + const Vector128 s9 = Vector128Load(state + 9); + const Vector128 s10 = Vector128Load(state + 10); + const Vector128 s11 = Vector128Load(state + 11); + const Vector128 s12 = Vector128Load(state + 12); + const Vector128 s13 = Vector128Load(state + 13); + const Vector128 s14 = Vector128Load(state + 14); + const Vector128 s15 = Vector128Load(state + 15); // Encode even blocks with keys. const Vector128 e0 = AesRound(s0, Vector128Load(keys + 0)); @@ -486,14 +419,14 @@ inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( const Vector128 o15 = AesRound(e14, s15); // Store odd blocks. (These will be shuffled later). - Vector128Store(o1, state + kLanes * 1); - Vector128Store(o3, state + kLanes * 3); - Vector128Store(o5, state + kLanes * 5); - Vector128Store(o7, state + kLanes * 7); - Vector128Store(o9, state + kLanes * 9); - Vector128Store(o11, state + kLanes * 11); - Vector128Store(o13, state + kLanes * 13); - Vector128Store(o15, state + kLanes * 15); + Vector128Store(o1, state + 1); + Vector128Store(o3, state + 3); + Vector128Store(o5, state + 5); + Vector128Store(o7, state + 7); + Vector128Store(o9, state + 9); + Vector128Store(o11, state + 11); + Vector128Store(o13, state + 13); + Vector128Store(o15, state + 15); return keys + 8; } @@ -503,16 +436,13 @@ inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. inline ABSL_TARGET_CRYPTO void Permute( - const void* ABSL_RANDOM_INTERNAL_RESTRICT keys, uint64_t* state) { - const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = - static_cast<const u64x2*>(keys); - + u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { // (Successfully unrolled; the first iteration jumps into the second half) #ifdef __clang__ #pragma clang loop unroll_count(2) #endif - for (size_t round = 0; round < kFeistelRounds; ++round) { - keys128 = FeistelRound(state, keys128); + for (size_t round = 0; round < RandenTraits::kFeistelRounds; ++round) { + keys = FeistelRound(state, keys); BlockShuffle(state); } } @@ -528,96 +458,101 @@ bool HasRandenHwAesImplementation() { return true; } const void* ABSL_TARGET_CRYPTO RandenHwAes::GetKeys() { // Round keys for one AES per Feistel round and branch. // The canonical implementation uses first digits of Pi. - return round_keys; +#if defined(ABSL_ARCH_PPC) + return kRandenRoundKeysBE; +#else + return kRandenRoundKeys; +#endif } // NOLINTNEXTLINE void ABSL_TARGET_CRYPTO RandenHwAes::Absorb(const void* seed_void, void* state_void) { - auto* state = static_cast<uint64_t*>(state_void); - const auto* seed = static_cast<const uint64_t*>(seed_void); - - constexpr size_t kCapacityBlocks = kCapacityBytes / sizeof(Vector128); - constexpr size_t kStateBlocks = kStateBytes / sizeof(Vector128); - - static_assert(kCapacityBlocks * sizeof(Vector128) == kCapacityBytes, - "Not i*V"); - static_assert(kCapacityBlocks == 1, "Unexpected Randen kCapacityBlocks"); - static_assert(kStateBlocks == 16, "Unexpected Randen kStateBlocks"); - - Vector128 b1 = Vector128Load(state + kLanes * 1); - b1 ^= Vector128Load(seed + kLanes * 0); - Vector128Store(b1, state + kLanes * 1); - - Vector128 b2 = Vector128Load(state + kLanes * 2); - b2 ^= Vector128Load(seed + kLanes * 1); - Vector128Store(b2, state + kLanes * 2); - - Vector128 b3 = Vector128Load(state + kLanes * 3); - b3 ^= Vector128Load(seed + kLanes * 2); - Vector128Store(b3, state + kLanes * 3); - - Vector128 b4 = Vector128Load(state + kLanes * 4); - b4 ^= Vector128Load(seed + kLanes * 3); - Vector128Store(b4, state + kLanes * 4); - - Vector128 b5 = Vector128Load(state + kLanes * 5); - b5 ^= Vector128Load(seed + kLanes * 4); - Vector128Store(b5, state + kLanes * 5); - - Vector128 b6 = Vector128Load(state + kLanes * 6); - b6 ^= Vector128Load(seed + kLanes * 5); - Vector128Store(b6, state + kLanes * 6); - - Vector128 b7 = Vector128Load(state + kLanes * 7); - b7 ^= Vector128Load(seed + kLanes * 6); - Vector128Store(b7, state + kLanes * 7); - - Vector128 b8 = Vector128Load(state + kLanes * 8); - b8 ^= Vector128Load(seed + kLanes * 7); - Vector128Store(b8, state + kLanes * 8); - - Vector128 b9 = Vector128Load(state + kLanes * 9); - b9 ^= Vector128Load(seed + kLanes * 8); - Vector128Store(b9, state + kLanes * 9); - - Vector128 b10 = Vector128Load(state + kLanes * 10); - b10 ^= Vector128Load(seed + kLanes * 9); - Vector128Store(b10, state + kLanes * 10); - - Vector128 b11 = Vector128Load(state + kLanes * 11); - b11 ^= Vector128Load(seed + kLanes * 10); - Vector128Store(b11, state + kLanes * 11); - - Vector128 b12 = Vector128Load(state + kLanes * 12); - b12 ^= Vector128Load(seed + kLanes * 11); - Vector128Store(b12, state + kLanes * 12); - - Vector128 b13 = Vector128Load(state + kLanes * 13); - b13 ^= Vector128Load(seed + kLanes * 12); - Vector128Store(b13, state + kLanes * 13); - - Vector128 b14 = Vector128Load(state + kLanes * 14); - b14 ^= Vector128Load(seed + kLanes * 13); - Vector128Store(b14, state + kLanes * 14); - - Vector128 b15 = Vector128Load(state + kLanes * 15); - b15 ^= Vector128Load(seed + kLanes * 14); - Vector128Store(b15, state + kLanes * 15); + static_assert(RandenTraits::kCapacityBytes / sizeof(Vector128) == 1, + "Unexpected Randen kCapacityBlocks"); + static_assert(RandenTraits::kStateBytes / sizeof(Vector128) == 16, + "Unexpected Randen kStateBlocks"); + + auto* state = + reinterpret_cast<u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(state_void); + const auto* seed = + reinterpret_cast<const u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(seed_void); + + Vector128 b1 = Vector128Load(state + 1); + b1 ^= Vector128Load(seed + 0); + Vector128Store(b1, state + 1); + + Vector128 b2 = Vector128Load(state + 2); + b2 ^= Vector128Load(seed + 1); + Vector128Store(b2, state + 2); + + Vector128 b3 = Vector128Load(state + 3); + b3 ^= Vector128Load(seed + 2); + Vector128Store(b3, state + 3); + + Vector128 b4 = Vector128Load(state + 4); + b4 ^= Vector128Load(seed + 3); + Vector128Store(b4, state + 4); + + Vector128 b5 = Vector128Load(state + 5); + b5 ^= Vector128Load(seed + 4); + Vector128Store(b5, state + 5); + + Vector128 b6 = Vector128Load(state + 6); + b6 ^= Vector128Load(seed + 5); + Vector128Store(b6, state + 6); + + Vector128 b7 = Vector128Load(state + 7); + b7 ^= Vector128Load(seed + 6); + Vector128Store(b7, state + 7); + + Vector128 b8 = Vector128Load(state + 8); + b8 ^= Vector128Load(seed + 7); + Vector128Store(b8, state + 8); + + Vector128 b9 = Vector128Load(state + 9); + b9 ^= Vector128Load(seed + 8); + Vector128Store(b9, state + 9); + + Vector128 b10 = Vector128Load(state + 10); + b10 ^= Vector128Load(seed + 9); + Vector128Store(b10, state + 10); + + Vector128 b11 = Vector128Load(state + 11); + b11 ^= Vector128Load(seed + 10); + Vector128Store(b11, state + 11); + + Vector128 b12 = Vector128Load(state + 12); + b12 ^= Vector128Load(seed + 11); + Vector128Store(b12, state + 12); + + Vector128 b13 = Vector128Load(state + 13); + b13 ^= Vector128Load(seed + 12); + Vector128Store(b13, state + 13); + + Vector128 b14 = Vector128Load(state + 14); + b14 ^= Vector128Load(seed + 13); + Vector128Store(b14, state + 14); + + Vector128 b15 = Vector128Load(state + 15); + b15 ^= Vector128Load(seed + 14); + Vector128Store(b15, state + 15); } // NOLINTNEXTLINE -void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys, +void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys_void, void* state_void) { - static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch"); + static_assert(RandenTraits::kCapacityBytes == sizeof(Vector128), + "Capacity mismatch"); - auto* state = static_cast<uint64_t*>(state_void); + auto* state = reinterpret_cast<u64x2*>(state_void); + const auto* keys = reinterpret_cast<const u64x2*>(keys_void); const Vector128 prev_inner = Vector128Load(state); SwapEndian(state); - Permute(keys, state); + Permute(state, keys); SwapEndian(state); diff --git a/absl/random/internal/randen_hwaes_test.cc b/absl/random/internal/randen_hwaes_test.cc index a7cbd46b..66ddb43f 100644 --- a/absl/random/internal/randen_hwaes_test.cc +++ b/absl/random/internal/randen_hwaes_test.cc @@ -27,12 +27,14 @@ namespace { using absl::random_internal::RandenHwAes; using absl::random_internal::RandenTraits; -struct randen { - static constexpr size_t kStateSizeT = - RandenTraits::kStateBytes / sizeof(uint64_t); +// Local state parameters. +constexpr size_t kSeedBytes = + RandenTraits::kStateBytes - RandenTraits::kCapacityBytes; +constexpr size_t kStateSizeT = RandenTraits::kStateBytes / sizeof(uint64_t); +constexpr size_t kSeedSizeT = kSeedBytes / sizeof(uint32_t); + +struct alignas(16) randen { uint64_t state[kStateSizeT]; - static constexpr size_t kSeedSizeT = - RandenTraits::kSeedBytes / sizeof(uint32_t); uint32_t seed[kSeedSizeT]; }; diff --git a/absl/random/internal/randen_round_keys.cc b/absl/random/internal/randen_round_keys.cc new file mode 100644 index 00000000..5fb3ca55 --- /dev/null +++ b/absl/random/internal/randen_round_keys.cc @@ -0,0 +1,462 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/random/internal/randen_traits.h" + +// This file contains only the round keys for randen. +// +// "Nothing up my sleeve" numbers from the first hex digits of Pi, obtained +// from http://hexpi.sourceforge.net/. The array was generated by following +// Python script: + +/* +python >tmp.cc << EOF +"""Generates Randen round keys array from pi-hex.62500.txt file.""" +import binascii + +KEYS = 17 * 8 + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def pairwise(t): + """Transforms sequence into sequence of pairs.""" + it = iter(t) + return zip(it,it) + +def digits_from_pi(): + """Reads digits from hexpi.sourceforge.net file.""" + with open("pi-hex.62500.txt") as file: + return file.read() + +def digits_from_urandom(): + """Reads digits from /dev/urandom.""" + with open("/dev/urandom") as file: + return binascii.hexlify(file.read(KEYS * 16)) + +def print_row(b) + print(" 0x{0}, 0x{1}, 0x{2}, 0x{3}, 0x{4}, 0x{5}, 0x{6}, 0x{7}, 0x{8}, 0x{9}, +0x{10}, 0x{11}, 0x{12}, 0x{13}, 0x{14}, 0x{15},".format(*b)) + + +digits = digits_from_pi() +#digits = digits_from_urandom() + +print("namespace {") +print("static constexpr size_t kKeyBytes = {0};\n".format(KEYS * 16)) +print("}") + +print("alignas(16) const unsigned char kRandenRoundKeysBE[kKeyBytes] = {") + +for i, u16 in zip(range(KEYS), chunks(digits, 32)): + b = list(chunks(u16, 2)) + print_row(b) + +print("};") + +print("alignas(16) const unsigned char kRandenRoundKeys[kKeyBytes] = {") + +for i, u16 in zip(range(KEYS), chunks(digits, 32)): + b = list(chunks(u16, 2)) + b.reverse() + print_row(b) + +print("};") + +EOF + +*/ + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace random_internal { +namespace { +static constexpr size_t kKeyBytes = 2176; +} + +alignas(16) const unsigned char kRandenRoundKeysBE[kKeyBytes] = { + 0x24, 0x3F, 0x6A, 0x88, 0x85, 0xA3, 0x08, 0xD3, 0x13, 0x19, 0x8A, 0x2E, + 0x03, 0x70, 0x73, 0x44, 0xA4, 0x09, 0x38, 0x22, 0x29, 0x9F, 0x31, 0xD0, + 0x08, 0x2E, 0xFA, 0x98, 0xEC, 0x4E, 0x6C, 0x89, 0x45, 0x28, 0x21, 0xE6, + 0x38, 0xD0, 0x13, 0x77, 0xBE, 0x54, 0x66, 0xCF, 0x34, 0xE9, 0x0C, 0x6C, + 0xC0, 0xAC, 0x29, 0xB7, 0xC9, 0x7C, 0x50, 0xDD, 0x3F, 0x84, 0xD5, 0xB5, + 0xB5, 0x47, 0x09, 0x17, 0x92, 0x16, 0xD5, 0xD9, 0x89, 0x79, 0xFB, 0x1B, + 0xD1, 0x31, 0x0B, 0xA6, 0x98, 0xDF, 0xB5, 0xAC, 0x2F, 0xFD, 0x72, 0xDB, + 0xD0, 0x1A, 0xDF, 0xB7, 0xB8, 0xE1, 0xAF, 0xED, 0x6A, 0x26, 0x7E, 0x96, + 0xBA, 0x7C, 0x90, 0x45, 0xF1, 0x2C, 0x7F, 0x99, 0x24, 0xA1, 0x99, 0x47, + 0xB3, 0x91, 0x6C, 0xF7, 0x08, 0x01, 0xF2, 0xE2, 0x85, 0x8E, 0xFC, 0x16, + 0x63, 0x69, 0x20, 0xD8, 0x71, 0x57, 0x4E, 0x69, 0xA4, 0x58, 0xFE, 0xA3, + 0xF4, 0x93, 0x3D, 0x7E, 0x0D, 0x95, 0x74, 0x8F, 0x72, 0x8E, 0xB6, 0x58, + 0x71, 0x8B, 0xCD, 0x58, 0x82, 0x15, 0x4A, 0xEE, 0x7B, 0x54, 0xA4, 0x1D, + 0xC2, 0x5A, 0x59, 0xB5, 0x9C, 0x30, 0xD5, 0x39, 0x2A, 0xF2, 0x60, 0x13, + 0xC5, 0xD1, 0xB0, 0x23, 0x28, 0x60, 0x85, 0xF0, 0xCA, 0x41, 0x79, 0x18, + 0xB8, 0xDB, 0x38, 0xEF, 0x8E, 0x79, 0xDC, 0xB0, 0x60, 0x3A, 0x18, 0x0E, + 0x6C, 0x9E, 0x0E, 0x8B, 0xB0, 0x1E, 0x8A, 0x3E, 0xD7, 0x15, 0x77, 0xC1, + 0xBD, 0x31, 0x4B, 0x27, 0x78, 0xAF, 0x2F, 0xDA, 0x55, 0x60, 0x5C, 0x60, + 0xE6, 0x55, 0x25, 0xF3, 0xAA, 0x55, 0xAB, 0x94, 0x57, 0x48, 0x98, 0x62, + 0x63, 0xE8, 0x14, 0x40, 0x55, 0xCA, 0x39, 0x6A, 0x2A, 0xAB, 0x10, 0xB6, + 0xB4, 0xCC, 0x5C, 0x34, 0x11, 0x41, 0xE8, 0xCE, 0xA1, 0x54, 0x86, 0xAF, + 0x7C, 0x72, 0xE9, 0x93, 0xB3, 0xEE, 0x14, 0x11, 0x63, 0x6F, 0xBC, 0x2A, + 0x2B, 0xA9, 0xC5, 0x5D, 0x74, 0x18, 0x31, 0xF6, 0xCE, 0x5C, 0x3E, 0x16, + 0x9B, 0x87, 0x93, 0x1E, 0xAF, 0xD6, 0xBA, 0x33, 0x6C, 0x24, 0xCF, 0x5C, + 0x7A, 0x32, 0x53, 0x81, 0x28, 0x95, 0x86, 0x77, 0x3B, 0x8F, 0x48, 0x98, + 0x6B, 0x4B, 0xB9, 0xAF, 0xC4, 0xBF, 0xE8, 0x1B, 0x66, 0x28, 0x21, 0x93, + 0x61, 0xD8, 0x09, 0xCC, 0xFB, 0x21, 0xA9, 0x91, 0x48, 0x7C, 0xAC, 0x60, + 0x5D, 0xEC, 0x80, 0x32, 0xEF, 0x84, 0x5D, 0x5D, 0xE9, 0x85, 0x75, 0xB1, + 0xDC, 0x26, 0x23, 0x02, 0xEB, 0x65, 0x1B, 0x88, 0x23, 0x89, 0x3E, 0x81, + 0xD3, 0x96, 0xAC, 0xC5, 0x0F, 0x6D, 0x6F, 0xF3, 0x83, 0xF4, 0x42, 0x39, + 0x2E, 0x0B, 0x44, 0x82, 0xA4, 0x84, 0x20, 0x04, 0x69, 0xC8, 0xF0, 0x4A, + 0x9E, 0x1F, 0x9B, 0x5E, 0x21, 0xC6, 0x68, 0x42, 0xF6, 0xE9, 0x6C, 0x9A, + 0x67, 0x0C, 0x9C, 0x61, 0xAB, 0xD3, 0x88, 0xF0, 0x6A, 0x51, 0xA0, 0xD2, + 0xD8, 0x54, 0x2F, 0x68, 0x96, 0x0F, 0xA7, 0x28, 0xAB, 0x51, 0x33, 0xA3, + 0x6E, 0xEF, 0x0B, 0x6C, 0x13, 0x7A, 0x3B, 0xE4, 0xBA, 0x3B, 0xF0, 0x50, + 0x7E, 0xFB, 0x2A, 0x98, 0xA1, 0xF1, 0x65, 0x1D, 0x39, 0xAF, 0x01, 0x76, + 0x66, 0xCA, 0x59, 0x3E, 0x82, 0x43, 0x0E, 0x88, 0x8C, 0xEE, 0x86, 0x19, + 0x45, 0x6F, 0x9F, 0xB4, 0x7D, 0x84, 0xA5, 0xC3, 0x3B, 0x8B, 0x5E, 0xBE, + 0xE0, 0x6F, 0x75, 0xD8, 0x85, 0xC1, 0x20, 0x73, 0x40, 0x1A, 0x44, 0x9F, + 0x56, 0xC1, 0x6A, 0xA6, 0x4E, 0xD3, 0xAA, 0x62, 0x36, 0x3F, 0x77, 0x06, + 0x1B, 0xFE, 0xDF, 0x72, 0x42, 0x9B, 0x02, 0x3D, 0x37, 0xD0, 0xD7, 0x24, + 0xD0, 0x0A, 0x12, 0x48, 0xDB, 0x0F, 0xEA, 0xD3, 0x49, 0xF1, 0xC0, 0x9B, + 0x07, 0x53, 0x72, 0xC9, 0x80, 0x99, 0x1B, 0x7B, 0x25, 0xD4, 0x79, 0xD8, + 0xF6, 0xE8, 0xDE, 0xF7, 0xE3, 0xFE, 0x50, 0x1A, 0xB6, 0x79, 0x4C, 0x3B, + 0x97, 0x6C, 0xE0, 0xBD, 0x04, 0xC0, 0x06, 0xBA, 0xC1, 0xA9, 0x4F, 0xB6, + 0x40, 0x9F, 0x60, 0xC4, 0x5E, 0x5C, 0x9E, 0xC2, 0x19, 0x6A, 0x24, 0x63, + 0x68, 0xFB, 0x6F, 0xAF, 0x3E, 0x6C, 0x53, 0xB5, 0x13, 0x39, 0xB2, 0xEB, + 0x3B, 0x52, 0xEC, 0x6F, 0x6D, 0xFC, 0x51, 0x1F, 0x9B, 0x30, 0x95, 0x2C, + 0xCC, 0x81, 0x45, 0x44, 0xAF, 0x5E, 0xBD, 0x09, 0xBE, 0xE3, 0xD0, 0x04, + 0xDE, 0x33, 0x4A, 0xFD, 0x66, 0x0F, 0x28, 0x07, 0x19, 0x2E, 0x4B, 0xB3, + 0xC0, 0xCB, 0xA8, 0x57, 0x45, 0xC8, 0x74, 0x0F, 0xD2, 0x0B, 0x5F, 0x39, + 0xB9, 0xD3, 0xFB, 0xDB, 0x55, 0x79, 0xC0, 0xBD, 0x1A, 0x60, 0x32, 0x0A, + 0xD6, 0xA1, 0x00, 0xC6, 0x40, 0x2C, 0x72, 0x79, 0x67, 0x9F, 0x25, 0xFE, + 0xFB, 0x1F, 0xA3, 0xCC, 0x8E, 0xA5, 0xE9, 0xF8, 0xDB, 0x32, 0x22, 0xF8, + 0x3C, 0x75, 0x16, 0xDF, 0xFD, 0x61, 0x6B, 0x15, 0x2F, 0x50, 0x1E, 0xC8, + 0xAD, 0x05, 0x52, 0xAB, 0x32, 0x3D, 0xB5, 0xFA, 0xFD, 0x23, 0x87, 0x60, + 0x53, 0x31, 0x7B, 0x48, 0x3E, 0x00, 0xDF, 0x82, 0x9E, 0x5C, 0x57, 0xBB, + 0xCA, 0x6F, 0x8C, 0xA0, 0x1A, 0x87, 0x56, 0x2E, 0xDF, 0x17, 0x69, 0xDB, + 0xD5, 0x42, 0xA8, 0xF6, 0x28, 0x7E, 0xFF, 0xC3, 0xAC, 0x67, 0x32, 0xC6, + 0x8C, 0x4F, 0x55, 0x73, 0x69, 0x5B, 0x27, 0xB0, 0xBB, 0xCA, 0x58, 0xC8, + 0xE1, 0xFF, 0xA3, 0x5D, 0xB8, 0xF0, 0x11, 0xA0, 0x10, 0xFA, 0x3D, 0x98, + 0xFD, 0x21, 0x83, 0xB8, 0x4A, 0xFC, 0xB5, 0x6C, 0x2D, 0xD1, 0xD3, 0x5B, + 0x9A, 0x53, 0xE4, 0x79, 0xB6, 0xF8, 0x45, 0x65, 0xD2, 0x8E, 0x49, 0xBC, + 0x4B, 0xFB, 0x97, 0x90, 0xE1, 0xDD, 0xF2, 0xDA, 0xA4, 0xCB, 0x7E, 0x33, + 0x62, 0xFB, 0x13, 0x41, 0xCE, 0xE4, 0xC6, 0xE8, 0xEF, 0x20, 0xCA, 0xDA, + 0x36, 0x77, 0x4C, 0x01, 0xD0, 0x7E, 0x9E, 0xFE, 0x2B, 0xF1, 0x1F, 0xB4, + 0x95, 0xDB, 0xDA, 0x4D, 0xAE, 0x90, 0x91, 0x98, 0xEA, 0xAD, 0x8E, 0x71, + 0x6B, 0x93, 0xD5, 0xA0, 0xD0, 0x8E, 0xD1, 0xD0, 0xAF, 0xC7, 0x25, 0xE0, + 0x8E, 0x3C, 0x5B, 0x2F, 0x8E, 0x75, 0x94, 0xB7, 0x8F, 0xF6, 0xE2, 0xFB, + 0xF2, 0x12, 0x2B, 0x64, 0x88, 0x88, 0xB8, 0x12, 0x90, 0x0D, 0xF0, 0x1C, + 0x4F, 0xAD, 0x5E, 0xA0, 0x68, 0x8F, 0xC3, 0x1C, 0xD1, 0xCF, 0xF1, 0x91, + 0xB3, 0xA8, 0xC1, 0xAD, 0x2F, 0x2F, 0x22, 0x18, 0xBE, 0x0E, 0x17, 0x77, + 0xEA, 0x75, 0x2D, 0xFE, 0x8B, 0x02, 0x1F, 0xA1, 0xE5, 0xA0, 0xCC, 0x0F, + 0xB5, 0x6F, 0x74, 0xE8, 0x18, 0xAC, 0xF3, 0xD6, 0xCE, 0x89, 0xE2, 0x99, + 0xB4, 0xA8, 0x4F, 0xE0, 0xFD, 0x13, 0xE0, 0xB7, 0x7C, 0xC4, 0x3B, 0x81, + 0xD2, 0xAD, 0xA8, 0xD9, 0x16, 0x5F, 0xA2, 0x66, 0x80, 0x95, 0x77, 0x05, + 0x93, 0xCC, 0x73, 0x14, 0x21, 0x1A, 0x14, 0x77, 0xE6, 0xAD, 0x20, 0x65, + 0x77, 0xB5, 0xFA, 0x86, 0xC7, 0x54, 0x42, 0xF5, 0xFB, 0x9D, 0x35, 0xCF, + 0xEB, 0xCD, 0xAF, 0x0C, 0x7B, 0x3E, 0x89, 0xA0, 0xD6, 0x41, 0x1B, 0xD3, + 0xAE, 0x1E, 0x7E, 0x49, 0x00, 0x25, 0x0E, 0x2D, 0x20, 0x71, 0xB3, 0x5E, + 0x22, 0x68, 0x00, 0xBB, 0x57, 0xB8, 0xE0, 0xAF, 0x24, 0x64, 0x36, 0x9B, + 0xF0, 0x09, 0xB9, 0x1E, 0x55, 0x63, 0x91, 0x1D, 0x59, 0xDF, 0xA6, 0xAA, + 0x78, 0xC1, 0x43, 0x89, 0xD9, 0x5A, 0x53, 0x7F, 0x20, 0x7D, 0x5B, 0xA2, + 0x02, 0xE5, 0xB9, 0xC5, 0x83, 0x26, 0x03, 0x76, 0x62, 0x95, 0xCF, 0xA9, + 0x11, 0xC8, 0x19, 0x68, 0x4E, 0x73, 0x4A, 0x41, 0xB3, 0x47, 0x2D, 0xCA, + 0x7B, 0x14, 0xA9, 0x4A, 0x1B, 0x51, 0x00, 0x52, 0x9A, 0x53, 0x29, 0x15, + 0xD6, 0x0F, 0x57, 0x3F, 0xBC, 0x9B, 0xC6, 0xE4, 0x2B, 0x60, 0xA4, 0x76, + 0x81, 0xE6, 0x74, 0x00, 0x08, 0xBA, 0x6F, 0xB5, 0x57, 0x1B, 0xE9, 0x1F, + 0xF2, 0x96, 0xEC, 0x6B, 0x2A, 0x0D, 0xD9, 0x15, 0xB6, 0x63, 0x65, 0x21, + 0xE7, 0xB9, 0xF9, 0xB6, 0xFF, 0x34, 0x05, 0x2E, 0xC5, 0x85, 0x56, 0x64, + 0x53, 0xB0, 0x2D, 0x5D, 0xA9, 0x9F, 0x8F, 0xA1, 0x08, 0xBA, 0x47, 0x99, + 0x6E, 0x85, 0x07, 0x6A, 0x4B, 0x7A, 0x70, 0xE9, 0xB5, 0xB3, 0x29, 0x44, + 0xDB, 0x75, 0x09, 0x2E, 0xC4, 0x19, 0x26, 0x23, 0xAD, 0x6E, 0xA6, 0xB0, + 0x49, 0xA7, 0xDF, 0x7D, 0x9C, 0xEE, 0x60, 0xB8, 0x8F, 0xED, 0xB2, 0x66, + 0xEC, 0xAA, 0x8C, 0x71, 0x69, 0x9A, 0x18, 0xFF, 0x56, 0x64, 0x52, 0x6C, + 0xC2, 0xB1, 0x9E, 0xE1, 0x19, 0x36, 0x02, 0xA5, 0x75, 0x09, 0x4C, 0x29, + 0xA0, 0x59, 0x13, 0x40, 0xE4, 0x18, 0x3A, 0x3E, 0x3F, 0x54, 0x98, 0x9A, + 0x5B, 0x42, 0x9D, 0x65, 0x6B, 0x8F, 0xE4, 0xD6, 0x99, 0xF7, 0x3F, 0xD6, + 0xA1, 0xD2, 0x9C, 0x07, 0xEF, 0xE8, 0x30, 0xF5, 0x4D, 0x2D, 0x38, 0xE6, + 0xF0, 0x25, 0x5D, 0xC1, 0x4C, 0xDD, 0x20, 0x86, 0x84, 0x70, 0xEB, 0x26, + 0x63, 0x82, 0xE9, 0xC6, 0x02, 0x1E, 0xCC, 0x5E, 0x09, 0x68, 0x6B, 0x3F, + 0x3E, 0xBA, 0xEF, 0xC9, 0x3C, 0x97, 0x18, 0x14, 0x6B, 0x6A, 0x70, 0xA1, + 0x68, 0x7F, 0x35, 0x84, 0x52, 0xA0, 0xE2, 0x86, 0xB7, 0x9C, 0x53, 0x05, + 0xAA, 0x50, 0x07, 0x37, 0x3E, 0x07, 0x84, 0x1C, 0x7F, 0xDE, 0xAE, 0x5C, + 0x8E, 0x7D, 0x44, 0xEC, 0x57, 0x16, 0xF2, 0xB8, 0xB0, 0x3A, 0xDA, 0x37, + 0xF0, 0x50, 0x0C, 0x0D, 0xF0, 0x1C, 0x1F, 0x04, 0x02, 0x00, 0xB3, 0xFF, + 0xAE, 0x0C, 0xF5, 0x1A, 0x3C, 0xB5, 0x74, 0xB2, 0x25, 0x83, 0x7A, 0x58, + 0xDC, 0x09, 0x21, 0xBD, 0xD1, 0x91, 0x13, 0xF9, 0x7C, 0xA9, 0x2F, 0xF6, + 0x94, 0x32, 0x47, 0x73, 0x22, 0xF5, 0x47, 0x01, 0x3A, 0xE5, 0xE5, 0x81, + 0x37, 0xC2, 0xDA, 0xDC, 0xC8, 0xB5, 0x76, 0x34, 0x9A, 0xF3, 0xDD, 0xA7, + 0xA9, 0x44, 0x61, 0x46, 0x0F, 0xD0, 0x03, 0x0E, 0xEC, 0xC8, 0xC7, 0x3E, + 0xA4, 0x75, 0x1E, 0x41, 0xE2, 0x38, 0xCD, 0x99, 0x3B, 0xEA, 0x0E, 0x2F, + 0x32, 0x80, 0xBB, 0xA1, 0x18, 0x3E, 0xB3, 0x31, 0x4E, 0x54, 0x8B, 0x38, + 0x4F, 0x6D, 0xB9, 0x08, 0x6F, 0x42, 0x0D, 0x03, 0xF6, 0x0A, 0x04, 0xBF, + 0x2C, 0xB8, 0x12, 0x90, 0x24, 0x97, 0x7C, 0x79, 0x56, 0x79, 0xB0, 0x72, + 0xBC, 0xAF, 0x89, 0xAF, 0xDE, 0x9A, 0x77, 0x1F, 0xD9, 0x93, 0x08, 0x10, + 0xB3, 0x8B, 0xAE, 0x12, 0xDC, 0xCF, 0x3F, 0x2E, 0x55, 0x12, 0x72, 0x1F, + 0x2E, 0x6B, 0x71, 0x24, 0x50, 0x1A, 0xDD, 0xE6, 0x9F, 0x84, 0xCD, 0x87, + 0x7A, 0x58, 0x47, 0x18, 0x74, 0x08, 0xDA, 0x17, 0xBC, 0x9F, 0x9A, 0xBC, + 0xE9, 0x4B, 0x7D, 0x8C, 0xEC, 0x7A, 0xEC, 0x3A, 0xDB, 0x85, 0x1D, 0xFA, + 0x63, 0x09, 0x43, 0x66, 0xC4, 0x64, 0xC3, 0xD2, 0xEF, 0x1C, 0x18, 0x47, + 0x32, 0x15, 0xD8, 0x08, 0xDD, 0x43, 0x3B, 0x37, 0x24, 0xC2, 0xBA, 0x16, + 0x12, 0xA1, 0x4D, 0x43, 0x2A, 0x65, 0xC4, 0x51, 0x50, 0x94, 0x00, 0x02, + 0x13, 0x3A, 0xE4, 0xDD, 0x71, 0xDF, 0xF8, 0x9E, 0x10, 0x31, 0x4E, 0x55, + 0x81, 0xAC, 0x77, 0xD6, 0x5F, 0x11, 0x19, 0x9B, 0x04, 0x35, 0x56, 0xF1, + 0xD7, 0xA3, 0xC7, 0x6B, 0x3C, 0x11, 0x18, 0x3B, 0x59, 0x24, 0xA5, 0x09, + 0xF2, 0x8F, 0xE6, 0xED, 0x97, 0xF1, 0xFB, 0xFA, 0x9E, 0xBA, 0xBF, 0x2C, + 0x1E, 0x15, 0x3C, 0x6E, 0x86, 0xE3, 0x45, 0x70, 0xEA, 0xE9, 0x6F, 0xB1, + 0x86, 0x0E, 0x5E, 0x0A, 0x5A, 0x3E, 0x2A, 0xB3, 0x77, 0x1F, 0xE7, 0x1C, + 0x4E, 0x3D, 0x06, 0xFA, 0x29, 0x65, 0xDC, 0xB9, 0x99, 0xE7, 0x1D, 0x0F, + 0x80, 0x3E, 0x89, 0xD6, 0x52, 0x66, 0xC8, 0x25, 0x2E, 0x4C, 0xC9, 0x78, + 0x9C, 0x10, 0xB3, 0x6A, 0xC6, 0x15, 0x0E, 0xBA, 0x94, 0xE2, 0xEA, 0x78, + 0xA6, 0xFC, 0x3C, 0x53, 0x1E, 0x0A, 0x2D, 0xF4, 0xF2, 0xF7, 0x4E, 0xA7, + 0x36, 0x1D, 0x2B, 0x3D, 0x19, 0x39, 0x26, 0x0F, 0x19, 0xC2, 0x79, 0x60, + 0x52, 0x23, 0xA7, 0x08, 0xF7, 0x13, 0x12, 0xB6, 0xEB, 0xAD, 0xFE, 0x6E, + 0xEA, 0xC3, 0x1F, 0x66, 0xE3, 0xBC, 0x45, 0x95, 0xA6, 0x7B, 0xC8, 0x83, + 0xB1, 0x7F, 0x37, 0xD1, 0x01, 0x8C, 0xFF, 0x28, 0xC3, 0x32, 0xDD, 0xEF, + 0xBE, 0x6C, 0x5A, 0xA5, 0x65, 0x58, 0x21, 0x85, 0x68, 0xAB, 0x97, 0x02, + 0xEE, 0xCE, 0xA5, 0x0F, 0xDB, 0x2F, 0x95, 0x3B, 0x2A, 0xEF, 0x7D, 0xAD, + 0x5B, 0x6E, 0x2F, 0x84, 0x15, 0x21, 0xB6, 0x28, 0x29, 0x07, 0x61, 0x70, + 0xEC, 0xDD, 0x47, 0x75, 0x61, 0x9F, 0x15, 0x10, 0x13, 0xCC, 0xA8, 0x30, + 0xEB, 0x61, 0xBD, 0x96, 0x03, 0x34, 0xFE, 0x1E, 0xAA, 0x03, 0x63, 0xCF, + 0xB5, 0x73, 0x5C, 0x90, 0x4C, 0x70, 0xA2, 0x39, 0xD5, 0x9E, 0x9E, 0x0B, + 0xCB, 0xAA, 0xDE, 0x14, 0xEE, 0xCC, 0x86, 0xBC, 0x60, 0x62, 0x2C, 0xA7, + 0x9C, 0xAB, 0x5C, 0xAB, 0xB2, 0xF3, 0x84, 0x6E, 0x64, 0x8B, 0x1E, 0xAF, + 0x19, 0xBD, 0xF0, 0xCA, 0xA0, 0x23, 0x69, 0xB9, 0x65, 0x5A, 0xBB, 0x50, + 0x40, 0x68, 0x5A, 0x32, 0x3C, 0x2A, 0xB4, 0xB3, 0x31, 0x9E, 0xE9, 0xD5, + 0xC0, 0x21, 0xB8, 0xF7, 0x9B, 0x54, 0x0B, 0x19, 0x87, 0x5F, 0xA0, 0x99, + 0x95, 0xF7, 0x99, 0x7E, 0x62, 0x3D, 0x7D, 0xA8, 0xF8, 0x37, 0x88, 0x9A, + 0x97, 0xE3, 0x2D, 0x77, 0x11, 0xED, 0x93, 0x5F, 0x16, 0x68, 0x12, 0x81, + 0x0E, 0x35, 0x88, 0x29, 0xC7, 0xE6, 0x1F, 0xD6, 0x96, 0xDE, 0xDF, 0xA1, + 0x78, 0x58, 0xBA, 0x99, 0x57, 0xF5, 0x84, 0xA5, 0x1B, 0x22, 0x72, 0x63, + 0x9B, 0x83, 0xC3, 0xFF, 0x1A, 0xC2, 0x46, 0x96, 0xCD, 0xB3, 0x0A, 0xEB, + 0x53, 0x2E, 0x30, 0x54, 0x8F, 0xD9, 0x48, 0xE4, 0x6D, 0xBC, 0x31, 0x28, + 0x58, 0xEB, 0xF2, 0xEF, 0x34, 0xC6, 0xFF, 0xEA, 0xFE, 0x28, 0xED, 0x61, + 0xEE, 0x7C, 0x3C, 0x73, 0x5D, 0x4A, 0x14, 0xD9, 0xE8, 0x64, 0xB7, 0xE3, + 0x42, 0x10, 0x5D, 0x14, 0x20, 0x3E, 0x13, 0xE0, 0x45, 0xEE, 0xE2, 0xB6, + 0xA3, 0xAA, 0xAB, 0xEA, 0xDB, 0x6C, 0x4F, 0x15, 0xFA, 0xCB, 0x4F, 0xD0, + 0xC7, 0x42, 0xF4, 0x42, 0xEF, 0x6A, 0xBB, 0xB5, 0x65, 0x4F, 0x3B, 0x1D, + 0x41, 0xCD, 0x21, 0x05, 0xD8, 0x1E, 0x79, 0x9E, 0x86, 0x85, 0x4D, 0xC7, + 0xE4, 0x4B, 0x47, 0x6A, 0x3D, 0x81, 0x62, 0x50, 0xCF, 0x62, 0xA1, 0xF2, + 0x5B, 0x8D, 0x26, 0x46, 0xFC, 0x88, 0x83, 0xA0, 0xC1, 0xC7, 0xB6, 0xA3, + 0x7F, 0x15, 0x24, 0xC3, 0x69, 0xCB, 0x74, 0x92, 0x47, 0x84, 0x8A, 0x0B, + 0x56, 0x92, 0xB2, 0x85, 0x09, 0x5B, 0xBF, 0x00, 0xAD, 0x19, 0x48, 0x9D, + 0x14, 0x62, 0xB1, 0x74, 0x23, 0x82, 0x0D, 0x00, 0x58, 0x42, 0x8D, 0x2A, + 0x0C, 0x55, 0xF5, 0xEA, 0x1D, 0xAD, 0xF4, 0x3E, 0x23, 0x3F, 0x70, 0x61, + 0x33, 0x72, 0xF0, 0x92, 0x8D, 0x93, 0x7E, 0x41, 0xD6, 0x5F, 0xEC, 0xF1, + 0x6C, 0x22, 0x3B, 0xDB, 0x7C, 0xDE, 0x37, 0x59, 0xCB, 0xEE, 0x74, 0x60, + 0x40, 0x85, 0xF2, 0xA7, 0xCE, 0x77, 0x32, 0x6E, 0xA6, 0x07, 0x80, 0x84, + 0x19, 0xF8, 0x50, 0x9E, 0xE8, 0xEF, 0xD8, 0x55, 0x61, 0xD9, 0x97, 0x35, + 0xA9, 0x69, 0xA7, 0xAA, 0xC5, 0x0C, 0x06, 0xC2, 0x5A, 0x04, 0xAB, 0xFC, + 0x80, 0x0B, 0xCA, 0xDC, 0x9E, 0x44, 0x7A, 0x2E, 0xC3, 0x45, 0x34, 0x84, + 0xFD, 0xD5, 0x67, 0x05, 0x0E, 0x1E, 0x9E, 0xC9, 0xDB, 0x73, 0xDB, 0xD3, + 0x10, 0x55, 0x88, 0xCD, 0x67, 0x5F, 0xDA, 0x79, 0xE3, 0x67, 0x43, 0x40, + 0xC5, 0xC4, 0x34, 0x65, 0x71, 0x3E, 0x38, 0xD8, 0x3D, 0x28, 0xF8, 0x9E, + 0xF1, 0x6D, 0xFF, 0x20, 0x15, 0x3E, 0x21, 0xE7, 0x8F, 0xB0, 0x3D, 0x4A, + 0xE6, 0xE3, 0x9F, 0x2B, 0xDB, 0x83, 0xAD, 0xF7, 0xE9, 0x3D, 0x5A, 0x68, + 0x94, 0x81, 0x40, 0xF7, 0xF6, 0x4C, 0x26, 0x1C, 0x94, 0x69, 0x29, 0x34, + 0x41, 0x15, 0x20, 0xF7, 0x76, 0x02, 0xD4, 0xF7, 0xBC, 0xF4, 0x6B, 0x2E, + 0xD4, 0xA1, 0x00, 0x68, 0xD4, 0x08, 0x24, 0x71, 0x33, 0x20, 0xF4, 0x6A, + 0x43, 0xB7, 0xD4, 0xB7, 0x50, 0x00, 0x61, 0xAF, 0x1E, 0x39, 0xF6, 0x2E, + 0x97, 0x24, 0x45, 0x46, +}; + +alignas(16) const unsigned char kRandenRoundKeys[kKeyBytes] = { + 0x44, 0x73, 0x70, 0x03, 0x2E, 0x8A, 0x19, 0x13, 0xD3, 0x08, 0xA3, 0x85, + 0x88, 0x6A, 0x3F, 0x24, 0x89, 0x6C, 0x4E, 0xEC, 0x98, 0xFA, 0x2E, 0x08, + 0xD0, 0x31, 0x9F, 0x29, 0x22, 0x38, 0x09, 0xA4, 0x6C, 0x0C, 0xE9, 0x34, + 0xCF, 0x66, 0x54, 0xBE, 0x77, 0x13, 0xD0, 0x38, 0xE6, 0x21, 0x28, 0x45, + 0x17, 0x09, 0x47, 0xB5, 0xB5, 0xD5, 0x84, 0x3F, 0xDD, 0x50, 0x7C, 0xC9, + 0xB7, 0x29, 0xAC, 0xC0, 0xAC, 0xB5, 0xDF, 0x98, 0xA6, 0x0B, 0x31, 0xD1, + 0x1B, 0xFB, 0x79, 0x89, 0xD9, 0xD5, 0x16, 0x92, 0x96, 0x7E, 0x26, 0x6A, + 0xED, 0xAF, 0xE1, 0xB8, 0xB7, 0xDF, 0x1A, 0xD0, 0xDB, 0x72, 0xFD, 0x2F, + 0xF7, 0x6C, 0x91, 0xB3, 0x47, 0x99, 0xA1, 0x24, 0x99, 0x7F, 0x2C, 0xF1, + 0x45, 0x90, 0x7C, 0xBA, 0x69, 0x4E, 0x57, 0x71, 0xD8, 0x20, 0x69, 0x63, + 0x16, 0xFC, 0x8E, 0x85, 0xE2, 0xF2, 0x01, 0x08, 0x58, 0xB6, 0x8E, 0x72, + 0x8F, 0x74, 0x95, 0x0D, 0x7E, 0x3D, 0x93, 0xF4, 0xA3, 0xFE, 0x58, 0xA4, + 0xB5, 0x59, 0x5A, 0xC2, 0x1D, 0xA4, 0x54, 0x7B, 0xEE, 0x4A, 0x15, 0x82, + 0x58, 0xCD, 0x8B, 0x71, 0xF0, 0x85, 0x60, 0x28, 0x23, 0xB0, 0xD1, 0xC5, + 0x13, 0x60, 0xF2, 0x2A, 0x39, 0xD5, 0x30, 0x9C, 0x0E, 0x18, 0x3A, 0x60, + 0xB0, 0xDC, 0x79, 0x8E, 0xEF, 0x38, 0xDB, 0xB8, 0x18, 0x79, 0x41, 0xCA, + 0x27, 0x4B, 0x31, 0xBD, 0xC1, 0x77, 0x15, 0xD7, 0x3E, 0x8A, 0x1E, 0xB0, + 0x8B, 0x0E, 0x9E, 0x6C, 0x94, 0xAB, 0x55, 0xAA, 0xF3, 0x25, 0x55, 0xE6, + 0x60, 0x5C, 0x60, 0x55, 0xDA, 0x2F, 0xAF, 0x78, 0xB6, 0x10, 0xAB, 0x2A, + 0x6A, 0x39, 0xCA, 0x55, 0x40, 0x14, 0xE8, 0x63, 0x62, 0x98, 0x48, 0x57, + 0x93, 0xE9, 0x72, 0x7C, 0xAF, 0x86, 0x54, 0xA1, 0xCE, 0xE8, 0x41, 0x11, + 0x34, 0x5C, 0xCC, 0xB4, 0xF6, 0x31, 0x18, 0x74, 0x5D, 0xC5, 0xA9, 0x2B, + 0x2A, 0xBC, 0x6F, 0x63, 0x11, 0x14, 0xEE, 0xB3, 0x5C, 0xCF, 0x24, 0x6C, + 0x33, 0xBA, 0xD6, 0xAF, 0x1E, 0x93, 0x87, 0x9B, 0x16, 0x3E, 0x5C, 0xCE, + 0xAF, 0xB9, 0x4B, 0x6B, 0x98, 0x48, 0x8F, 0x3B, 0x77, 0x86, 0x95, 0x28, + 0x81, 0x53, 0x32, 0x7A, 0x91, 0xA9, 0x21, 0xFB, 0xCC, 0x09, 0xD8, 0x61, + 0x93, 0x21, 0x28, 0x66, 0x1B, 0xE8, 0xBF, 0xC4, 0xB1, 0x75, 0x85, 0xE9, + 0x5D, 0x5D, 0x84, 0xEF, 0x32, 0x80, 0xEC, 0x5D, 0x60, 0xAC, 0x7C, 0x48, + 0xC5, 0xAC, 0x96, 0xD3, 0x81, 0x3E, 0x89, 0x23, 0x88, 0x1B, 0x65, 0xEB, + 0x02, 0x23, 0x26, 0xDC, 0x04, 0x20, 0x84, 0xA4, 0x82, 0x44, 0x0B, 0x2E, + 0x39, 0x42, 0xF4, 0x83, 0xF3, 0x6F, 0x6D, 0x0F, 0x9A, 0x6C, 0xE9, 0xF6, + 0x42, 0x68, 0xC6, 0x21, 0x5E, 0x9B, 0x1F, 0x9E, 0x4A, 0xF0, 0xC8, 0x69, + 0x68, 0x2F, 0x54, 0xD8, 0xD2, 0xA0, 0x51, 0x6A, 0xF0, 0x88, 0xD3, 0xAB, + 0x61, 0x9C, 0x0C, 0x67, 0xE4, 0x3B, 0x7A, 0x13, 0x6C, 0x0B, 0xEF, 0x6E, + 0xA3, 0x33, 0x51, 0xAB, 0x28, 0xA7, 0x0F, 0x96, 0x76, 0x01, 0xAF, 0x39, + 0x1D, 0x65, 0xF1, 0xA1, 0x98, 0x2A, 0xFB, 0x7E, 0x50, 0xF0, 0x3B, 0xBA, + 0xB4, 0x9F, 0x6F, 0x45, 0x19, 0x86, 0xEE, 0x8C, 0x88, 0x0E, 0x43, 0x82, + 0x3E, 0x59, 0xCA, 0x66, 0x73, 0x20, 0xC1, 0x85, 0xD8, 0x75, 0x6F, 0xE0, + 0xBE, 0x5E, 0x8B, 0x3B, 0xC3, 0xA5, 0x84, 0x7D, 0x06, 0x77, 0x3F, 0x36, + 0x62, 0xAA, 0xD3, 0x4E, 0xA6, 0x6A, 0xC1, 0x56, 0x9F, 0x44, 0x1A, 0x40, + 0x48, 0x12, 0x0A, 0xD0, 0x24, 0xD7, 0xD0, 0x37, 0x3D, 0x02, 0x9B, 0x42, + 0x72, 0xDF, 0xFE, 0x1B, 0x7B, 0x1B, 0x99, 0x80, 0xC9, 0x72, 0x53, 0x07, + 0x9B, 0xC0, 0xF1, 0x49, 0xD3, 0xEA, 0x0F, 0xDB, 0x3B, 0x4C, 0x79, 0xB6, + 0x1A, 0x50, 0xFE, 0xE3, 0xF7, 0xDE, 0xE8, 0xF6, 0xD8, 0x79, 0xD4, 0x25, + 0xC4, 0x60, 0x9F, 0x40, 0xB6, 0x4F, 0xA9, 0xC1, 0xBA, 0x06, 0xC0, 0x04, + 0xBD, 0xE0, 0x6C, 0x97, 0xB5, 0x53, 0x6C, 0x3E, 0xAF, 0x6F, 0xFB, 0x68, + 0x63, 0x24, 0x6A, 0x19, 0xC2, 0x9E, 0x5C, 0x5E, 0x2C, 0x95, 0x30, 0x9B, + 0x1F, 0x51, 0xFC, 0x6D, 0x6F, 0xEC, 0x52, 0x3B, 0xEB, 0xB2, 0x39, 0x13, + 0xFD, 0x4A, 0x33, 0xDE, 0x04, 0xD0, 0xE3, 0xBE, 0x09, 0xBD, 0x5E, 0xAF, + 0x44, 0x45, 0x81, 0xCC, 0x0F, 0x74, 0xC8, 0x45, 0x57, 0xA8, 0xCB, 0xC0, + 0xB3, 0x4B, 0x2E, 0x19, 0x07, 0x28, 0x0F, 0x66, 0x0A, 0x32, 0x60, 0x1A, + 0xBD, 0xC0, 0x79, 0x55, 0xDB, 0xFB, 0xD3, 0xB9, 0x39, 0x5F, 0x0B, 0xD2, + 0xCC, 0xA3, 0x1F, 0xFB, 0xFE, 0x25, 0x9F, 0x67, 0x79, 0x72, 0x2C, 0x40, + 0xC6, 0x00, 0xA1, 0xD6, 0x15, 0x6B, 0x61, 0xFD, 0xDF, 0x16, 0x75, 0x3C, + 0xF8, 0x22, 0x32, 0xDB, 0xF8, 0xE9, 0xA5, 0x8E, 0x60, 0x87, 0x23, 0xFD, + 0xFA, 0xB5, 0x3D, 0x32, 0xAB, 0x52, 0x05, 0xAD, 0xC8, 0x1E, 0x50, 0x2F, + 0xA0, 0x8C, 0x6F, 0xCA, 0xBB, 0x57, 0x5C, 0x9E, 0x82, 0xDF, 0x00, 0x3E, + 0x48, 0x7B, 0x31, 0x53, 0xC3, 0xFF, 0x7E, 0x28, 0xF6, 0xA8, 0x42, 0xD5, + 0xDB, 0x69, 0x17, 0xDF, 0x2E, 0x56, 0x87, 0x1A, 0xC8, 0x58, 0xCA, 0xBB, + 0xB0, 0x27, 0x5B, 0x69, 0x73, 0x55, 0x4F, 0x8C, 0xC6, 0x32, 0x67, 0xAC, + 0xB8, 0x83, 0x21, 0xFD, 0x98, 0x3D, 0xFA, 0x10, 0xA0, 0x11, 0xF0, 0xB8, + 0x5D, 0xA3, 0xFF, 0xE1, 0x65, 0x45, 0xF8, 0xB6, 0x79, 0xE4, 0x53, 0x9A, + 0x5B, 0xD3, 0xD1, 0x2D, 0x6C, 0xB5, 0xFC, 0x4A, 0x33, 0x7E, 0xCB, 0xA4, + 0xDA, 0xF2, 0xDD, 0xE1, 0x90, 0x97, 0xFB, 0x4B, 0xBC, 0x49, 0x8E, 0xD2, + 0x01, 0x4C, 0x77, 0x36, 0xDA, 0xCA, 0x20, 0xEF, 0xE8, 0xC6, 0xE4, 0xCE, + 0x41, 0x13, 0xFB, 0x62, 0x98, 0x91, 0x90, 0xAE, 0x4D, 0xDA, 0xDB, 0x95, + 0xB4, 0x1F, 0xF1, 0x2B, 0xFE, 0x9E, 0x7E, 0xD0, 0xE0, 0x25, 0xC7, 0xAF, + 0xD0, 0xD1, 0x8E, 0xD0, 0xA0, 0xD5, 0x93, 0x6B, 0x71, 0x8E, 0xAD, 0xEA, + 0x64, 0x2B, 0x12, 0xF2, 0xFB, 0xE2, 0xF6, 0x8F, 0xB7, 0x94, 0x75, 0x8E, + 0x2F, 0x5B, 0x3C, 0x8E, 0x1C, 0xC3, 0x8F, 0x68, 0xA0, 0x5E, 0xAD, 0x4F, + 0x1C, 0xF0, 0x0D, 0x90, 0x12, 0xB8, 0x88, 0x88, 0x77, 0x17, 0x0E, 0xBE, + 0x18, 0x22, 0x2F, 0x2F, 0xAD, 0xC1, 0xA8, 0xB3, 0x91, 0xF1, 0xCF, 0xD1, + 0xE8, 0x74, 0x6F, 0xB5, 0x0F, 0xCC, 0xA0, 0xE5, 0xA1, 0x1F, 0x02, 0x8B, + 0xFE, 0x2D, 0x75, 0xEA, 0xB7, 0xE0, 0x13, 0xFD, 0xE0, 0x4F, 0xA8, 0xB4, + 0x99, 0xE2, 0x89, 0xCE, 0xD6, 0xF3, 0xAC, 0x18, 0x05, 0x77, 0x95, 0x80, + 0x66, 0xA2, 0x5F, 0x16, 0xD9, 0xA8, 0xAD, 0xD2, 0x81, 0x3B, 0xC4, 0x7C, + 0x86, 0xFA, 0xB5, 0x77, 0x65, 0x20, 0xAD, 0xE6, 0x77, 0x14, 0x1A, 0x21, + 0x14, 0x73, 0xCC, 0x93, 0xA0, 0x89, 0x3E, 0x7B, 0x0C, 0xAF, 0xCD, 0xEB, + 0xCF, 0x35, 0x9D, 0xFB, 0xF5, 0x42, 0x54, 0xC7, 0x5E, 0xB3, 0x71, 0x20, + 0x2D, 0x0E, 0x25, 0x00, 0x49, 0x7E, 0x1E, 0xAE, 0xD3, 0x1B, 0x41, 0xD6, + 0x1E, 0xB9, 0x09, 0xF0, 0x9B, 0x36, 0x64, 0x24, 0xAF, 0xE0, 0xB8, 0x57, + 0xBB, 0x00, 0x68, 0x22, 0x7F, 0x53, 0x5A, 0xD9, 0x89, 0x43, 0xC1, 0x78, + 0xAA, 0xA6, 0xDF, 0x59, 0x1D, 0x91, 0x63, 0x55, 0xA9, 0xCF, 0x95, 0x62, + 0x76, 0x03, 0x26, 0x83, 0xC5, 0xB9, 0xE5, 0x02, 0xA2, 0x5B, 0x7D, 0x20, + 0x4A, 0xA9, 0x14, 0x7B, 0xCA, 0x2D, 0x47, 0xB3, 0x41, 0x4A, 0x73, 0x4E, + 0x68, 0x19, 0xC8, 0x11, 0xE4, 0xC6, 0x9B, 0xBC, 0x3F, 0x57, 0x0F, 0xD6, + 0x15, 0x29, 0x53, 0x9A, 0x52, 0x00, 0x51, 0x1B, 0x1F, 0xE9, 0x1B, 0x57, + 0xB5, 0x6F, 0xBA, 0x08, 0x00, 0x74, 0xE6, 0x81, 0x76, 0xA4, 0x60, 0x2B, + 0xB6, 0xF9, 0xB9, 0xE7, 0x21, 0x65, 0x63, 0xB6, 0x15, 0xD9, 0x0D, 0x2A, + 0x6B, 0xEC, 0x96, 0xF2, 0xA1, 0x8F, 0x9F, 0xA9, 0x5D, 0x2D, 0xB0, 0x53, + 0x64, 0x56, 0x85, 0xC5, 0x2E, 0x05, 0x34, 0xFF, 0x44, 0x29, 0xB3, 0xB5, + 0xE9, 0x70, 0x7A, 0x4B, 0x6A, 0x07, 0x85, 0x6E, 0x99, 0x47, 0xBA, 0x08, + 0x7D, 0xDF, 0xA7, 0x49, 0xB0, 0xA6, 0x6E, 0xAD, 0x23, 0x26, 0x19, 0xC4, + 0x2E, 0x09, 0x75, 0xDB, 0xFF, 0x18, 0x9A, 0x69, 0x71, 0x8C, 0xAA, 0xEC, + 0x66, 0xB2, 0xED, 0x8F, 0xB8, 0x60, 0xEE, 0x9C, 0x29, 0x4C, 0x09, 0x75, + 0xA5, 0x02, 0x36, 0x19, 0xE1, 0x9E, 0xB1, 0xC2, 0x6C, 0x52, 0x64, 0x56, + 0x65, 0x9D, 0x42, 0x5B, 0x9A, 0x98, 0x54, 0x3F, 0x3E, 0x3A, 0x18, 0xE4, + 0x40, 0x13, 0x59, 0xA0, 0xF5, 0x30, 0xE8, 0xEF, 0x07, 0x9C, 0xD2, 0xA1, + 0xD6, 0x3F, 0xF7, 0x99, 0xD6, 0xE4, 0x8F, 0x6B, 0x26, 0xEB, 0x70, 0x84, + 0x86, 0x20, 0xDD, 0x4C, 0xC1, 0x5D, 0x25, 0xF0, 0xE6, 0x38, 0x2D, 0x4D, + 0xC9, 0xEF, 0xBA, 0x3E, 0x3F, 0x6B, 0x68, 0x09, 0x5E, 0xCC, 0x1E, 0x02, + 0xC6, 0xE9, 0x82, 0x63, 0x86, 0xE2, 0xA0, 0x52, 0x84, 0x35, 0x7F, 0x68, + 0xA1, 0x70, 0x6A, 0x6B, 0x14, 0x18, 0x97, 0x3C, 0x5C, 0xAE, 0xDE, 0x7F, + 0x1C, 0x84, 0x07, 0x3E, 0x37, 0x07, 0x50, 0xAA, 0x05, 0x53, 0x9C, 0xB7, + 0x0D, 0x0C, 0x50, 0xF0, 0x37, 0xDA, 0x3A, 0xB0, 0xB8, 0xF2, 0x16, 0x57, + 0xEC, 0x44, 0x7D, 0x8E, 0xB2, 0x74, 0xB5, 0x3C, 0x1A, 0xF5, 0x0C, 0xAE, + 0xFF, 0xB3, 0x00, 0x02, 0x04, 0x1F, 0x1C, 0xF0, 0xF6, 0x2F, 0xA9, 0x7C, + 0xF9, 0x13, 0x91, 0xD1, 0xBD, 0x21, 0x09, 0xDC, 0x58, 0x7A, 0x83, 0x25, + 0xDC, 0xDA, 0xC2, 0x37, 0x81, 0xE5, 0xE5, 0x3A, 0x01, 0x47, 0xF5, 0x22, + 0x73, 0x47, 0x32, 0x94, 0x0E, 0x03, 0xD0, 0x0F, 0x46, 0x61, 0x44, 0xA9, + 0xA7, 0xDD, 0xF3, 0x9A, 0x34, 0x76, 0xB5, 0xC8, 0x2F, 0x0E, 0xEA, 0x3B, + 0x99, 0xCD, 0x38, 0xE2, 0x41, 0x1E, 0x75, 0xA4, 0x3E, 0xC7, 0xC8, 0xEC, + 0x08, 0xB9, 0x6D, 0x4F, 0x38, 0x8B, 0x54, 0x4E, 0x31, 0xB3, 0x3E, 0x18, + 0xA1, 0xBB, 0x80, 0x32, 0x79, 0x7C, 0x97, 0x24, 0x90, 0x12, 0xB8, 0x2C, + 0xBF, 0x04, 0x0A, 0xF6, 0x03, 0x0D, 0x42, 0x6F, 0x10, 0x08, 0x93, 0xD9, + 0x1F, 0x77, 0x9A, 0xDE, 0xAF, 0x89, 0xAF, 0xBC, 0x72, 0xB0, 0x79, 0x56, + 0x24, 0x71, 0x6B, 0x2E, 0x1F, 0x72, 0x12, 0x55, 0x2E, 0x3F, 0xCF, 0xDC, + 0x12, 0xAE, 0x8B, 0xB3, 0x17, 0xDA, 0x08, 0x74, 0x18, 0x47, 0x58, 0x7A, + 0x87, 0xCD, 0x84, 0x9F, 0xE6, 0xDD, 0x1A, 0x50, 0xFA, 0x1D, 0x85, 0xDB, + 0x3A, 0xEC, 0x7A, 0xEC, 0x8C, 0x7D, 0x4B, 0xE9, 0xBC, 0x9A, 0x9F, 0xBC, + 0x08, 0xD8, 0x15, 0x32, 0x47, 0x18, 0x1C, 0xEF, 0xD2, 0xC3, 0x64, 0xC4, + 0x66, 0x43, 0x09, 0x63, 0x51, 0xC4, 0x65, 0x2A, 0x43, 0x4D, 0xA1, 0x12, + 0x16, 0xBA, 0xC2, 0x24, 0x37, 0x3B, 0x43, 0xDD, 0x55, 0x4E, 0x31, 0x10, + 0x9E, 0xF8, 0xDF, 0x71, 0xDD, 0xE4, 0x3A, 0x13, 0x02, 0x00, 0x94, 0x50, + 0x6B, 0xC7, 0xA3, 0xD7, 0xF1, 0x56, 0x35, 0x04, 0x9B, 0x19, 0x11, 0x5F, + 0xD6, 0x77, 0xAC, 0x81, 0xFA, 0xFB, 0xF1, 0x97, 0xED, 0xE6, 0x8F, 0xF2, + 0x09, 0xA5, 0x24, 0x59, 0x3B, 0x18, 0x11, 0x3C, 0xB1, 0x6F, 0xE9, 0xEA, + 0x70, 0x45, 0xE3, 0x86, 0x6E, 0x3C, 0x15, 0x1E, 0x2C, 0xBF, 0xBA, 0x9E, + 0xFA, 0x06, 0x3D, 0x4E, 0x1C, 0xE7, 0x1F, 0x77, 0xB3, 0x2A, 0x3E, 0x5A, + 0x0A, 0x5E, 0x0E, 0x86, 0x25, 0xC8, 0x66, 0x52, 0xD6, 0x89, 0x3E, 0x80, + 0x0F, 0x1D, 0xE7, 0x99, 0xB9, 0xDC, 0x65, 0x29, 0x78, 0xEA, 0xE2, 0x94, + 0xBA, 0x0E, 0x15, 0xC6, 0x6A, 0xB3, 0x10, 0x9C, 0x78, 0xC9, 0x4C, 0x2E, + 0x3D, 0x2B, 0x1D, 0x36, 0xA7, 0x4E, 0xF7, 0xF2, 0xF4, 0x2D, 0x0A, 0x1E, + 0x53, 0x3C, 0xFC, 0xA6, 0xB6, 0x12, 0x13, 0xF7, 0x08, 0xA7, 0x23, 0x52, + 0x60, 0x79, 0xC2, 0x19, 0x0F, 0x26, 0x39, 0x19, 0x83, 0xC8, 0x7B, 0xA6, + 0x95, 0x45, 0xBC, 0xE3, 0x66, 0x1F, 0xC3, 0xEA, 0x6E, 0xFE, 0xAD, 0xEB, + 0xA5, 0x5A, 0x6C, 0xBE, 0xEF, 0xDD, 0x32, 0xC3, 0x28, 0xFF, 0x8C, 0x01, + 0xD1, 0x37, 0x7F, 0xB1, 0x3B, 0x95, 0x2F, 0xDB, 0x0F, 0xA5, 0xCE, 0xEE, + 0x02, 0x97, 0xAB, 0x68, 0x85, 0x21, 0x58, 0x65, 0x70, 0x61, 0x07, 0x29, + 0x28, 0xB6, 0x21, 0x15, 0x84, 0x2F, 0x6E, 0x5B, 0xAD, 0x7D, 0xEF, 0x2A, + 0x96, 0xBD, 0x61, 0xEB, 0x30, 0xA8, 0xCC, 0x13, 0x10, 0x15, 0x9F, 0x61, + 0x75, 0x47, 0xDD, 0xEC, 0x39, 0xA2, 0x70, 0x4C, 0x90, 0x5C, 0x73, 0xB5, + 0xCF, 0x63, 0x03, 0xAA, 0x1E, 0xFE, 0x34, 0x03, 0xA7, 0x2C, 0x62, 0x60, + 0xBC, 0x86, 0xCC, 0xEE, 0x14, 0xDE, 0xAA, 0xCB, 0x0B, 0x9E, 0x9E, 0xD5, + 0xCA, 0xF0, 0xBD, 0x19, 0xAF, 0x1E, 0x8B, 0x64, 0x6E, 0x84, 0xF3, 0xB2, + 0xAB, 0x5C, 0xAB, 0x9C, 0xB3, 0xB4, 0x2A, 0x3C, 0x32, 0x5A, 0x68, 0x40, + 0x50, 0xBB, 0x5A, 0x65, 0xB9, 0x69, 0x23, 0xA0, 0x99, 0xA0, 0x5F, 0x87, + 0x19, 0x0B, 0x54, 0x9B, 0xF7, 0xB8, 0x21, 0xC0, 0xD5, 0xE9, 0x9E, 0x31, + 0x77, 0x2D, 0xE3, 0x97, 0x9A, 0x88, 0x37, 0xF8, 0xA8, 0x7D, 0x3D, 0x62, + 0x7E, 0x99, 0xF7, 0x95, 0xD6, 0x1F, 0xE6, 0xC7, 0x29, 0x88, 0x35, 0x0E, + 0x81, 0x12, 0x68, 0x16, 0x5F, 0x93, 0xED, 0x11, 0x63, 0x72, 0x22, 0x1B, + 0xA5, 0x84, 0xF5, 0x57, 0x99, 0xBA, 0x58, 0x78, 0xA1, 0xDF, 0xDE, 0x96, + 0x54, 0x30, 0x2E, 0x53, 0xEB, 0x0A, 0xB3, 0xCD, 0x96, 0x46, 0xC2, 0x1A, + 0xFF, 0xC3, 0x83, 0x9B, 0xEA, 0xFF, 0xC6, 0x34, 0xEF, 0xF2, 0xEB, 0x58, + 0x28, 0x31, 0xBC, 0x6D, 0xE4, 0x48, 0xD9, 0x8F, 0xE3, 0xB7, 0x64, 0xE8, + 0xD9, 0x14, 0x4A, 0x5D, 0x73, 0x3C, 0x7C, 0xEE, 0x61, 0xED, 0x28, 0xFE, + 0xEA, 0xAB, 0xAA, 0xA3, 0xB6, 0xE2, 0xEE, 0x45, 0xE0, 0x13, 0x3E, 0x20, + 0x14, 0x5D, 0x10, 0x42, 0xB5, 0xBB, 0x6A, 0xEF, 0x42, 0xF4, 0x42, 0xC7, + 0xD0, 0x4F, 0xCB, 0xFA, 0x15, 0x4F, 0x6C, 0xDB, 0xC7, 0x4D, 0x85, 0x86, + 0x9E, 0x79, 0x1E, 0xD8, 0x05, 0x21, 0xCD, 0x41, 0x1D, 0x3B, 0x4F, 0x65, + 0x46, 0x26, 0x8D, 0x5B, 0xF2, 0xA1, 0x62, 0xCF, 0x50, 0x62, 0x81, 0x3D, + 0x6A, 0x47, 0x4B, 0xE4, 0x92, 0x74, 0xCB, 0x69, 0xC3, 0x24, 0x15, 0x7F, + 0xA3, 0xB6, 0xC7, 0xC1, 0xA0, 0x83, 0x88, 0xFC, 0x9D, 0x48, 0x19, 0xAD, + 0x00, 0xBF, 0x5B, 0x09, 0x85, 0xB2, 0x92, 0x56, 0x0B, 0x8A, 0x84, 0x47, + 0xEA, 0xF5, 0x55, 0x0C, 0x2A, 0x8D, 0x42, 0x58, 0x00, 0x0D, 0x82, 0x23, + 0x74, 0xB1, 0x62, 0x14, 0x41, 0x7E, 0x93, 0x8D, 0x92, 0xF0, 0x72, 0x33, + 0x61, 0x70, 0x3F, 0x23, 0x3E, 0xF4, 0xAD, 0x1D, 0x60, 0x74, 0xEE, 0xCB, + 0x59, 0x37, 0xDE, 0x7C, 0xDB, 0x3B, 0x22, 0x6C, 0xF1, 0xEC, 0x5F, 0xD6, + 0x9E, 0x50, 0xF8, 0x19, 0x84, 0x80, 0x07, 0xA6, 0x6E, 0x32, 0x77, 0xCE, + 0xA7, 0xF2, 0x85, 0x40, 0xC2, 0x06, 0x0C, 0xC5, 0xAA, 0xA7, 0x69, 0xA9, + 0x35, 0x97, 0xD9, 0x61, 0x55, 0xD8, 0xEF, 0xE8, 0x84, 0x34, 0x45, 0xC3, + 0x2E, 0x7A, 0x44, 0x9E, 0xDC, 0xCA, 0x0B, 0x80, 0xFC, 0xAB, 0x04, 0x5A, + 0xCD, 0x88, 0x55, 0x10, 0xD3, 0xDB, 0x73, 0xDB, 0xC9, 0x9E, 0x1E, 0x0E, + 0x05, 0x67, 0xD5, 0xFD, 0xD8, 0x38, 0x3E, 0x71, 0x65, 0x34, 0xC4, 0xC5, + 0x40, 0x43, 0x67, 0xE3, 0x79, 0xDA, 0x5F, 0x67, 0x4A, 0x3D, 0xB0, 0x8F, + 0xE7, 0x21, 0x3E, 0x15, 0x20, 0xFF, 0x6D, 0xF1, 0x9E, 0xF8, 0x28, 0x3D, + 0xF7, 0x40, 0x81, 0x94, 0x68, 0x5A, 0x3D, 0xE9, 0xF7, 0xAD, 0x83, 0xDB, + 0x2B, 0x9F, 0xE3, 0xE6, 0xF7, 0xD4, 0x02, 0x76, 0xF7, 0x20, 0x15, 0x41, + 0x34, 0x29, 0x69, 0x94, 0x1C, 0x26, 0x4C, 0xF6, 0x6A, 0xF4, 0x20, 0x33, + 0x71, 0x24, 0x08, 0xD4, 0x68, 0x00, 0xA1, 0xD4, 0x2E, 0x6B, 0xF4, 0xBC, + 0x46, 0x45, 0x24, 0x97, 0x2E, 0xF6, 0x39, 0x1E, 0xAF, 0x61, 0x00, 0x50, + 0xB7, 0xD4, 0xB7, 0x43, +}; + +} // namespace random_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/random/internal/randen_slow.cc b/absl/random/internal/randen_slow.cc index 8d074582..4e5f3dc1 100644 --- a/absl/random/internal/randen_slow.cc +++ b/absl/random/internal/randen_slow.cc @@ -20,6 +20,7 @@ #include "absl/base/attributes.h" #include "absl/random/internal/platform.h" +#include "absl/random/internal/randen_traits.h" #if ABSL_HAVE_ATTRIBUTE(always_inline) || \ (defined(__GNUC__) && !defined(__clang__)) @@ -225,35 +226,16 @@ constexpr uint32_t te3[256] = { 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, }; -struct alignas(16) u64x2 { - constexpr u64x2() : v{0, 0} {}; - constexpr u64x2(uint64_t hi, uint64_t lo) : v{lo, hi} {} - - uint64_t v[2]; -}; - // Software implementation of the Vector128 class, using uint32_t // as an underlying vector register. -// -struct Vector128 { - inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( - const Vector128& other) { - s[0] ^= other.s[0]; - s[1] ^= other.s[1]; - s[2] ^= other.s[2]; - s[3] ^= other.s[3]; - return *this; - } - +struct alignas(16) Vector128 { uint32_t s[4]; }; inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 -Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { +Vector128Load(const void* from) { Vector128 result; - const uint8_t* ABSL_RANDOM_INTERNAL_RESTRICT src = - reinterpret_cast<const uint8_t*>(from); - + const uint8_t* src = reinterpret_cast<const uint8_t*>(from); result.s[0] = static_cast<uint32_t>(src[0]) << 24 | static_cast<uint32_t>(src[1]) << 16 | static_cast<uint32_t>(src[2]) << 8 | @@ -274,7 +256,7 @@ Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { } inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( - const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { + const Vector128& v, void* to) { uint8_t* dst = reinterpret_cast<uint8_t*>(to); dst[0] = static_cast<uint8_t>(v.s[0] >> 24); dst[1] = static_cast<uint8_t>(v.s[0] >> 16); @@ -298,91 +280,57 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( // symmetry of AES (ensures previously equal columns differ afterwards). inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 AesRound(const Vector128& state, const Vector128& round_key) { - // clang-format off Vector128 result; - result.s[0] = round_key.s[0] ^ - te0[uint8_t(state.s[0] >> 24)] ^ - te1[uint8_t(state.s[1] >> 16)] ^ - te2[uint8_t(state.s[2] >> 8)] ^ + result.s[0] = round_key.s[0] ^ // + te0[uint8_t(state.s[0] >> 24)] ^ // + te1[uint8_t(state.s[1] >> 16)] ^ // + te2[uint8_t(state.s[2] >> 8)] ^ // te3[uint8_t(state.s[3])]; - result.s[1] = round_key.s[1] ^ - te0[uint8_t(state.s[1] >> 24)] ^ - te1[uint8_t(state.s[2] >> 16)] ^ - te2[uint8_t(state.s[3] >> 8)] ^ + result.s[1] = round_key.s[1] ^ // + te0[uint8_t(state.s[1] >> 24)] ^ // + te1[uint8_t(state.s[2] >> 16)] ^ // + te2[uint8_t(state.s[3] >> 8)] ^ // te3[uint8_t(state.s[0])]; - result.s[2] = round_key.s[2] ^ - te0[uint8_t(state.s[2] >> 24)] ^ - te1[uint8_t(state.s[3] >> 16)] ^ - te2[uint8_t(state.s[0] >> 8)] ^ + result.s[2] = round_key.s[2] ^ // + te0[uint8_t(state.s[2] >> 24)] ^ // + te1[uint8_t(state.s[3] >> 16)] ^ // + te2[uint8_t(state.s[0] >> 8)] ^ // te3[uint8_t(state.s[1])]; - result.s[3] = round_key.s[3] ^ - te0[uint8_t(state.s[3] >> 24)] ^ - te1[uint8_t(state.s[0] >> 16)] ^ - te2[uint8_t(state.s[1] >> 8)] ^ + result.s[3] = round_key.s[3] ^ // + te0[uint8_t(state.s[3] >> 24)] ^ // + te1[uint8_t(state.s[0] >> 16)] ^ // + te2[uint8_t(state.s[1] >> 8)] ^ // te3[uint8_t(state.s[2])]; return result; - // clang-format on } -// RANDen = RANDom generator or beetroots in Swiss German. -// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random -// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32. -// -// High-level summary: -// 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is -// a sponge-like random generator that requires a cryptographic permutation. -// It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by -// achieving backtracking resistance with only one Permute() per buffer. -// -// 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round -// Function" constructs up to 1024-bit permutations using an improved -// Generalized Feistel network with 2-round AES-128 functions. This Feistel -// block shuffle achieves diffusion faster and is less vulnerable to -// sliced-biclique attacks than the Type-2 cyclic shuffle. -// -// 3) "Improving the Generalized Feistel" and "New criterion for diffusion -// property" extends the same kind of improved Feistel block shuffle to 16 -// branches, which enables a 2048-bit permutation. -// -// Combine these three ideas and also change Simpira's subround keys from -// structured/low-entropy counters to digits of Pi. - -// Randen constants. -constexpr size_t kFeistelBlocks = 16; -constexpr size_t kFeistelFunctions = kFeistelBlocks / 2; // = 8 -constexpr size_t kFeistelRounds = 16 + 1; // > 4 * log2(kFeistelBlocks) -constexpr size_t kKeys = kFeistelRounds * kFeistelFunctions; - -// INCLUDE keys. -#include "absl/random/internal/randen-keys.inc" - -static_assert(kKeys == kRoundKeys, "kKeys and kRoundKeys must be equal"); +using ::absl::random_internal::RandenTraits; -// 2 uint64_t lanes per Vector128 -static constexpr size_t kLanes = 2; +// Randen operates on 128-bit vectors. +struct alignas(16) u64x2 { + uint64_t data[2]; +}; // The improved Feistel block shuffle function for 16 blocks. inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state_u64) { - static_assert(kFeistelBlocks == 16, + u64x2* state) { + static_assert(RandenTraits::kFeistelBlocks == 16, "Feistel block shuffle only works for 16 blocks."); - constexpr size_t shuffle[kFeistelBlocks] = {7, 2, 13, 4, 11, 8, 3, 6, - 15, 0, 9, 10, 1, 14, 5, 12}; - - u64x2* ABSL_RANDOM_INTERNAL_RESTRICT state = - reinterpret_cast<u64x2*>(state_u64); + constexpr size_t shuffle[RandenTraits::kFeistelBlocks] = { + 7, 2, 13, 4, 11, 8, 3, 6, 15, 0, 9, 10, 1, 14, 5, 12}; // The fully unrolled loop without the memcpy improves the speed by about - // 30% over the equivalent (leaving code here as a comment): - if (false) { - u64x2 source[kFeistelBlocks]; - std::memcpy(source, state, sizeof(source)); - for (size_t i = 0; i < kFeistelBlocks; i++) { - const u64x2 v0 = source[shuffle[i]]; - state[i] = v0; - } + // 30% over the equivalent: +#if 0 + u64x2 source[RandenTraits::kFeistelBlocks]; + std::memcpy(source, state, sizeof(source)); + for (size_t i = 0; i < RandenTraits::kFeistelBlocks; i++) { + const u64x2 v0 = source[shuffle[i]]; + state[i] = v0; } + return; +#endif const u64x2 v0 = state[shuffle[0]]; const u64x2 v1 = state[shuffle[1]]; @@ -424,23 +372,23 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, + u64x2* ABSL_RANDOM_INTERNAL_RESTRICT state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { - for (size_t branch = 0; branch < kFeistelBlocks; branch += 4) { - const Vector128 s0 = Vector128Load(state + kLanes * branch); - const Vector128 s1 = Vector128Load(state + kLanes * (branch + 1)); + for (size_t branch = 0; branch < RandenTraits::kFeistelBlocks; branch += 4) { + const Vector128 s0 = Vector128Load(state + branch); + const Vector128 s1 = Vector128Load(state + branch + 1); const Vector128 f0 = AesRound(s0, Vector128Load(keys)); keys++; const Vector128 o1 = AesRound(f0, s1); - Vector128Store(o1, state + kLanes * (branch + 1)); + Vector128Store(o1, state + branch + 1); // Manually unroll this loop once. about 10% better than not unrolled. - const Vector128 s2 = Vector128Load(state + kLanes * (branch + 2)); - const Vector128 s3 = Vector128Load(state + kLanes * (branch + 3)); + const Vector128 s2 = Vector128Load(state + branch + 2); + const Vector128 s3 = Vector128Load(state + branch + 3); const Vector128 f2 = AesRound(s2, Vector128Load(keys)); keys++; const Vector128 o3 = AesRound(f2, s3); - Vector128Store(o3, state + kLanes * (branch + 3)); + Vector128Store(o3, state + branch + 3); } return keys; } @@ -450,11 +398,9 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Permute( - const void* keys, uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { - const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = - static_cast<const u64x2*>(keys); - for (size_t round = 0; round < kFeistelRounds; ++round) { - keys128 = FeistelRound(state, keys128); + u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { + for (size_t round = 0; round < RandenTraits::kFeistelRounds; ++round) { + keys = FeistelRound(state, keys); BlockShuffle(state); } } @@ -468,37 +414,42 @@ namespace random_internal { const void* RandenSlow::GetKeys() { // Round keys for one AES per Feistel round and branch. // The canonical implementation uses first digits of Pi. - return round_keys; + return kRandenRoundKeys; } void RandenSlow::Absorb(const void* seed_void, void* state_void) { - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state = - reinterpret_cast<uint64_t*>(state_void); - const uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT seed = - reinterpret_cast<const uint64_t*>(seed_void); - - constexpr size_t kCapacityBlocks = kCapacityBytes / sizeof(uint64_t); - static_assert(kCapacityBlocks * sizeof(uint64_t) == kCapacityBytes, - "Not i*V"); - for (size_t i = kCapacityBlocks; i < kStateBytes / sizeof(uint64_t); ++i) { + auto* state = + reinterpret_cast<uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT>(state_void); + const auto* seed = + reinterpret_cast<const uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT>( + seed_void); + + constexpr size_t kCapacityBlocks = + RandenTraits::kCapacityBytes / sizeof(uint64_t); + static_assert( + kCapacityBlocks * sizeof(uint64_t) == RandenTraits::kCapacityBytes, + "Not i*V"); + + for (size_t i = kCapacityBlocks; + i < RandenTraits::kStateBytes / sizeof(uint64_t); ++i) { state[i] ^= seed[i - kCapacityBlocks]; } } -void RandenSlow::Generate(const void* keys, void* state_void) { - static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch"); +void RandenSlow::Generate(const void* keys_void, void* state_void) { + static_assert(RandenTraits::kCapacityBytes == sizeof(u64x2), + "Capacity mismatch"); - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state = - reinterpret_cast<uint64_t*>(state_void); + auto* state = reinterpret_cast<u64x2*>(state_void); + const auto* keys = reinterpret_cast<const u64x2*>(keys_void); - const Vector128 prev_inner = Vector128Load(state); + const u64x2 prev_inner = state[0]; - Permute(keys, state); + Permute(state, keys); // Ensure backtracking resistance. - Vector128 inner = Vector128Load(state); - inner ^= prev_inner; - Vector128Store(inner, state); + state[0].data[0] ^= prev_inner.data[0]; + state[0].data[1] ^= prev_inner.data[1]; } } // namespace random_internal diff --git a/absl/random/internal/randen_slow.h b/absl/random/internal/randen_slow.h index 72f92b54..b6f137eb 100644 --- a/absl/random/internal/randen_slow.h +++ b/absl/random/internal/randen_slow.h @@ -28,13 +28,6 @@ namespace random_internal { // architectures lacking AES hardware acceleration intrinsics. class RandenSlow { public: - // Size of the entire sponge / state for the randen PRNG. - static constexpr size_t kStateBytes = 256; // 2048-bit - - // Size of the 'inner' (inaccessible) part of the sponge. Larger values would - // require more frequent calls to RandenGenerate. - static constexpr size_t kCapacityBytes = 16; // 128-bit - static void Generate(const void* keys, void* state_void); static void Absorb(const void* seed_void, void* state_void); static const void* GetKeys(); diff --git a/absl/random/internal/randen_slow_test.cc b/absl/random/internal/randen_slow_test.cc index c07155d8..4a535837 100644 --- a/absl/random/internal/randen_slow_test.cc +++ b/absl/random/internal/randen_slow_test.cc @@ -17,18 +17,20 @@ #include <cstring> #include "gtest/gtest.h" +#include "absl/random/internal/randen_traits.h" namespace { using absl::random_internal::RandenSlow; +using absl::random_internal::RandenTraits; // Local state parameters. constexpr size_t kSeedBytes = - RandenSlow::kStateBytes - RandenSlow::kCapacityBytes; -constexpr size_t kStateSizeT = RandenSlow::kStateBytes / sizeof(uint64_t); + RandenTraits::kStateBytes - RandenTraits::kCapacityBytes; +constexpr size_t kStateSizeT = RandenTraits::kStateBytes / sizeof(uint64_t); constexpr size_t kSeedSizeT = kSeedBytes / sizeof(uint32_t); -struct randen { +struct alignas(16) randen { uint64_t state[kStateSizeT]; uint32_t seed[kSeedSizeT]; }; diff --git a/absl/random/internal/randen_traits.h b/absl/random/internal/randen_traits.h index 2b8bbe73..53caa936 100644 --- a/absl/random/internal/randen_traits.h +++ b/absl/random/internal/randen_traits.h @@ -32,6 +32,25 @@ namespace random_internal { // 'Strong' (well-distributed, unpredictable, backtracking-resistant) random // generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32. // +// High-level summary: +// 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is +// a sponge-like random generator that requires a cryptographic permutation. +// It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by +// achieving backtracking resistance with only one Permute() per buffer. +// +// 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round +// Function" constructs up to 1024-bit permutations using an improved +// Generalized Feistel network with 2-round AES-128 functions. This Feistel +// block shuffle achieves diffusion faster and is less vulnerable to +// sliced-biclique attacks than the Type-2 cyclic shuffle. +// +// 3) "Improving the Generalized Feistel" and "New criterion for diffusion +// property" extends the same kind of improved Feistel block shuffle to 16 +// branches, which enables a 2048-bit permutation. +// +// Combine these three ideas and also change Simpira's subround keys from +// structured/low-entropy counters to digits of Pi (or other random source). + // RandenTraits contains the basic algorithm traits, such as the size of the // state, seed, sponge, etc. struct RandenTraits { @@ -45,17 +64,23 @@ struct RandenTraits { // Size of the default seed consumed by the sponge. static constexpr size_t kSeedBytes = kStateBytes - kCapacityBytes; + // Assuming 128-bit blocks, the number of blocks in the state. // Largest size for which security proofs are known. static constexpr size_t kFeistelBlocks = 16; - // Type-2 generalized Feistel => one round function for every two blocks. - static constexpr size_t kFeistelFunctions = kFeistelBlocks / 2; // = 8 - // Ensures SPRP security and two full subblock diffusions. // Must be > 4 * log2(kFeistelBlocks). static constexpr size_t kFeistelRounds = 16 + 1; + + // Size of the key. A 128-bit key block is used for every-other + // feistel block (Type-2 generalized Feistel network) in each round. + static constexpr size_t kKeyBytes = 16 * kFeistelRounds * kFeistelBlocks / 2; }; +// Randen key arrays. In randen_round_keys.cc +extern const unsigned char kRandenRoundKeys[RandenTraits::kKeyBytes]; +extern const unsigned char kRandenRoundKeysBE[RandenTraits::kKeyBytes]; + } // namespace random_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/random/internal/uniform_helper.h b/absl/random/internal/uniform_helper.h index 663107cb..1243bc1c 100644 --- a/absl/random/internal/uniform_helper.h +++ b/absl/random/internal/uniform_helper.h @@ -19,10 +19,13 @@ #include <limits> #include <type_traits> +#include "absl/base/config.h" #include "absl/meta/type_traits.h" +#include "absl/random/internal/traits.h" namespace absl { ABSL_NAMESPACE_BEGIN + template <typename IntType> class uniform_int_distribution; @@ -58,6 +61,26 @@ struct IntervalOpenOpenTag : public random_internal::TagTypeCompare<IntervalOpenOpenTag> {}; namespace random_internal { + +// In the absence of an explicitly provided return-type, the template +// "uniform_inferred_return_t<A, B>" is used to derive a suitable type, based on +// the data-types of the endpoint-arguments {A lo, B hi}. +// +// Given endpoints {A lo, B hi}, one of {A, B} will be chosen as the +// return-type, if one type can be implicitly converted into the other, in a +// lossless way. The template "is_widening_convertible" implements the +// compile-time logic for deciding if such a conversion is possible. +// +// If no such conversion between {A, B} exists, then the overload for +// absl::Uniform() will be discarded, and the call will be ill-formed. +// Return-type for absl::Uniform() when the return-type is inferred. +template <typename A, typename B> +using uniform_inferred_return_t = + absl::enable_if_t<absl::disjunction<is_widening_convertible<A, B>, + is_widening_convertible<B, A>>::value, + typename std::conditional< + is_widening_convertible<A, B>::value, B, A>::type>; + // The functions // uniform_lower_bound(tag, a, b) // and @@ -82,7 +105,7 @@ typename absl::enable_if_t< std::is_same<Tag, IntervalOpenOpenTag>>>::value, IntType> uniform_lower_bound(Tag, IntType a, IntType) { - return a + 1; + return a < (std::numeric_limits<IntType>::max)() ? (a + 1) : a; } template <typename FloatType, typename Tag> @@ -113,7 +136,7 @@ typename absl::enable_if_t< std::is_same<Tag, IntervalOpenOpenTag>>>::value, IntType> uniform_upper_bound(Tag, IntType, IntType b) { - return b - 1; + return b > (std::numeric_limits<IntType>::min)() ? (b - 1) : b; } template <typename FloatType, typename Tag> @@ -149,12 +172,53 @@ uniform_upper_bound(Tag, FloatType, FloatType b) { return std::nextafter(b, (std::numeric_limits<FloatType>::max)()); } +// Returns whether the bounds are valid for the underlying distribution. +// Inputs must have already been resolved via uniform_*_bound calls. +// +// The c++ standard constraints in [rand.dist.uni.int] are listed as: +// requires: lo <= hi. +// +// In the uniform_int_distrubtion, {lo, hi} are closed, closed. Thus: +// [0, 0] is legal. +// [0, 0) is not legal, but [0, 1) is, which translates to [0, 0]. +// (0, 1) is not legal, but (0, 2) is, which translates to [1, 1]. +// (0, 0] is not legal, but (0, 1] is, which translates to [1, 1]. +// +// The c++ standard constraints in [rand.dist.uni.real] are listed as: +// requires: lo <= hi. +// requires: (hi - lo) <= numeric_limits<T>::max() +// +// In the uniform_real_distribution, {lo, hi} are closed, open, Thus: +// [0, 0] is legal, which is [0, 0+epsilon). +// [0, 0) is legal. +// (0, 0) is not legal, but (0-epsilon, 0+epsilon) is. +// (0, 0] is not legal, but (0, 0+epsilon] is. +// +template <typename FloatType> +absl::enable_if_t<std::is_floating_point<FloatType>::value, bool> +is_uniform_range_valid(FloatType a, FloatType b) { + return a <= b && std::isfinite(b - a); +} + +template <typename IntType> +absl::enable_if_t<std::is_integral<IntType>::value, bool> +is_uniform_range_valid(IntType a, IntType b) { + return a <= b; +} + +// UniformDistribution selects either absl::uniform_int_distribution +// or absl::uniform_real_distribution depending on the NumType parameter. template <typename NumType> using UniformDistribution = typename std::conditional<std::is_integral<NumType>::value, absl::uniform_int_distribution<NumType>, absl::uniform_real_distribution<NumType>>::type; +// UniformDistributionWrapper is used as the underlying distribution type +// by the absl::Uniform template function. It selects the proper Abseil +// uniform distribution and provides constructor overloads that match the +// expected parameter order as well as adjusting distribtuion bounds based +// on the tag. template <typename NumType> struct UniformDistributionWrapper : public UniformDistribution<NumType> { template <typename TagType> diff --git a/absl/random/internal/uniform_helper_test.cc b/absl/random/internal/uniform_helper_test.cc new file mode 100644 index 00000000..173c49b0 --- /dev/null +++ b/absl/random/internal/uniform_helper_test.cc @@ -0,0 +1,279 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/random/internal/uniform_helper.h" + +#include <cmath> +#include <cstdint> +#include <random> + +#include "gtest/gtest.h" + +namespace { + +using absl::IntervalClosedClosedTag; +using absl::IntervalClosedOpenTag; +using absl::IntervalOpenClosedTag; +using absl::IntervalOpenOpenTag; +using absl::random_internal::uniform_inferred_return_t; +using absl::random_internal::uniform_lower_bound; +using absl::random_internal::uniform_upper_bound; + +class UniformHelperTest : public testing::Test {}; + +TEST_F(UniformHelperTest, UniformBoundFunctionsGeneral) { + constexpr IntervalClosedClosedTag IntervalClosedClosed; + constexpr IntervalClosedOpenTag IntervalClosedOpen; + constexpr IntervalOpenClosedTag IntervalOpenClosed; + constexpr IntervalOpenOpenTag IntervalOpenOpen; + + // absl::uniform_int_distribution natively assumes IntervalClosedClosed + // absl::uniform_real_distribution natively assumes IntervalClosedOpen + + EXPECT_EQ(uniform_lower_bound(IntervalOpenClosed, 0, 100), 1); + EXPECT_EQ(uniform_lower_bound(IntervalOpenOpen, 0, 100), 1); + EXPECT_GT(uniform_lower_bound<float>(IntervalOpenClosed, 0, 1.0), 0); + EXPECT_GT(uniform_lower_bound<float>(IntervalOpenOpen, 0, 1.0), 0); + EXPECT_GT(uniform_lower_bound<double>(IntervalOpenClosed, 0, 1.0), 0); + EXPECT_GT(uniform_lower_bound<double>(IntervalOpenOpen, 0, 1.0), 0); + + EXPECT_EQ(uniform_lower_bound(IntervalClosedClosed, 0, 100), 0); + EXPECT_EQ(uniform_lower_bound(IntervalClosedOpen, 0, 100), 0); + EXPECT_EQ(uniform_lower_bound<float>(IntervalClosedClosed, 0, 1.0), 0); + EXPECT_EQ(uniform_lower_bound<float>(IntervalClosedOpen, 0, 1.0), 0); + EXPECT_EQ(uniform_lower_bound<double>(IntervalClosedClosed, 0, 1.0), 0); + EXPECT_EQ(uniform_lower_bound<double>(IntervalClosedOpen, 0, 1.0), 0); + + EXPECT_EQ(uniform_upper_bound(IntervalOpenOpen, 0, 100), 99); + EXPECT_EQ(uniform_upper_bound(IntervalClosedOpen, 0, 100), 99); + EXPECT_EQ(uniform_upper_bound<float>(IntervalOpenOpen, 0, 1.0), 1.0); + EXPECT_EQ(uniform_upper_bound<float>(IntervalClosedOpen, 0, 1.0), 1.0); + EXPECT_EQ(uniform_upper_bound<double>(IntervalOpenOpen, 0, 1.0), 1.0); + EXPECT_EQ(uniform_upper_bound<double>(IntervalClosedOpen, 0, 1.0), 1.0); + + EXPECT_EQ(uniform_upper_bound(IntervalOpenClosed, 0, 100), 100); + EXPECT_EQ(uniform_upper_bound(IntervalClosedClosed, 0, 100), 100); + EXPECT_GT(uniform_upper_bound<float>(IntervalOpenClosed, 0, 1.0), 1.0); + EXPECT_GT(uniform_upper_bound<float>(IntervalClosedClosed, 0, 1.0), 1.0); + EXPECT_GT(uniform_upper_bound<double>(IntervalOpenClosed, 0, 1.0), 1.0); + EXPECT_GT(uniform_upper_bound<double>(IntervalClosedClosed, 0, 1.0), 1.0); + + // Negative value tests + EXPECT_EQ(uniform_lower_bound(IntervalOpenClosed, -100, -1), -99); + EXPECT_EQ(uniform_lower_bound(IntervalOpenOpen, -100, -1), -99); + EXPECT_GT(uniform_lower_bound<float>(IntervalOpenClosed, -2.0, -1.0), -2.0); + EXPECT_GT(uniform_lower_bound<float>(IntervalOpenOpen, -2.0, -1.0), -2.0); + EXPECT_GT(uniform_lower_bound<double>(IntervalOpenClosed, -2.0, -1.0), -2.0); + EXPECT_GT(uniform_lower_bound<double>(IntervalOpenOpen, -2.0, -1.0), -2.0); + + EXPECT_EQ(uniform_lower_bound(IntervalClosedClosed, -100, -1), -100); + EXPECT_EQ(uniform_lower_bound(IntervalClosedOpen, -100, -1), -100); + EXPECT_EQ(uniform_lower_bound<float>(IntervalClosedClosed, -2.0, -1.0), -2.0); + EXPECT_EQ(uniform_lower_bound<float>(IntervalClosedOpen, -2.0, -1.0), -2.0); + EXPECT_EQ(uniform_lower_bound<double>(IntervalClosedClosed, -2.0, -1.0), + -2.0); + EXPECT_EQ(uniform_lower_bound<double>(IntervalClosedOpen, -2.0, -1.0), -2.0); + + EXPECT_EQ(uniform_upper_bound(IntervalOpenOpen, -100, -1), -2); + EXPECT_EQ(uniform_upper_bound(IntervalClosedOpen, -100, -1), -2); + EXPECT_EQ(uniform_upper_bound<float>(IntervalOpenOpen, -2.0, -1.0), -1.0); + EXPECT_EQ(uniform_upper_bound<float>(IntervalClosedOpen, -2.0, -1.0), -1.0); + EXPECT_EQ(uniform_upper_bound<double>(IntervalOpenOpen, -2.0, -1.0), -1.0); + EXPECT_EQ(uniform_upper_bound<double>(IntervalClosedOpen, -2.0, -1.0), -1.0); + + EXPECT_EQ(uniform_upper_bound(IntervalOpenClosed, -100, -1), -1); + EXPECT_EQ(uniform_upper_bound(IntervalClosedClosed, -100, -1), -1); + EXPECT_GT(uniform_upper_bound<float>(IntervalOpenClosed, -2.0, -1.0), -1.0); + EXPECT_GT(uniform_upper_bound<float>(IntervalClosedClosed, -2.0, -1.0), -1.0); + EXPECT_GT(uniform_upper_bound<double>(IntervalOpenClosed, -2.0, -1.0), -1.0); + EXPECT_GT(uniform_upper_bound<double>(IntervalClosedClosed, -2.0, -1.0), + -1.0); + + EXPECT_GT(uniform_lower_bound(IntervalOpenClosed, 1.0, 2.0), 1.0); + EXPECT_LT(uniform_lower_bound(IntervalOpenClosed, 1.0, +0.0), 1.0); + EXPECT_LT(uniform_lower_bound(IntervalOpenClosed, 1.0, -0.0), 1.0); + EXPECT_LT(uniform_lower_bound(IntervalOpenClosed, 1.0, -1.0), 1.0); +} + +TEST_F(UniformHelperTest, UniformBoundFunctionsIntBounds) { + // Verifies the saturating nature of uniform_lower_bound and + // uniform_upper_bound + constexpr IntervalOpenOpenTag IntervalOpenOpen; + + // uint max. + constexpr auto m = (std::numeric_limits<uint64_t>::max)(); + + EXPECT_EQ(1, uniform_lower_bound(IntervalOpenOpen, 0u, 0u)); + EXPECT_EQ(m, uniform_lower_bound(IntervalOpenOpen, m, m)); + EXPECT_EQ(m, uniform_lower_bound(IntervalOpenOpen, m - 1, m - 1)); + EXPECT_EQ(0, uniform_upper_bound(IntervalOpenOpen, 0u, 0u)); + EXPECT_EQ(m - 1, uniform_upper_bound(IntervalOpenOpen, m, m)); + + // int min/max + constexpr auto l = (std::numeric_limits<int64_t>::min)(); + constexpr auto r = (std::numeric_limits<int64_t>::max)(); + EXPECT_EQ(1, uniform_lower_bound(IntervalOpenOpen, 0, 0)); + EXPECT_EQ(l + 1, uniform_lower_bound(IntervalOpenOpen, l, l)); + EXPECT_EQ(r, uniform_lower_bound(IntervalOpenOpen, r - 1, r - 1)); + EXPECT_EQ(r, uniform_lower_bound(IntervalOpenOpen, r, r)); + EXPECT_EQ(-1, uniform_upper_bound(IntervalOpenOpen, 0, 0)); + EXPECT_EQ(l, uniform_upper_bound(IntervalOpenOpen, l, l)); + EXPECT_EQ(r - 1, uniform_upper_bound(IntervalOpenOpen, r, r)); +} + +TEST_F(UniformHelperTest, UniformBoundFunctionsRealBounds) { + // absl::uniform_real_distribution natively assumes IntervalClosedOpen; + // use the inverse here so each bound has to change. + constexpr IntervalOpenClosedTag IntervalOpenClosed; + + // Edge cases: the next value toward itself is itself. + EXPECT_EQ(1.0, uniform_lower_bound(IntervalOpenClosed, 1.0, 1.0)); + EXPECT_EQ(1.0f, uniform_lower_bound(IntervalOpenClosed, 1.0f, 1.0f)); + + // rightmost and leftmost finite values. + constexpr auto r = (std::numeric_limits<double>::max)(); + const auto re = std::nexttoward(r, 0.0); + constexpr auto l = -r; + const auto le = std::nexttoward(l, 0.0); + + EXPECT_EQ(l, uniform_lower_bound(IntervalOpenClosed, l, l)); // (l,l) + EXPECT_EQ(r, uniform_lower_bound(IntervalOpenClosed, r, r)); // (r,r) + EXPECT_EQ(le, uniform_lower_bound(IntervalOpenClosed, l, r)); // (l,r) + EXPECT_EQ(le, uniform_lower_bound(IntervalOpenClosed, l, 0.0)); // (l, 0) + EXPECT_EQ(le, uniform_lower_bound(IntervalOpenClosed, l, le)); // (l, le) + EXPECT_EQ(r, uniform_lower_bound(IntervalOpenClosed, re, r)); // (re, r) + + EXPECT_EQ(le, uniform_upper_bound(IntervalOpenClosed, l, l)); // (l,l) + EXPECT_EQ(r, uniform_upper_bound(IntervalOpenClosed, r, r)); // (r,r) + EXPECT_EQ(r, uniform_upper_bound(IntervalOpenClosed, l, r)); // (l,r) + EXPECT_EQ(r, uniform_upper_bound(IntervalOpenClosed, l, re)); // (l,re) + EXPECT_EQ(r, uniform_upper_bound(IntervalOpenClosed, 0.0, r)); // (0, r) + EXPECT_EQ(r, uniform_upper_bound(IntervalOpenClosed, re, r)); // (re, r) + EXPECT_EQ(r, uniform_upper_bound(IntervalOpenClosed, le, re)); // (le, re) + + const double e = std::nextafter(1.0, 2.0); // 1 + epsilon + const double f = std::nextafter(1.0, 0.0); // 1 - epsilon + + // (1.0, 1.0 + epsilon) + EXPECT_EQ(e, uniform_lower_bound(IntervalOpenClosed, 1.0, e)); + EXPECT_EQ(std::nextafter(e, 2.0), + uniform_upper_bound(IntervalOpenClosed, 1.0, e)); + + // (1.0-epsilon, 1.0) + EXPECT_EQ(1.0, uniform_lower_bound(IntervalOpenClosed, f, 1.0)); + EXPECT_EQ(e, uniform_upper_bound(IntervalOpenClosed, f, 1.0)); + + // denorm cases. + const double g = std::numeric_limits<double>::denorm_min(); + const double h = std::nextafter(g, 1.0); + + // (0, denorm_min) + EXPECT_EQ(g, uniform_lower_bound(IntervalOpenClosed, 0.0, g)); + EXPECT_EQ(h, uniform_upper_bound(IntervalOpenClosed, 0.0, g)); + + // (denorm_min, 1.0) + EXPECT_EQ(h, uniform_lower_bound(IntervalOpenClosed, g, 1.0)); + EXPECT_EQ(e, uniform_upper_bound(IntervalOpenClosed, g, 1.0)); + + // Edge cases: invalid bounds. + EXPECT_EQ(f, uniform_lower_bound(IntervalOpenClosed, 1.0, -1.0)); +} + +struct Invalid {}; + +template <typename A, typename B> +auto InferredUniformReturnT(int) -> uniform_inferred_return_t<A, B>; + +template <typename, typename> +Invalid InferredUniformReturnT(...); + +// Given types <A, B, Expect>, CheckArgsInferType() verifies that +// +// uniform_inferred_return_t<A, B> and +// uniform_inferred_return_t<B, A> +// +// returns the type "Expect". +// +// This interface can also be used to assert that a given inferred return types +// are invalid. Writing: +// +// CheckArgsInferType<float, int, Invalid>() +// +// will assert that this overload does not exist. +template <typename A, typename B, typename Expect> +void CheckArgsInferType() { + static_assert( + absl::conjunction< + std::is_same<Expect, decltype(InferredUniformReturnT<A, B>(0))>, + std::is_same<Expect, + decltype(InferredUniformReturnT<B, A>(0))>>::value, + ""); +} + +TEST_F(UniformHelperTest, UniformTypeInference) { + // Infers common types. + CheckArgsInferType<uint16_t, uint16_t, uint16_t>(); + CheckArgsInferType<uint32_t, uint32_t, uint32_t>(); + CheckArgsInferType<uint64_t, uint64_t, uint64_t>(); + CheckArgsInferType<int16_t, int16_t, int16_t>(); + CheckArgsInferType<int32_t, int32_t, int32_t>(); + CheckArgsInferType<int64_t, int64_t, int64_t>(); + CheckArgsInferType<float, float, float>(); + CheckArgsInferType<double, double, double>(); + + // Properly promotes uint16_t. + CheckArgsInferType<uint16_t, uint32_t, uint32_t>(); + CheckArgsInferType<uint16_t, uint64_t, uint64_t>(); + CheckArgsInferType<uint16_t, int32_t, int32_t>(); + CheckArgsInferType<uint16_t, int64_t, int64_t>(); + CheckArgsInferType<uint16_t, float, float>(); + CheckArgsInferType<uint16_t, double, double>(); + + // Properly promotes int16_t. + CheckArgsInferType<int16_t, int32_t, int32_t>(); + CheckArgsInferType<int16_t, int64_t, int64_t>(); + CheckArgsInferType<int16_t, float, float>(); + CheckArgsInferType<int16_t, double, double>(); + + // Invalid (u)int16_t-pairings do not compile. + // See "CheckArgsInferType" comments above, for how this is achieved. + CheckArgsInferType<uint16_t, int16_t, Invalid>(); + CheckArgsInferType<int16_t, uint32_t, Invalid>(); + CheckArgsInferType<int16_t, uint64_t, Invalid>(); + + // Properly promotes uint32_t. + CheckArgsInferType<uint32_t, uint64_t, uint64_t>(); + CheckArgsInferType<uint32_t, int64_t, int64_t>(); + CheckArgsInferType<uint32_t, double, double>(); + + // Properly promotes int32_t. + CheckArgsInferType<int32_t, int64_t, int64_t>(); + CheckArgsInferType<int32_t, double, double>(); + + // Invalid (u)int32_t-pairings do not compile. + CheckArgsInferType<uint32_t, int32_t, Invalid>(); + CheckArgsInferType<int32_t, uint64_t, Invalid>(); + CheckArgsInferType<int32_t, float, Invalid>(); + CheckArgsInferType<uint32_t, float, Invalid>(); + + // Invalid (u)int64_t-pairings do not compile. + CheckArgsInferType<uint64_t, int64_t, Invalid>(); + CheckArgsInferType<int64_t, float, Invalid>(); + CheckArgsInferType<int64_t, double, Invalid>(); + + // Properly promotes float. + CheckArgsInferType<float, double, double>(); +} + +} // namespace diff --git a/absl/random/internal/wide_multiply.h b/absl/random/internal/wide_multiply.h index 6e4cf1be..0afcbe08 100644 --- a/absl/random/internal/wide_multiply.h +++ b/absl/random/internal/wide_multiply.h @@ -38,9 +38,9 @@ namespace random_internal { // MultiplyU64ToU128 multiplies two 64-bit values to a 128-bit value. // If an intrinsic is available, it is used, otherwise use native 32-bit // multiplies to construct the result. -inline uint128 MultiplyU64ToU128(uint64_t a, uint64_t b) { +inline absl::uint128 MultiplyU64ToU128(uint64_t a, uint64_t b) { #if defined(ABSL_HAVE_INTRINSIC_INT128) - return uint128(static_cast<__uint128_t>(a) * b); + return absl::uint128(static_cast<__uint128_t>(a) * b); #elif defined(ABSL_INTERNAL_USE_UMUL128) // uint64_t * uint64_t => uint128 multiply using imul intrinsic on MSVC. uint64_t high = 0; @@ -93,14 +93,14 @@ struct wide_multiply { template <> struct wide_multiply<uint64_t> { using input_type = uint64_t; - using result_type = uint128; + using result_type = absl::uint128; static result_type multiply(uint64_t a, uint64_t b) { return MultiplyU64ToU128(a, b); } - static uint64_t hi(result_type r) { return Uint128High64(r); } - static uint64_t lo(result_type r) { return Uint128Low64(r); } + static uint64_t hi(result_type r) { return absl::Uint128High64(r); } + static uint64_t lo(result_type r) { return absl::Uint128Low64(r); } }; #endif diff --git a/absl/random/internal/wide_multiply_test.cc b/absl/random/internal/wide_multiply_test.cc index 922603f2..ca8ce923 100644 --- a/absl/random/internal/wide_multiply_test.cc +++ b/absl/random/internal/wide_multiply_test.cc @@ -28,7 +28,7 @@ TEST(WideMultiplyTest, MultiplyU64ToU128Test) { EXPECT_EQ(absl::uint128(0), MultiplyU64ToU128(0, 0)); - // Max uint64 + // Max uint64_t EXPECT_EQ(MultiplyU64ToU128(kMax, kMax), absl::MakeUint128(0xfffffffffffffffe, 0x0000000000000001)); EXPECT_EQ(absl::MakeUint128(0, kMax), MultiplyU64ToU128(kMax, 1)); |