diff options
author | Abseil Team <absl-team@google.com> | 2020-05-18 17:53:14 -0700 |
---|---|---|
committer | Mark Barolak <mbar@google.com> | 2020-05-19 10:59:49 -0400 |
commit | 768eb2ca2857342673fcd462792ce04b8bac3fa3 (patch) | |
tree | 7c7951bde3dd27933b1d85621d7abf01179c0377 /absl/random | |
parent | 3f347c46272886a099852a4cd303ecf37a054de8 (diff) |
Export of internal Abseil changes
--
f012012ef78234a6a4585321b67d7b7c92ebc266 by Laramie Leavitt <lar@google.com>:
Slight restructuring of absl/random/internal randen implementation.
Convert round-keys.inc into randen_round_keys.cc file.
Consistently use a 128-bit pointer type for internal method parameters. This allows simpler pointer arithmetic in C++ & permits removal of some constants and casts.
Remove some redundancy in comments & constexpr variables. Specifically, all references to Randen algorithm parameters use RandenTraits; duplication in RandenSlow removed.
PiperOrigin-RevId: 312190313
--
dc8b42e054046741e9ed65335bfdface997c6063 by Abseil Team <absl-team@google.com>:
Internal change.
PiperOrigin-RevId: 312167304
--
f13d248fafaf206492c1362c3574031aea3abaf7 by Matthew Brown <matthewbr@google.com>:
Cleanup StrFormat extensions a little.
PiperOrigin-RevId: 312166336
--
9d9117589667afe2332bb7ad42bc967ca7c54502 by Derek Mauro <dmauro@google.com>:
Internal change
PiperOrigin-RevId: 312105213
--
9a12b9b3aa0e59b8ee6cf9408ed0029045543a9b by Abseil Team <absl-team@google.com>:
Complete IGNORE_TYPE macro renaming.
PiperOrigin-RevId: 311999699
--
64756f20d61021d999bd0d4c15e9ad3857382f57 by Gennadiy Rozental <rogeeff@google.com>:
Switch to fixed bytes specific default value.
This fixes the Abseil Flags for big endian platforms.
PiperOrigin-RevId: 311844448
--
bdbe6b5b29791dbc3816ada1828458b3010ff1e9 by Laramie Leavitt <lar@google.com>:
Change many distribution tests to use pcg_engine as a deterministic source of entropy.
It's reasonable to test that the BitGen itself has good entropy, however when testing the cross product of all random distributions x all the architecture variations x all submitted changes results in a large number of tests. In order to account for these failures while still using good entropy requires that our allowed sigma need to account for all of these independent tests.
Our current sigma values are too restrictive, and we see a lot of failures, so we have to either relax the sigma values or convert some of the statistical tests to use deterministic values.
This changelist does the latter.
PiperOrigin-RevId: 311840096
GitOrigin-RevId: f012012ef78234a6a4585321b67d7b7c92ebc266
Change-Id: Ic84886f38ff30d7d72c126e9b63c9a61eb729a1a
Diffstat (limited to 'absl/random')
21 files changed, 838 insertions, 603 deletions
diff --git a/absl/random/BUILD.bazel b/absl/random/BUILD.bazel index e61d31b5..9ba75b52 100644 --- a/absl/random/BUILD.bazel +++ b/absl/random/BUILD.bazel @@ -168,6 +168,7 @@ cc_test( deps = [ ":distributions", ":random", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "@com_google_googletest//:gtest_main", ], @@ -186,6 +187,7 @@ cc_test( ":random", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "//absl/strings:str_format", @@ -233,9 +235,9 @@ cc_test( deps = [ ":distributions", ":random", - "//absl/base:core_headers", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "//absl/strings:str_format", @@ -256,6 +258,7 @@ cc_test( ":random", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "@com_google_googletest//:gtest_main", @@ -283,6 +286,7 @@ cc_test( "//absl/base:raw_logging_internal", "//absl/container:flat_hash_map", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "//absl/strings:str_format", @@ -302,6 +306,7 @@ cc_test( "//absl/base:core_headers", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "//absl/strings:str_format", @@ -345,6 +350,7 @@ cc_test( ":random", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "@com_google_googletest//:gtest_main", @@ -369,6 +375,7 @@ cc_test( ":random", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "@com_google_googletest//:gtest_main", @@ -388,6 +395,7 @@ cc_test( ":random", "//absl/base:raw_logging_internal", "//absl/random/internal:distribution_test_util", + "//absl/random/internal:pcg_engine", "//absl/random/internal:sequence_urbg", "//absl/strings", "@com_google_googletest//:gtest_main", diff --git a/absl/random/CMakeLists.txt b/absl/random/CMakeLists.txt index 69bedbd6..ec616dd9 100644 --- a/absl/random/CMakeLists.txt +++ b/absl/random/CMakeLists.txt @@ -244,6 +244,7 @@ absl_cc_test( absl::random_distributions absl::random_random absl::random_internal_sequence_urbg + absl::random_internal_pcg_engine gmock gtest_main ) @@ -262,6 +263,7 @@ absl_cc_test( absl::random_random absl::random_internal_distribution_test_util absl::random_internal_sequence_urbg + absl::random_internal_pcg_engine absl::raw_logging_internal absl::strings absl::str_format @@ -311,9 +313,9 @@ absl_cc_test( ${ABSL_TEST_COPTS} LINKOPTS ${ABSL_DEFAULT_LINKOPTS} - absl::core_headers absl::random_distributions absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::random_random absl::raw_logging_internal @@ -335,6 +337,7 @@ absl_cc_test( DEPS absl::random_distributions absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::random_random absl::raw_logging_internal @@ -358,6 +361,7 @@ absl_cc_test( absl::core_headers absl::flat_hash_map absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::raw_logging_internal absl::strings @@ -379,6 +383,7 @@ absl_cc_test( absl::core_headers absl::random_distributions absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::random_random absl::raw_logging_internal @@ -422,6 +427,7 @@ absl_cc_test( DEPS absl::random_distributions absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::random_random absl::raw_logging_internal @@ -442,6 +448,7 @@ absl_cc_test( DEPS absl::random_distributions absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::random_random absl::strings @@ -461,6 +468,7 @@ absl_cc_test( DEPS absl::random_distributions absl::random_internal_distribution_test_util + absl::random_internal_pcg_engine absl::random_internal_sequence_urbg absl::random_random absl::raw_logging_internal @@ -782,8 +790,9 @@ absl_cc_library( random_internal_platform HDRS "internal/randen_traits.h" - "internal/randen-keys.inc" "internal/platform.h" + SRCS + "internal/randen_round_keys.cc" COPTS ${ABSL_DEFAULT_COPTS} LINKOPTS diff --git a/absl/random/bernoulli_distribution_test.cc b/absl/random/bernoulli_distribution_test.cc index 5581af50..b250f878 100644 --- a/absl/random/bernoulli_distribution_test.cc +++ b/absl/random/bernoulli_distribution_test.cc @@ -21,6 +21,7 @@ #include <utility> #include "gtest/gtest.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" @@ -63,7 +64,10 @@ TEST_P(BernoulliTest, Accuracy) { size_t trials = para.second; double p = para.first; - absl::InsecureBitGen rng; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng(0x2B7E151628AED2A6); size_t yes = 0; absl::bernoulli_distribution dist(p); diff --git a/absl/random/beta_distribution_test.cc b/absl/random/beta_distribution_test.cc index d0111b3e..277e4dc6 100644 --- a/absl/random/beta_distribution_test.cc +++ b/absl/random/beta_distribution_test.cc @@ -29,6 +29,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -159,8 +160,12 @@ TYPED_TEST(BetaDistributionInterfaceTest, SerializeTest) { } TYPED_TEST(BetaDistributionInterfaceTest, DegenerateCases) { + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng(0x2B7E151628AED2A6); + // Extreme cases when the params are abnormal. - absl::InsecureBitGen gen; constexpr int kCount = 1000; const TypeParam kSmallValues[] = { std::numeric_limits<TypeParam>::min(), @@ -186,7 +191,7 @@ TYPED_TEST(BetaDistributionInterfaceTest, DegenerateCases) { int ones = 0; absl::beta_distribution<TypeParam> d(alpha, beta); for (int i = 0; i < kCount; ++i) { - TypeParam x = d(gen); + TypeParam x = d(rng); if (x == 0.0) { zeros++; } else if (x == 1.0) { @@ -212,7 +217,7 @@ TYPED_TEST(BetaDistributionInterfaceTest, DegenerateCases) { for (TypeParam beta : kLargeValues) { absl::beta_distribution<TypeParam> d(alpha, beta); for (int i = 0; i < kCount; ++i) { - EXPECT_EQ(d(gen), 0.0); + EXPECT_EQ(d(rng), 0.0); } } } @@ -227,7 +232,7 @@ TYPED_TEST(BetaDistributionInterfaceTest, DegenerateCases) { for (TypeParam beta : kSmallValues) { absl::beta_distribution<TypeParam> d(alpha, beta); for (int i = 0; i < kCount; ++i) { - EXPECT_EQ(d(gen), 1.0); + EXPECT_EQ(d(rng), 1.0); } } } @@ -237,7 +242,7 @@ TYPED_TEST(BetaDistributionInterfaceTest, DegenerateCases) { absl::beta_distribution<TypeParam> d(std::numeric_limits<TypeParam>::max(), std::numeric_limits<TypeParam>::max()); for (int i = 0; i < kCount; ++i) { - EXPECT_EQ(d(gen), 0.5); + EXPECT_EQ(d(rng), 0.5); } } { @@ -246,7 +251,7 @@ TYPED_TEST(BetaDistributionInterfaceTest, DegenerateCases) { std::numeric_limits<TypeParam>::max(), std::numeric_limits<TypeParam>::max() * 0.9999); for (int i = 0; i < kCount; ++i) { - TypeParam x = d(gen); + TypeParam x = d(rng); EXPECT_NE(x, 0.5f); EXPECT_FLOAT_EQ(x, 0.500025f); } diff --git a/absl/random/discrete_distribution_test.cc b/absl/random/discrete_distribution_test.cc index 7296f0ac..6d007006 100644 --- a/absl/random/discrete_distribution_test.cc +++ b/absl/random/discrete_distribution_test.cc @@ -29,6 +29,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -156,7 +157,10 @@ TEST(DiscreteDistributionTest, ChiSquaredTest50) { std::iota(std::begin(weights), std::end(weights), 1); absl::discrete_distribution<int> dist(std::begin(weights), std::end(weights)); - absl::InsecureBitGen rng; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng(0x2B7E151628AED2A6); std::vector<int32_t> counts(kBuckets, 0); for (size_t i = 0; i < kTrials; i++) { diff --git a/absl/random/exponential_distribution_test.cc b/absl/random/exponential_distribution_test.cc index f3cfd764..8e9e69b6 100644 --- a/absl/random/exponential_distribution_test.cc +++ b/absl/random/exponential_distribution_test.cc @@ -32,6 +32,7 @@ #include "absl/base/macros.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -205,7 +206,10 @@ class ExponentialDistributionTests : public testing::TestWithParam<Param>, template <typename D> double SingleChiSquaredTest(); - absl::InsecureBitGen rng_; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; }; template <typename D> diff --git a/absl/random/gaussian_distribution_test.cc b/absl/random/gaussian_distribution_test.cc index 398f0131..02ac578a 100644 --- a/absl/random/gaussian_distribution_test.cc +++ b/absl/random/gaussian_distribution_test.cc @@ -216,7 +216,10 @@ class GaussianDistributionTests : public testing::TestWithParam<Param>, template <typename D> double SingleChiSquaredTest(); - absl::InsecureBitGen rng_; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; }; template <typename D> diff --git a/absl/random/internal/BUILD.bazel b/absl/random/internal/BUILD.bazel index dc452816..85d1fb81 100644 --- a/absl/random/internal/BUILD.bazel +++ b/absl/random/internal/BUILD.bazel @@ -255,13 +255,15 @@ cc_library( cc_library( name = "platform", + srcs = [ + "randen_round_keys.cc", + ], hdrs = [ "randen_traits.h", ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, textual_hdrs = [ - "randen-keys.inc", "platform.h", ], deps = ["//absl/base:config"], @@ -613,6 +615,7 @@ cc_test( copts = ABSL_TEST_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, deps = [ + ":platform", ":randen_slow", "@com_google_googletest//:gtest_main", ], diff --git a/absl/random/internal/randen-keys.inc b/absl/random/internal/randen-keys.inc deleted file mode 100644 index fa4b1668..00000000 --- a/absl/random/internal/randen-keys.inc +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef ABSL_RANDOM_INTERNAL_RANDEN_KEYS_INC_ -#define ABSL_RANDOM_INTERNAL_RANDEN_KEYS_INC_ - -// Textual header to include the randen_keys where necessary. -// REQUIRES: struct u64x2{} -// -// PROVIDES: kKeys -// PROVIDES: round_keys[] - -// "Nothing up my sleeve" numbers from the first hex digits of Pi, obtained -// from http://hexpi.sourceforge.net/. The array was generated by following -// Python script: -/* -python << EOF -"""Generates Randen round keys array from pi-hex.62500.txt file.""" -import binascii - -KEYS = 136 - -def chunks(l, n): - """Yield successive n-sized chunks from l.""" - for i in range(0, len(l), n): - yield l[i:i + n] - -def pairwise(t): - """Transforms sequence into sequence of pairs.""" - it = iter(t) - return zip(it,it) - -def digits_from_pi(): - """Reads digits from hexpi.sourceforge.net file.""" - with open("pi-hex.62500.txt") as file: - return file.read() - -def digits_from_urandom(): - """Reads digits from /dev/urandom.""" - with open("/dev/urandom") as file: - return binascii.hexlify(file.read(KEYS * 16)) - -digits = digits_from_pi() -print("static constexpr const size_t kRoundKeys = {0};\n".format(KEYS)) -print("alignas(16) constexpr const u64x2 round_keys[kRoundKeys] = {") - -for i, (hi, lo) in zip(range(KEYS), pairwise(chunks(digits, 16))): - hi = "0x{0}ull".format(hi) - lo = "0x{0}ull".format(lo) - print(" u64x2({0}, {1}){2}".format(hi, lo, ',' if i+1 < KEYS else '')) - -print("};") -EOF -*/ - -static constexpr const size_t kRoundKeys = 136; - -alignas(16) constexpr u64x2 round_keys[kRoundKeys] = { - u64x2(0x243F6A8885A308D3ull, 0x13198A2E03707344ull), - u64x2(0xA4093822299F31D0ull, 0x082EFA98EC4E6C89ull), - u64x2(0x452821E638D01377ull, 0xBE5466CF34E90C6Cull), - u64x2(0xC0AC29B7C97C50DDull, 0x3F84D5B5B5470917ull), - u64x2(0x9216D5D98979FB1Bull, 0xD1310BA698DFB5ACull), - u64x2(0x2FFD72DBD01ADFB7ull, 0xB8E1AFED6A267E96ull), - u64x2(0xBA7C9045F12C7F99ull, 0x24A19947B3916CF7ull), - u64x2(0x0801F2E2858EFC16ull, 0x636920D871574E69ull), - u64x2(0xA458FEA3F4933D7Eull, 0x0D95748F728EB658ull), - u64x2(0x718BCD5882154AEEull, 0x7B54A41DC25A59B5ull), - u64x2(0x9C30D5392AF26013ull, 0xC5D1B023286085F0ull), - u64x2(0xCA417918B8DB38EFull, 0x8E79DCB0603A180Eull), - u64x2(0x6C9E0E8BB01E8A3Eull, 0xD71577C1BD314B27ull), - u64x2(0x78AF2FDA55605C60ull, 0xE65525F3AA55AB94ull), - u64x2(0x5748986263E81440ull, 0x55CA396A2AAB10B6ull), - u64x2(0xB4CC5C341141E8CEull, 0xA15486AF7C72E993ull), - u64x2(0xB3EE1411636FBC2Aull, 0x2BA9C55D741831F6ull), - u64x2(0xCE5C3E169B87931Eull, 0xAFD6BA336C24CF5Cull), - u64x2(0x7A32538128958677ull, 0x3B8F48986B4BB9AFull), - u64x2(0xC4BFE81B66282193ull, 0x61D809CCFB21A991ull), - u64x2(0x487CAC605DEC8032ull, 0xEF845D5DE98575B1ull), - u64x2(0xDC262302EB651B88ull, 0x23893E81D396ACC5ull), - u64x2(0x0F6D6FF383F44239ull, 0x2E0B4482A4842004ull), - u64x2(0x69C8F04A9E1F9B5Eull, 0x21C66842F6E96C9Aull), - u64x2(0x670C9C61ABD388F0ull, 0x6A51A0D2D8542F68ull), - u64x2(0x960FA728AB5133A3ull, 0x6EEF0B6C137A3BE4ull), - u64x2(0xBA3BF0507EFB2A98ull, 0xA1F1651D39AF0176ull), - u64x2(0x66CA593E82430E88ull, 0x8CEE8619456F9FB4ull), - u64x2(0x7D84A5C33B8B5EBEull, 0xE06F75D885C12073ull), - u64x2(0x401A449F56C16AA6ull, 0x4ED3AA62363F7706ull), - u64x2(0x1BFEDF72429B023Dull, 0x37D0D724D00A1248ull), - u64x2(0xDB0FEAD349F1C09Bull, 0x075372C980991B7Bull), - u64x2(0x25D479D8F6E8DEF7ull, 0xE3FE501AB6794C3Bull), - u64x2(0x976CE0BD04C006BAull, 0xC1A94FB6409F60C4ull), - u64x2(0x5E5C9EC2196A2463ull, 0x68FB6FAF3E6C53B5ull), - u64x2(0x1339B2EB3B52EC6Full, 0x6DFC511F9B30952Cull), - u64x2(0xCC814544AF5EBD09ull, 0xBEE3D004DE334AFDull), - u64x2(0x660F2807192E4BB3ull, 0xC0CBA85745C8740Full), - u64x2(0xD20B5F39B9D3FBDBull, 0x5579C0BD1A60320Aull), - u64x2(0xD6A100C6402C7279ull, 0x679F25FEFB1FA3CCull), - u64x2(0x8EA5E9F8DB3222F8ull, 0x3C7516DFFD616B15ull), - u64x2(0x2F501EC8AD0552ABull, 0x323DB5FAFD238760ull), - u64x2(0x53317B483E00DF82ull, 0x9E5C57BBCA6F8CA0ull), - u64x2(0x1A87562EDF1769DBull, 0xD542A8F6287EFFC3ull), - u64x2(0xAC6732C68C4F5573ull, 0x695B27B0BBCA58C8ull), - u64x2(0xE1FFA35DB8F011A0ull, 0x10FA3D98FD2183B8ull), - u64x2(0x4AFCB56C2DD1D35Bull, 0x9A53E479B6F84565ull), - u64x2(0xD28E49BC4BFB9790ull, 0xE1DDF2DAA4CB7E33ull), - u64x2(0x62FB1341CEE4C6E8ull, 0xEF20CADA36774C01ull), - u64x2(0xD07E9EFE2BF11FB4ull, 0x95DBDA4DAE909198ull), - u64x2(0xEAAD8E716B93D5A0ull, 0xD08ED1D0AFC725E0ull), - u64x2(0x8E3C5B2F8E7594B7ull, 0x8FF6E2FBF2122B64ull), - u64x2(0x8888B812900DF01Cull, 0x4FAD5EA0688FC31Cull), - u64x2(0xD1CFF191B3A8C1ADull, 0x2F2F2218BE0E1777ull), - u64x2(0xEA752DFE8B021FA1ull, 0xE5A0CC0FB56F74E8ull), - u64x2(0x18ACF3D6CE89E299ull, 0xB4A84FE0FD13E0B7ull), - u64x2(0x7CC43B81D2ADA8D9ull, 0x165FA26680957705ull), - u64x2(0x93CC7314211A1477ull, 0xE6AD206577B5FA86ull), - u64x2(0xC75442F5FB9D35CFull, 0xEBCDAF0C7B3E89A0ull), - u64x2(0xD6411BD3AE1E7E49ull, 0x00250E2D2071B35Eull), - u64x2(0x226800BB57B8E0AFull, 0x2464369BF009B91Eull), - u64x2(0x5563911D59DFA6AAull, 0x78C14389D95A537Full), - u64x2(0x207D5BA202E5B9C5ull, 0x832603766295CFA9ull), - u64x2(0x11C819684E734A41ull, 0xB3472DCA7B14A94Aull), - u64x2(0x1B5100529A532915ull, 0xD60F573FBC9BC6E4ull), - u64x2(0x2B60A47681E67400ull, 0x08BA6FB5571BE91Full), - u64x2(0xF296EC6B2A0DD915ull, 0xB6636521E7B9F9B6ull), - u64x2(0xFF34052EC5855664ull, 0x53B02D5DA99F8FA1ull), - u64x2(0x08BA47996E85076Aull, 0x4B7A70E9B5B32944ull), - u64x2(0xDB75092EC4192623ull, 0xAD6EA6B049A7DF7Dull), - u64x2(0x9CEE60B88FEDB266ull, 0xECAA8C71699A18FFull), - u64x2(0x5664526CC2B19EE1ull, 0x193602A575094C29ull), - u64x2(0xA0591340E4183A3Eull, 0x3F54989A5B429D65ull), - u64x2(0x6B8FE4D699F73FD6ull, 0xA1D29C07EFE830F5ull), - u64x2(0x4D2D38E6F0255DC1ull, 0x4CDD20868470EB26ull), - u64x2(0x6382E9C6021ECC5Eull, 0x09686B3F3EBAEFC9ull), - u64x2(0x3C9718146B6A70A1ull, 0x687F358452A0E286ull), - u64x2(0xB79C5305AA500737ull, 0x3E07841C7FDEAE5Cull), - u64x2(0x8E7D44EC5716F2B8ull, 0xB03ADA37F0500C0Dull), - u64x2(0xF01C1F040200B3FFull, 0xAE0CF51A3CB574B2ull), - u64x2(0x25837A58DC0921BDull, 0xD19113F97CA92FF6ull), - u64x2(0x9432477322F54701ull, 0x3AE5E58137C2DADCull), - u64x2(0xC8B576349AF3DDA7ull, 0xA94461460FD0030Eull), - u64x2(0xECC8C73EA4751E41ull, 0xE238CD993BEA0E2Full), - u64x2(0x3280BBA1183EB331ull, 0x4E548B384F6DB908ull), - u64x2(0x6F420D03F60A04BFull, 0x2CB8129024977C79ull), - u64x2(0x5679B072BCAF89AFull, 0xDE9A771FD9930810ull), - u64x2(0xB38BAE12DCCF3F2Eull, 0x5512721F2E6B7124ull), - u64x2(0x501ADDE69F84CD87ull, 0x7A5847187408DA17ull), - u64x2(0xBC9F9ABCE94B7D8Cull, 0xEC7AEC3ADB851DFAull), - u64x2(0x63094366C464C3D2ull, 0xEF1C18473215D808ull), - u64x2(0xDD433B3724C2BA16ull, 0x12A14D432A65C451ull), - u64x2(0x50940002133AE4DDull, 0x71DFF89E10314E55ull), - u64x2(0x81AC77D65F11199Bull, 0x043556F1D7A3C76Bull), - u64x2(0x3C11183B5924A509ull, 0xF28FE6ED97F1FBFAull), - u64x2(0x9EBABF2C1E153C6Eull, 0x86E34570EAE96FB1ull), - u64x2(0x860E5E0A5A3E2AB3ull, 0x771FE71C4E3D06FAull), - u64x2(0x2965DCB999E71D0Full, 0x803E89D65266C825ull), - u64x2(0x2E4CC9789C10B36Aull, 0xC6150EBA94E2EA78ull), - u64x2(0xA6FC3C531E0A2DF4ull, 0xF2F74EA7361D2B3Dull), - u64x2(0x1939260F19C27960ull, 0x5223A708F71312B6ull), - u64x2(0xEBADFE6EEAC31F66ull, 0xE3BC4595A67BC883ull), - u64x2(0xB17F37D1018CFF28ull, 0xC332DDEFBE6C5AA5ull), - u64x2(0x6558218568AB9702ull, 0xEECEA50FDB2F953Bull), - u64x2(0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull), - u64x2(0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull), - u64x2(0x0334FE1EAA0363CFull, 0xB5735C904C70A239ull), - u64x2(0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull), - u64x2(0x9CAB5CABB2F3846Eull, 0x648B1EAF19BDF0CAull), - u64x2(0xA02369B9655ABB50ull, 0x40685A323C2AB4B3ull), - u64x2(0x319EE9D5C021B8F7ull, 0x9B540B19875FA099ull), - u64x2(0x95F7997E623D7DA8ull, 0xF837889A97E32D77ull), - u64x2(0x11ED935F16681281ull, 0x0E358829C7E61FD6ull), - u64x2(0x96DEDFA17858BA99ull, 0x57F584A51B227263ull), - u64x2(0x9B83C3FF1AC24696ull, 0xCDB30AEB532E3054ull), - u64x2(0x8FD948E46DBC3128ull, 0x58EBF2EF34C6FFEAull), - u64x2(0xFE28ED61EE7C3C73ull, 0x5D4A14D9E864B7E3ull), - u64x2(0x42105D14203E13E0ull, 0x45EEE2B6A3AAABEAull), - u64x2(0xDB6C4F15FACB4FD0ull, 0xC742F442EF6ABBB5ull), - u64x2(0x654F3B1D41CD2105ull, 0xD81E799E86854DC7ull), - u64x2(0xE44B476A3D816250ull, 0xCF62A1F25B8D2646ull), - u64x2(0xFC8883A0C1C7B6A3ull, 0x7F1524C369CB7492ull), - u64x2(0x47848A0B5692B285ull, 0x095BBF00AD19489Dull), - u64x2(0x1462B17423820D00ull, 0x58428D2A0C55F5EAull), - u64x2(0x1DADF43E233F7061ull, 0x3372F0928D937E41ull), - u64x2(0xD65FECF16C223BDBull, 0x7CDE3759CBEE7460ull), - u64x2(0x4085F2A7CE77326Eull, 0xA607808419F8509Eull), - u64x2(0xE8EFD85561D99735ull, 0xA969A7AAC50C06C2ull), - u64x2(0x5A04ABFC800BCADCull, 0x9E447A2EC3453484ull), - u64x2(0xFDD567050E1E9EC9ull, 0xDB73DBD3105588CDull), - u64x2(0x675FDA79E3674340ull, 0xC5C43465713E38D8ull), - u64x2(0x3D28F89EF16DFF20ull, 0x153E21E78FB03D4Aull), - u64x2(0xE6E39F2BDB83ADF7ull, 0xE93D5A68948140F7ull), - u64x2(0xF64C261C94692934ull, 0x411520F77602D4F7ull), - u64x2(0xBCF46B2ED4A10068ull, 0xD40824713320F46Aull), - u64x2(0x43B7D4B7500061AFull, 0x1E39F62E97244546ull)}; - -#endif // ABSL_RANDOM_INTERNAL_RANDEN_KEYS_INC_ diff --git a/absl/random/internal/randen_hwaes.cc b/absl/random/internal/randen_hwaes.cc index e23844f1..9966486f 100644 --- a/absl/random/internal/randen_hwaes.cc +++ b/absl/random/internal/randen_hwaes.cc @@ -24,6 +24,7 @@ #include "absl/base/attributes.h" #include "absl/random/internal/platform.h" +#include "absl/random/internal/randen_traits.h" // ABSL_RANDEN_HWAES_IMPL indicates whether this file will contain // a hardware accelerated implementation of randen, or whether it @@ -115,8 +116,16 @@ ABSL_NAMESPACE_END // Accelerated implementations are supported. // We need the per-architecture includes and defines. // +namespace { -#include "absl/random/internal/randen_traits.h" +using absl::random_internal::RandenTraits; + +// Randen operates on 128-bit vectors. +struct alignas(16) u64x2 { + uint64_t data[2]; +}; + +} // namespace // TARGET_CRYPTO defines a crypto attribute for each architecture. // @@ -150,7 +159,6 @@ ABSL_NAMESPACE_END using Vector128 = __vector unsigned long long; // NOLINT(runtime/int) namespace { - inline ABSL_TARGET_CRYPTO Vector128 ReverseBytes(const Vector128& v) { // Reverses the bytes of the vector. const __vector unsigned char perm = {15, 14, 13, 12, 11, 10, 9, 8, @@ -177,14 +185,9 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, } // Enables native loads in the round loop by pre-swapping. -inline ABSL_TARGET_CRYPTO void SwapEndian(uint64_t* state) { - using absl::random_internal::RandenTraits; - constexpr size_t kLanes = 2; - constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks; - - for (uint32_t branch = 0; branch < kFeistelBlocks; ++branch) { - const Vector128 v = ReverseBytes(Vector128Load(state + kLanes * branch)); - Vector128Store(v, state + kLanes * branch); +inline ABSL_TARGET_CRYPTO void SwapEndian(u64x2* state) { + for (uint32_t block = 0; block < RandenTraits::kFeistelBlocks; ++block) { + Vector128Store(ReverseBytes(Vector128Load(state + block)), state + block); } } @@ -251,7 +254,7 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, return vaesmcq_u8(vaeseq_u8(state, uint8x16_t{})) ^ round_key; } -inline ABSL_TARGET_CRYPTO void SwapEndian(uint64_t*) {} +inline ABSL_TARGET_CRYPTO void SwapEndian(void*) {} } // namespace @@ -297,39 +300,12 @@ inline ABSL_TARGET_CRYPTO Vector128 AesRound(const Vector128& state, return Vector128(_mm_aesenc_si128(state.data(), round_key.data())); } -inline ABSL_TARGET_CRYPTO void SwapEndian(uint64_t*) {} +inline ABSL_TARGET_CRYPTO void SwapEndian(void*) {} } // namespace #endif -namespace { - -// u64x2 is a 128-bit, (2 x uint64_t lanes) struct used to store -// the randen_keys. -struct alignas(16) u64x2 { - constexpr u64x2(uint64_t hi, uint64_t lo) -#if defined(ABSL_ARCH_PPC) - // This has been tested with PPC running in little-endian mode; - // We byte-swap the u64x2 structure from little-endian to big-endian - // because altivec always runs in big-endian mode. - : v{__builtin_bswap64(hi), __builtin_bswap64(lo)} { -#else - : v{lo, hi} { -#endif - } - - constexpr bool operator==(const u64x2& other) const { - return v[0] == other.v[0] && v[1] == other.v[1]; - } - - constexpr bool operator!=(const u64x2& other) const { - return !(*this == other); - } - - uint64_t v[2]; -}; // namespace - #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunknown-pragmas" @@ -338,7 +314,6 @@ struct alignas(16) u64x2 { // At this point, all of the platform-specific features have been defined / // implemented. // -// REQUIRES: using u64x2 = ... // REQUIRES: using Vector128 = ... // REQUIRES: Vector128 Vector128Load(void*) {...} // REQUIRES: void Vector128Store(Vector128, void*) {...} @@ -347,94 +322,50 @@ struct alignas(16) u64x2 { // // PROVIDES: absl::random_internal::RandenHwAes::Absorb // PROVIDES: absl::random_internal::RandenHwAes::Generate - -// RANDen = RANDom generator or beetroots in Swiss German. -// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random -// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32. -// -// High-level summary: -// 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is -// a sponge-like random generator that requires a cryptographic permutation. -// It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by -// achieving backtracking resistance with only one Permute() per buffer. -// -// 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round -// Function" constructs up to 1024-bit permutations using an improved -// Generalized Feistel network with 2-round AES-128 functions. This Feistel -// block shuffle achieves diffusion faster and is less vulnerable to -// sliced-biclique attacks than the Type-2 cyclic shuffle. -// -// 3) "Improving the Generalized Feistel" and "New criterion for diffusion -// property" extends the same kind of improved Feistel block shuffle to 16 -// branches, which enables a 2048-bit permutation. -// -// We combine these three ideas and also change Simpira's subround keys from -// structured/low-entropy counters to digits of Pi. - -// Randen constants. -using absl::random_internal::RandenTraits; -constexpr size_t kStateBytes = RandenTraits::kStateBytes; -constexpr size_t kCapacityBytes = RandenTraits::kCapacityBytes; -constexpr size_t kFeistelBlocks = RandenTraits::kFeistelBlocks; -constexpr size_t kFeistelRounds = RandenTraits::kFeistelRounds; -constexpr size_t kFeistelFunctions = RandenTraits::kFeistelFunctions; - -// Independent keys (272 = 2.1 KiB) for the first AES subround of each function. -constexpr size_t kKeys = kFeistelRounds * kFeistelFunctions; - -// INCLUDE keys. -#include "absl/random/internal/randen-keys.inc" - -static_assert(kKeys == kRoundKeys, "kKeys and kRoundKeys must be equal"); -static_assert(round_keys[kKeys - 1] != u64x2(0, 0), - "Too few round_keys initializers"); - -// Number of uint64_t lanes per 128-bit vector; -constexpr size_t kLanes = 2; +namespace { // Block shuffles applies a shuffle to the entire state between AES rounds. // Improved odd-even shuffle from "New criterion for diffusion property". -inline ABSL_TARGET_CRYPTO void BlockShuffle(uint64_t* state) { - static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); - - constexpr size_t shuffle[kFeistelBlocks] = {7, 2, 13, 4, 11, 8, 3, 6, - 15, 0, 9, 10, 1, 14, 5, 12}; - - // The fully unrolled loop without the memcpy improves the speed by about - // 30% over the equivalent loop. - const Vector128 v0 = Vector128Load(state + kLanes * shuffle[0]); - const Vector128 v1 = Vector128Load(state + kLanes * shuffle[1]); - const Vector128 v2 = Vector128Load(state + kLanes * shuffle[2]); - const Vector128 v3 = Vector128Load(state + kLanes * shuffle[3]); - const Vector128 v4 = Vector128Load(state + kLanes * shuffle[4]); - const Vector128 v5 = Vector128Load(state + kLanes * shuffle[5]); - const Vector128 v6 = Vector128Load(state + kLanes * shuffle[6]); - const Vector128 v7 = Vector128Load(state + kLanes * shuffle[7]); - const Vector128 w0 = Vector128Load(state + kLanes * shuffle[8]); - const Vector128 w1 = Vector128Load(state + kLanes * shuffle[9]); - const Vector128 w2 = Vector128Load(state + kLanes * shuffle[10]); - const Vector128 w3 = Vector128Load(state + kLanes * shuffle[11]); - const Vector128 w4 = Vector128Load(state + kLanes * shuffle[12]); - const Vector128 w5 = Vector128Load(state + kLanes * shuffle[13]); - const Vector128 w6 = Vector128Load(state + kLanes * shuffle[14]); - const Vector128 w7 = Vector128Load(state + kLanes * shuffle[15]); - - Vector128Store(v0, state + kLanes * 0); - Vector128Store(v1, state + kLanes * 1); - Vector128Store(v2, state + kLanes * 2); - Vector128Store(v3, state + kLanes * 3); - Vector128Store(v4, state + kLanes * 4); - Vector128Store(v5, state + kLanes * 5); - Vector128Store(v6, state + kLanes * 6); - Vector128Store(v7, state + kLanes * 7); - Vector128Store(w0, state + kLanes * 8); - Vector128Store(w1, state + kLanes * 9); - Vector128Store(w2, state + kLanes * 10); - Vector128Store(w3, state + kLanes * 11); - Vector128Store(w4, state + kLanes * 12); - Vector128Store(w5, state + kLanes * 13); - Vector128Store(w6, state + kLanes * 14); - Vector128Store(w7, state + kLanes * 15); +inline ABSL_TARGET_CRYPTO void BlockShuffle(u64x2* state) { + static_assert(RandenTraits::kFeistelBlocks == 16, + "Expecting 16 FeistelBlocks."); + + constexpr size_t shuffle[RandenTraits::kFeistelBlocks] = { + 7, 2, 13, 4, 11, 8, 3, 6, 15, 0, 9, 10, 1, 14, 5, 12}; + + const Vector128 v0 = Vector128Load(state + shuffle[0]); + const Vector128 v1 = Vector128Load(state + shuffle[1]); + const Vector128 v2 = Vector128Load(state + shuffle[2]); + const Vector128 v3 = Vector128Load(state + shuffle[3]); + const Vector128 v4 = Vector128Load(state + shuffle[4]); + const Vector128 v5 = Vector128Load(state + shuffle[5]); + const Vector128 v6 = Vector128Load(state + shuffle[6]); + const Vector128 v7 = Vector128Load(state + shuffle[7]); + const Vector128 w0 = Vector128Load(state + shuffle[8]); + const Vector128 w1 = Vector128Load(state + shuffle[9]); + const Vector128 w2 = Vector128Load(state + shuffle[10]); + const Vector128 w3 = Vector128Load(state + shuffle[11]); + const Vector128 w4 = Vector128Load(state + shuffle[12]); + const Vector128 w5 = Vector128Load(state + shuffle[13]); + const Vector128 w6 = Vector128Load(state + shuffle[14]); + const Vector128 w7 = Vector128Load(state + shuffle[15]); + + Vector128Store(v0, state + 0); + Vector128Store(v1, state + 1); + Vector128Store(v2, state + 2); + Vector128Store(v3, state + 3); + Vector128Store(v4, state + 4); + Vector128Store(v5, state + 5); + Vector128Store(v6, state + 6); + Vector128Store(v7, state + 7); + Vector128Store(w0, state + 8); + Vector128Store(w1, state + 9); + Vector128Store(w2, state + 10); + Vector128Store(w3, state + 11); + Vector128Store(w4, state + 12); + Vector128Store(w5, state + 13); + Vector128Store(w6, state + 14); + Vector128Store(w7, state + 15); } // Feistel round function using two AES subrounds. Very similar to F() @@ -443,27 +374,28 @@ inline ABSL_TARGET_CRYPTO void BlockShuffle(uint64_t* state) { // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( - uint64_t* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { - static_assert(kFeistelBlocks == 16, "Expecting 16 FeistelBlocks."); + u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { + static_assert(RandenTraits::kFeistelBlocks == 16, + "Expecting 16 FeistelBlocks."); // MSVC does a horrible job at unrolling loops. // So we unroll the loop by hand to improve the performance. - const Vector128 s0 = Vector128Load(state + kLanes * 0); - const Vector128 s1 = Vector128Load(state + kLanes * 1); - const Vector128 s2 = Vector128Load(state + kLanes * 2); - const Vector128 s3 = Vector128Load(state + kLanes * 3); - const Vector128 s4 = Vector128Load(state + kLanes * 4); - const Vector128 s5 = Vector128Load(state + kLanes * 5); - const Vector128 s6 = Vector128Load(state + kLanes * 6); - const Vector128 s7 = Vector128Load(state + kLanes * 7); - const Vector128 s8 = Vector128Load(state + kLanes * 8); - const Vector128 s9 = Vector128Load(state + kLanes * 9); - const Vector128 s10 = Vector128Load(state + kLanes * 10); - const Vector128 s11 = Vector128Load(state + kLanes * 11); - const Vector128 s12 = Vector128Load(state + kLanes * 12); - const Vector128 s13 = Vector128Load(state + kLanes * 13); - const Vector128 s14 = Vector128Load(state + kLanes * 14); - const Vector128 s15 = Vector128Load(state + kLanes * 15); + const Vector128 s0 = Vector128Load(state + 0); + const Vector128 s1 = Vector128Load(state + 1); + const Vector128 s2 = Vector128Load(state + 2); + const Vector128 s3 = Vector128Load(state + 3); + const Vector128 s4 = Vector128Load(state + 4); + const Vector128 s5 = Vector128Load(state + 5); + const Vector128 s6 = Vector128Load(state + 6); + const Vector128 s7 = Vector128Load(state + 7); + const Vector128 s8 = Vector128Load(state + 8); + const Vector128 s9 = Vector128Load(state + 9); + const Vector128 s10 = Vector128Load(state + 10); + const Vector128 s11 = Vector128Load(state + 11); + const Vector128 s12 = Vector128Load(state + 12); + const Vector128 s13 = Vector128Load(state + 13); + const Vector128 s14 = Vector128Load(state + 14); + const Vector128 s15 = Vector128Load(state + 15); // Encode even blocks with keys. const Vector128 e0 = AesRound(s0, Vector128Load(keys + 0)); @@ -486,14 +418,14 @@ inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( const Vector128 o15 = AesRound(e14, s15); // Store odd blocks. (These will be shuffled later). - Vector128Store(o1, state + kLanes * 1); - Vector128Store(o3, state + kLanes * 3); - Vector128Store(o5, state + kLanes * 5); - Vector128Store(o7, state + kLanes * 7); - Vector128Store(o9, state + kLanes * 9); - Vector128Store(o11, state + kLanes * 11); - Vector128Store(o13, state + kLanes * 13); - Vector128Store(o15, state + kLanes * 15); + Vector128Store(o1, state + 1); + Vector128Store(o3, state + 3); + Vector128Store(o5, state + 5); + Vector128Store(o7, state + 7); + Vector128Store(o9, state + 9); + Vector128Store(o11, state + 11); + Vector128Store(o13, state + 13); + Vector128Store(o15, state + 15); return keys + 8; } @@ -503,16 +435,13 @@ inline ABSL_TARGET_CRYPTO const u64x2* FeistelRound( // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. inline ABSL_TARGET_CRYPTO void Permute( - const void* ABSL_RANDOM_INTERNAL_RESTRICT keys, uint64_t* state) { - const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = - static_cast<const u64x2*>(keys); - + u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { // (Successfully unrolled; the first iteration jumps into the second half) #ifdef __clang__ #pragma clang loop unroll_count(2) #endif - for (size_t round = 0; round < kFeistelRounds; ++round) { - keys128 = FeistelRound(state, keys128); + for (size_t round = 0; round < RandenTraits::kFeistelRounds; ++round) { + keys = FeistelRound(state, keys); BlockShuffle(state); } } @@ -528,96 +457,101 @@ bool HasRandenHwAesImplementation() { return true; } const void* ABSL_TARGET_CRYPTO RandenHwAes::GetKeys() { // Round keys for one AES per Feistel round and branch. // The canonical implementation uses first digits of Pi. - return round_keys; +#if defined(ABSL_ARCH_PPC) + return kRandenRoundKeysBE; +#else + return kRandenRoundKeys; +#endif } // NOLINTNEXTLINE void ABSL_TARGET_CRYPTO RandenHwAes::Absorb(const void* seed_void, void* state_void) { - auto* state = static_cast<uint64_t*>(state_void); - const auto* seed = static_cast<const uint64_t*>(seed_void); - - constexpr size_t kCapacityBlocks = kCapacityBytes / sizeof(Vector128); - constexpr size_t kStateBlocks = kStateBytes / sizeof(Vector128); - - static_assert(kCapacityBlocks * sizeof(Vector128) == kCapacityBytes, - "Not i*V"); - static_assert(kCapacityBlocks == 1, "Unexpected Randen kCapacityBlocks"); - static_assert(kStateBlocks == 16, "Unexpected Randen kStateBlocks"); - - Vector128 b1 = Vector128Load(state + kLanes * 1); - b1 ^= Vector128Load(seed + kLanes * 0); - Vector128Store(b1, state + kLanes * 1); - - Vector128 b2 = Vector128Load(state + kLanes * 2); - b2 ^= Vector128Load(seed + kLanes * 1); - Vector128Store(b2, state + kLanes * 2); - - Vector128 b3 = Vector128Load(state + kLanes * 3); - b3 ^= Vector128Load(seed + kLanes * 2); - Vector128Store(b3, state + kLanes * 3); - - Vector128 b4 = Vector128Load(state + kLanes * 4); - b4 ^= Vector128Load(seed + kLanes * 3); - Vector128Store(b4, state + kLanes * 4); - - Vector128 b5 = Vector128Load(state + kLanes * 5); - b5 ^= Vector128Load(seed + kLanes * 4); - Vector128Store(b5, state + kLanes * 5); - - Vector128 b6 = Vector128Load(state + kLanes * 6); - b6 ^= Vector128Load(seed + kLanes * 5); - Vector128Store(b6, state + kLanes * 6); - - Vector128 b7 = Vector128Load(state + kLanes * 7); - b7 ^= Vector128Load(seed + kLanes * 6); - Vector128Store(b7, state + kLanes * 7); - - Vector128 b8 = Vector128Load(state + kLanes * 8); - b8 ^= Vector128Load(seed + kLanes * 7); - Vector128Store(b8, state + kLanes * 8); - - Vector128 b9 = Vector128Load(state + kLanes * 9); - b9 ^= Vector128Load(seed + kLanes * 8); - Vector128Store(b9, state + kLanes * 9); - - Vector128 b10 = Vector128Load(state + kLanes * 10); - b10 ^= Vector128Load(seed + kLanes * 9); - Vector128Store(b10, state + kLanes * 10); - - Vector128 b11 = Vector128Load(state + kLanes * 11); - b11 ^= Vector128Load(seed + kLanes * 10); - Vector128Store(b11, state + kLanes * 11); - - Vector128 b12 = Vector128Load(state + kLanes * 12); - b12 ^= Vector128Load(seed + kLanes * 11); - Vector128Store(b12, state + kLanes * 12); - - Vector128 b13 = Vector128Load(state + kLanes * 13); - b13 ^= Vector128Load(seed + kLanes * 12); - Vector128Store(b13, state + kLanes * 13); - - Vector128 b14 = Vector128Load(state + kLanes * 14); - b14 ^= Vector128Load(seed + kLanes * 13); - Vector128Store(b14, state + kLanes * 14); - - Vector128 b15 = Vector128Load(state + kLanes * 15); - b15 ^= Vector128Load(seed + kLanes * 14); - Vector128Store(b15, state + kLanes * 15); + static_assert(RandenTraits::kCapacityBytes / sizeof(Vector128) == 1, + "Unexpected Randen kCapacityBlocks"); + static_assert(RandenTraits::kStateBytes / sizeof(Vector128) == 16, + "Unexpected Randen kStateBlocks"); + + auto* state = + reinterpret_cast<u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(state_void); + const auto* seed = + reinterpret_cast<const u64x2 * ABSL_RANDOM_INTERNAL_RESTRICT>(seed_void); + + Vector128 b1 = Vector128Load(state + 1); + b1 ^= Vector128Load(seed + 0); + Vector128Store(b1, state + 1); + + Vector128 b2 = Vector128Load(state + 2); + b2 ^= Vector128Load(seed + 1); + Vector128Store(b2, state + 2); + + Vector128 b3 = Vector128Load(state + 3); + b3 ^= Vector128Load(seed + 2); + Vector128Store(b3, state + 3); + + Vector128 b4 = Vector128Load(state + 4); + b4 ^= Vector128Load(seed + 3); + Vector128Store(b4, state + 4); + + Vector128 b5 = Vector128Load(state + 5); + b5 ^= Vector128Load(seed + 4); + Vector128Store(b5, state + 5); + + Vector128 b6 = Vector128Load(state + 6); + b6 ^= Vector128Load(seed + 5); + Vector128Store(b6, state + 6); + + Vector128 b7 = Vector128Load(state + 7); + b7 ^= Vector128Load(seed + 6); + Vector128Store(b7, state + 7); + + Vector128 b8 = Vector128Load(state + 8); + b8 ^= Vector128Load(seed + 7); + Vector128Store(b8, state + 8); + + Vector128 b9 = Vector128Load(state + 9); + b9 ^= Vector128Load(seed + 8); + Vector128Store(b9, state + 9); + + Vector128 b10 = Vector128Load(state + 10); + b10 ^= Vector128Load(seed + 9); + Vector128Store(b10, state + 10); + + Vector128 b11 = Vector128Load(state + 11); + b11 ^= Vector128Load(seed + 10); + Vector128Store(b11, state + 11); + + Vector128 b12 = Vector128Load(state + 12); + b12 ^= Vector128Load(seed + 11); + Vector128Store(b12, state + 12); + + Vector128 b13 = Vector128Load(state + 13); + b13 ^= Vector128Load(seed + 12); + Vector128Store(b13, state + 13); + + Vector128 b14 = Vector128Load(state + 14); + b14 ^= Vector128Load(seed + 13); + Vector128Store(b14, state + 14); + + Vector128 b15 = Vector128Load(state + 15); + b15 ^= Vector128Load(seed + 14); + Vector128Store(b15, state + 15); } // NOLINTNEXTLINE -void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys, +void ABSL_TARGET_CRYPTO RandenHwAes::Generate(const void* keys_void, void* state_void) { - static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch"); + static_assert(RandenTraits::kCapacityBytes == sizeof(Vector128), + "Capacity mismatch"); - auto* state = static_cast<uint64_t*>(state_void); + auto* state = reinterpret_cast<u64x2*>(state_void); + const auto* keys = reinterpret_cast<const u64x2*>(keys_void); const Vector128 prev_inner = Vector128Load(state); SwapEndian(state); - Permute(keys, state); + Permute(state, keys); SwapEndian(state); diff --git a/absl/random/internal/randen_hwaes_test.cc b/absl/random/internal/randen_hwaes_test.cc index a7cbd46b..66ddb43f 100644 --- a/absl/random/internal/randen_hwaes_test.cc +++ b/absl/random/internal/randen_hwaes_test.cc @@ -27,12 +27,14 @@ namespace { using absl::random_internal::RandenHwAes; using absl::random_internal::RandenTraits; -struct randen { - static constexpr size_t kStateSizeT = - RandenTraits::kStateBytes / sizeof(uint64_t); +// Local state parameters. +constexpr size_t kSeedBytes = + RandenTraits::kStateBytes - RandenTraits::kCapacityBytes; +constexpr size_t kStateSizeT = RandenTraits::kStateBytes / sizeof(uint64_t); +constexpr size_t kSeedSizeT = kSeedBytes / sizeof(uint32_t); + +struct alignas(16) randen { uint64_t state[kStateSizeT]; - static constexpr size_t kSeedSizeT = - RandenTraits::kSeedBytes / sizeof(uint32_t); uint32_t seed[kSeedSizeT]; }; diff --git a/absl/random/internal/randen_round_keys.cc b/absl/random/internal/randen_round_keys.cc new file mode 100644 index 00000000..5fb3ca55 --- /dev/null +++ b/absl/random/internal/randen_round_keys.cc @@ -0,0 +1,462 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/random/internal/randen_traits.h" + +// This file contains only the round keys for randen. +// +// "Nothing up my sleeve" numbers from the first hex digits of Pi, obtained +// from http://hexpi.sourceforge.net/. The array was generated by following +// Python script: + +/* +python >tmp.cc << EOF +"""Generates Randen round keys array from pi-hex.62500.txt file.""" +import binascii + +KEYS = 17 * 8 + +def chunks(l, n): + """Yield successive n-sized chunks from l.""" + for i in range(0, len(l), n): + yield l[i:i + n] + +def pairwise(t): + """Transforms sequence into sequence of pairs.""" + it = iter(t) + return zip(it,it) + +def digits_from_pi(): + """Reads digits from hexpi.sourceforge.net file.""" + with open("pi-hex.62500.txt") as file: + return file.read() + +def digits_from_urandom(): + """Reads digits from /dev/urandom.""" + with open("/dev/urandom") as file: + return binascii.hexlify(file.read(KEYS * 16)) + +def print_row(b) + print(" 0x{0}, 0x{1}, 0x{2}, 0x{3}, 0x{4}, 0x{5}, 0x{6}, 0x{7}, 0x{8}, 0x{9}, +0x{10}, 0x{11}, 0x{12}, 0x{13}, 0x{14}, 0x{15},".format(*b)) + + +digits = digits_from_pi() +#digits = digits_from_urandom() + +print("namespace {") +print("static constexpr size_t kKeyBytes = {0};\n".format(KEYS * 16)) +print("}") + +print("alignas(16) const unsigned char kRandenRoundKeysBE[kKeyBytes] = {") + +for i, u16 in zip(range(KEYS), chunks(digits, 32)): + b = list(chunks(u16, 2)) + print_row(b) + +print("};") + +print("alignas(16) const unsigned char kRandenRoundKeys[kKeyBytes] = {") + +for i, u16 in zip(range(KEYS), chunks(digits, 32)): + b = list(chunks(u16, 2)) + b.reverse() + print_row(b) + +print("};") + +EOF + +*/ + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace random_internal { +namespace { +static constexpr size_t kKeyBytes = 2176; +} + +alignas(16) const unsigned char kRandenRoundKeysBE[kKeyBytes] = { + 0x24, 0x3F, 0x6A, 0x88, 0x85, 0xA3, 0x08, 0xD3, 0x13, 0x19, 0x8A, 0x2E, + 0x03, 0x70, 0x73, 0x44, 0xA4, 0x09, 0x38, 0x22, 0x29, 0x9F, 0x31, 0xD0, + 0x08, 0x2E, 0xFA, 0x98, 0xEC, 0x4E, 0x6C, 0x89, 0x45, 0x28, 0x21, 0xE6, + 0x38, 0xD0, 0x13, 0x77, 0xBE, 0x54, 0x66, 0xCF, 0x34, 0xE9, 0x0C, 0x6C, + 0xC0, 0xAC, 0x29, 0xB7, 0xC9, 0x7C, 0x50, 0xDD, 0x3F, 0x84, 0xD5, 0xB5, + 0xB5, 0x47, 0x09, 0x17, 0x92, 0x16, 0xD5, 0xD9, 0x89, 0x79, 0xFB, 0x1B, + 0xD1, 0x31, 0x0B, 0xA6, 0x98, 0xDF, 0xB5, 0xAC, 0x2F, 0xFD, 0x72, 0xDB, + 0xD0, 0x1A, 0xDF, 0xB7, 0xB8, 0xE1, 0xAF, 0xED, 0x6A, 0x26, 0x7E, 0x96, + 0xBA, 0x7C, 0x90, 0x45, 0xF1, 0x2C, 0x7F, 0x99, 0x24, 0xA1, 0x99, 0x47, + 0xB3, 0x91, 0x6C, 0xF7, 0x08, 0x01, 0xF2, 0xE2, 0x85, 0x8E, 0xFC, 0x16, + 0x63, 0x69, 0x20, 0xD8, 0x71, 0x57, 0x4E, 0x69, 0xA4, 0x58, 0xFE, 0xA3, + 0xF4, 0x93, 0x3D, 0x7E, 0x0D, 0x95, 0x74, 0x8F, 0x72, 0x8E, 0xB6, 0x58, + 0x71, 0x8B, 0xCD, 0x58, 0x82, 0x15, 0x4A, 0xEE, 0x7B, 0x54, 0xA4, 0x1D, + 0xC2, 0x5A, 0x59, 0xB5, 0x9C, 0x30, 0xD5, 0x39, 0x2A, 0xF2, 0x60, 0x13, + 0xC5, 0xD1, 0xB0, 0x23, 0x28, 0x60, 0x85, 0xF0, 0xCA, 0x41, 0x79, 0x18, + 0xB8, 0xDB, 0x38, 0xEF, 0x8E, 0x79, 0xDC, 0xB0, 0x60, 0x3A, 0x18, 0x0E, + 0x6C, 0x9E, 0x0E, 0x8B, 0xB0, 0x1E, 0x8A, 0x3E, 0xD7, 0x15, 0x77, 0xC1, + 0xBD, 0x31, 0x4B, 0x27, 0x78, 0xAF, 0x2F, 0xDA, 0x55, 0x60, 0x5C, 0x60, + 0xE6, 0x55, 0x25, 0xF3, 0xAA, 0x55, 0xAB, 0x94, 0x57, 0x48, 0x98, 0x62, + 0x63, 0xE8, 0x14, 0x40, 0x55, 0xCA, 0x39, 0x6A, 0x2A, 0xAB, 0x10, 0xB6, + 0xB4, 0xCC, 0x5C, 0x34, 0x11, 0x41, 0xE8, 0xCE, 0xA1, 0x54, 0x86, 0xAF, + 0x7C, 0x72, 0xE9, 0x93, 0xB3, 0xEE, 0x14, 0x11, 0x63, 0x6F, 0xBC, 0x2A, + 0x2B, 0xA9, 0xC5, 0x5D, 0x74, 0x18, 0x31, 0xF6, 0xCE, 0x5C, 0x3E, 0x16, + 0x9B, 0x87, 0x93, 0x1E, 0xAF, 0xD6, 0xBA, 0x33, 0x6C, 0x24, 0xCF, 0x5C, + 0x7A, 0x32, 0x53, 0x81, 0x28, 0x95, 0x86, 0x77, 0x3B, 0x8F, 0x48, 0x98, + 0x6B, 0x4B, 0xB9, 0xAF, 0xC4, 0xBF, 0xE8, 0x1B, 0x66, 0x28, 0x21, 0x93, + 0x61, 0xD8, 0x09, 0xCC, 0xFB, 0x21, 0xA9, 0x91, 0x48, 0x7C, 0xAC, 0x60, + 0x5D, 0xEC, 0x80, 0x32, 0xEF, 0x84, 0x5D, 0x5D, 0xE9, 0x85, 0x75, 0xB1, + 0xDC, 0x26, 0x23, 0x02, 0xEB, 0x65, 0x1B, 0x88, 0x23, 0x89, 0x3E, 0x81, + 0xD3, 0x96, 0xAC, 0xC5, 0x0F, 0x6D, 0x6F, 0xF3, 0x83, 0xF4, 0x42, 0x39, + 0x2E, 0x0B, 0x44, 0x82, 0xA4, 0x84, 0x20, 0x04, 0x69, 0xC8, 0xF0, 0x4A, + 0x9E, 0x1F, 0x9B, 0x5E, 0x21, 0xC6, 0x68, 0x42, 0xF6, 0xE9, 0x6C, 0x9A, + 0x67, 0x0C, 0x9C, 0x61, 0xAB, 0xD3, 0x88, 0xF0, 0x6A, 0x51, 0xA0, 0xD2, + 0xD8, 0x54, 0x2F, 0x68, 0x96, 0x0F, 0xA7, 0x28, 0xAB, 0x51, 0x33, 0xA3, + 0x6E, 0xEF, 0x0B, 0x6C, 0x13, 0x7A, 0x3B, 0xE4, 0xBA, 0x3B, 0xF0, 0x50, + 0x7E, 0xFB, 0x2A, 0x98, 0xA1, 0xF1, 0x65, 0x1D, 0x39, 0xAF, 0x01, 0x76, + 0x66, 0xCA, 0x59, 0x3E, 0x82, 0x43, 0x0E, 0x88, 0x8C, 0xEE, 0x86, 0x19, + 0x45, 0x6F, 0x9F, 0xB4, 0x7D, 0x84, 0xA5, 0xC3, 0x3B, 0x8B, 0x5E, 0xBE, + 0xE0, 0x6F, 0x75, 0xD8, 0x85, 0xC1, 0x20, 0x73, 0x40, 0x1A, 0x44, 0x9F, + 0x56, 0xC1, 0x6A, 0xA6, 0x4E, 0xD3, 0xAA, 0x62, 0x36, 0x3F, 0x77, 0x06, + 0x1B, 0xFE, 0xDF, 0x72, 0x42, 0x9B, 0x02, 0x3D, 0x37, 0xD0, 0xD7, 0x24, + 0xD0, 0x0A, 0x12, 0x48, 0xDB, 0x0F, 0xEA, 0xD3, 0x49, 0xF1, 0xC0, 0x9B, + 0x07, 0x53, 0x72, 0xC9, 0x80, 0x99, 0x1B, 0x7B, 0x25, 0xD4, 0x79, 0xD8, + 0xF6, 0xE8, 0xDE, 0xF7, 0xE3, 0xFE, 0x50, 0x1A, 0xB6, 0x79, 0x4C, 0x3B, + 0x97, 0x6C, 0xE0, 0xBD, 0x04, 0xC0, 0x06, 0xBA, 0xC1, 0xA9, 0x4F, 0xB6, + 0x40, 0x9F, 0x60, 0xC4, 0x5E, 0x5C, 0x9E, 0xC2, 0x19, 0x6A, 0x24, 0x63, + 0x68, 0xFB, 0x6F, 0xAF, 0x3E, 0x6C, 0x53, 0xB5, 0x13, 0x39, 0xB2, 0xEB, + 0x3B, 0x52, 0xEC, 0x6F, 0x6D, 0xFC, 0x51, 0x1F, 0x9B, 0x30, 0x95, 0x2C, + 0xCC, 0x81, 0x45, 0x44, 0xAF, 0x5E, 0xBD, 0x09, 0xBE, 0xE3, 0xD0, 0x04, + 0xDE, 0x33, 0x4A, 0xFD, 0x66, 0x0F, 0x28, 0x07, 0x19, 0x2E, 0x4B, 0xB3, + 0xC0, 0xCB, 0xA8, 0x57, 0x45, 0xC8, 0x74, 0x0F, 0xD2, 0x0B, 0x5F, 0x39, + 0xB9, 0xD3, 0xFB, 0xDB, 0x55, 0x79, 0xC0, 0xBD, 0x1A, 0x60, 0x32, 0x0A, + 0xD6, 0xA1, 0x00, 0xC6, 0x40, 0x2C, 0x72, 0x79, 0x67, 0x9F, 0x25, 0xFE, + 0xFB, 0x1F, 0xA3, 0xCC, 0x8E, 0xA5, 0xE9, 0xF8, 0xDB, 0x32, 0x22, 0xF8, + 0x3C, 0x75, 0x16, 0xDF, 0xFD, 0x61, 0x6B, 0x15, 0x2F, 0x50, 0x1E, 0xC8, + 0xAD, 0x05, 0x52, 0xAB, 0x32, 0x3D, 0xB5, 0xFA, 0xFD, 0x23, 0x87, 0x60, + 0x53, 0x31, 0x7B, 0x48, 0x3E, 0x00, 0xDF, 0x82, 0x9E, 0x5C, 0x57, 0xBB, + 0xCA, 0x6F, 0x8C, 0xA0, 0x1A, 0x87, 0x56, 0x2E, 0xDF, 0x17, 0x69, 0xDB, + 0xD5, 0x42, 0xA8, 0xF6, 0x28, 0x7E, 0xFF, 0xC3, 0xAC, 0x67, 0x32, 0xC6, + 0x8C, 0x4F, 0x55, 0x73, 0x69, 0x5B, 0x27, 0xB0, 0xBB, 0xCA, 0x58, 0xC8, + 0xE1, 0xFF, 0xA3, 0x5D, 0xB8, 0xF0, 0x11, 0xA0, 0x10, 0xFA, 0x3D, 0x98, + 0xFD, 0x21, 0x83, 0xB8, 0x4A, 0xFC, 0xB5, 0x6C, 0x2D, 0xD1, 0xD3, 0x5B, + 0x9A, 0x53, 0xE4, 0x79, 0xB6, 0xF8, 0x45, 0x65, 0xD2, 0x8E, 0x49, 0xBC, + 0x4B, 0xFB, 0x97, 0x90, 0xE1, 0xDD, 0xF2, 0xDA, 0xA4, 0xCB, 0x7E, 0x33, + 0x62, 0xFB, 0x13, 0x41, 0xCE, 0xE4, 0xC6, 0xE8, 0xEF, 0x20, 0xCA, 0xDA, + 0x36, 0x77, 0x4C, 0x01, 0xD0, 0x7E, 0x9E, 0xFE, 0x2B, 0xF1, 0x1F, 0xB4, + 0x95, 0xDB, 0xDA, 0x4D, 0xAE, 0x90, 0x91, 0x98, 0xEA, 0xAD, 0x8E, 0x71, + 0x6B, 0x93, 0xD5, 0xA0, 0xD0, 0x8E, 0xD1, 0xD0, 0xAF, 0xC7, 0x25, 0xE0, + 0x8E, 0x3C, 0x5B, 0x2F, 0x8E, 0x75, 0x94, 0xB7, 0x8F, 0xF6, 0xE2, 0xFB, + 0xF2, 0x12, 0x2B, 0x64, 0x88, 0x88, 0xB8, 0x12, 0x90, 0x0D, 0xF0, 0x1C, + 0x4F, 0xAD, 0x5E, 0xA0, 0x68, 0x8F, 0xC3, 0x1C, 0xD1, 0xCF, 0xF1, 0x91, + 0xB3, 0xA8, 0xC1, 0xAD, 0x2F, 0x2F, 0x22, 0x18, 0xBE, 0x0E, 0x17, 0x77, + 0xEA, 0x75, 0x2D, 0xFE, 0x8B, 0x02, 0x1F, 0xA1, 0xE5, 0xA0, 0xCC, 0x0F, + 0xB5, 0x6F, 0x74, 0xE8, 0x18, 0xAC, 0xF3, 0xD6, 0xCE, 0x89, 0xE2, 0x99, + 0xB4, 0xA8, 0x4F, 0xE0, 0xFD, 0x13, 0xE0, 0xB7, 0x7C, 0xC4, 0x3B, 0x81, + 0xD2, 0xAD, 0xA8, 0xD9, 0x16, 0x5F, 0xA2, 0x66, 0x80, 0x95, 0x77, 0x05, + 0x93, 0xCC, 0x73, 0x14, 0x21, 0x1A, 0x14, 0x77, 0xE6, 0xAD, 0x20, 0x65, + 0x77, 0xB5, 0xFA, 0x86, 0xC7, 0x54, 0x42, 0xF5, 0xFB, 0x9D, 0x35, 0xCF, + 0xEB, 0xCD, 0xAF, 0x0C, 0x7B, 0x3E, 0x89, 0xA0, 0xD6, 0x41, 0x1B, 0xD3, + 0xAE, 0x1E, 0x7E, 0x49, 0x00, 0x25, 0x0E, 0x2D, 0x20, 0x71, 0xB3, 0x5E, + 0x22, 0x68, 0x00, 0xBB, 0x57, 0xB8, 0xE0, 0xAF, 0x24, 0x64, 0x36, 0x9B, + 0xF0, 0x09, 0xB9, 0x1E, 0x55, 0x63, 0x91, 0x1D, 0x59, 0xDF, 0xA6, 0xAA, + 0x78, 0xC1, 0x43, 0x89, 0xD9, 0x5A, 0x53, 0x7F, 0x20, 0x7D, 0x5B, 0xA2, + 0x02, 0xE5, 0xB9, 0xC5, 0x83, 0x26, 0x03, 0x76, 0x62, 0x95, 0xCF, 0xA9, + 0x11, 0xC8, 0x19, 0x68, 0x4E, 0x73, 0x4A, 0x41, 0xB3, 0x47, 0x2D, 0xCA, + 0x7B, 0x14, 0xA9, 0x4A, 0x1B, 0x51, 0x00, 0x52, 0x9A, 0x53, 0x29, 0x15, + 0xD6, 0x0F, 0x57, 0x3F, 0xBC, 0x9B, 0xC6, 0xE4, 0x2B, 0x60, 0xA4, 0x76, + 0x81, 0xE6, 0x74, 0x00, 0x08, 0xBA, 0x6F, 0xB5, 0x57, 0x1B, 0xE9, 0x1F, + 0xF2, 0x96, 0xEC, 0x6B, 0x2A, 0x0D, 0xD9, 0x15, 0xB6, 0x63, 0x65, 0x21, + 0xE7, 0xB9, 0xF9, 0xB6, 0xFF, 0x34, 0x05, 0x2E, 0xC5, 0x85, 0x56, 0x64, + 0x53, 0xB0, 0x2D, 0x5D, 0xA9, 0x9F, 0x8F, 0xA1, 0x08, 0xBA, 0x47, 0x99, + 0x6E, 0x85, 0x07, 0x6A, 0x4B, 0x7A, 0x70, 0xE9, 0xB5, 0xB3, 0x29, 0x44, + 0xDB, 0x75, 0x09, 0x2E, 0xC4, 0x19, 0x26, 0x23, 0xAD, 0x6E, 0xA6, 0xB0, + 0x49, 0xA7, 0xDF, 0x7D, 0x9C, 0xEE, 0x60, 0xB8, 0x8F, 0xED, 0xB2, 0x66, + 0xEC, 0xAA, 0x8C, 0x71, 0x69, 0x9A, 0x18, 0xFF, 0x56, 0x64, 0x52, 0x6C, + 0xC2, 0xB1, 0x9E, 0xE1, 0x19, 0x36, 0x02, 0xA5, 0x75, 0x09, 0x4C, 0x29, + 0xA0, 0x59, 0x13, 0x40, 0xE4, 0x18, 0x3A, 0x3E, 0x3F, 0x54, 0x98, 0x9A, + 0x5B, 0x42, 0x9D, 0x65, 0x6B, 0x8F, 0xE4, 0xD6, 0x99, 0xF7, 0x3F, 0xD6, + 0xA1, 0xD2, 0x9C, 0x07, 0xEF, 0xE8, 0x30, 0xF5, 0x4D, 0x2D, 0x38, 0xE6, + 0xF0, 0x25, 0x5D, 0xC1, 0x4C, 0xDD, 0x20, 0x86, 0x84, 0x70, 0xEB, 0x26, + 0x63, 0x82, 0xE9, 0xC6, 0x02, 0x1E, 0xCC, 0x5E, 0x09, 0x68, 0x6B, 0x3F, + 0x3E, 0xBA, 0xEF, 0xC9, 0x3C, 0x97, 0x18, 0x14, 0x6B, 0x6A, 0x70, 0xA1, + 0x68, 0x7F, 0x35, 0x84, 0x52, 0xA0, 0xE2, 0x86, 0xB7, 0x9C, 0x53, 0x05, + 0xAA, 0x50, 0x07, 0x37, 0x3E, 0x07, 0x84, 0x1C, 0x7F, 0xDE, 0xAE, 0x5C, + 0x8E, 0x7D, 0x44, 0xEC, 0x57, 0x16, 0xF2, 0xB8, 0xB0, 0x3A, 0xDA, 0x37, + 0xF0, 0x50, 0x0C, 0x0D, 0xF0, 0x1C, 0x1F, 0x04, 0x02, 0x00, 0xB3, 0xFF, + 0xAE, 0x0C, 0xF5, 0x1A, 0x3C, 0xB5, 0x74, 0xB2, 0x25, 0x83, 0x7A, 0x58, + 0xDC, 0x09, 0x21, 0xBD, 0xD1, 0x91, 0x13, 0xF9, 0x7C, 0xA9, 0x2F, 0xF6, + 0x94, 0x32, 0x47, 0x73, 0x22, 0xF5, 0x47, 0x01, 0x3A, 0xE5, 0xE5, 0x81, + 0x37, 0xC2, 0xDA, 0xDC, 0xC8, 0xB5, 0x76, 0x34, 0x9A, 0xF3, 0xDD, 0xA7, + 0xA9, 0x44, 0x61, 0x46, 0x0F, 0xD0, 0x03, 0x0E, 0xEC, 0xC8, 0xC7, 0x3E, + 0xA4, 0x75, 0x1E, 0x41, 0xE2, 0x38, 0xCD, 0x99, 0x3B, 0xEA, 0x0E, 0x2F, + 0x32, 0x80, 0xBB, 0xA1, 0x18, 0x3E, 0xB3, 0x31, 0x4E, 0x54, 0x8B, 0x38, + 0x4F, 0x6D, 0xB9, 0x08, 0x6F, 0x42, 0x0D, 0x03, 0xF6, 0x0A, 0x04, 0xBF, + 0x2C, 0xB8, 0x12, 0x90, 0x24, 0x97, 0x7C, 0x79, 0x56, 0x79, 0xB0, 0x72, + 0xBC, 0xAF, 0x89, 0xAF, 0xDE, 0x9A, 0x77, 0x1F, 0xD9, 0x93, 0x08, 0x10, + 0xB3, 0x8B, 0xAE, 0x12, 0xDC, 0xCF, 0x3F, 0x2E, 0x55, 0x12, 0x72, 0x1F, + 0x2E, 0x6B, 0x71, 0x24, 0x50, 0x1A, 0xDD, 0xE6, 0x9F, 0x84, 0xCD, 0x87, + 0x7A, 0x58, 0x47, 0x18, 0x74, 0x08, 0xDA, 0x17, 0xBC, 0x9F, 0x9A, 0xBC, + 0xE9, 0x4B, 0x7D, 0x8C, 0xEC, 0x7A, 0xEC, 0x3A, 0xDB, 0x85, 0x1D, 0xFA, + 0x63, 0x09, 0x43, 0x66, 0xC4, 0x64, 0xC3, 0xD2, 0xEF, 0x1C, 0x18, 0x47, + 0x32, 0x15, 0xD8, 0x08, 0xDD, 0x43, 0x3B, 0x37, 0x24, 0xC2, 0xBA, 0x16, + 0x12, 0xA1, 0x4D, 0x43, 0x2A, 0x65, 0xC4, 0x51, 0x50, 0x94, 0x00, 0x02, + 0x13, 0x3A, 0xE4, 0xDD, 0x71, 0xDF, 0xF8, 0x9E, 0x10, 0x31, 0x4E, 0x55, + 0x81, 0xAC, 0x77, 0xD6, 0x5F, 0x11, 0x19, 0x9B, 0x04, 0x35, 0x56, 0xF1, + 0xD7, 0xA3, 0xC7, 0x6B, 0x3C, 0x11, 0x18, 0x3B, 0x59, 0x24, 0xA5, 0x09, + 0xF2, 0x8F, 0xE6, 0xED, 0x97, 0xF1, 0xFB, 0xFA, 0x9E, 0xBA, 0xBF, 0x2C, + 0x1E, 0x15, 0x3C, 0x6E, 0x86, 0xE3, 0x45, 0x70, 0xEA, 0xE9, 0x6F, 0xB1, + 0x86, 0x0E, 0x5E, 0x0A, 0x5A, 0x3E, 0x2A, 0xB3, 0x77, 0x1F, 0xE7, 0x1C, + 0x4E, 0x3D, 0x06, 0xFA, 0x29, 0x65, 0xDC, 0xB9, 0x99, 0xE7, 0x1D, 0x0F, + 0x80, 0x3E, 0x89, 0xD6, 0x52, 0x66, 0xC8, 0x25, 0x2E, 0x4C, 0xC9, 0x78, + 0x9C, 0x10, 0xB3, 0x6A, 0xC6, 0x15, 0x0E, 0xBA, 0x94, 0xE2, 0xEA, 0x78, + 0xA6, 0xFC, 0x3C, 0x53, 0x1E, 0x0A, 0x2D, 0xF4, 0xF2, 0xF7, 0x4E, 0xA7, + 0x36, 0x1D, 0x2B, 0x3D, 0x19, 0x39, 0x26, 0x0F, 0x19, 0xC2, 0x79, 0x60, + 0x52, 0x23, 0xA7, 0x08, 0xF7, 0x13, 0x12, 0xB6, 0xEB, 0xAD, 0xFE, 0x6E, + 0xEA, 0xC3, 0x1F, 0x66, 0xE3, 0xBC, 0x45, 0x95, 0xA6, 0x7B, 0xC8, 0x83, + 0xB1, 0x7F, 0x37, 0xD1, 0x01, 0x8C, 0xFF, 0x28, 0xC3, 0x32, 0xDD, 0xEF, + 0xBE, 0x6C, 0x5A, 0xA5, 0x65, 0x58, 0x21, 0x85, 0x68, 0xAB, 0x97, 0x02, + 0xEE, 0xCE, 0xA5, 0x0F, 0xDB, 0x2F, 0x95, 0x3B, 0x2A, 0xEF, 0x7D, 0xAD, + 0x5B, 0x6E, 0x2F, 0x84, 0x15, 0x21, 0xB6, 0x28, 0x29, 0x07, 0x61, 0x70, + 0xEC, 0xDD, 0x47, 0x75, 0x61, 0x9F, 0x15, 0x10, 0x13, 0xCC, 0xA8, 0x30, + 0xEB, 0x61, 0xBD, 0x96, 0x03, 0x34, 0xFE, 0x1E, 0xAA, 0x03, 0x63, 0xCF, + 0xB5, 0x73, 0x5C, 0x90, 0x4C, 0x70, 0xA2, 0x39, 0xD5, 0x9E, 0x9E, 0x0B, + 0xCB, 0xAA, 0xDE, 0x14, 0xEE, 0xCC, 0x86, 0xBC, 0x60, 0x62, 0x2C, 0xA7, + 0x9C, 0xAB, 0x5C, 0xAB, 0xB2, 0xF3, 0x84, 0x6E, 0x64, 0x8B, 0x1E, 0xAF, + 0x19, 0xBD, 0xF0, 0xCA, 0xA0, 0x23, 0x69, 0xB9, 0x65, 0x5A, 0xBB, 0x50, + 0x40, 0x68, 0x5A, 0x32, 0x3C, 0x2A, 0xB4, 0xB3, 0x31, 0x9E, 0xE9, 0xD5, + 0xC0, 0x21, 0xB8, 0xF7, 0x9B, 0x54, 0x0B, 0x19, 0x87, 0x5F, 0xA0, 0x99, + 0x95, 0xF7, 0x99, 0x7E, 0x62, 0x3D, 0x7D, 0xA8, 0xF8, 0x37, 0x88, 0x9A, + 0x97, 0xE3, 0x2D, 0x77, 0x11, 0xED, 0x93, 0x5F, 0x16, 0x68, 0x12, 0x81, + 0x0E, 0x35, 0x88, 0x29, 0xC7, 0xE6, 0x1F, 0xD6, 0x96, 0xDE, 0xDF, 0xA1, + 0x78, 0x58, 0xBA, 0x99, 0x57, 0xF5, 0x84, 0xA5, 0x1B, 0x22, 0x72, 0x63, + 0x9B, 0x83, 0xC3, 0xFF, 0x1A, 0xC2, 0x46, 0x96, 0xCD, 0xB3, 0x0A, 0xEB, + 0x53, 0x2E, 0x30, 0x54, 0x8F, 0xD9, 0x48, 0xE4, 0x6D, 0xBC, 0x31, 0x28, + 0x58, 0xEB, 0xF2, 0xEF, 0x34, 0xC6, 0xFF, 0xEA, 0xFE, 0x28, 0xED, 0x61, + 0xEE, 0x7C, 0x3C, 0x73, 0x5D, 0x4A, 0x14, 0xD9, 0xE8, 0x64, 0xB7, 0xE3, + 0x42, 0x10, 0x5D, 0x14, 0x20, 0x3E, 0x13, 0xE0, 0x45, 0xEE, 0xE2, 0xB6, + 0xA3, 0xAA, 0xAB, 0xEA, 0xDB, 0x6C, 0x4F, 0x15, 0xFA, 0xCB, 0x4F, 0xD0, + 0xC7, 0x42, 0xF4, 0x42, 0xEF, 0x6A, 0xBB, 0xB5, 0x65, 0x4F, 0x3B, 0x1D, + 0x41, 0xCD, 0x21, 0x05, 0xD8, 0x1E, 0x79, 0x9E, 0x86, 0x85, 0x4D, 0xC7, + 0xE4, 0x4B, 0x47, 0x6A, 0x3D, 0x81, 0x62, 0x50, 0xCF, 0x62, 0xA1, 0xF2, + 0x5B, 0x8D, 0x26, 0x46, 0xFC, 0x88, 0x83, 0xA0, 0xC1, 0xC7, 0xB6, 0xA3, + 0x7F, 0x15, 0x24, 0xC3, 0x69, 0xCB, 0x74, 0x92, 0x47, 0x84, 0x8A, 0x0B, + 0x56, 0x92, 0xB2, 0x85, 0x09, 0x5B, 0xBF, 0x00, 0xAD, 0x19, 0x48, 0x9D, + 0x14, 0x62, 0xB1, 0x74, 0x23, 0x82, 0x0D, 0x00, 0x58, 0x42, 0x8D, 0x2A, + 0x0C, 0x55, 0xF5, 0xEA, 0x1D, 0xAD, 0xF4, 0x3E, 0x23, 0x3F, 0x70, 0x61, + 0x33, 0x72, 0xF0, 0x92, 0x8D, 0x93, 0x7E, 0x41, 0xD6, 0x5F, 0xEC, 0xF1, + 0x6C, 0x22, 0x3B, 0xDB, 0x7C, 0xDE, 0x37, 0x59, 0xCB, 0xEE, 0x74, 0x60, + 0x40, 0x85, 0xF2, 0xA7, 0xCE, 0x77, 0x32, 0x6E, 0xA6, 0x07, 0x80, 0x84, + 0x19, 0xF8, 0x50, 0x9E, 0xE8, 0xEF, 0xD8, 0x55, 0x61, 0xD9, 0x97, 0x35, + 0xA9, 0x69, 0xA7, 0xAA, 0xC5, 0x0C, 0x06, 0xC2, 0x5A, 0x04, 0xAB, 0xFC, + 0x80, 0x0B, 0xCA, 0xDC, 0x9E, 0x44, 0x7A, 0x2E, 0xC3, 0x45, 0x34, 0x84, + 0xFD, 0xD5, 0x67, 0x05, 0x0E, 0x1E, 0x9E, 0xC9, 0xDB, 0x73, 0xDB, 0xD3, + 0x10, 0x55, 0x88, 0xCD, 0x67, 0x5F, 0xDA, 0x79, 0xE3, 0x67, 0x43, 0x40, + 0xC5, 0xC4, 0x34, 0x65, 0x71, 0x3E, 0x38, 0xD8, 0x3D, 0x28, 0xF8, 0x9E, + 0xF1, 0x6D, 0xFF, 0x20, 0x15, 0x3E, 0x21, 0xE7, 0x8F, 0xB0, 0x3D, 0x4A, + 0xE6, 0xE3, 0x9F, 0x2B, 0xDB, 0x83, 0xAD, 0xF7, 0xE9, 0x3D, 0x5A, 0x68, + 0x94, 0x81, 0x40, 0xF7, 0xF6, 0x4C, 0x26, 0x1C, 0x94, 0x69, 0x29, 0x34, + 0x41, 0x15, 0x20, 0xF7, 0x76, 0x02, 0xD4, 0xF7, 0xBC, 0xF4, 0x6B, 0x2E, + 0xD4, 0xA1, 0x00, 0x68, 0xD4, 0x08, 0x24, 0x71, 0x33, 0x20, 0xF4, 0x6A, + 0x43, 0xB7, 0xD4, 0xB7, 0x50, 0x00, 0x61, 0xAF, 0x1E, 0x39, 0xF6, 0x2E, + 0x97, 0x24, 0x45, 0x46, +}; + +alignas(16) const unsigned char kRandenRoundKeys[kKeyBytes] = { + 0x44, 0x73, 0x70, 0x03, 0x2E, 0x8A, 0x19, 0x13, 0xD3, 0x08, 0xA3, 0x85, + 0x88, 0x6A, 0x3F, 0x24, 0x89, 0x6C, 0x4E, 0xEC, 0x98, 0xFA, 0x2E, 0x08, + 0xD0, 0x31, 0x9F, 0x29, 0x22, 0x38, 0x09, 0xA4, 0x6C, 0x0C, 0xE9, 0x34, + 0xCF, 0x66, 0x54, 0xBE, 0x77, 0x13, 0xD0, 0x38, 0xE6, 0x21, 0x28, 0x45, + 0x17, 0x09, 0x47, 0xB5, 0xB5, 0xD5, 0x84, 0x3F, 0xDD, 0x50, 0x7C, 0xC9, + 0xB7, 0x29, 0xAC, 0xC0, 0xAC, 0xB5, 0xDF, 0x98, 0xA6, 0x0B, 0x31, 0xD1, + 0x1B, 0xFB, 0x79, 0x89, 0xD9, 0xD5, 0x16, 0x92, 0x96, 0x7E, 0x26, 0x6A, + 0xED, 0xAF, 0xE1, 0xB8, 0xB7, 0xDF, 0x1A, 0xD0, 0xDB, 0x72, 0xFD, 0x2F, + 0xF7, 0x6C, 0x91, 0xB3, 0x47, 0x99, 0xA1, 0x24, 0x99, 0x7F, 0x2C, 0xF1, + 0x45, 0x90, 0x7C, 0xBA, 0x69, 0x4E, 0x57, 0x71, 0xD8, 0x20, 0x69, 0x63, + 0x16, 0xFC, 0x8E, 0x85, 0xE2, 0xF2, 0x01, 0x08, 0x58, 0xB6, 0x8E, 0x72, + 0x8F, 0x74, 0x95, 0x0D, 0x7E, 0x3D, 0x93, 0xF4, 0xA3, 0xFE, 0x58, 0xA4, + 0xB5, 0x59, 0x5A, 0xC2, 0x1D, 0xA4, 0x54, 0x7B, 0xEE, 0x4A, 0x15, 0x82, + 0x58, 0xCD, 0x8B, 0x71, 0xF0, 0x85, 0x60, 0x28, 0x23, 0xB0, 0xD1, 0xC5, + 0x13, 0x60, 0xF2, 0x2A, 0x39, 0xD5, 0x30, 0x9C, 0x0E, 0x18, 0x3A, 0x60, + 0xB0, 0xDC, 0x79, 0x8E, 0xEF, 0x38, 0xDB, 0xB8, 0x18, 0x79, 0x41, 0xCA, + 0x27, 0x4B, 0x31, 0xBD, 0xC1, 0x77, 0x15, 0xD7, 0x3E, 0x8A, 0x1E, 0xB0, + 0x8B, 0x0E, 0x9E, 0x6C, 0x94, 0xAB, 0x55, 0xAA, 0xF3, 0x25, 0x55, 0xE6, + 0x60, 0x5C, 0x60, 0x55, 0xDA, 0x2F, 0xAF, 0x78, 0xB6, 0x10, 0xAB, 0x2A, + 0x6A, 0x39, 0xCA, 0x55, 0x40, 0x14, 0xE8, 0x63, 0x62, 0x98, 0x48, 0x57, + 0x93, 0xE9, 0x72, 0x7C, 0xAF, 0x86, 0x54, 0xA1, 0xCE, 0xE8, 0x41, 0x11, + 0x34, 0x5C, 0xCC, 0xB4, 0xF6, 0x31, 0x18, 0x74, 0x5D, 0xC5, 0xA9, 0x2B, + 0x2A, 0xBC, 0x6F, 0x63, 0x11, 0x14, 0xEE, 0xB3, 0x5C, 0xCF, 0x24, 0x6C, + 0x33, 0xBA, 0xD6, 0xAF, 0x1E, 0x93, 0x87, 0x9B, 0x16, 0x3E, 0x5C, 0xCE, + 0xAF, 0xB9, 0x4B, 0x6B, 0x98, 0x48, 0x8F, 0x3B, 0x77, 0x86, 0x95, 0x28, + 0x81, 0x53, 0x32, 0x7A, 0x91, 0xA9, 0x21, 0xFB, 0xCC, 0x09, 0xD8, 0x61, + 0x93, 0x21, 0x28, 0x66, 0x1B, 0xE8, 0xBF, 0xC4, 0xB1, 0x75, 0x85, 0xE9, + 0x5D, 0x5D, 0x84, 0xEF, 0x32, 0x80, 0xEC, 0x5D, 0x60, 0xAC, 0x7C, 0x48, + 0xC5, 0xAC, 0x96, 0xD3, 0x81, 0x3E, 0x89, 0x23, 0x88, 0x1B, 0x65, 0xEB, + 0x02, 0x23, 0x26, 0xDC, 0x04, 0x20, 0x84, 0xA4, 0x82, 0x44, 0x0B, 0x2E, + 0x39, 0x42, 0xF4, 0x83, 0xF3, 0x6F, 0x6D, 0x0F, 0x9A, 0x6C, 0xE9, 0xF6, + 0x42, 0x68, 0xC6, 0x21, 0x5E, 0x9B, 0x1F, 0x9E, 0x4A, 0xF0, 0xC8, 0x69, + 0x68, 0x2F, 0x54, 0xD8, 0xD2, 0xA0, 0x51, 0x6A, 0xF0, 0x88, 0xD3, 0xAB, + 0x61, 0x9C, 0x0C, 0x67, 0xE4, 0x3B, 0x7A, 0x13, 0x6C, 0x0B, 0xEF, 0x6E, + 0xA3, 0x33, 0x51, 0xAB, 0x28, 0xA7, 0x0F, 0x96, 0x76, 0x01, 0xAF, 0x39, + 0x1D, 0x65, 0xF1, 0xA1, 0x98, 0x2A, 0xFB, 0x7E, 0x50, 0xF0, 0x3B, 0xBA, + 0xB4, 0x9F, 0x6F, 0x45, 0x19, 0x86, 0xEE, 0x8C, 0x88, 0x0E, 0x43, 0x82, + 0x3E, 0x59, 0xCA, 0x66, 0x73, 0x20, 0xC1, 0x85, 0xD8, 0x75, 0x6F, 0xE0, + 0xBE, 0x5E, 0x8B, 0x3B, 0xC3, 0xA5, 0x84, 0x7D, 0x06, 0x77, 0x3F, 0x36, + 0x62, 0xAA, 0xD3, 0x4E, 0xA6, 0x6A, 0xC1, 0x56, 0x9F, 0x44, 0x1A, 0x40, + 0x48, 0x12, 0x0A, 0xD0, 0x24, 0xD7, 0xD0, 0x37, 0x3D, 0x02, 0x9B, 0x42, + 0x72, 0xDF, 0xFE, 0x1B, 0x7B, 0x1B, 0x99, 0x80, 0xC9, 0x72, 0x53, 0x07, + 0x9B, 0xC0, 0xF1, 0x49, 0xD3, 0xEA, 0x0F, 0xDB, 0x3B, 0x4C, 0x79, 0xB6, + 0x1A, 0x50, 0xFE, 0xE3, 0xF7, 0xDE, 0xE8, 0xF6, 0xD8, 0x79, 0xD4, 0x25, + 0xC4, 0x60, 0x9F, 0x40, 0xB6, 0x4F, 0xA9, 0xC1, 0xBA, 0x06, 0xC0, 0x04, + 0xBD, 0xE0, 0x6C, 0x97, 0xB5, 0x53, 0x6C, 0x3E, 0xAF, 0x6F, 0xFB, 0x68, + 0x63, 0x24, 0x6A, 0x19, 0xC2, 0x9E, 0x5C, 0x5E, 0x2C, 0x95, 0x30, 0x9B, + 0x1F, 0x51, 0xFC, 0x6D, 0x6F, 0xEC, 0x52, 0x3B, 0xEB, 0xB2, 0x39, 0x13, + 0xFD, 0x4A, 0x33, 0xDE, 0x04, 0xD0, 0xE3, 0xBE, 0x09, 0xBD, 0x5E, 0xAF, + 0x44, 0x45, 0x81, 0xCC, 0x0F, 0x74, 0xC8, 0x45, 0x57, 0xA8, 0xCB, 0xC0, + 0xB3, 0x4B, 0x2E, 0x19, 0x07, 0x28, 0x0F, 0x66, 0x0A, 0x32, 0x60, 0x1A, + 0xBD, 0xC0, 0x79, 0x55, 0xDB, 0xFB, 0xD3, 0xB9, 0x39, 0x5F, 0x0B, 0xD2, + 0xCC, 0xA3, 0x1F, 0xFB, 0xFE, 0x25, 0x9F, 0x67, 0x79, 0x72, 0x2C, 0x40, + 0xC6, 0x00, 0xA1, 0xD6, 0x15, 0x6B, 0x61, 0xFD, 0xDF, 0x16, 0x75, 0x3C, + 0xF8, 0x22, 0x32, 0xDB, 0xF8, 0xE9, 0xA5, 0x8E, 0x60, 0x87, 0x23, 0xFD, + 0xFA, 0xB5, 0x3D, 0x32, 0xAB, 0x52, 0x05, 0xAD, 0xC8, 0x1E, 0x50, 0x2F, + 0xA0, 0x8C, 0x6F, 0xCA, 0xBB, 0x57, 0x5C, 0x9E, 0x82, 0xDF, 0x00, 0x3E, + 0x48, 0x7B, 0x31, 0x53, 0xC3, 0xFF, 0x7E, 0x28, 0xF6, 0xA8, 0x42, 0xD5, + 0xDB, 0x69, 0x17, 0xDF, 0x2E, 0x56, 0x87, 0x1A, 0xC8, 0x58, 0xCA, 0xBB, + 0xB0, 0x27, 0x5B, 0x69, 0x73, 0x55, 0x4F, 0x8C, 0xC6, 0x32, 0x67, 0xAC, + 0xB8, 0x83, 0x21, 0xFD, 0x98, 0x3D, 0xFA, 0x10, 0xA0, 0x11, 0xF0, 0xB8, + 0x5D, 0xA3, 0xFF, 0xE1, 0x65, 0x45, 0xF8, 0xB6, 0x79, 0xE4, 0x53, 0x9A, + 0x5B, 0xD3, 0xD1, 0x2D, 0x6C, 0xB5, 0xFC, 0x4A, 0x33, 0x7E, 0xCB, 0xA4, + 0xDA, 0xF2, 0xDD, 0xE1, 0x90, 0x97, 0xFB, 0x4B, 0xBC, 0x49, 0x8E, 0xD2, + 0x01, 0x4C, 0x77, 0x36, 0xDA, 0xCA, 0x20, 0xEF, 0xE8, 0xC6, 0xE4, 0xCE, + 0x41, 0x13, 0xFB, 0x62, 0x98, 0x91, 0x90, 0xAE, 0x4D, 0xDA, 0xDB, 0x95, + 0xB4, 0x1F, 0xF1, 0x2B, 0xFE, 0x9E, 0x7E, 0xD0, 0xE0, 0x25, 0xC7, 0xAF, + 0xD0, 0xD1, 0x8E, 0xD0, 0xA0, 0xD5, 0x93, 0x6B, 0x71, 0x8E, 0xAD, 0xEA, + 0x64, 0x2B, 0x12, 0xF2, 0xFB, 0xE2, 0xF6, 0x8F, 0xB7, 0x94, 0x75, 0x8E, + 0x2F, 0x5B, 0x3C, 0x8E, 0x1C, 0xC3, 0x8F, 0x68, 0xA0, 0x5E, 0xAD, 0x4F, + 0x1C, 0xF0, 0x0D, 0x90, 0x12, 0xB8, 0x88, 0x88, 0x77, 0x17, 0x0E, 0xBE, + 0x18, 0x22, 0x2F, 0x2F, 0xAD, 0xC1, 0xA8, 0xB3, 0x91, 0xF1, 0xCF, 0xD1, + 0xE8, 0x74, 0x6F, 0xB5, 0x0F, 0xCC, 0xA0, 0xE5, 0xA1, 0x1F, 0x02, 0x8B, + 0xFE, 0x2D, 0x75, 0xEA, 0xB7, 0xE0, 0x13, 0xFD, 0xE0, 0x4F, 0xA8, 0xB4, + 0x99, 0xE2, 0x89, 0xCE, 0xD6, 0xF3, 0xAC, 0x18, 0x05, 0x77, 0x95, 0x80, + 0x66, 0xA2, 0x5F, 0x16, 0xD9, 0xA8, 0xAD, 0xD2, 0x81, 0x3B, 0xC4, 0x7C, + 0x86, 0xFA, 0xB5, 0x77, 0x65, 0x20, 0xAD, 0xE6, 0x77, 0x14, 0x1A, 0x21, + 0x14, 0x73, 0xCC, 0x93, 0xA0, 0x89, 0x3E, 0x7B, 0x0C, 0xAF, 0xCD, 0xEB, + 0xCF, 0x35, 0x9D, 0xFB, 0xF5, 0x42, 0x54, 0xC7, 0x5E, 0xB3, 0x71, 0x20, + 0x2D, 0x0E, 0x25, 0x00, 0x49, 0x7E, 0x1E, 0xAE, 0xD3, 0x1B, 0x41, 0xD6, + 0x1E, 0xB9, 0x09, 0xF0, 0x9B, 0x36, 0x64, 0x24, 0xAF, 0xE0, 0xB8, 0x57, + 0xBB, 0x00, 0x68, 0x22, 0x7F, 0x53, 0x5A, 0xD9, 0x89, 0x43, 0xC1, 0x78, + 0xAA, 0xA6, 0xDF, 0x59, 0x1D, 0x91, 0x63, 0x55, 0xA9, 0xCF, 0x95, 0x62, + 0x76, 0x03, 0x26, 0x83, 0xC5, 0xB9, 0xE5, 0x02, 0xA2, 0x5B, 0x7D, 0x20, + 0x4A, 0xA9, 0x14, 0x7B, 0xCA, 0x2D, 0x47, 0xB3, 0x41, 0x4A, 0x73, 0x4E, + 0x68, 0x19, 0xC8, 0x11, 0xE4, 0xC6, 0x9B, 0xBC, 0x3F, 0x57, 0x0F, 0xD6, + 0x15, 0x29, 0x53, 0x9A, 0x52, 0x00, 0x51, 0x1B, 0x1F, 0xE9, 0x1B, 0x57, + 0xB5, 0x6F, 0xBA, 0x08, 0x00, 0x74, 0xE6, 0x81, 0x76, 0xA4, 0x60, 0x2B, + 0xB6, 0xF9, 0xB9, 0xE7, 0x21, 0x65, 0x63, 0xB6, 0x15, 0xD9, 0x0D, 0x2A, + 0x6B, 0xEC, 0x96, 0xF2, 0xA1, 0x8F, 0x9F, 0xA9, 0x5D, 0x2D, 0xB0, 0x53, + 0x64, 0x56, 0x85, 0xC5, 0x2E, 0x05, 0x34, 0xFF, 0x44, 0x29, 0xB3, 0xB5, + 0xE9, 0x70, 0x7A, 0x4B, 0x6A, 0x07, 0x85, 0x6E, 0x99, 0x47, 0xBA, 0x08, + 0x7D, 0xDF, 0xA7, 0x49, 0xB0, 0xA6, 0x6E, 0xAD, 0x23, 0x26, 0x19, 0xC4, + 0x2E, 0x09, 0x75, 0xDB, 0xFF, 0x18, 0x9A, 0x69, 0x71, 0x8C, 0xAA, 0xEC, + 0x66, 0xB2, 0xED, 0x8F, 0xB8, 0x60, 0xEE, 0x9C, 0x29, 0x4C, 0x09, 0x75, + 0xA5, 0x02, 0x36, 0x19, 0xE1, 0x9E, 0xB1, 0xC2, 0x6C, 0x52, 0x64, 0x56, + 0x65, 0x9D, 0x42, 0x5B, 0x9A, 0x98, 0x54, 0x3F, 0x3E, 0x3A, 0x18, 0xE4, + 0x40, 0x13, 0x59, 0xA0, 0xF5, 0x30, 0xE8, 0xEF, 0x07, 0x9C, 0xD2, 0xA1, + 0xD6, 0x3F, 0xF7, 0x99, 0xD6, 0xE4, 0x8F, 0x6B, 0x26, 0xEB, 0x70, 0x84, + 0x86, 0x20, 0xDD, 0x4C, 0xC1, 0x5D, 0x25, 0xF0, 0xE6, 0x38, 0x2D, 0x4D, + 0xC9, 0xEF, 0xBA, 0x3E, 0x3F, 0x6B, 0x68, 0x09, 0x5E, 0xCC, 0x1E, 0x02, + 0xC6, 0xE9, 0x82, 0x63, 0x86, 0xE2, 0xA0, 0x52, 0x84, 0x35, 0x7F, 0x68, + 0xA1, 0x70, 0x6A, 0x6B, 0x14, 0x18, 0x97, 0x3C, 0x5C, 0xAE, 0xDE, 0x7F, + 0x1C, 0x84, 0x07, 0x3E, 0x37, 0x07, 0x50, 0xAA, 0x05, 0x53, 0x9C, 0xB7, + 0x0D, 0x0C, 0x50, 0xF0, 0x37, 0xDA, 0x3A, 0xB0, 0xB8, 0xF2, 0x16, 0x57, + 0xEC, 0x44, 0x7D, 0x8E, 0xB2, 0x74, 0xB5, 0x3C, 0x1A, 0xF5, 0x0C, 0xAE, + 0xFF, 0xB3, 0x00, 0x02, 0x04, 0x1F, 0x1C, 0xF0, 0xF6, 0x2F, 0xA9, 0x7C, + 0xF9, 0x13, 0x91, 0xD1, 0xBD, 0x21, 0x09, 0xDC, 0x58, 0x7A, 0x83, 0x25, + 0xDC, 0xDA, 0xC2, 0x37, 0x81, 0xE5, 0xE5, 0x3A, 0x01, 0x47, 0xF5, 0x22, + 0x73, 0x47, 0x32, 0x94, 0x0E, 0x03, 0xD0, 0x0F, 0x46, 0x61, 0x44, 0xA9, + 0xA7, 0xDD, 0xF3, 0x9A, 0x34, 0x76, 0xB5, 0xC8, 0x2F, 0x0E, 0xEA, 0x3B, + 0x99, 0xCD, 0x38, 0xE2, 0x41, 0x1E, 0x75, 0xA4, 0x3E, 0xC7, 0xC8, 0xEC, + 0x08, 0xB9, 0x6D, 0x4F, 0x38, 0x8B, 0x54, 0x4E, 0x31, 0xB3, 0x3E, 0x18, + 0xA1, 0xBB, 0x80, 0x32, 0x79, 0x7C, 0x97, 0x24, 0x90, 0x12, 0xB8, 0x2C, + 0xBF, 0x04, 0x0A, 0xF6, 0x03, 0x0D, 0x42, 0x6F, 0x10, 0x08, 0x93, 0xD9, + 0x1F, 0x77, 0x9A, 0xDE, 0xAF, 0x89, 0xAF, 0xBC, 0x72, 0xB0, 0x79, 0x56, + 0x24, 0x71, 0x6B, 0x2E, 0x1F, 0x72, 0x12, 0x55, 0x2E, 0x3F, 0xCF, 0xDC, + 0x12, 0xAE, 0x8B, 0xB3, 0x17, 0xDA, 0x08, 0x74, 0x18, 0x47, 0x58, 0x7A, + 0x87, 0xCD, 0x84, 0x9F, 0xE6, 0xDD, 0x1A, 0x50, 0xFA, 0x1D, 0x85, 0xDB, + 0x3A, 0xEC, 0x7A, 0xEC, 0x8C, 0x7D, 0x4B, 0xE9, 0xBC, 0x9A, 0x9F, 0xBC, + 0x08, 0xD8, 0x15, 0x32, 0x47, 0x18, 0x1C, 0xEF, 0xD2, 0xC3, 0x64, 0xC4, + 0x66, 0x43, 0x09, 0x63, 0x51, 0xC4, 0x65, 0x2A, 0x43, 0x4D, 0xA1, 0x12, + 0x16, 0xBA, 0xC2, 0x24, 0x37, 0x3B, 0x43, 0xDD, 0x55, 0x4E, 0x31, 0x10, + 0x9E, 0xF8, 0xDF, 0x71, 0xDD, 0xE4, 0x3A, 0x13, 0x02, 0x00, 0x94, 0x50, + 0x6B, 0xC7, 0xA3, 0xD7, 0xF1, 0x56, 0x35, 0x04, 0x9B, 0x19, 0x11, 0x5F, + 0xD6, 0x77, 0xAC, 0x81, 0xFA, 0xFB, 0xF1, 0x97, 0xED, 0xE6, 0x8F, 0xF2, + 0x09, 0xA5, 0x24, 0x59, 0x3B, 0x18, 0x11, 0x3C, 0xB1, 0x6F, 0xE9, 0xEA, + 0x70, 0x45, 0xE3, 0x86, 0x6E, 0x3C, 0x15, 0x1E, 0x2C, 0xBF, 0xBA, 0x9E, + 0xFA, 0x06, 0x3D, 0x4E, 0x1C, 0xE7, 0x1F, 0x77, 0xB3, 0x2A, 0x3E, 0x5A, + 0x0A, 0x5E, 0x0E, 0x86, 0x25, 0xC8, 0x66, 0x52, 0xD6, 0x89, 0x3E, 0x80, + 0x0F, 0x1D, 0xE7, 0x99, 0xB9, 0xDC, 0x65, 0x29, 0x78, 0xEA, 0xE2, 0x94, + 0xBA, 0x0E, 0x15, 0xC6, 0x6A, 0xB3, 0x10, 0x9C, 0x78, 0xC9, 0x4C, 0x2E, + 0x3D, 0x2B, 0x1D, 0x36, 0xA7, 0x4E, 0xF7, 0xF2, 0xF4, 0x2D, 0x0A, 0x1E, + 0x53, 0x3C, 0xFC, 0xA6, 0xB6, 0x12, 0x13, 0xF7, 0x08, 0xA7, 0x23, 0x52, + 0x60, 0x79, 0xC2, 0x19, 0x0F, 0x26, 0x39, 0x19, 0x83, 0xC8, 0x7B, 0xA6, + 0x95, 0x45, 0xBC, 0xE3, 0x66, 0x1F, 0xC3, 0xEA, 0x6E, 0xFE, 0xAD, 0xEB, + 0xA5, 0x5A, 0x6C, 0xBE, 0xEF, 0xDD, 0x32, 0xC3, 0x28, 0xFF, 0x8C, 0x01, + 0xD1, 0x37, 0x7F, 0xB1, 0x3B, 0x95, 0x2F, 0xDB, 0x0F, 0xA5, 0xCE, 0xEE, + 0x02, 0x97, 0xAB, 0x68, 0x85, 0x21, 0x58, 0x65, 0x70, 0x61, 0x07, 0x29, + 0x28, 0xB6, 0x21, 0x15, 0x84, 0x2F, 0x6E, 0x5B, 0xAD, 0x7D, 0xEF, 0x2A, + 0x96, 0xBD, 0x61, 0xEB, 0x30, 0xA8, 0xCC, 0x13, 0x10, 0x15, 0x9F, 0x61, + 0x75, 0x47, 0xDD, 0xEC, 0x39, 0xA2, 0x70, 0x4C, 0x90, 0x5C, 0x73, 0xB5, + 0xCF, 0x63, 0x03, 0xAA, 0x1E, 0xFE, 0x34, 0x03, 0xA7, 0x2C, 0x62, 0x60, + 0xBC, 0x86, 0xCC, 0xEE, 0x14, 0xDE, 0xAA, 0xCB, 0x0B, 0x9E, 0x9E, 0xD5, + 0xCA, 0xF0, 0xBD, 0x19, 0xAF, 0x1E, 0x8B, 0x64, 0x6E, 0x84, 0xF3, 0xB2, + 0xAB, 0x5C, 0xAB, 0x9C, 0xB3, 0xB4, 0x2A, 0x3C, 0x32, 0x5A, 0x68, 0x40, + 0x50, 0xBB, 0x5A, 0x65, 0xB9, 0x69, 0x23, 0xA0, 0x99, 0xA0, 0x5F, 0x87, + 0x19, 0x0B, 0x54, 0x9B, 0xF7, 0xB8, 0x21, 0xC0, 0xD5, 0xE9, 0x9E, 0x31, + 0x77, 0x2D, 0xE3, 0x97, 0x9A, 0x88, 0x37, 0xF8, 0xA8, 0x7D, 0x3D, 0x62, + 0x7E, 0x99, 0xF7, 0x95, 0xD6, 0x1F, 0xE6, 0xC7, 0x29, 0x88, 0x35, 0x0E, + 0x81, 0x12, 0x68, 0x16, 0x5F, 0x93, 0xED, 0x11, 0x63, 0x72, 0x22, 0x1B, + 0xA5, 0x84, 0xF5, 0x57, 0x99, 0xBA, 0x58, 0x78, 0xA1, 0xDF, 0xDE, 0x96, + 0x54, 0x30, 0x2E, 0x53, 0xEB, 0x0A, 0xB3, 0xCD, 0x96, 0x46, 0xC2, 0x1A, + 0xFF, 0xC3, 0x83, 0x9B, 0xEA, 0xFF, 0xC6, 0x34, 0xEF, 0xF2, 0xEB, 0x58, + 0x28, 0x31, 0xBC, 0x6D, 0xE4, 0x48, 0xD9, 0x8F, 0xE3, 0xB7, 0x64, 0xE8, + 0xD9, 0x14, 0x4A, 0x5D, 0x73, 0x3C, 0x7C, 0xEE, 0x61, 0xED, 0x28, 0xFE, + 0xEA, 0xAB, 0xAA, 0xA3, 0xB6, 0xE2, 0xEE, 0x45, 0xE0, 0x13, 0x3E, 0x20, + 0x14, 0x5D, 0x10, 0x42, 0xB5, 0xBB, 0x6A, 0xEF, 0x42, 0xF4, 0x42, 0xC7, + 0xD0, 0x4F, 0xCB, 0xFA, 0x15, 0x4F, 0x6C, 0xDB, 0xC7, 0x4D, 0x85, 0x86, + 0x9E, 0x79, 0x1E, 0xD8, 0x05, 0x21, 0xCD, 0x41, 0x1D, 0x3B, 0x4F, 0x65, + 0x46, 0x26, 0x8D, 0x5B, 0xF2, 0xA1, 0x62, 0xCF, 0x50, 0x62, 0x81, 0x3D, + 0x6A, 0x47, 0x4B, 0xE4, 0x92, 0x74, 0xCB, 0x69, 0xC3, 0x24, 0x15, 0x7F, + 0xA3, 0xB6, 0xC7, 0xC1, 0xA0, 0x83, 0x88, 0xFC, 0x9D, 0x48, 0x19, 0xAD, + 0x00, 0xBF, 0x5B, 0x09, 0x85, 0xB2, 0x92, 0x56, 0x0B, 0x8A, 0x84, 0x47, + 0xEA, 0xF5, 0x55, 0x0C, 0x2A, 0x8D, 0x42, 0x58, 0x00, 0x0D, 0x82, 0x23, + 0x74, 0xB1, 0x62, 0x14, 0x41, 0x7E, 0x93, 0x8D, 0x92, 0xF0, 0x72, 0x33, + 0x61, 0x70, 0x3F, 0x23, 0x3E, 0xF4, 0xAD, 0x1D, 0x60, 0x74, 0xEE, 0xCB, + 0x59, 0x37, 0xDE, 0x7C, 0xDB, 0x3B, 0x22, 0x6C, 0xF1, 0xEC, 0x5F, 0xD6, + 0x9E, 0x50, 0xF8, 0x19, 0x84, 0x80, 0x07, 0xA6, 0x6E, 0x32, 0x77, 0xCE, + 0xA7, 0xF2, 0x85, 0x40, 0xC2, 0x06, 0x0C, 0xC5, 0xAA, 0xA7, 0x69, 0xA9, + 0x35, 0x97, 0xD9, 0x61, 0x55, 0xD8, 0xEF, 0xE8, 0x84, 0x34, 0x45, 0xC3, + 0x2E, 0x7A, 0x44, 0x9E, 0xDC, 0xCA, 0x0B, 0x80, 0xFC, 0xAB, 0x04, 0x5A, + 0xCD, 0x88, 0x55, 0x10, 0xD3, 0xDB, 0x73, 0xDB, 0xC9, 0x9E, 0x1E, 0x0E, + 0x05, 0x67, 0xD5, 0xFD, 0xD8, 0x38, 0x3E, 0x71, 0x65, 0x34, 0xC4, 0xC5, + 0x40, 0x43, 0x67, 0xE3, 0x79, 0xDA, 0x5F, 0x67, 0x4A, 0x3D, 0xB0, 0x8F, + 0xE7, 0x21, 0x3E, 0x15, 0x20, 0xFF, 0x6D, 0xF1, 0x9E, 0xF8, 0x28, 0x3D, + 0xF7, 0x40, 0x81, 0x94, 0x68, 0x5A, 0x3D, 0xE9, 0xF7, 0xAD, 0x83, 0xDB, + 0x2B, 0x9F, 0xE3, 0xE6, 0xF7, 0xD4, 0x02, 0x76, 0xF7, 0x20, 0x15, 0x41, + 0x34, 0x29, 0x69, 0x94, 0x1C, 0x26, 0x4C, 0xF6, 0x6A, 0xF4, 0x20, 0x33, + 0x71, 0x24, 0x08, 0xD4, 0x68, 0x00, 0xA1, 0xD4, 0x2E, 0x6B, 0xF4, 0xBC, + 0x46, 0x45, 0x24, 0x97, 0x2E, 0xF6, 0x39, 0x1E, 0xAF, 0x61, 0x00, 0x50, + 0xB7, 0xD4, 0xB7, 0x43, +}; + +} // namespace random_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/random/internal/randen_slow.cc b/absl/random/internal/randen_slow.cc index 8d074582..4e5f3dc1 100644 --- a/absl/random/internal/randen_slow.cc +++ b/absl/random/internal/randen_slow.cc @@ -20,6 +20,7 @@ #include "absl/base/attributes.h" #include "absl/random/internal/platform.h" +#include "absl/random/internal/randen_traits.h" #if ABSL_HAVE_ATTRIBUTE(always_inline) || \ (defined(__GNUC__) && !defined(__clang__)) @@ -225,35 +226,16 @@ constexpr uint32_t te3[256] = { 0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c, }; -struct alignas(16) u64x2 { - constexpr u64x2() : v{0, 0} {}; - constexpr u64x2(uint64_t hi, uint64_t lo) : v{lo, hi} {} - - uint64_t v[2]; -}; - // Software implementation of the Vector128 class, using uint32_t // as an underlying vector register. -// -struct Vector128 { - inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128& operator^=( - const Vector128& other) { - s[0] ^= other.s[0]; - s[1] ^= other.s[1]; - s[2] ^= other.s[2]; - s[3] ^= other.s[3]; - return *this; - } - +struct alignas(16) Vector128 { uint32_t s[4]; }; inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 -Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { +Vector128Load(const void* from) { Vector128 result; - const uint8_t* ABSL_RANDOM_INTERNAL_RESTRICT src = - reinterpret_cast<const uint8_t*>(from); - + const uint8_t* src = reinterpret_cast<const uint8_t*>(from); result.s[0] = static_cast<uint32_t>(src[0]) << 24 | static_cast<uint32_t>(src[1]) << 16 | static_cast<uint32_t>(src[2]) << 8 | @@ -274,7 +256,7 @@ Vector128Load(const void* ABSL_RANDOM_INTERNAL_RESTRICT from) { } inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( - const Vector128& v, void* ABSL_RANDOM_INTERNAL_RESTRICT to) { + const Vector128& v, void* to) { uint8_t* dst = reinterpret_cast<uint8_t*>(to); dst[0] = static_cast<uint8_t>(v.s[0] >> 24); dst[1] = static_cast<uint8_t>(v.s[0] >> 16); @@ -298,91 +280,57 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Vector128Store( // symmetry of AES (ensures previously equal columns differ afterwards). inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE Vector128 AesRound(const Vector128& state, const Vector128& round_key) { - // clang-format off Vector128 result; - result.s[0] = round_key.s[0] ^ - te0[uint8_t(state.s[0] >> 24)] ^ - te1[uint8_t(state.s[1] >> 16)] ^ - te2[uint8_t(state.s[2] >> 8)] ^ + result.s[0] = round_key.s[0] ^ // + te0[uint8_t(state.s[0] >> 24)] ^ // + te1[uint8_t(state.s[1] >> 16)] ^ // + te2[uint8_t(state.s[2] >> 8)] ^ // te3[uint8_t(state.s[3])]; - result.s[1] = round_key.s[1] ^ - te0[uint8_t(state.s[1] >> 24)] ^ - te1[uint8_t(state.s[2] >> 16)] ^ - te2[uint8_t(state.s[3] >> 8)] ^ + result.s[1] = round_key.s[1] ^ // + te0[uint8_t(state.s[1] >> 24)] ^ // + te1[uint8_t(state.s[2] >> 16)] ^ // + te2[uint8_t(state.s[3] >> 8)] ^ // te3[uint8_t(state.s[0])]; - result.s[2] = round_key.s[2] ^ - te0[uint8_t(state.s[2] >> 24)] ^ - te1[uint8_t(state.s[3] >> 16)] ^ - te2[uint8_t(state.s[0] >> 8)] ^ + result.s[2] = round_key.s[2] ^ // + te0[uint8_t(state.s[2] >> 24)] ^ // + te1[uint8_t(state.s[3] >> 16)] ^ // + te2[uint8_t(state.s[0] >> 8)] ^ // te3[uint8_t(state.s[1])]; - result.s[3] = round_key.s[3] ^ - te0[uint8_t(state.s[3] >> 24)] ^ - te1[uint8_t(state.s[0] >> 16)] ^ - te2[uint8_t(state.s[1] >> 8)] ^ + result.s[3] = round_key.s[3] ^ // + te0[uint8_t(state.s[3] >> 24)] ^ // + te1[uint8_t(state.s[0] >> 16)] ^ // + te2[uint8_t(state.s[1] >> 8)] ^ // te3[uint8_t(state.s[2])]; return result; - // clang-format on } -// RANDen = RANDom generator or beetroots in Swiss German. -// 'Strong' (well-distributed, unpredictable, backtracking-resistant) random -// generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32. -// -// High-level summary: -// 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is -// a sponge-like random generator that requires a cryptographic permutation. -// It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by -// achieving backtracking resistance with only one Permute() per buffer. -// -// 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round -// Function" constructs up to 1024-bit permutations using an improved -// Generalized Feistel network with 2-round AES-128 functions. This Feistel -// block shuffle achieves diffusion faster and is less vulnerable to -// sliced-biclique attacks than the Type-2 cyclic shuffle. -// -// 3) "Improving the Generalized Feistel" and "New criterion for diffusion -// property" extends the same kind of improved Feistel block shuffle to 16 -// branches, which enables a 2048-bit permutation. -// -// Combine these three ideas and also change Simpira's subround keys from -// structured/low-entropy counters to digits of Pi. - -// Randen constants. -constexpr size_t kFeistelBlocks = 16; -constexpr size_t kFeistelFunctions = kFeistelBlocks / 2; // = 8 -constexpr size_t kFeistelRounds = 16 + 1; // > 4 * log2(kFeistelBlocks) -constexpr size_t kKeys = kFeistelRounds * kFeistelFunctions; - -// INCLUDE keys. -#include "absl/random/internal/randen-keys.inc" - -static_assert(kKeys == kRoundKeys, "kKeys and kRoundKeys must be equal"); +using ::absl::random_internal::RandenTraits; -// 2 uint64_t lanes per Vector128 -static constexpr size_t kLanes = 2; +// Randen operates on 128-bit vectors. +struct alignas(16) u64x2 { + uint64_t data[2]; +}; // The improved Feistel block shuffle function for 16 blocks. inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state_u64) { - static_assert(kFeistelBlocks == 16, + u64x2* state) { + static_assert(RandenTraits::kFeistelBlocks == 16, "Feistel block shuffle only works for 16 blocks."); - constexpr size_t shuffle[kFeistelBlocks] = {7, 2, 13, 4, 11, 8, 3, 6, - 15, 0, 9, 10, 1, 14, 5, 12}; - - u64x2* ABSL_RANDOM_INTERNAL_RESTRICT state = - reinterpret_cast<u64x2*>(state_u64); + constexpr size_t shuffle[RandenTraits::kFeistelBlocks] = { + 7, 2, 13, 4, 11, 8, 3, 6, 15, 0, 9, 10, 1, 14, 5, 12}; // The fully unrolled loop without the memcpy improves the speed by about - // 30% over the equivalent (leaving code here as a comment): - if (false) { - u64x2 source[kFeistelBlocks]; - std::memcpy(source, state, sizeof(source)); - for (size_t i = 0; i < kFeistelBlocks; i++) { - const u64x2 v0 = source[shuffle[i]]; - state[i] = v0; - } + // 30% over the equivalent: +#if 0 + u64x2 source[RandenTraits::kFeistelBlocks]; + std::memcpy(source, state, sizeof(source)); + for (size_t i = 0; i < RandenTraits::kFeistelBlocks; i++) { + const u64x2 v0 = source[shuffle[i]]; + state[i] = v0; } + return; +#endif const u64x2 v0 = state[shuffle[0]]; const u64x2 v1 = state[shuffle[1]]; @@ -424,23 +372,23 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void BlockShuffle( // parallel hides the 7-cycle AESNI latency on HSW. Note that the Feistel // XORs are 'free' (included in the second AES instruction). inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state, + u64x2* ABSL_RANDOM_INTERNAL_RESTRICT state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { - for (size_t branch = 0; branch < kFeistelBlocks; branch += 4) { - const Vector128 s0 = Vector128Load(state + kLanes * branch); - const Vector128 s1 = Vector128Load(state + kLanes * (branch + 1)); + for (size_t branch = 0; branch < RandenTraits::kFeistelBlocks; branch += 4) { + const Vector128 s0 = Vector128Load(state + branch); + const Vector128 s1 = Vector128Load(state + branch + 1); const Vector128 f0 = AesRound(s0, Vector128Load(keys)); keys++; const Vector128 o1 = AesRound(f0, s1); - Vector128Store(o1, state + kLanes * (branch + 1)); + Vector128Store(o1, state + branch + 1); // Manually unroll this loop once. about 10% better than not unrolled. - const Vector128 s2 = Vector128Load(state + kLanes * (branch + 2)); - const Vector128 s3 = Vector128Load(state + kLanes * (branch + 3)); + const Vector128 s2 = Vector128Load(state + branch + 2); + const Vector128 s3 = Vector128Load(state + branch + 3); const Vector128 f2 = AesRound(s2, Vector128Load(keys)); keys++; const Vector128 o3 = AesRound(f2, s3); - Vector128Store(o3, state + kLanes * (branch + 3)); + Vector128Store(o3, state + branch + 3); } return keys; } @@ -450,11 +398,9 @@ inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE const u64x2* FeistelRound( // 2^64 queries if the round function is a PRF. This is similar to the b=8 case // of Simpira v2, but more efficient than its generic construction for b=16. inline ABSL_RANDOM_INTERNAL_ATTRIBUTE_ALWAYS_INLINE void Permute( - const void* keys, uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state) { - const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys128 = - static_cast<const u64x2*>(keys); - for (size_t round = 0; round < kFeistelRounds; ++round) { - keys128 = FeistelRound(state, keys128); + u64x2* state, const u64x2* ABSL_RANDOM_INTERNAL_RESTRICT keys) { + for (size_t round = 0; round < RandenTraits::kFeistelRounds; ++round) { + keys = FeistelRound(state, keys); BlockShuffle(state); } } @@ -468,37 +414,42 @@ namespace random_internal { const void* RandenSlow::GetKeys() { // Round keys for one AES per Feistel round and branch. // The canonical implementation uses first digits of Pi. - return round_keys; + return kRandenRoundKeys; } void RandenSlow::Absorb(const void* seed_void, void* state_void) { - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state = - reinterpret_cast<uint64_t*>(state_void); - const uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT seed = - reinterpret_cast<const uint64_t*>(seed_void); - - constexpr size_t kCapacityBlocks = kCapacityBytes / sizeof(uint64_t); - static_assert(kCapacityBlocks * sizeof(uint64_t) == kCapacityBytes, - "Not i*V"); - for (size_t i = kCapacityBlocks; i < kStateBytes / sizeof(uint64_t); ++i) { + auto* state = + reinterpret_cast<uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT>(state_void); + const auto* seed = + reinterpret_cast<const uint64_t * ABSL_RANDOM_INTERNAL_RESTRICT>( + seed_void); + + constexpr size_t kCapacityBlocks = + RandenTraits::kCapacityBytes / sizeof(uint64_t); + static_assert( + kCapacityBlocks * sizeof(uint64_t) == RandenTraits::kCapacityBytes, + "Not i*V"); + + for (size_t i = kCapacityBlocks; + i < RandenTraits::kStateBytes / sizeof(uint64_t); ++i) { state[i] ^= seed[i - kCapacityBlocks]; } } -void RandenSlow::Generate(const void* keys, void* state_void) { - static_assert(kCapacityBytes == sizeof(Vector128), "Capacity mismatch"); +void RandenSlow::Generate(const void* keys_void, void* state_void) { + static_assert(RandenTraits::kCapacityBytes == sizeof(u64x2), + "Capacity mismatch"); - uint64_t* ABSL_RANDOM_INTERNAL_RESTRICT state = - reinterpret_cast<uint64_t*>(state_void); + auto* state = reinterpret_cast<u64x2*>(state_void); + const auto* keys = reinterpret_cast<const u64x2*>(keys_void); - const Vector128 prev_inner = Vector128Load(state); + const u64x2 prev_inner = state[0]; - Permute(keys, state); + Permute(state, keys); // Ensure backtracking resistance. - Vector128 inner = Vector128Load(state); - inner ^= prev_inner; - Vector128Store(inner, state); + state[0].data[0] ^= prev_inner.data[0]; + state[0].data[1] ^= prev_inner.data[1]; } } // namespace random_internal diff --git a/absl/random/internal/randen_slow.h b/absl/random/internal/randen_slow.h index 72f92b54..b6f137eb 100644 --- a/absl/random/internal/randen_slow.h +++ b/absl/random/internal/randen_slow.h @@ -28,13 +28,6 @@ namespace random_internal { // architectures lacking AES hardware acceleration intrinsics. class RandenSlow { public: - // Size of the entire sponge / state for the randen PRNG. - static constexpr size_t kStateBytes = 256; // 2048-bit - - // Size of the 'inner' (inaccessible) part of the sponge. Larger values would - // require more frequent calls to RandenGenerate. - static constexpr size_t kCapacityBytes = 16; // 128-bit - static void Generate(const void* keys, void* state_void); static void Absorb(const void* seed_void, void* state_void); static const void* GetKeys(); diff --git a/absl/random/internal/randen_slow_test.cc b/absl/random/internal/randen_slow_test.cc index c07155d8..4a535837 100644 --- a/absl/random/internal/randen_slow_test.cc +++ b/absl/random/internal/randen_slow_test.cc @@ -17,18 +17,20 @@ #include <cstring> #include "gtest/gtest.h" +#include "absl/random/internal/randen_traits.h" namespace { using absl::random_internal::RandenSlow; +using absl::random_internal::RandenTraits; // Local state parameters. constexpr size_t kSeedBytes = - RandenSlow::kStateBytes - RandenSlow::kCapacityBytes; -constexpr size_t kStateSizeT = RandenSlow::kStateBytes / sizeof(uint64_t); + RandenTraits::kStateBytes - RandenTraits::kCapacityBytes; +constexpr size_t kStateSizeT = RandenTraits::kStateBytes / sizeof(uint64_t); constexpr size_t kSeedSizeT = kSeedBytes / sizeof(uint32_t); -struct randen { +struct alignas(16) randen { uint64_t state[kStateSizeT]; uint32_t seed[kSeedSizeT]; }; diff --git a/absl/random/internal/randen_traits.h b/absl/random/internal/randen_traits.h index 2b8bbe73..53caa936 100644 --- a/absl/random/internal/randen_traits.h +++ b/absl/random/internal/randen_traits.h @@ -32,6 +32,25 @@ namespace random_internal { // 'Strong' (well-distributed, unpredictable, backtracking-resistant) random // generator, faster in some benchmarks than std::mt19937_64 and pcg64_c32. // +// High-level summary: +// 1) Reverie (see "A Robust and Sponge-Like PRNG with Improved Efficiency") is +// a sponge-like random generator that requires a cryptographic permutation. +// It improves upon "Provably Robust Sponge-Based PRNGs and KDFs" by +// achieving backtracking resistance with only one Permute() per buffer. +// +// 2) "Simpira v2: A Family of Efficient Permutations Using the AES Round +// Function" constructs up to 1024-bit permutations using an improved +// Generalized Feistel network with 2-round AES-128 functions. This Feistel +// block shuffle achieves diffusion faster and is less vulnerable to +// sliced-biclique attacks than the Type-2 cyclic shuffle. +// +// 3) "Improving the Generalized Feistel" and "New criterion for diffusion +// property" extends the same kind of improved Feistel block shuffle to 16 +// branches, which enables a 2048-bit permutation. +// +// Combine these three ideas and also change Simpira's subround keys from +// structured/low-entropy counters to digits of Pi (or other random source). + // RandenTraits contains the basic algorithm traits, such as the size of the // state, seed, sponge, etc. struct RandenTraits { @@ -45,17 +64,23 @@ struct RandenTraits { // Size of the default seed consumed by the sponge. static constexpr size_t kSeedBytes = kStateBytes - kCapacityBytes; + // Assuming 128-bit blocks, the number of blocks in the state. // Largest size for which security proofs are known. static constexpr size_t kFeistelBlocks = 16; - // Type-2 generalized Feistel => one round function for every two blocks. - static constexpr size_t kFeistelFunctions = kFeistelBlocks / 2; // = 8 - // Ensures SPRP security and two full subblock diffusions. // Must be > 4 * log2(kFeistelBlocks). static constexpr size_t kFeistelRounds = 16 + 1; + + // Size of the key. A 128-bit key block is used for every-other + // feistel block (Type-2 generalized Feistel network) in each round. + static constexpr size_t kKeyBytes = 16 * kFeistelRounds * kFeistelBlocks / 2; }; +// Randen key arrays. In randen_round_keys.cc +extern const unsigned char kRandenRoundKeys[RandenTraits::kKeyBytes]; +extern const unsigned char kRandenRoundKeysBE[RandenTraits::kKeyBytes]; + } // namespace random_internal ABSL_NAMESPACE_END } // namespace absl diff --git a/absl/random/log_uniform_int_distribution_test.cc b/absl/random/log_uniform_int_distribution_test.cc index 5270531d..5e780d96 100644 --- a/absl/random/log_uniform_int_distribution_test.cc +++ b/absl/random/log_uniform_int_distribution_test.cc @@ -27,6 +27,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -121,7 +122,10 @@ class LogUniformIntChiSquaredTest // data generated by the log-uniform-int distribution. double ChiSquaredTestImpl(); - absl::InsecureBitGen rng_; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; }; double LogUniformIntChiSquaredTest::ChiSquaredTestImpl() { @@ -194,7 +198,6 @@ double LogUniformIntChiSquaredTest::ChiSquaredTestImpl() { TEST_P(LogUniformIntChiSquaredTest, MultiTest) { const int kTrials = 5; - int failures = 0; for (int i = 0; i < kTrials; i++) { double p_value = ChiSquaredTestImpl(); diff --git a/absl/random/poisson_distribution_test.cc b/absl/random/poisson_distribution_test.cc index 9d215fbc..8baabd11 100644 --- a/absl/random/poisson_distribution_test.cc +++ b/absl/random/poisson_distribution_test.cc @@ -30,6 +30,7 @@ #include "absl/container/flat_hash_map.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -257,7 +258,10 @@ class PoissonDistributionZTest : public testing::TestWithParam<ZParam>, template <typename D> bool SingleZTest(const double p, const size_t samples); - absl::InsecureBitGen rng_; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; }; template <typename D> @@ -357,9 +361,13 @@ class PoissonDistributionChiSquaredTest : public testing::TestWithParam<double>, private: void InitChiSquaredTest(const double buckets); - absl::InsecureBitGen rng_; std::vector<size_t> cutoffs_; std::vector<double> expected_; + + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; }; void PoissonDistributionChiSquaredTest::InitChiSquaredTest( diff --git a/absl/random/uniform_int_distribution_test.cc b/absl/random/uniform_int_distribution_test.cc index 69537603..276d72ad 100644 --- a/absl/random/uniform_int_distribution_test.cc +++ b/absl/random/uniform_int_distribution_test.cc @@ -26,6 +26,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -134,7 +135,11 @@ TYPED_TEST(UniformIntDistributionTest, TestMoments) { using param_type = typename absl::uniform_int_distribution<TypeParam>::param_type; - absl::InsecureBitGen rng; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng{0x2B7E151628AED2A6}; + std::vector<double> values(kSize); for (const auto& param : {param_type(0, Limits::max()), param_type(13, 127)}) { @@ -178,7 +183,11 @@ TYPED_TEST(UniformIntDistributionTest, ChiSquaredTest50) { const TypeParam min = std::is_unsigned<TypeParam>::value ? 37 : -37; const TypeParam max = min + kBuckets; - absl::InsecureBitGen rng; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng{0x2B7E151628AED2A6}; + absl::uniform_int_distribution<TypeParam> dist(min, max); std::vector<int32_t> counts(kBuckets + 1, 0); diff --git a/absl/random/uniform_real_distribution_test.cc b/absl/random/uniform_real_distribution_test.cc index a56374a6..be107cdd 100644 --- a/absl/random/uniform_real_distribution_test.cc +++ b/absl/random/uniform_real_distribution_test.cc @@ -27,6 +27,7 @@ #include "absl/base/internal/raw_logging.h" #include "absl/random/internal/chi_square.h" #include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -207,7 +208,11 @@ TYPED_TEST(UniformRealDistributionTest, TestMoments) { constexpr int kSize = 1000000; std::vector<double> values(kSize); - absl::InsecureBitGen rng; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng{0x2B7E151628AED2A6}; + absl::uniform_real_distribution<TypeParam> dist; for (int i = 0; i < kSize; i++) { values[i] = dist(rng); @@ -237,7 +242,11 @@ TYPED_TEST(UniformRealDistributionTest, ChiSquaredTest50) { const int kThreshold = absl::random_internal::ChiSquareValue(kBuckets - 1, 0.999999); - absl::InsecureBitGen rng; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng{0x2B7E151628AED2A6}; + for (const auto& param : {param_type(0, 1), param_type(5, 12), param_type(-5, 13), param_type(-5, -2)}) { const double min_val = param.a(); diff --git a/absl/random/zipf_distribution_test.cc b/absl/random/zipf_distribution_test.cc index 4d4a0fcf..f8cf70e0 100644 --- a/absl/random/zipf_distribution_test.cc +++ b/absl/random/zipf_distribution_test.cc @@ -27,6 +27,7 @@ #include "gtest/gtest.h" #include "absl/base/internal/raw_logging.h" #include "absl/random/internal/chi_square.h" +#include "absl/random/internal/pcg_engine.h" #include "absl/random/internal/sequence_urbg.h" #include "absl/random/random.h" #include "absl/strings/str_cat.h" @@ -213,7 +214,10 @@ class ZipfTest : public testing::TestWithParam<zipf_u64::param_type>, public: ZipfTest() : ZipfModel(GetParam().k(), GetParam().q(), GetParam().v()) {} - absl::InsecureBitGen rng_; + // We use a fixed bit generator for distribution accuracy tests. This allows + // these tests to be deterministic, while still testing the qualify of the + // implementation. + absl::random_internal::pcg64_2018_engine rng_{0x2B7E151628AED2A6}; }; TEST_P(ZipfTest, ChiSquaredTest) { |