Export of internal Abseil changes

-- 4833151c207fac9f57a735efe6d5db4c83368415 by Gennadiy Rozental <rogeeff@google.com>: Import of CCTZ from GitHub. PiperOrigin-RevId: 320398694 -- a1becb36b223230f0a45f204a5fb33b83d2deffe by Gennadiy Rozental <rogeeff@google.com>: Update CMakeLists.txt Import of https://github.com/abseil/abseil-cpp/pull/737 PiperOrigin-RevId: 320391906 -- b529c45856fe7a3447f1f3259286d57e13b1f292 by Abseil Team <absl-team@google.com>: Improves a comment about use of absl::Condition. PiperOrigin-RevId: 320384329 -- c7b1dacda2739c10dc1ccbfb56b07ed7fe2464a4 by Laramie Leavitt <lar@google.com>: Improve FastUniformBits performance for std::minstd_rand. The rejection algorithm was too pessimistic before, and not in line with the [rand.adapt.ibits]. Specifically, when sampling from an URBG with a non power of 2 range, FastUniformBits constructed a rejection threshold with a power-of-2 range that was too restrictive. For example, minstd_rand has a range of [1, 2147483646], which has a range of 2145386495, or about 30.999 bits. Before FastUniformBits rejected values between 1<<30 and 2145386495, which includes approximately 50% of the generated values. However, since a minimum of 3 calls are required to generate a full 64-bit value from an entropy pool of 30.9 bits, the correct value for rejection sampling is the range value which masks 21 (0x7fe00000) or 22 bits and rejects values greater than that. This reduces the probability of rejecting a sample to about 0.1% NOTE: Abseil random does not guarantee sequence stability over time, and this is expected to change sequences in some cases. PiperOrigin-RevId: 320285836 -- 15800a39557a07dd52e0add66a0ab67aed00590b by Gennadiy Rozental <rogeeff@google.com>: Internal change. PiperOrigin-RevId: 320220913 -- ef39348360873f6d19669755fe0b5d09a945a501 by Gennadiy Rozental <rogeeff@google.com>: Internal change PiperOrigin-RevId: 320181729 -- 4f9f6ef8034a24da1832e4c838c72f80fc2ea062 by Gennadiy Rozental <rogeeff@google.com>: Internal change PiperOrigin-RevId: 320176084 -- 6bfc8008462801657d231585bd5c37fc18bb25b6 by Gennadiy Rozental <rogeeff@google.com>: Internal change PiperOrigin-RevId: 320176070 -- b35b055ab1f41e6056031ff0641cabab23530027 by Abseil Team <absl-team@google.com>: Disabling using header module as well as building one for randen_hwaes_impl PiperOrigin-RevId: 320024299 GitOrigin-RevId: 4833151c207fac9f57a735efe6d5db4c83368415 Change-Id: I9cf102dbf46ed07752a508b7cda3ab3858857d0d
author: Abseil Team <absl-team@google.com> 2020-07-09 08:13:12 -0700
committer: Gennadiy Rozental <rogeeff@google.com> 2020-07-09 11:59:40 -0400
commit: d5269a8b6dbe7836049417d0ff2c88b8363cc1fc (patch)
tree: 007496a6f35159aabff38bf1a258e324801faf90 /absl/random/internal/fast_uniform_bits.h
parent: bf655de09b67fd8b924814cbb369cb65ddd0bd24 (diff)
1 files changed, 103 insertions, 99 deletions
diff --git a/absl/random/internal/fast_uniform_bits.h b/absl/random/internal/fast_uniform_bits.h
index f13c8729..425aaf7d 100644
--- a/absl/random/internal/fast_uniform_bits.h
+++ b/absl/random/internal/fast_uniform_bits.h
@@ -21,6 +21,7 @@
 #include <type_traits>
 
 #include "absl/base/config.h"
+#include "absl/meta/type_traits.h"
 
 namespace absl {
 ABSL_NAMESPACE_BEGIN
@@ -38,28 +39,17 @@ constexpr bool IsPowerOfTwoOrZero(UIntType n) {
 template <typename URBG>
 constexpr typename URBG::result_type RangeSize() {
   using result_type = typename URBG::result_type;
+  static_assert((URBG::max)() != (URBG::min)(), "URBG range cannot be 0.");
   return ((URBG::max)() == (std::numeric_limits<result_type>::max)() &&
           (URBG::min)() == std::numeric_limits<result_type>::lowest())
              ? result_type{0}
-             : (URBG::max)() - (URBG::min)() + result_type{1};
-}
-
-template <typename UIntType>
-constexpr UIntType LargestPowerOfTwoLessThanOrEqualTo(UIntType n) {
-  return n < 2 ? n : 2 * LargestPowerOfTwoLessThanOrEqualTo(n / 2);
-}
-
-// Given a URBG generating values in the closed interval [Lo, Hi], returns the
-// largest power of two less than or equal to `Hi - Lo + 1`.
-template <typename URBG>
-constexpr typename URBG::result_type PowerOfTwoSubRangeSize() {
-  return LargestPowerOfTwoLessThanOrEqualTo(RangeSize<URBG>());
+             : ((URBG::max)() - (URBG::min)() + result_type{1});
 }
 
 // Computes the floor of the log. (i.e., std::floor(std::log2(N));
 template <typename UIntType>
 constexpr UIntType IntegerLog2(UIntType n) {
-  return (n <= 1) ? 0 : 1 + IntegerLog2(n / 2);
+  return (n <= 1) ? 0 : 1 + IntegerLog2(n >> 1);
 }
 
 // Returns the number of bits of randomness returned through
@@ -68,18 +58,23 @@ template <typename URBG>
 constexpr size_t NumBits() {
   return RangeSize<URBG>() == 0
              ? std::numeric_limits<typename URBG::result_type>::digits
-             : IntegerLog2(PowerOfTwoSubRangeSize<URBG>());
+             : IntegerLog2(RangeSize<URBG>());
 }
 
 // Given a shift value `n`, constructs a mask with exactly the low `n` bits set.
 // If `n == 0`, all bits are set.
 template <typename UIntType>
-constexpr UIntType MaskFromShift(UIntType n) {
+constexpr UIntType MaskFromShift(size_t n) {
   return ((n % std::numeric_limits<UIntType>::digits) == 0)
              ? ~UIntType{0}
              : (UIntType{1} << n) - UIntType{1};
 }
 
+// Tags used to dispatch FastUniformBits::generate to the simple or more complex
+// entropy extraction algorithm.
+struct SimplifiedLoopTag {};
+struct RejectionLoopTag {};
+
 // FastUniformBits implements a fast path to acquire uniform independent bits
 // from a type which conforms to the [rand.req.urbg] concept.
 // Parameterized by:
@@ -107,50 +102,16 @@ class FastUniformBits {
                 "Class-template FastUniformBits<> must be parameterized using "
                 "an unsigned type.");
 
-  // PowerOfTwoVariate() generates a single random variate, always returning a
-  // value in the half-open interval `[0, PowerOfTwoSubRangeSize<URBG>())`. If
-  // the URBG already generates values in a power-of-two range, the generator
-  // itself is used. Otherwise, we use rejection sampling on the largest
-  // possible power-of-two-sized subrange.
-  struct PowerOfTwoTag {};
-  struct RejectionSamplingTag {};
-  template <typename URBG>
-  static typename URBG::result_type PowerOfTwoVariate(
-      URBG& g) {  // NOLINT(runtime/references)
-    using tag =
-        typename std::conditional<IsPowerOfTwoOrZero(RangeSize<URBG>()),
-                                  PowerOfTwoTag, RejectionSamplingTag>::type;
-    return PowerOfTwoVariate(g, tag{});
-  }
-
-  template <typename URBG>
-  static typename URBG::result_type PowerOfTwoVariate(
-      URBG& g,  // NOLINT(runtime/references)
-      PowerOfTwoTag) {
-    return g() - (URBG::min)();
-  }
-
-  template <typename URBG>
-  static typename URBG::result_type PowerOfTwoVariate(
-      URBG& g,  // NOLINT(runtime/references)
-      RejectionSamplingTag) {
-    // Use rejection sampling to ensure uniformity across the range.
-    typename URBG::result_type u;
-    do {
-      u = g() - (URBG::min)();
-    } while (u >= PowerOfTwoSubRangeSize<URBG>());
-    return u;
-  }
-
   // Generate() generates a random value, dispatched on whether
-  // the underlying URBG must loop over multiple calls or not.
+  // the underlying URBG must use rejection sampling to generate a value,
+  // or whether a simplified loop will suffice.
   template <typename URBG>
   result_type Generate(URBG& g,  // NOLINT(runtime/references)
-                       std::true_type /* avoid_looping */);
+                       SimplifiedLoopTag);
 
   template <typename URBG>
   result_type Generate(URBG& g,  // NOLINT(runtime/references)
-                       std::false_type /* avoid_looping */);
+                       RejectionLoopTag);
 };
 
 template <typename UIntType>
@@ -162,31 +123,47 @@ FastUniformBits<UIntType>::operator()(URBG& g) {  // NOLINT(runtime/references)
   // Y = (2 ^ kRange) - 1
   static_assert((URBG::max)() > (URBG::min)(),
                 "URBG::max and URBG::min may not be equal.");
-  using urbg_result_type = typename URBG::result_type;
-  constexpr urbg_result_type kRangeMask =
-      RangeSize<URBG>() == 0
-          ? (std::numeric_limits<urbg_result_type>::max)()
-          : static_cast<urbg_result_type>(PowerOfTwoSubRangeSize<URBG>() - 1);
-  return Generate(g, std::integral_constant<bool, (kRangeMask >= (max)())>{});
+
+  using tag = absl::conditional_t<IsPowerOfTwoOrZero(RangeSize<URBG>()),
+                                  SimplifiedLoopTag, RejectionLoopTag>;
+  return Generate(g, tag{});
 }
 
 template <typename UIntType>
 template <typename URBG>
 typename FastUniformBits<UIntType>::result_type
 FastUniformBits<UIntType>::Generate(URBG& g,  // NOLINT(runtime/references)
-                                    std::true_type /* avoid_looping */) {
-  // The width of the result_type is less than than the width of the random bits
-  // provided by URBG.  Thus, generate a single value and then simply mask off
-  // the required bits.
+                                    SimplifiedLoopTag) {
+  // The simplified version of FastUniformBits works only on URBGs that have
+  // a range that is a power of 2. In this case we simply loop and shift without
+  // attempting to balance the bits across calls.
+  static_assert(IsPowerOfTwoOrZero(RangeSize<URBG>()),
+                "incorrect Generate tag for URBG instance");
+
+  static constexpr size_t kResultBits =
+      std::numeric_limits<result_type>::digits;
+  static constexpr size_t kUrbgBits = NumBits<URBG>();
+  static constexpr size_t kIters =
+      (kResultBits / kUrbgBits) + (kResultBits % kUrbgBits != 0);
+  static constexpr size_t kShift = (kIters == 1) ? 0 : kUrbgBits;
+  static constexpr auto kMin = (URBG::min)();
 
-  return PowerOfTwoVariate(g) & (max)();
+  result_type r = static_cast<result_type>(g() - kMin);
+  for (size_t n = 1; n < kIters; ++n) {
+    r = (r << kShift) + static_cast<result_type>(g() - kMin);
+  }
+  return r;
 }
 
 template <typename UIntType>
 template <typename URBG>
 typename FastUniformBits<UIntType>::result_type
 FastUniformBits<UIntType>::Generate(URBG& g,  // NOLINT(runtime/references)
-                                    std::false_type /* avoid_looping */) {
+                                    RejectionLoopTag) {
+  static_assert(!IsPowerOfTwoOrZero(RangeSize<URBG>()),
+                "incorrect Generate tag for URBG instance");
+  using urbg_result_type = typename URBG::result_type;
+
   // See [rand.adapt.ibits] for more details on the constants calculated below.
   //
   // It is preferable to use roughly the same number of bits from each generator
@@ -199,21 +176,44 @@ FastUniformBits<UIntType>::Generate(URBG& g,  // NOLINT(runtime/references)
   // `kSmallIters` and `kLargeIters` times respectively such
   // that
   //
-  //    `kTotalWidth == kSmallIters * kSmallWidth
-  //                    + kLargeIters * kLargeWidth`
+  //    `kResultBits == kSmallIters * kSmallBits
+  //                    + kLargeIters * kLargeBits`
   //
-  // where `kTotalWidth` is the total number of bits in `result_type`.
+  // where `kResultBits` is the total number of bits in `result_type`.
   //
-  constexpr size_t kTotalWidth = std::numeric_limits<result_type>::digits;
-  constexpr size_t kUrbgWidth = NumBits<URBG>();
-  constexpr size_t kTotalIters =
-      kTotalWidth / kUrbgWidth + (kTotalWidth % kUrbgWidth != 0);
-  constexpr size_t kSmallWidth = kTotalWidth / kTotalIters;
-  constexpr size_t kLargeWidth = kSmallWidth + 1;
+  static constexpr size_t kResultBits =
+      std::numeric_limits<result_type>::digits;                      // w
+  static constexpr urbg_result_type kUrbgRange = RangeSize<URBG>();  // R
+  static constexpr size_t kUrbgBits = NumBits<URBG>();               // m
+
+  // compute the initial estimate of the bits used.
+  // [rand.adapt.ibits] 2 (c)
+  static constexpr size_t kA =  // ceil(w/m)
+      (kResultBits / kUrbgBits) + ((kResultBits % kUrbgBits) != 0);  // n'
+
+  static constexpr size_t kABits = kResultBits / kA;  // w0'
+  static constexpr urbg_result_type kARejection =
+      ((kUrbgRange >> kABits) << kABits);  // y0'
+
+  // refine the selection to reduce the rejection frequency.
+  static constexpr size_t kTotalIters =
+      ((kUrbgRange - kARejection) <= (kARejection / kA)) ? kA : (kA + 1);  // n
+
+  // [rand.adapt.ibits] 2 (b)
+  static constexpr size_t kSmallIters =
+      kTotalIters - (kResultBits % kTotalIters);                   // n0
+  static constexpr size_t kSmallBits = kResultBits / kTotalIters;  // w0
+  static constexpr urbg_result_type kSmallRejection =
+      ((kUrbgRange >> kSmallBits) << kSmallBits);  // y0
+
+  static constexpr size_t kLargeBits = kSmallBits + 1;  // w0+1
+  static constexpr urbg_result_type kLargeRejection =
+      ((kUrbgRange >> kLargeBits) << kLargeBits);  // y1
+
   //
-  // Because `kLargeWidth == kSmallWidth + 1`, it follows that
+  // Because `kLargeBits == kSmallBits + 1`, it follows that
   //
-  //     `kTotalWidth == kTotalIters * kSmallWidth + kLargeIters`
+  //     `kResultBits == kSmallIters * kSmallBits + kLargeIters`
   //
   // and therefore
   //
@@ -224,36 +224,40 @@ FastUniformBits<UIntType>::Generate(URBG& g,  // NOLINT(runtime/references)
   // mentioned above, if the URBG width is a divisor of `kTotalWidth`, then
   // there would be no need for any large iterations (i.e., one loop would
   // suffice), and indeed, in this case, `kLargeIters` would be zero.
-  constexpr size_t kLargeIters = kTotalWidth % kSmallWidth;
-  constexpr size_t kSmallIters =
-      (kTotalWidth - (kLargeWidth * kLargeIters)) / kSmallWidth;
+  static_assert(kResultBits == kSmallIters * kSmallBits +
+                                   (kTotalIters - kSmallIters) * kLargeBits,
+                "Error in looping constant calculations.");
 
-  static_assert(
-      kTotalWidth == kSmallIters * kSmallWidth + kLargeIters * kLargeWidth,
-      "Error in looping constant calculations.");
+  // The small shift is essentially small bits, but due to the potential
+  // of generating a smaller result_type from a larger urbg type, the actual
+  // shift might be 0.
+  static constexpr size_t kSmallShift = kSmallBits % kResultBits;
+  static constexpr auto kSmallMask =
+      MaskFromShift<urbg_result_type>(kSmallShift);
+  static constexpr size_t kLargeShift = kLargeBits % kResultBits;
+  static constexpr auto kLargeMask =
+      MaskFromShift<urbg_result_type>(kLargeShift);
 
-  result_type s = 0;
+  static constexpr auto kMin = (URBG::min)();
 
-  constexpr size_t kSmallShift = kSmallWidth % kTotalWidth;
-  constexpr result_type kSmallMask = MaskFromShift(result_type{kSmallShift});
+  result_type s = 0;
   for (size_t n = 0; n < kSmallIters; ++n) {
-    s = (s << kSmallShift) +
-        (static_cast<result_type>(PowerOfTwoVariate(g)) & kSmallMask);
-  }
+    urbg_result_type v;
+    do {
+      v = g() - kMin;
+    } while (v >= kSmallRejection);
 
-  constexpr size_t kLargeShift = kLargeWidth % kTotalWidth;
-  constexpr result_type kLargeMask = MaskFromShift(result_type{kLargeShift});
-  for (size_t n = 0; n < kLargeIters; ++n) {
-    s = (s << kLargeShift) +
-        (static_cast<result_type>(PowerOfTwoVariate(g)) & kLargeMask);
+    s = (s << kSmallShift) + static_cast<result_type>(v & kSmallMask);
   }
 
-  static_assert(
-      kLargeShift == kSmallShift + 1 ||
-          (kLargeShift == 0 &&
-           kSmallShift == std::numeric_limits<result_type>::digits - 1),
-      "Error in looping constant calculations");
+  for (size_t n = kSmallIters; n < kTotalIters; ++n) {
+    urbg_result_type v;
+    do {
+      v = g() - kMin;
+    } while (v >= kLargeRejection);
 
+    s = (s << kLargeShift) + static_cast<result_type>(v & kLargeMask);
+  }
   return s;
 }
author	Abseil Team <absl-team@google.com>	2020-07-09 08:13:12 -0700
committer	Gennadiy Rozental <rogeeff@google.com>	2020-07-09 11:59:40 -0400
commit	d5269a8b6dbe7836049417d0ff2c88b8363cc1fc (patch)
tree	007496a6f35159aabff38bf1a258e324801faf90 /absl/random/internal/fast_uniform_bits.h
parent	bf655de09b67fd8b924814cbb369cb65ddd0bd24 (diff)