From 16d9fd58a51c6083234e2e9f8f50e49ed5ed02e4 Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 21 Nov 2019 10:30:22 -0800 Subject: Export of internal Abseil changes -- db8dbd0e8a7b0125a4819dfc81c9bd2496849c71 by Abseil Team : Create GetSkipCount() and GetStride() methods and add rounding bias correction. PiperOrigin-RevId: 281780897 GitOrigin-RevId: db8dbd0e8a7b0125a4819dfc81c9bd2496849c71 Change-Id: I56a97288b1cb38a9357c065747f8d9bc4b187fee --- absl/base/internal/exponential_biased.cc | 45 +++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'absl/base/internal/exponential_biased.cc') diff --git a/absl/base/internal/exponential_biased.cc b/absl/base/internal/exponential_biased.cc index d7ffd18..3007f9b 100644 --- a/absl/base/internal/exponential_biased.cc +++ b/absl/base/internal/exponential_biased.cc @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -26,6 +27,42 @@ namespace absl { namespace base_internal { + +int64_t ExponentialBiased::GetSkipCount(int64_t mean) { + if (ABSL_PREDICT_FALSE(!initialized_)) { + Initialize(); + } + + uint64_t rng = NextRandom(rng_); + rng_ = rng; + + // Take the top 26 bits as the random number + // (This plus the 1<<58 sampling bound give a max possible step of + // 5194297183973780480 bytes.) + // The uint32_t cast is to prevent a (hard-to-reproduce) NAN + // under piii debug for some binaries. + double q = static_cast(rng >> (kPrngNumBits - 26)) + 1.0; + // Put the computed p-value through the CDF of a geometric. + double interval = bias_ + (std::log2(q) - 26) * (-std::log(2.0) * mean); + // Very large values of interval overflow int64_t. To avoid that, we will + // cheat and clamp any huge values to (int64_t max)/2. This is a potential + // source of bias, but the mean would need to be such a large value that it's + // not likely to come up. For example, with a mean of 1e18, the probability of + // hitting this condition is about 1/1000. For a mean of 1e17, standard + // calculators claim that this event won't happen. + if (interval > static_cast(std::numeric_limits::max() / 2)) { + // Assume huge values are bias neutral, retain bias for next call. + return std::numeric_limits::max() / 2; + } + double value = std::round(interval); + bias_ = interval - value; + return value; +} + +int64_t ExponentialBiased::GetStride(int64_t mean) { + return GetSkipCount(mean - 1) + 1; +} + // The algorithm generates a random number between 0 and 1 and applies the // inverse cumulative distribution function for an exponential. Specifically: // Let m be the inverse of the sample period, then the probability @@ -51,7 +88,7 @@ int64_t ExponentialBiased::Get(int64_t mean) { // under piii debug for some binaries. double q = static_cast(rng >> (kPrngNumBits - 26)) + 1.0; // Put the computed p-value through the CDF of a geometric. - double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean); + double interval = bias_ + (std::log2(q) - 26) * (-std::log(2.0) * mean); // Very large values of interval overflow int64_t. To avoid that, we will cheat // and clamp any huge values to (int64_t max)/2. This is a potential source of // bias, but the mean would need to be such a large value that it's not likely @@ -59,10 +96,12 @@ int64_t ExponentialBiased::Get(int64_t mean) { // this condition is about 1/1000. For a mean of 1e17, standard calculators // claim that this event won't happen. if (interval > static_cast(std::numeric_limits::max() / 2)) { + // Assume huge values are bias neutral, retain bias for next call. return std::numeric_limits::max() / 2; } - - return static_cast(interval); + int64_t value = std::max(1, std::round(interval)); + bias_ = interval - value; + return value; } void ExponentialBiased::Initialize() { -- cgit v1.2.3