From aa844899c937bde5d2b24f276b59997e5b668bde Mon Sep 17 00:00:00 2001 From: Abseil Team Date: Thu, 8 Aug 2019 10:56:58 -0700 Subject: Creation of LTS branch "lts_2019_08_08" - 9ee91d3e430fb33a4590486573792eb0fa146c2d Export of internal Abseil changes by Abseil Team - 8efba58a3b656e9b41fb0471ae6453425a61c520 Export of internal Abseil changes by Abseil Team - b49b8d16b67ec6912899684b732e6367f258cfdb Export of internal Abseil changes by Abseil Team - 67222ffc4c83d918ce8395aa61769eeb77df4c4d Export of internal Abseil changes by Abseil Team - c5c4db4f5191fe5e76cbf68dcc71fb28702f7d2b Export of internal Abseil changes by Abseil Team - 14550beb3b7b97195e483fb74b5efb906395c31e Export of internal Abseil changes. by Abseil Team - 52e88ee56b72cf32bc66534d942c7398ce481331 Export of internal Abseil changes. by Abseil Team - 36d37ab992038f52276ca66b9da80c1cf0f57dc2 Export of internal Abseil changes. by Abseil Team - ad1485c8986246b2ae9105e512738d0e97aec887 Export of internal Abseil changes. by Abseil Team - f3840bc5e33ce4932e35986cf3718450c6f02af2 Export of internal Abseil changes. by Abseil Team - 278b26058c036833a4f7f3047d3f4d9296527f87 Export of internal Abseil changes. by Abseil Team - c6c3c1b498e4ee939b24be59cae29d59c3863be8 Export of internal Abseil changes. by Abseil Team - 44efe96dfca674a17b45ca53fc77fb69f1e29bf4 Export of internal Abseil changes. by Abseil Team - 3c98fcc0461bd2a4b9c149d4748a7373a225cf4b Merge pull request #340 from jtsylve/macos_cxx17_fix by Matt Calabrese <38107210+mattcalabrese-google@users.noreply.github.com> - 74d91756c11bc22f9b0108b94da9326f7f9e376f Export of internal Abseil changes. by Abseil Team - e6b050212c859fbaf67abac76105da10ec348274 Export of internal Abseil changes. by Abseil Team - c964fcffac27bd4a9ff67fe393410dd1146ef8b8 Export of internal Abseil changes. by Abseil Team - 72e09a54d993b192db32be14c65adf7e9bd08c31 Export of internal Abseil changes. by Abseil Team - d65e19dfcd8697076f68598c0131c6930cdcd74d Export of internal Abseil changes. by Abseil Team - 5162fc83d2f3b79a9753ed59594c43966afdd37a Merge pull request #336 from shields/patch-2 by Shaindel Schwartz <31392632+shaindelschwartz@users.noreply.github.com> - 0389f7bf58fa41f35b3ad60be61d32f31e4f8ed6 Merge pull request #335 from shields/patch-1 by Shaindel Schwartz <31392632+shaindelschwartz@users.noreply.github.com> - e9324d926a9189e222741fce6e676f0944661a72 Export of internal Abseil changes. by Abseil Team - 43ef2148c0936ebf7cb4be6b19927a9d9d145b8f Export of internal Abseil changes. by Abseil Team - a13d3df2b3ba68aeead92e2d078fba0510d55024 Merge pull request #323 from gosnik/master by Gennadiy Rozental - 310a11865c97c5cdcc42a4ee2c2e3578423afe69 Merge pull request #324 from RasPat1/patch-1 by Gennadiy Rozental - 8f11724067248acc330b4d1f12f0c76d03f2cfb1 Export of internal Abseil changes. by Abseil Team - b1dd425423380126f6441ce4fbb6f8f6c75b793a Export of internal Abseil changes. by Abseil Team - 361cb8a9db2f2130442389fd80593255be26d681 Export of internal Abseil changes. by Abseil Team - 0238ab0a831f179518c1a814f9584e99da2d75a3 Merge pull request #321 from christoph-cullmann/c4245_fix... by Xiaoyi Zhang - 61c9bf3e3e1c28a4aa6d7f1be4b37fd473bb5529 Export of internal Abseil changes. by Abseil Team - bc9101f9982391019521161a36179b52555ed212 Merge pull request #320 from christoph-cullmann/master by Xiaoyi Zhang - 2f76a9bf50046e396138cc8eeb3cdc17b7a5ac24 Export of internal Abseil changes. by Abseil Team - 4adaf5490921f13028b55018c9f550277de5aebb Export of internal Abseil changes. by Abseil Team - 27c30ec671cb7b5ba84c4e79feff7fd0b0ac6338 Avoid undefined behavior when nullptr is passed to memcpy... by Roman Gershman - ce65f5ac3cbf897bb5e3de1a51d80fd00866abaa Export of internal Abseil changes. by Abseil Team - a18fc7461e7409c2ad64e28537261db1e02e76fa Export of internal Abseil changes. by Abseil Team - 8a394b19c149cab50534b04c5e21d42bc2217a7d Export of internal Abseil changes. by Abseil Team - daf381e8535a1f1f1b8a75966a74e7cca63dee89 Export of internal Abseil changes. by Abseil Team - fa00c321073c7ea40a4fc3dfc8a06309eae3d025 Export of internal Abseil changes. by Abseil Team - 436ba6c4a0ea3a06eca6e055f9c8d296bf3bae12 Export of internal Abseil changes. by Abseil Team - 0cbdc774b97f7e80ab60dbe2ed4eaca3b2e33fc8 Export of internal Abseil changes. by Abseil Team - 27c2f6e2f3b5929fbd322b0f0ca392eb02efd9f8 Export of internal Abseil changes. by Abseil Team - aa468ad75539619b47979911297efbb629c52e44 Export of internal Abseil changes. by Abseil Team - cd86d0d20ab167c33b23d3875db68d1d4bad3a3b Export of internal Abseil changes. by Abseil Team - 33841c5c963aa9c3f096ef8e6c1e71624b941940 Export of internal Abseil changes. by Abseil Team - ca3f87560a0eef716195cadf66dc6b938a579ec6 Export of internal Abseil changes. by Abseil Team - d902eb869bcfacc1bad14933ed9af4bed006d481 Export of internal Abseil changes. by Abseil Team - a02f62f456f2c4a7ecf2be3104fe0c6e16fbad9a Export of internal Abseil changes. by Abseil Team - 0b545b460141b882b244a1efcef7621d59278160 Export of internal Abseil changes. by Abseil Team - dbae8764fbd429bf7d7745e24bcf73962177a7c0 Export of internal Abseil changes. by Abseil Team - 044da8a29c923506af0f0b46bc46f43c1e1300b5 Export of internal Abseil changes. by Abseil Team - 6cc6ac44e065b9e8975fadfd6ccb99cbcf89aac4 Export of internal Abseil changes. by Abseil Team - 666fc1266bccfd8e6eaaa084e7b42580bb8eb199 Export of internal Abseil changes. by Abseil Team - 93dfcf74cb5fccae3da07897d8613ae6cab958a0 Export of internal Abseil changes. by Abseil Team - 2c8421e1c6cef0da9e8a20b01c15256ec9ec116d Export of internal Abseil changes. by Abseil Team - 5b65c4af5107176555b23a638e5947686410ac1f Export of internal Abseil changes. by Abseil Team - eab2078b53c9e3d9d240135c09d27e3393acb50a Export of internal Abseil changes. by Abseil Team - 253eb7416421661873afbaa33828a850db978541 [CMake] Set correct flags for clang-cl (#278) by Loo Rong Jie - e75672f6afc7e8f23ee7b532e86d1b3b9be3984e Export of internal Abseil changes. by Abseil Team - bf29470384a101b307873b26d358433138c857fc Export of internal Abseil changes. by Abseil Team - 6fd827124facd8336981e73218997f9e73029b4f Merge pull request #280 from chiumichael/master by Derek Mauro <761129+derekmauro@users.noreply.github.com> - 7c7754fb3ed9ffb57d35fe8658f3ba4d73a31e72 Export of internal Abseil changes. by Abseil Team - 256be563447a315f2a7993ec669460ba475fa86a Export of internal Abseil changes. by Abseil Team - 88a152ae747c3c42dc9167d46c590929b048d436 Export of internal Abseil changes. by Abseil Team - c1cecb25a94c075725e9d2640f6b978a8f61957b Implement Span::first and Span::last from C++20 (#274) by Girts - 38b704384cd2f17590b3922b97744be0b43622c9 Changed HTTP URLs to HTTPS where possible (#270) by nik7273 - febc5ee6a92d0eb7dac1fceaa6c648cf6521b4dc Export of internal Abseil changes. by Abseil Team - 9fdf5e5b805412cb2a2e624d3e9a11588120465f Export of internal Abseil changes. by Abseil Team - 419f3184f8ebcdb23105295eadd2a569f3351eb9 Export of internal Abseil changes. by Abseil Team - b312c3cb53a0aad75a85ac2bf57c4a614fbd48d4 Export of internal Abseil changes. by Abseil Team - 308ce31528a7edfa39f5f6d36142278a0ae1bf45 Export of internal Abseil changes. by Abseil Team - 93d155bc4414f6c121bb1f19dba9fdb27c8943bc Export of internal Abseil changes. by Abseil Team - 426eaa4aa44e4580418bee46c1bd13911151bfb1 Export of internal Abseil changes. by Abseil Team - 2901ec32a919311384d6ad4194e2d927c06831f7 Export of internal Abseil changes. by Abseil Team - d78310fe5a82f2e0e6e16509ef8079c8d7e4674e Export of internal Abseil changes. by Abseil Team - a4cb1c8ba61531a63f9d309eea01ac1d43d8371d Export of internal Abseil changes. by Abseil Team - 540e2537b92cd4abfae6ceddfe24304345461f32 Export of internal Abseil changes. by Abseil Team - 89ea0c5ff34aaa5855cfc7aa41f323b8a0ef0ede Merge pull request #255 from uilianries/hotfix/conan by ahedberg - 5e0dcf72c64fae912184d2e0de87195fe8f0a425 Export of internal Abseil changes. by Abseil Team - 0dffca4e36791c7beeda04da10460b534283948a Export of internal Abseil changes. by Abseil Team - 6b4201f9ef650637510a21b8d6cbcc3bee4a606f Fix GCC8 warnings by Boris Staletic - 0b1e6d417b414aad9282e32e8c49c719edeb63c1 Export of internal Abseil changes. by Abseil Team - efccc502606bed768e50a6cd5806d8eb13e4e935 Export of internal Abseil changes. by Abseil Team - 5e6a78131f7bd5940218462c07d88cdefdd75dbe Export of internal Abseil changes. by Abseil Team - 5eea0f713c14ac17788b83e496f11903f8e2bbb0 Export of internal Abseil changes. by Abseil Team - 66f9becbb98ecc083f4db349b4b1e0ca9de93b15 Export of internal Abseil changes. by Abseil Team - 018b4db1d73ec8238e6dc4b17fd9e1fd7468d0ed Export of internal Abseil changes. by Abseil Team - 9449ae94397f2fd683851348e25ed8c93f75b3b9 Merge pull request #243 from ThomsonTan/FixIntrinsic by Alex Strelnikov - b16aeb6756bdab08cdf12d40baab5b51f7d15b16 Export of internal Abseil changes. by Abseil Team - 7ffbe09f3d85504bd018783bbe1e2c12992fe47c Export of internal Abseil changes. by Abseil Team - 01b471d9f3ebef27f5aaca14b66509099fa8cd6c Export of internal Abseil changes. by Abseil Team - 7bd8f36c741c7cbe311611d7981bf38ba04c6fef Export of internal Abseil changes. by Abseil Team - 968a34ffdaadd7db062a9621dfbdf8b2d16e05af Export of internal Abseil changes. by Abseil Team - 3e2e9b5557e76d098de4b8a2a659125b98ca519b Merge pull request #231 from uilianries/feature/conan by Mark Barolak - 111ca7060a6ff50115ca85b59f6b5d8c8c5e9105 Export of internal Abseil changes. by Abseil Team - 389ec3f906f018661a5308458d623d01f96d7b23 Export of internal Abseil changes. by Abseil Team - 8fbcdb90952c57828c4a9c2f6d79fcd7cae9088f Export of internal Abseil changes. by Abseil Team - 455dc17ba1af9635f0b60155bc565bc572a1e722 Export of internal Abseil changes. by Abseil Team - f197d7c72a54064cfde5a2058f1513a4a0ee36fb Export of internal Abseil changes. by Abseil Team - 284378a71b32dfb3af4e3661f585e671d1b603a3 Export of internal Abseil changes. by Abseil Team GitOrigin-RevId: 9ee91d3e430fb33a4590486573792eb0fa146c2d Change-Id: Ia06e548bc106cc9d136f6c65714be6645317aced --- absl/random/poisson_distribution_test.cc | 565 +++++++++++++++++++++++++++++++ 1 file changed, 565 insertions(+) create mode 100644 absl/random/poisson_distribution_test.cc (limited to 'absl/random/poisson_distribution_test.cc') diff --git a/absl/random/poisson_distribution_test.cc b/absl/random/poisson_distribution_test.cc new file mode 100644 index 00000000..6d68999a --- /dev/null +++ b/absl/random/poisson_distribution_test.cc @@ -0,0 +1,565 @@ +// Copyright 2017 The Abseil Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/random/poisson_distribution.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "absl/base/internal/raw_logging.h" +#include "absl/base/macros.h" +#include "absl/container/flat_hash_map.h" +#include "absl/random/internal/chi_square.h" +#include "absl/random/internal/distribution_test_util.h" +#include "absl/random/internal/sequence_urbg.h" +#include "absl/random/random.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_replace.h" +#include "absl/strings/strip.h" + +// Notes about generating poisson variates: +// +// It is unlikely that any implementation of std::poisson_distribution +// will be stable over time and across library implementations. For instance +// the three different poisson variate generators listed below all differ: +// +// https://github.com/ampl/gsl/tree/master/randist/poisson.c +// * GSL uses a gamma + binomial + knuth method to compute poisson variates. +// +// https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/random.tcc +// * GCC uses the Devroye rejection algorithm, based on +// Devroye, L. Non-Uniform Random Variates Generation. Springer-Verlag, +// New York, 1986, Ch. X, Sects. 3.3 & 3.4 (+ Errata!), ~p.511 +// http://www.nrbook.com/devroye/ +// +// https://github.com/llvm-mirror/libcxx/blob/master/include/random +// * CLANG uses a different rejection method, which appears to include a +// normal-distribution approximation and an exponential distribution to +// compute the threshold, including a similar factorial approximation to this +// one, but it is unclear where the algorithm comes from, exactly. +// + +namespace { + +using absl::random_internal::kChiSquared; + +// The PoissonDistributionInterfaceTest provides a basic test that +// absl::poisson_distribution conforms to the interface and serialization +// requirements imposed by [rand.req.dist] for the common integer types. + +template +class PoissonDistributionInterfaceTest : public ::testing::Test {}; + +using IntTypes = ::testing::Types; +TYPED_TEST_CASE(PoissonDistributionInterfaceTest, IntTypes); + +TYPED_TEST(PoissonDistributionInterfaceTest, SerializeTest) { + using param_type = typename absl::poisson_distribution::param_type; + const double kMax = + std::min(1e10 /* assertion limit */, + static_cast(std::numeric_limits::max())); + + const double kParams[] = { + // Cases around 1. + 1, // + std::nextafter(1.0, 0.0), // 1 - epsilon + std::nextafter(1.0, 2.0), // 1 + epsilon + // Arbitrary values. + 1e-8, 1e-4, + 0.0000005, // ~7.2e-7 + 0.2, // ~0.2x + 0.5, // 0.72 + 2, // ~2.8 + 20, // 3x ~9.6 + 100, 1e4, 1e8, 1.5e9, 1e20, + // Boundary cases. + std::numeric_limits::max(), + std::numeric_limits::epsilon(), + std::nextafter(std::numeric_limits::min(), + 1.0), // min + epsilon + std::numeric_limits::min(), // smallest normal + std::numeric_limits::denorm_min(), // smallest denorm + std::numeric_limits::min() / 2, // denorm + std::nextafter(std::numeric_limits::min(), + 0.0), // denorm_max + }; + + + constexpr int kCount = 1000; + absl::InsecureBitGen gen; + for (const double m : kParams) { + const double mean = std::min(kMax, m); + const param_type param(mean); + + // Validate parameters. + absl::poisson_distribution before(mean); + EXPECT_EQ(before.mean(), param.mean()); + + { + absl::poisson_distribution via_param(param); + EXPECT_EQ(via_param, before); + EXPECT_EQ(via_param.param(), before.param()); + } + + // Smoke test. + auto sample_min = before.max(); + auto sample_max = before.min(); + for (int i = 0; i < kCount; i++) { + auto sample = before(gen); + EXPECT_GE(sample, before.min()); + EXPECT_LE(sample, before.max()); + if (sample > sample_max) sample_max = sample; + if (sample < sample_min) sample_min = sample; + } + + ABSL_INTERNAL_LOG(INFO, absl::StrCat("Range {", param.mean(), "}: ", + +sample_min, ", ", +sample_max)); + + // Validate stream serialization. + std::stringstream ss; + ss << before; + + absl::poisson_distribution after(3.8); + + EXPECT_NE(before.mean(), after.mean()); + EXPECT_NE(before.param(), after.param()); + EXPECT_NE(before, after); + + ss >> after; + + EXPECT_EQ(before.mean(), after.mean()) // + << ss.str() << " " // + << (ss.good() ? "good " : "") // + << (ss.bad() ? "bad " : "") // + << (ss.eof() ? "eof " : "") // + << (ss.fail() ? "fail " : ""); + } +} + +// See http://www.itl.nist.gov/div898/handbook/eda/section3/eda366j.htm + +class PoissonModel { + public: + explicit PoissonModel(double mean) : mean_(mean) {} + + double mean() const { return mean_; } + double variance() const { return mean_; } + double stddev() const { return std::sqrt(variance()); } + double skew() const { return 1.0 / mean_; } + double kurtosis() const { return 3.0 + 1.0 / mean_; } + + // InitCDF() initializes the CDF for the distribution parameters. + void InitCDF(); + + // The InverseCDF, or the Percent-point function returns x, P(x) < v. + struct CDF { + size_t index; + double pmf; + double cdf; + }; + CDF InverseCDF(double p) { + CDF target{0, 0, p}; + auto it = std::upper_bound( + std::begin(cdf_), std::end(cdf_), target, + [](const CDF& a, const CDF& b) { return a.cdf < b.cdf; }); + return *it; + } + + void LogCDF() { + ABSL_INTERNAL_LOG(INFO, absl::StrCat("CDF (mean = ", mean_, ")")); + for (const auto c : cdf_) { + ABSL_INTERNAL_LOG(INFO, + absl::StrCat(c.index, ": pmf=", c.pmf, " cdf=", c.cdf)); + } + } + + private: + const double mean_; + + std::vector cdf_; +}; + +// The goal is to compute an InverseCDF function, or percent point function for +// the poisson distribution, and use that to partition our output into equal +// range buckets. However there is no closed form solution for the inverse cdf +// for poisson distributions (the closest is the incomplete gamma function). +// Instead, `InitCDF` iteratively computes the PMF and the CDF. This enables +// searching for the bucket points. +void PoissonModel::InitCDF() { + if (!cdf_.empty()) { + // State already initialized. + return; + } + ABSL_ASSERT(mean_ < 201.0); + + const size_t max_i = 50 * stddev() + mean(); + const double e_neg_mean = std::exp(-mean()); + ABSL_ASSERT(e_neg_mean > 0); + + double d = 1; + double last_result = e_neg_mean; + double cumulative = e_neg_mean; + if (e_neg_mean > 1e-10) { + cdf_.push_back({0, e_neg_mean, cumulative}); + } + for (size_t i = 1; i < max_i; i++) { + d *= (mean() / i); + double result = e_neg_mean * d; + cumulative += result; + if (result < 1e-10 && result < last_result && cumulative > 0.999999) { + break; + } + if (result > 1e-7) { + cdf_.push_back({i, result, cumulative}); + } + last_result = result; + } + ABSL_ASSERT(!cdf_.empty()); +} + +// PoissonDistributionZTest implements a z-test for the poisson distribution. + +struct ZParam { + double mean; + double p_fail; // Z-Test probability of failure. + int trials; // Z-Test trials. + size_t samples; // Z-Test samples. +}; + +class PoissonDistributionZTest : public testing::TestWithParam, + public PoissonModel { + public: + PoissonDistributionZTest() : PoissonModel(GetParam().mean) {} + + // ZTestImpl provides a basic z-squared test of the mean vs. expected + // mean for data generated by the poisson distribution. + template + bool SingleZTest(const double p, const size_t samples); + + absl::InsecureBitGen rng_; +}; + +template +bool PoissonDistributionZTest::SingleZTest(const double p, + const size_t samples) { + D dis(mean()); + + absl::flat_hash_map buckets; + std::vector data; + data.reserve(samples); + for (int j = 0; j < samples; j++) { + const auto x = dis(rng_); + buckets[x]++; + data.push_back(x); + } + + // The null-hypothesis is that the distribution is a poisson distribution with + // the provided mean (not estimated from the data). + const auto m = absl::random_internal::ComputeDistributionMoments(data); + const double max_err = absl::random_internal::MaxErrorTolerance(p); + const double z = absl::random_internal::ZScore(mean(), m); + const bool pass = absl::random_internal::Near("z", z, 0.0, max_err); + + if (!pass) { + ABSL_INTERNAL_LOG( + INFO, absl::StrFormat("p=%f max_err=%f\n" + " mean=%f vs. %f\n" + " stddev=%f vs. %f\n" + " skewness=%f vs. %f\n" + " kurtosis=%f vs. %f\n" + " z=%f", + p, max_err, m.mean, mean(), std::sqrt(m.variance), + stddev(), m.skewness, skew(), m.kurtosis, + kurtosis(), z)); + } + return pass; +} + +TEST_P(PoissonDistributionZTest, AbslPoissonDistribution) { + const auto& param = GetParam(); + const int expected_failures = + std::max(1, static_cast(std::ceil(param.trials * param.p_fail))); + const double p = absl::random_internal::RequiredSuccessProbability( + param.p_fail, param.trials); + + int failures = 0; + for (int i = 0; i < param.trials; i++) { + failures += + SingleZTest>(p, param.samples) ? 0 + : 1; + } + EXPECT_LE(failures, expected_failures); +} + +std::vector GetZParams() { + // These values have been adjusted from the "exact" computed values to reduce + // failure rates. + // + // It turns out that the actual values are not as close to the expected values + // as would be ideal. + return std::vector({ + // Knuth method. + ZParam{0.5, 0.01, 100, 1000}, + ZParam{1.0, 0.01, 100, 1000}, + ZParam{10.0, 0.01, 100, 5000}, + // Split-knuth method. + ZParam{20.0, 0.01, 100, 10000}, + ZParam{50.0, 0.01, 100, 10000}, + // Ratio of gaussians method. + ZParam{51.0, 0.01, 100, 10000}, + ZParam{200.0, 0.05, 10, 100000}, + ZParam{100000.0, 0.05, 10, 1000000}, + }); +} + +std::string ZParamName(const ::testing::TestParamInfo& info) { + const auto& p = info.param; + std::string name = absl::StrCat("mean_", absl::SixDigits(p.mean)); + return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}}); +} + +INSTANTIATE_TEST_SUITE_P(, PoissonDistributionZTest, + ::testing::ValuesIn(GetZParams()), ZParamName); + +// The PoissonDistributionChiSquaredTest class provides a basic test framework +// for variates generated by a conforming poisson_distribution. +class PoissonDistributionChiSquaredTest : public testing::TestWithParam, + public PoissonModel { + public: + PoissonDistributionChiSquaredTest() : PoissonModel(GetParam()) {} + + // The ChiSquaredTestImpl provides a chi-squared goodness of fit test for data + // generated by the poisson distribution. + template + double ChiSquaredTestImpl(); + + private: + void InitChiSquaredTest(const double buckets); + + absl::InsecureBitGen rng_; + std::vector cutoffs_; + std::vector expected_; +}; + +void PoissonDistributionChiSquaredTest::InitChiSquaredTest( + const double buckets) { + if (!cutoffs_.empty() && !expected_.empty()) { + return; + } + InitCDF(); + + // The code below finds cuttoffs that yield approximately equally-sized + // buckets to the extent that it is possible. However for poisson + // distributions this is particularly challenging for small mean parameters. + // Track the expected proportion of items in each bucket. + double last_cdf = 0; + const double inc = 1.0 / buckets; + for (double p = inc; p <= 1.0; p += inc) { + auto result = InverseCDF(p); + if (!cutoffs_.empty() && cutoffs_.back() == result.index) { + continue; + } + double d = result.cdf - last_cdf; + cutoffs_.push_back(result.index); + expected_.push_back(d); + last_cdf = result.cdf; + } + cutoffs_.push_back(std::numeric_limits::max()); + expected_.push_back(std::max(0.0, 1.0 - last_cdf)); +} + +template +double PoissonDistributionChiSquaredTest::ChiSquaredTestImpl() { + const int kSamples = 2000; + const int kBuckets = 50; + + // The poisson CDF fails for large mean values, since e^-mean exceeds the + // machine precision. For these cases, using a normal approximation would be + // appropriate. + ABSL_ASSERT(mean() <= 200); + InitChiSquaredTest(kBuckets); + + D dis(mean()); + + std::vector counts(cutoffs_.size(), 0); + for (int j = 0; j < kSamples; j++) { + const size_t x = dis(rng_); + auto it = std::lower_bound(std::begin(cutoffs_), std::end(cutoffs_), x); + counts[std::distance(cutoffs_.begin(), it)]++; + } + + // Normalize the counts. + std::vector e(expected_.size(), 0); + for (int i = 0; i < e.size(); i++) { + e[i] = kSamples * expected_[i]; + } + + // The null-hypothesis is that the distribution is a poisson distribution with + // the provided mean (not estimated from the data). + const int dof = static_cast(counts.size()) - 1; + + // The threshold for logging is 1-in-50. + const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98); + + const double chi_square = absl::random_internal::ChiSquare( + std::begin(counts), std::end(counts), std::begin(e), std::end(e)); + + const double p = absl::random_internal::ChiSquarePValue(chi_square, dof); + + // Log if the chi_squared value is above the threshold. + if (chi_square > threshold) { + LogCDF(); + + ABSL_INTERNAL_LOG(INFO, absl::StrCat("VALUES buckets=", counts.size(), + " samples=", kSamples)); + for (size_t i = 0; i < counts.size(); i++) { + ABSL_INTERNAL_LOG( + INFO, absl::StrCat(cutoffs_[i], ": ", counts[i], " vs. E=", e[i])); + } + + ABSL_INTERNAL_LOG( + INFO, + absl::StrCat(kChiSquared, "(data, dof=", dof, ") = ", chi_square, " (", + p, ")\n", " vs.\n", kChiSquared, " @ 0.98 = ", threshold)); + } + return p; +} + +TEST_P(PoissonDistributionChiSquaredTest, AbslPoissonDistribution) { + const int kTrials = 20; + + // Large values are not yet supported -- this requires estimating the cdf + // using the normal distribution instead of the poisson in this case. + ASSERT_LE(mean(), 200.0); + if (mean() > 200.0) { + return; + } + + int failures = 0; + for (int i = 0; i < kTrials; i++) { + double p_value = ChiSquaredTestImpl>(); + if (p_value < 0.005) { + failures++; + } + } + // There is a 0.10% chance of producing at least one failure, so raise the + // failure threshold high enough to allow for a flake rate < 10,000. + EXPECT_LE(failures, 4); +} + +INSTANTIATE_TEST_SUITE_P(, PoissonDistributionChiSquaredTest, + ::testing::Values(0.5, 1.0, 2.0, 10.0, 50.0, 51.0, + 200.0)); + +// NOTE: absl::poisson_distribution is not guaranteed to be stable. +TEST(PoissonDistributionTest, StabilityTest) { + using testing::ElementsAre; + // absl::poisson_distribution stability relies on stability of + // std::exp, std::log, std::sqrt, std::ceil, std::floor, and + // absl::FastUniformBits, absl::StirlingLogFactorial, absl::RandU64ToDouble. + absl::random_internal::sequence_urbg urbg({ + 0x035b0dc7e0a18acfull, 0x06cebe0d2653682eull, 0x0061e9b23861596bull, + 0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull, + 0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull, + 0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull, + 0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull, + 0x4864f22c059bf29eull, 0x247856d8b862665cull, 0xe46e86e9a1337e10ull, + 0xd8c8541f3519b133ull, 0xe75b5162c567b9e4ull, 0xf732e5ded7009c5bull, + 0xb170b98353121eacull, 0x1ec2e8986d2362caull, 0x814c8e35fe9a961aull, + 0x0c3cd59c9b638a02ull, 0xcb3bb6478a07715cull, 0x1224e62c978bbc7full, + 0x671ef2cb04e81f6eull, 0x3c1cbd811eaf1808ull, 0x1bbc23cfa8fac721ull, + 0xa4c2cda65e596a51ull, 0xb77216fad37adf91ull, 0x836d794457c08849ull, + 0xe083df03475f49d7ull, 0xbc9feb512e6b0d6cull, 0xb12d74fdd718c8c5ull, + 0x12ff09653bfbe4caull, 0x8dd03a105bc4ee7eull, 0x5738341045ba0d85ull, + 0xf3fd722dc65ad09eull, 0xfa14fd21ea2a5705ull, 0xffe6ea4d6edb0c73ull, + 0xD07E9EFE2BF11FB4ull, 0x95DBDA4DAE909198ull, 0xEAAD8E716B93D5A0ull, + 0xD08ED1D0AFC725E0ull, 0x8E3C5B2F8E7594B7ull, 0x8FF6E2FBF2122B64ull, + 0x8888B812900DF01Cull, 0x4FAD5EA0688FC31Cull, 0xD1CFF191B3A8C1ADull, + 0x2F2F2218BE0E1777ull, 0xEA752DFE8B021FA1ull, 0xE5A0CC0FB56F74E8ull, + 0x18ACF3D6CE89E299ull, 0xB4A84FE0FD13E0B7ull, 0x7CC43B81D2ADA8D9ull, + 0x165FA26680957705ull, 0x93CC7314211A1477ull, 0xE6AD206577B5FA86ull, + 0xC75442F5FB9D35CFull, 0xEBCDAF0C7B3E89A0ull, 0xD6411BD3AE1E7E49ull, + 0x00250E2D2071B35Eull, 0x226800BB57B8E0AFull, 0x2464369BF009B91Eull, + 0x5563911D59DFA6AAull, 0x78C14389D95A537Full, 0x207D5BA202E5B9C5ull, + 0x832603766295CFA9ull, 0x11C819684E734A41ull, 0xB3472DCA7B14A94Aull, + }); + + std::vector output(10); + + // Method 1. + { + absl::poisson_distribution dist(5); + std::generate(std::begin(output), std::end(output), + [&] { return dist(urbg); }); + } + EXPECT_THAT(output, // mean = 4.2 + ElementsAre(1, 0, 0, 4, 2, 10, 3, 3, 7, 12)); + + // Method 2. + { + urbg.reset(); + absl::poisson_distribution dist(25); + std::generate(std::begin(output), std::end(output), + [&] { return dist(urbg); }); + } + EXPECT_THAT(output, // mean = 19.8 + ElementsAre(9, 35, 18, 10, 35, 18, 10, 35, 18, 10)); + + // Method 3. + { + urbg.reset(); + absl::poisson_distribution dist(121); + std::generate(std::begin(output), std::end(output), + [&] { return dist(urbg); }); + } + EXPECT_THAT(output, // mean = 124.1 + ElementsAre(161, 122, 129, 124, 112, 112, 117, 120, 130, 114)); +} + +TEST(PoissonDistributionTest, AlgorithmExpectedValue_1) { + // This tests small values of the Knuth method. + // The underlying uniform distribution will generate exactly 0.5. + absl::random_internal::sequence_urbg urbg({0x8000000000000001ull}); + absl::poisson_distribution dist(5); + EXPECT_EQ(7, dist(urbg)); +} + +TEST(PoissonDistributionTest, AlgorithmExpectedValue_2) { + // This tests larger values of the Knuth method. + // The underlying uniform distribution will generate exactly 0.5. + absl::random_internal::sequence_urbg urbg({0x8000000000000001ull}); + absl::poisson_distribution dist(25); + EXPECT_EQ(36, dist(urbg)); +} + +TEST(PoissonDistributionTest, AlgorithmExpectedValue_3) { + // This variant uses the ratio of uniforms method. + absl::random_internal::sequence_urbg urbg( + {0x7fffffffffffffffull, 0x8000000000000000ull}); + + absl::poisson_distribution dist(121); + EXPECT_EQ(121, dist(urbg)); +} + +} // namespace -- cgit v1.2.3