aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/distributions
diff options
context:
space:
mode:
authorGravatar Alexey Radul <axch@google.com>2018-09-25 15:04:25 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-25 15:08:40 -0700
commitad27440a79c30a53f9fd2a3171a2c2da6ff37820 (patch)
tree09a553424ae80697f6cbce3fb2996770288ed341 /tensorflow/contrib/distributions
parent20c71535c5f1ed1d918d6cc6e327ffbba49ecbd6 (diff)
Move the correlation matrix volumes computation for testing the LKJ distribution from tf/contrib to tfp.
Relevant to tensorflow/tensorflow#21909 PiperOrigin-RevId: 214511101
Diffstat (limited to 'tensorflow/contrib/distributions')
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/util/BUILD51
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py98
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py323
-rw-r--r--tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py150
4 files changed, 0 insertions, 622 deletions
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD b/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD
deleted file mode 100644
index 42ecea034d..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD
+++ /dev/null
@@ -1,51 +0,0 @@
-# Description:
-# Internal testing utilities, e.g., computing the correct answer to
-# put in a unit test.
-
-licenses(["notice"]) # Apache 2.0
-
-py_library(
- name = "correlation_matrix_volumes_py",
- srcs = [
- "correlation_matrix_volumes_lib.py",
- ],
- deps = [
- "//tensorflow/contrib/distributions:distributions_py",
- "//tensorflow/python:array_ops",
- "//tensorflow/python:errors",
- "//tensorflow/python:framework",
- "//tensorflow/python:framework_for_generated_wrappers",
- "//tensorflow/python:math_ops",
- "//third_party/py/numpy",
- ],
-)
-
-py_binary(
- name = "correlation_matrix_volumes",
- srcs = [
- "correlation_matrix_volumes.py",
- ],
- deps = [
- ":correlation_matrix_volumes_py",
- ],
-)
-
-py_test(
- name = "correlation_matrix_volumes_test",
- size = "medium",
- srcs = ["correlation_matrix_volumes_test.py"],
- tags = [
- "no_pip",
- "optonly",
- ],
- deps = [
- ":correlation_matrix_volumes_py",
- # For statistical testing
- "//tensorflow/contrib/distributions:distributions_py",
- "//third_party/py/numpy",
- "//tensorflow/python:array_ops",
- "//tensorflow/python:check_ops",
- "//tensorflow/python:client_testlib",
- "//tensorflow/python:framework",
- ],
-)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py
deleted file mode 100644
index 2eab51cd30..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Executable to estimate the volume of various sets of correlation matrices.
-
-See correlation_matrix_volumes_lib.py for purpose and methodology.
-
-Invocation example:
-```
-python correlation_matrix_volumes.py --num_samples 1e7
-```
-
-This will compute 10,000,000-sample confidence intervals for the
-volumes of several sets of correlation matrices. Which sets, and the
-desired statistical significance, are hard-coded in this source file.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import pprint
-
-from absl import app
-from absl import flags
-
-from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr
-
-FLAGS = flags.FLAGS
-
-# Float to support giving the number of samples in scientific notation.
-# The production run used for the LKJ test used 1e7 samples.
-flags.DEFINE_float('num_samples', 1e4, 'Number of samples to use.')
-
-
-def ctv_debatched(det_bounds, dim, num_samples, error_rate=1e-6, seed=42):
- # This wrapper undoes the batching in compute_true_volumes, because
- # apparently several 5x5x9x1e7 Tensors of float32 can strain RAM.
- bounds = {}
- for db in det_bounds:
- bounds[db] = corr.compute_true_volumes(
- [db], dim, num_samples, error_rate=error_rate, seed=seed)[db]
- return bounds
-
-
-# The particular bounds in all three of these functions were chosen by
-# a somewhat arbitrary walk through an empirical tradeoff, for the
-# purpose of testing the LKJ distribution. Setting the determinant
-# bound lower
-# - Covers more of the testee's sample space, and
-# - Increases the probability that the rejection sampler will hit, thus
-# - Decreases the relative error (at a fixed sample count) in the
-# rejection-based volume estimate;
-# but also
-# - Increases the variance of the estimator used in the LKJ test.
-# This latter variance is also affected by the dimension and the
-# tested concentration parameter, and can be compensated for with more
-# compute (expensive) or a looser discrepancy limit (unsatisfying).
-# The values here are the projection of the points in that test design
-# space that ended up getting chosen.
-def compute_3x3_volumes(num_samples):
- det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45]
- return ctv_debatched(
- det_bounds, 3, num_samples, error_rate=5e-7, seed=46)
-
-
-def compute_4x4_volumes(num_samples):
- det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45]
- return ctv_debatched(
- det_bounds, 4, num_samples, error_rate=5e-7, seed=47)
-
-
-def compute_5x5_volumes(num_samples):
- det_bounds = [0.01, 0.2, 0.25, 0.3, 0.35, 0.4]
- return ctv_debatched(
- det_bounds, 5, num_samples, error_rate=5e-7, seed=48)
-
-
-def main(_):
- full_bounds = {}
- full_bounds[3] = compute_3x3_volumes(int(FLAGS.num_samples))
- full_bounds[4] = compute_4x4_volumes(int(FLAGS.num_samples))
- full_bounds[5] = compute_5x5_volumes(int(FLAGS.num_samples))
- pprint.pprint(full_bounds)
-
-if __name__ == '__main__':
- app.run(main)
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py
deleted file mode 100644
index 455e71f00c..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Estimating the volume of the correlation matrices with bounded determinant.
-
-Why? Because lkj_test.py tests the sampler for the LKJ distribution
-by estimating the same volume another way.
-
-How? Rejection sampling. Or, more precisely, importance sampling,
-proposing from the uniform distribution on symmetric matrices with
-diagonal 1s and entries in [-1, 1]. Such a matrix is a correlation
-matrix if and only if it is also positive semi-definite.
-
-The samples can then be converted into a confidence interval on the
-volume in question by the [Clopper-Pearson
-method](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval),
-also implemented here.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import importlib
-import sys
-
-import numpy as np
-
-from tensorflow.python.client import session
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import linalg_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops.distributions import uniform
-from tensorflow.python.ops.distributions import util
-from tensorflow.python.platform import tf_logging
-
-__all__ = [
- "correlation_matrix_volume_rejection_samples",
- "compute_true_volumes",
-]
-
-
-def try_import(name): # pylint: disable=invalid-name
- module = None
- try:
- module = importlib.import_module(name)
- except ImportError as e:
- tf_logging.warning("Could not import %s: %s" % (name, str(e)))
- return module
-
-optimize = try_import("scipy.optimize")
-stats = try_import("scipy.stats")
-
-
-def _psd_mask(x):
- """Computes whether each square matrix in the input is positive semi-definite.
-
- Args:
- x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`.
-
- Returns:
- mask: A floating-point `Tensor` of shape `[B1, ... Bn]`. Each
- scalar is 1 if the corresponding matrix was PSD, otherwise 0.
- """
- # Allegedly
- # https://scicomp.stackexchange.com/questions/12979/testing-if-a-matrix-is-positive-semi-definite
- # it is more efficient to test for positive semi-definiteness by
- # trying to compute the Cholesky decomposition -- the matrix is PSD
- # if you succeed and not PSD if you fail. However, TensorFlow's
- # Cholesky raises an exception if _any_ of the input matrices are
- # not PSD, from which I don't know how to extract _which ones_, so I
- # proceed by explicitly computing all the eigenvalues and checking
- # whether they are all positive or not.
- #
- # Also, as was discussed in the answer, it is somewhat dangerous to
- # treat SPD-ness as binary in floating-point arithmetic. Cholesky
- # factorization can complete and 'look' like everything is fine
- # (e.g., O(1) entries and a diagonal of all ones) but the matrix can
- # have an exponential condition number.
- eigenvalues, _ = linalg_ops.self_adjoint_eig(x)
- return math_ops.cast(
- math_ops.reduce_min(eigenvalues, axis=-1) >= 0, dtype=x.dtype)
-
-
-def _det_large_enough_mask(x, det_bounds):
- """Returns whether the input matches the given determinant limit.
-
- Args:
- x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`.
- det_bounds: A floating-point `Tensor` that must broadcast to shape
- `[B1, ..., Bn]`, giving the desired lower bound on the
- determinants in `x`.
-
- Returns:
- mask: A floating-point `Tensor` of shape [B1, ..., Bn]. Each
- scalar is 1 if the corresponding matrix had determinant above
- the corresponding bound, otherwise 0.
- """
- # For the curious: I wonder whether it is possible and desirable to
- # use a Cholesky decomposition-based algorithm for this, since the
- # only matrices whose determinant this code cares about will be PSD.
- # Didn't figure out how to code that in TensorFlow.
- #
- # Expert opinion is that it would be about twice as fast since
- # Cholesky is roughly half the cost of Gaussian Elimination with
- # Partial Pivoting. But this is less of an impact than the switch in
- # _psd_mask.
- return math_ops.cast(
- linalg_ops.matrix_determinant(x) > det_bounds, dtype=x.dtype)
-
-
-def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed):
- """Returns a uniformly random `Tensor` of "correlation-like" matrices.
-
- A "correlation-like" matrix is a symmetric square matrix with all entries
- between -1 and 1 (inclusive) and 1s on the main diagonal. Of these,
- the ones that are positive semi-definite are exactly the correlation
- matrices.
-
- Args:
- num_rows: Python `int` dimension of the correlation-like matrices.
- batch_shape: `Tensor` or Python `tuple` of `int` shape of the
- batch to return.
- dtype: `dtype` of the `Tensor` to return.
- seed: Random seed.
-
- Returns:
- matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]`
- and dtype `dtype`. Each entry is in [-1, 1], and each matrix
- along the bottom two dimensions is symmetric and has 1s on the
- main diagonal.
- """
- num_entries = num_rows * (num_rows + 1) / 2
- ones = array_ops.ones(shape=[num_entries], dtype=dtype)
- # It seems wasteful to generate random values for the diagonal since
- # I am going to throw them away, but `fill_triangular` fills the
- # diagonal, so I probably need them.
- # It's not impossible that it would be more efficient to just fill
- # the whole matrix with random values instead of messing with
- # `fill_triangular`. Then would need to filter almost half out with
- # `matrix_band_part`.
- unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed)
- tril = util.fill_triangular(unifs)
- symmetric = tril + array_ops.matrix_transpose(tril)
- diagonal_ones = array_ops.ones(
- shape=util.pad(batch_shape, axis=0, back=True, value=num_rows),
- dtype=dtype)
- return array_ops.matrix_set_diag(symmetric, diagonal_ones)
-
-
-def correlation_matrix_volume_rejection_samples(
- det_bounds, dim, sample_shape, dtype, seed):
- """Returns rejection samples from trying to get good correlation matrices.
-
- The proposal being rejected from is the uniform distribution on
- "correlation-like" matrices. We say a matrix is "correlation-like"
- if it is a symmetric square matrix with all entries between -1 and 1
- (inclusive) and 1s on the main diagonal. Of these, the ones that
- are positive semi-definite are exactly the correlation matrices.
-
- The rejection algorithm, then, is to sample a `Tensor` of
- `sample_shape` correlation-like matrices of dimensions `dim` by
- `dim`, and check each one for (i) being a correlation matrix (i.e.,
- PSD), and (ii) having determinant at least the corresponding entry
- of `det_bounds`.
-
- Args:
- det_bounds: A `Tensor` of lower bounds on the determinants of
- acceptable matrices. The shape must broadcast with `sample_shape`.
- dim: A Python `int` dimension of correlation matrices to sample.
- sample_shape: Python `tuple` of `int` shape of the samples to
- compute, excluding the two matrix dimensions.
- dtype: The `dtype` in which to do the computation.
- seed: Random seed.
-
- Returns:
- weights: A `Tensor` of shape `sample_shape`. Each entry is 0 if the
- corresponding matrix was not a correlation matrix, or had too
- small of a determinant. Otherwise, the entry is the
- multiplicative inverse of the density of proposing that matrix
- uniformly, i.e., the volume of the set of `dim` by `dim`
- correlation-like matrices.
- volume: The volume of the set of `dim` by `dim` correlation-like
- matrices.
- """
- with ops.name_scope("rejection_sampler"):
- rej_proposals = _uniform_correlation_like_matrix(
- dim, sample_shape, dtype, seed=seed)
- rej_proposal_volume = 2. ** (dim * (dim - 1) / 2.)
- # The density of proposing any given point is 1 / rej_proposal_volume;
- # The weight of that point should be scaled by
- # 1 / density = rej_proposal_volume.
- rej_weights = rej_proposal_volume * _psd_mask(
- rej_proposals) * _det_large_enough_mask(rej_proposals, det_bounds)
- return rej_weights, rej_proposal_volume
-
-
-def _clopper_pearson_confidence_interval(samples, error_rate):
- """Computes a confidence interval for the mean of the given 1-D distribution.
-
- Assumes (and checks) that the given distribution is Bernoulli, i.e.,
- takes only two values. This licenses using the CDF of the binomial
- distribution for the confidence, which is tighter (for extreme
- probabilities) than the DKWM inequality. The method is known as the
- [Clopper-Pearson method]
- (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval).
-
- Assumes:
-
- - The given samples were drawn iid from the distribution of interest.
-
- - The given distribution is a Bernoulli, i.e., supported only on
- low and high.
-
- Guarantees:
-
- - The probability (over the randomness of drawing the given sample)
- that the true mean is outside the returned interval is no more
- than the given error_rate.
-
- Args:
- samples: `np.ndarray` of samples drawn iid from the distribution
- of interest.
- error_rate: Python `float` admissible rate of mistakes.
-
- Returns:
- low: Lower bound of confidence interval.
- high: Upper bound of confidence interval.
-
- Raises:
- ValueError: If `samples` has rank other than 1 (batch semantics
- are not implemented), or if `samples` contains values other than
- `low` or `high` (as that makes the distribution not Bernoulli).
- """
- # TODO(b/78025336) Migrate this confidence interval function
- # to statistical_testing.py. In order to do that
- # - Get the binomial CDF from the Binomial distribution
- # - Implement scalar root finding in TF. Batch bisection search
- # shouldn't be too hard, and is definitely good enough for this
- # problem. Batching the Brent algorithm (from scipy) that is used
- # here may be more involved, but may also not be necessary---it's
- # only used here because scipy made it convenient. In particular,
- # robustness is more important than speed here, which may make
- # bisection search actively better.
- # - The rest is just a matter of rewriting in the appropriate style.
- if optimize is None or stats is None:
- raise ValueError(
- "Scipy is required for computing Clopper-Pearson confidence intervals")
- if len(samples.shape) != 1:
- raise ValueError("Batch semantics not implemented")
- n = len(samples)
- low = np.amin(samples)
- high = np.amax(samples)
- successes = np.count_nonzero(samples - low)
- failures = np.count_nonzero(samples - high)
- if successes + failures != n:
- uniques = np.unique(samples)
- msg = ("Purportedly Bernoulli distribution had distinct samples"
- " {}, {}, and {}".format(uniques[0], uniques[1], uniques[2]))
- raise ValueError(msg)
- def p_small_enough(p):
- prob = stats.binom.logcdf(successes, n, p)
- return prob - np.log(error_rate / 2.)
- def p_big_enough(p):
- prob = stats.binom.logsf(successes, n, p)
- return prob - np.log(error_rate / 2.)
- high_p = optimize.brentq(
- p_small_enough, float(successes) / n, 1., rtol=1e-9)
- low_p = optimize.brentq(
- p_big_enough, 0., float(successes) / n, rtol=1e-9)
- low_interval = low + (high - low) * low_p
- high_interval = low + (high - low) * high_p
- return (low_interval, high_interval)
-
-
-def compute_true_volumes(
- det_bounds, dim, num_samples, error_rate=1e-6, seed=42):
- """Returns confidence intervals for the desired correlation matrix volumes.
-
- The confidence intervals are computed by the [Clopper-Pearson method]
- (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval).
-
- Args:
- det_bounds: A rank-1 numpy array of lower bounds on the
- determinants of acceptable matrices. Entries must be unique.
- dim: A Python `int` dimension of correlation matrices to sample.
- num_samples: The number of samples to draw.
- error_rate: The statistical significance of the returned
- confidence intervals. The significance is broadcast: Each
- returned interval separately may be incorrect with probability
- (under the sample of correlation-like matrices drawn internally)
- at most `error_rate`.
- seed: Random seed.
-
- Returns:
- bounds: A Python `dict` mapping each determinant bound to the low, high
- tuple giving the confidence interval.
- """
- bounds = {}
- with session.Session() as sess:
- rej_weights, _ = correlation_matrix_volume_rejection_samples(
- det_bounds, dim, [num_samples, len(det_bounds)], np.float32, seed=seed)
- rej_weights = sess.run(rej_weights)
- for rw, det in zip(np.rollaxis(rej_weights, 1), det_bounds):
- template = ("Estimating volume of {}x{} correlation "
- "matrices with determinant >= {}.")
- print(template.format(dim, dim, det))
- sys.stdout.flush()
- bounds[det] = _clopper_pearson_confidence_interval(
- rw, error_rate=error_rate)
- return bounds
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py
deleted file mode 100644
index 8f99300e63..0000000000
--- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for correlation_matrix_volumes_lib.py."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr
-from tensorflow.contrib.distributions.python.ops import statistical_testing as st
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import check_ops
-from tensorflow.python.platform import test
-
-
-# NxN correlation matrices are determined by the N*(N-1)/2
-# lower-triangular entries. In addition to being between -1 and 1,
-# they must also obey the constraint that the determinant of the
-# resulting symmetric matrix is non-negative. In 2x2, we can even
-# analytically compute the volume when the determinant is bounded to >
-# epsilon, as that boils down to the one lower-triangular entry being
-# less than 1 - epsilon in absolute value.
-def two_by_two_volume(det_bound):
- return 2 * np.sqrt(1.0 - det_bound)
-
-
-# The post
-# https://psychometroscar.com/the-volume-of-a-3-x-3-correlation-matrix/
-# derives (with elementary calculus) that the volume (with respect to
-# Lebesgue^3 measure) of the set of 3x3 correlation matrices is
-# pi^2/2. The same result is also obtained by [1].
-def three_by_three_volume():
- return np.pi**2 / 2.
-
-
-# The volume of the unconstrained set of correlation matrices is also
-# the normalization constant of the LKJ distribution from [2]. As
-# part of defining the distribution, that reference a derives general
-# formula for this volume for all dimensions. A TensorFlow
-# computation thereof gave the below result for 4x4:
-def four_by_four_volume():
- # This constant computed as math_ops.exp(lkj.log_norm_const(4, [1.0]))
- return 11.6973076
-
-# [1] Rousseeuw, P. J., & Molenberghs, G. (1994). "The shape of
-# correlation matrices." The American Statistician, 48(4), 276-279.
-
-# [2] Daniel Lewandowski, Dorota Kurowicka, and Harry Joe, "Generating
-# random correlation matrices based on vines and extended onion
-# method," Journal of Multivariate Analysis 100 (2009), pp 1989-2001.
-
-
-class CorrelationMatrixVolumesTest(test.TestCase):
-
- def testRejection2D(self):
- num_samples = int(1e5) # Chosen for a small min detectable discrepancy
- det_bounds = np.array(
- [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32)
- exact_volumes = two_by_two_volume(det_bounds)
- (rej_weights,
- rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples(
- det_bounds, 2, [num_samples, 9], dtype=np.float32, seed=43)
- # shape of rej_weights: [num_samples, 9, 2, 2]
- chk1 = st.assert_true_mean_equal_by_dkwm(
- rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes,
- false_fail_rate=1e-6)
- chk2 = check_ops.assert_less(
- st.min_discrepancy_of_true_means_detectable_by_dkwm(
- num_samples, low=0., high=rej_proposal_volume,
- # Correct the false fail rate due to different broadcasting
- false_fail_rate=1.1e-7, false_pass_rate=1e-6),
- 0.036)
- with ops.control_dependencies([chk1, chk2]):
- rej_weights = array_ops.identity(rej_weights)
- self.evaluate(rej_weights)
-
- def testRejection3D(self):
- num_samples = int(1e5) # Chosen for a small min detectable discrepancy
- det_bounds = np.array([0.0], dtype=np.float32)
- exact_volumes = np.array([three_by_three_volume()], dtype=np.float32)
- (rej_weights,
- rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples(
- det_bounds, 3, [num_samples, 1], dtype=np.float32, seed=44)
- # shape of rej_weights: [num_samples, 1, 3, 3]
- chk1 = st.assert_true_mean_equal_by_dkwm(
- rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes,
- false_fail_rate=1e-6)
- chk2 = check_ops.assert_less(
- st.min_discrepancy_of_true_means_detectable_by_dkwm(
- num_samples, low=0., high=rej_proposal_volume,
- false_fail_rate=1e-6, false_pass_rate=1e-6),
- # Going for about a 3% relative error
- 0.15)
- with ops.control_dependencies([chk1, chk2]):
- rej_weights = array_ops.identity(rej_weights)
- self.evaluate(rej_weights)
-
- def testRejection4D(self):
- num_samples = int(1e5) # Chosen for a small min detectable discrepancy
- det_bounds = np.array([0.0], dtype=np.float32)
- exact_volumes = [four_by_four_volume()]
- (rej_weights,
- rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples(
- det_bounds, 4, [num_samples, 1], dtype=np.float32, seed=45)
- # shape of rej_weights: [num_samples, 1, 4, 4]
- chk1 = st.assert_true_mean_equal_by_dkwm(
- rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes,
- false_fail_rate=1e-6)
- chk2 = check_ops.assert_less(
- st.min_discrepancy_of_true_means_detectable_by_dkwm(
- num_samples, low=0., high=rej_proposal_volume,
- false_fail_rate=1e-6, false_pass_rate=1e-6),
- # Going for about a 10% relative error
- 1.1)
- with ops.control_dependencies([chk1, chk2]):
- rej_weights = array_ops.identity(rej_weights)
- self.evaluate(rej_weights)
-
- def testVolumeEstimation2D(self):
- # Test that the confidence intervals produced by
- # corr.compte_true_volumes are sound, in the sense of containing
- # the exact volume.
- num_samples = int(1e5) # Chosen by symmetry with testRejection2D
- det_bounds = np.array(
- [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32)
- volume_bounds = corr.compute_true_volumes(
- det_bounds, 2, num_samples, error_rate=1e-6, seed=47)
- exact_volumes = two_by_two_volume(det_bounds)
- for det, volume in zip(det_bounds, exact_volumes):
- computed_low, computed_high = volume_bounds[det]
- self.assertLess(computed_low, volume)
- self.assertGreater(computed_high, volume)
-
-if __name__ == "__main__":
- test.main()