diff options
author | Alexey Radul <axch@google.com> | 2018-09-25 15:04:25 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-25 15:08:40 -0700 |
commit | ad27440a79c30a53f9fd2a3171a2c2da6ff37820 (patch) | |
tree | 09a553424ae80697f6cbce3fb2996770288ed341 /tensorflow/contrib/distributions | |
parent | 20c71535c5f1ed1d918d6cc6e327ffbba49ecbd6 (diff) |
Move the correlation matrix volumes computation for testing the LKJ distribution from tf/contrib to tfp.
Relevant to tensorflow/tensorflow#21909
PiperOrigin-RevId: 214511101
Diffstat (limited to 'tensorflow/contrib/distributions')
4 files changed, 0 insertions, 622 deletions
diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD b/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD deleted file mode 100644 index 42ecea034d..0000000000 --- a/tensorflow/contrib/distributions/python/kernel_tests/util/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -# Description: -# Internal testing utilities, e.g., computing the correct answer to -# put in a unit test. - -licenses(["notice"]) # Apache 2.0 - -py_library( - name = "correlation_matrix_volumes_py", - srcs = [ - "correlation_matrix_volumes_lib.py", - ], - deps = [ - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:errors", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:math_ops", - "//third_party/py/numpy", - ], -) - -py_binary( - name = "correlation_matrix_volumes", - srcs = [ - "correlation_matrix_volumes.py", - ], - deps = [ - ":correlation_matrix_volumes_py", - ], -) - -py_test( - name = "correlation_matrix_volumes_test", - size = "medium", - srcs = ["correlation_matrix_volumes_test.py"], - tags = [ - "no_pip", - "optonly", - ], - deps = [ - ":correlation_matrix_volumes_py", - # For statistical testing - "//tensorflow/contrib/distributions:distributions_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:check_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - ], -) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py deleted file mode 100644 index 2eab51cd30..0000000000 --- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Executable to estimate the volume of various sets of correlation matrices. - -See correlation_matrix_volumes_lib.py for purpose and methodology. - -Invocation example: -``` -python correlation_matrix_volumes.py --num_samples 1e7 -``` - -This will compute 10,000,000-sample confidence intervals for the -volumes of several sets of correlation matrices. Which sets, and the -desired statistical significance, are hard-coded in this source file. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import pprint - -from absl import app -from absl import flags - -from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr - -FLAGS = flags.FLAGS - -# Float to support giving the number of samples in scientific notation. -# The production run used for the LKJ test used 1e7 samples. -flags.DEFINE_float('num_samples', 1e4, 'Number of samples to use.') - - -def ctv_debatched(det_bounds, dim, num_samples, error_rate=1e-6, seed=42): - # This wrapper undoes the batching in compute_true_volumes, because - # apparently several 5x5x9x1e7 Tensors of float32 can strain RAM. - bounds = {} - for db in det_bounds: - bounds[db] = corr.compute_true_volumes( - [db], dim, num_samples, error_rate=error_rate, seed=seed)[db] - return bounds - - -# The particular bounds in all three of these functions were chosen by -# a somewhat arbitrary walk through an empirical tradeoff, for the -# purpose of testing the LKJ distribution. Setting the determinant -# bound lower -# - Covers more of the testee's sample space, and -# - Increases the probability that the rejection sampler will hit, thus -# - Decreases the relative error (at a fixed sample count) in the -# rejection-based volume estimate; -# but also -# - Increases the variance of the estimator used in the LKJ test. -# This latter variance is also affected by the dimension and the -# tested concentration parameter, and can be compensated for with more -# compute (expensive) or a looser discrepancy limit (unsatisfying). -# The values here are the projection of the points in that test design -# space that ended up getting chosen. -def compute_3x3_volumes(num_samples): - det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45] - return ctv_debatched( - det_bounds, 3, num_samples, error_rate=5e-7, seed=46) - - -def compute_4x4_volumes(num_samples): - det_bounds = [0.01, 0.25, 0.3, 0.35, 0.4, 0.45] - return ctv_debatched( - det_bounds, 4, num_samples, error_rate=5e-7, seed=47) - - -def compute_5x5_volumes(num_samples): - det_bounds = [0.01, 0.2, 0.25, 0.3, 0.35, 0.4] - return ctv_debatched( - det_bounds, 5, num_samples, error_rate=5e-7, seed=48) - - -def main(_): - full_bounds = {} - full_bounds[3] = compute_3x3_volumes(int(FLAGS.num_samples)) - full_bounds[4] = compute_4x4_volumes(int(FLAGS.num_samples)) - full_bounds[5] = compute_5x5_volumes(int(FLAGS.num_samples)) - pprint.pprint(full_bounds) - -if __name__ == '__main__': - app.run(main) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py deleted file mode 100644 index 455e71f00c..0000000000 --- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_lib.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Estimating the volume of the correlation matrices with bounded determinant. - -Why? Because lkj_test.py tests the sampler for the LKJ distribution -by estimating the same volume another way. - -How? Rejection sampling. Or, more precisely, importance sampling, -proposing from the uniform distribution on symmetric matrices with -diagonal 1s and entries in [-1, 1]. Such a matrix is a correlation -matrix if and only if it is also positive semi-definite. - -The samples can then be converted into a confidence interval on the -volume in question by the [Clopper-Pearson -method](https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval), -also implemented here. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import importlib -import sys - -import numpy as np - -from tensorflow.python.client import session -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.distributions import uniform -from tensorflow.python.ops.distributions import util -from tensorflow.python.platform import tf_logging - -__all__ = [ - "correlation_matrix_volume_rejection_samples", - "compute_true_volumes", -] - - -def try_import(name): # pylint: disable=invalid-name - module = None - try: - module = importlib.import_module(name) - except ImportError as e: - tf_logging.warning("Could not import %s: %s" % (name, str(e))) - return module - -optimize = try_import("scipy.optimize") -stats = try_import("scipy.stats") - - -def _psd_mask(x): - """Computes whether each square matrix in the input is positive semi-definite. - - Args: - x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`. - - Returns: - mask: A floating-point `Tensor` of shape `[B1, ... Bn]`. Each - scalar is 1 if the corresponding matrix was PSD, otherwise 0. - """ - # Allegedly - # https://scicomp.stackexchange.com/questions/12979/testing-if-a-matrix-is-positive-semi-definite - # it is more efficient to test for positive semi-definiteness by - # trying to compute the Cholesky decomposition -- the matrix is PSD - # if you succeed and not PSD if you fail. However, TensorFlow's - # Cholesky raises an exception if _any_ of the input matrices are - # not PSD, from which I don't know how to extract _which ones_, so I - # proceed by explicitly computing all the eigenvalues and checking - # whether they are all positive or not. - # - # Also, as was discussed in the answer, it is somewhat dangerous to - # treat SPD-ness as binary in floating-point arithmetic. Cholesky - # factorization can complete and 'look' like everything is fine - # (e.g., O(1) entries and a diagonal of all ones) but the matrix can - # have an exponential condition number. - eigenvalues, _ = linalg_ops.self_adjoint_eig(x) - return math_ops.cast( - math_ops.reduce_min(eigenvalues, axis=-1) >= 0, dtype=x.dtype) - - -def _det_large_enough_mask(x, det_bounds): - """Returns whether the input matches the given determinant limit. - - Args: - x: A floating-point `Tensor` of shape `[B1, ..., Bn, M, M]`. - det_bounds: A floating-point `Tensor` that must broadcast to shape - `[B1, ..., Bn]`, giving the desired lower bound on the - determinants in `x`. - - Returns: - mask: A floating-point `Tensor` of shape [B1, ..., Bn]. Each - scalar is 1 if the corresponding matrix had determinant above - the corresponding bound, otherwise 0. - """ - # For the curious: I wonder whether it is possible and desirable to - # use a Cholesky decomposition-based algorithm for this, since the - # only matrices whose determinant this code cares about will be PSD. - # Didn't figure out how to code that in TensorFlow. - # - # Expert opinion is that it would be about twice as fast since - # Cholesky is roughly half the cost of Gaussian Elimination with - # Partial Pivoting. But this is less of an impact than the switch in - # _psd_mask. - return math_ops.cast( - linalg_ops.matrix_determinant(x) > det_bounds, dtype=x.dtype) - - -def _uniform_correlation_like_matrix(num_rows, batch_shape, dtype, seed): - """Returns a uniformly random `Tensor` of "correlation-like" matrices. - - A "correlation-like" matrix is a symmetric square matrix with all entries - between -1 and 1 (inclusive) and 1s on the main diagonal. Of these, - the ones that are positive semi-definite are exactly the correlation - matrices. - - Args: - num_rows: Python `int` dimension of the correlation-like matrices. - batch_shape: `Tensor` or Python `tuple` of `int` shape of the - batch to return. - dtype: `dtype` of the `Tensor` to return. - seed: Random seed. - - Returns: - matrices: A `Tensor` of shape `batch_shape + [num_rows, num_rows]` - and dtype `dtype`. Each entry is in [-1, 1], and each matrix - along the bottom two dimensions is symmetric and has 1s on the - main diagonal. - """ - num_entries = num_rows * (num_rows + 1) / 2 - ones = array_ops.ones(shape=[num_entries], dtype=dtype) - # It seems wasteful to generate random values for the diagonal since - # I am going to throw them away, but `fill_triangular` fills the - # diagonal, so I probably need them. - # It's not impossible that it would be more efficient to just fill - # the whole matrix with random values instead of messing with - # `fill_triangular`. Then would need to filter almost half out with - # `matrix_band_part`. - unifs = uniform.Uniform(-ones, ones).sample(batch_shape, seed=seed) - tril = util.fill_triangular(unifs) - symmetric = tril + array_ops.matrix_transpose(tril) - diagonal_ones = array_ops.ones( - shape=util.pad(batch_shape, axis=0, back=True, value=num_rows), - dtype=dtype) - return array_ops.matrix_set_diag(symmetric, diagonal_ones) - - -def correlation_matrix_volume_rejection_samples( - det_bounds, dim, sample_shape, dtype, seed): - """Returns rejection samples from trying to get good correlation matrices. - - The proposal being rejected from is the uniform distribution on - "correlation-like" matrices. We say a matrix is "correlation-like" - if it is a symmetric square matrix with all entries between -1 and 1 - (inclusive) and 1s on the main diagonal. Of these, the ones that - are positive semi-definite are exactly the correlation matrices. - - The rejection algorithm, then, is to sample a `Tensor` of - `sample_shape` correlation-like matrices of dimensions `dim` by - `dim`, and check each one for (i) being a correlation matrix (i.e., - PSD), and (ii) having determinant at least the corresponding entry - of `det_bounds`. - - Args: - det_bounds: A `Tensor` of lower bounds on the determinants of - acceptable matrices. The shape must broadcast with `sample_shape`. - dim: A Python `int` dimension of correlation matrices to sample. - sample_shape: Python `tuple` of `int` shape of the samples to - compute, excluding the two matrix dimensions. - dtype: The `dtype` in which to do the computation. - seed: Random seed. - - Returns: - weights: A `Tensor` of shape `sample_shape`. Each entry is 0 if the - corresponding matrix was not a correlation matrix, or had too - small of a determinant. Otherwise, the entry is the - multiplicative inverse of the density of proposing that matrix - uniformly, i.e., the volume of the set of `dim` by `dim` - correlation-like matrices. - volume: The volume of the set of `dim` by `dim` correlation-like - matrices. - """ - with ops.name_scope("rejection_sampler"): - rej_proposals = _uniform_correlation_like_matrix( - dim, sample_shape, dtype, seed=seed) - rej_proposal_volume = 2. ** (dim * (dim - 1) / 2.) - # The density of proposing any given point is 1 / rej_proposal_volume; - # The weight of that point should be scaled by - # 1 / density = rej_proposal_volume. - rej_weights = rej_proposal_volume * _psd_mask( - rej_proposals) * _det_large_enough_mask(rej_proposals, det_bounds) - return rej_weights, rej_proposal_volume - - -def _clopper_pearson_confidence_interval(samples, error_rate): - """Computes a confidence interval for the mean of the given 1-D distribution. - - Assumes (and checks) that the given distribution is Bernoulli, i.e., - takes only two values. This licenses using the CDF of the binomial - distribution for the confidence, which is tighter (for extreme - probabilities) than the DKWM inequality. The method is known as the - [Clopper-Pearson method] - (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval). - - Assumes: - - - The given samples were drawn iid from the distribution of interest. - - - The given distribution is a Bernoulli, i.e., supported only on - low and high. - - Guarantees: - - - The probability (over the randomness of drawing the given sample) - that the true mean is outside the returned interval is no more - than the given error_rate. - - Args: - samples: `np.ndarray` of samples drawn iid from the distribution - of interest. - error_rate: Python `float` admissible rate of mistakes. - - Returns: - low: Lower bound of confidence interval. - high: Upper bound of confidence interval. - - Raises: - ValueError: If `samples` has rank other than 1 (batch semantics - are not implemented), or if `samples` contains values other than - `low` or `high` (as that makes the distribution not Bernoulli). - """ - # TODO(b/78025336) Migrate this confidence interval function - # to statistical_testing.py. In order to do that - # - Get the binomial CDF from the Binomial distribution - # - Implement scalar root finding in TF. Batch bisection search - # shouldn't be too hard, and is definitely good enough for this - # problem. Batching the Brent algorithm (from scipy) that is used - # here may be more involved, but may also not be necessary---it's - # only used here because scipy made it convenient. In particular, - # robustness is more important than speed here, which may make - # bisection search actively better. - # - The rest is just a matter of rewriting in the appropriate style. - if optimize is None or stats is None: - raise ValueError( - "Scipy is required for computing Clopper-Pearson confidence intervals") - if len(samples.shape) != 1: - raise ValueError("Batch semantics not implemented") - n = len(samples) - low = np.amin(samples) - high = np.amax(samples) - successes = np.count_nonzero(samples - low) - failures = np.count_nonzero(samples - high) - if successes + failures != n: - uniques = np.unique(samples) - msg = ("Purportedly Bernoulli distribution had distinct samples" - " {}, {}, and {}".format(uniques[0], uniques[1], uniques[2])) - raise ValueError(msg) - def p_small_enough(p): - prob = stats.binom.logcdf(successes, n, p) - return prob - np.log(error_rate / 2.) - def p_big_enough(p): - prob = stats.binom.logsf(successes, n, p) - return prob - np.log(error_rate / 2.) - high_p = optimize.brentq( - p_small_enough, float(successes) / n, 1., rtol=1e-9) - low_p = optimize.brentq( - p_big_enough, 0., float(successes) / n, rtol=1e-9) - low_interval = low + (high - low) * low_p - high_interval = low + (high - low) * high_p - return (low_interval, high_interval) - - -def compute_true_volumes( - det_bounds, dim, num_samples, error_rate=1e-6, seed=42): - """Returns confidence intervals for the desired correlation matrix volumes. - - The confidence intervals are computed by the [Clopper-Pearson method] - (https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval). - - Args: - det_bounds: A rank-1 numpy array of lower bounds on the - determinants of acceptable matrices. Entries must be unique. - dim: A Python `int` dimension of correlation matrices to sample. - num_samples: The number of samples to draw. - error_rate: The statistical significance of the returned - confidence intervals. The significance is broadcast: Each - returned interval separately may be incorrect with probability - (under the sample of correlation-like matrices drawn internally) - at most `error_rate`. - seed: Random seed. - - Returns: - bounds: A Python `dict` mapping each determinant bound to the low, high - tuple giving the confidence interval. - """ - bounds = {} - with session.Session() as sess: - rej_weights, _ = correlation_matrix_volume_rejection_samples( - det_bounds, dim, [num_samples, len(det_bounds)], np.float32, seed=seed) - rej_weights = sess.run(rej_weights) - for rw, det in zip(np.rollaxis(rej_weights, 1), det_bounds): - template = ("Estimating volume of {}x{} correlation " - "matrices with determinant >= {}.") - print(template.format(dim, dim, det)) - sys.stdout.flush() - bounds[det] = _clopper_pearson_confidence_interval( - rw, error_rate=error_rate) - return bounds diff --git a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py b/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py deleted file mode 100644 index 8f99300e63..0000000000 --- a/tensorflow/contrib/distributions/python/kernel_tests/util/correlation_matrix_volumes_test.py +++ /dev/null @@ -1,150 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for correlation_matrix_volumes_lib.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.distributions.python.kernel_tests.util import correlation_matrix_volumes_lib as corr -from tensorflow.contrib.distributions.python.ops import statistical_testing as st -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.platform import test - - -# NxN correlation matrices are determined by the N*(N-1)/2 -# lower-triangular entries. In addition to being between -1 and 1, -# they must also obey the constraint that the determinant of the -# resulting symmetric matrix is non-negative. In 2x2, we can even -# analytically compute the volume when the determinant is bounded to > -# epsilon, as that boils down to the one lower-triangular entry being -# less than 1 - epsilon in absolute value. -def two_by_two_volume(det_bound): - return 2 * np.sqrt(1.0 - det_bound) - - -# The post -# https://psychometroscar.com/the-volume-of-a-3-x-3-correlation-matrix/ -# derives (with elementary calculus) that the volume (with respect to -# Lebesgue^3 measure) of the set of 3x3 correlation matrices is -# pi^2/2. The same result is also obtained by [1]. -def three_by_three_volume(): - return np.pi**2 / 2. - - -# The volume of the unconstrained set of correlation matrices is also -# the normalization constant of the LKJ distribution from [2]. As -# part of defining the distribution, that reference a derives general -# formula for this volume for all dimensions. A TensorFlow -# computation thereof gave the below result for 4x4: -def four_by_four_volume(): - # This constant computed as math_ops.exp(lkj.log_norm_const(4, [1.0])) - return 11.6973076 - -# [1] Rousseeuw, P. J., & Molenberghs, G. (1994). "The shape of -# correlation matrices." The American Statistician, 48(4), 276-279. - -# [2] Daniel Lewandowski, Dorota Kurowicka, and Harry Joe, "Generating -# random correlation matrices based on vines and extended onion -# method," Journal of Multivariate Analysis 100 (2009), pp 1989-2001. - - -class CorrelationMatrixVolumesTest(test.TestCase): - - def testRejection2D(self): - num_samples = int(1e5) # Chosen for a small min detectable discrepancy - det_bounds = np.array( - [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32) - exact_volumes = two_by_two_volume(det_bounds) - (rej_weights, - rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples( - det_bounds, 2, [num_samples, 9], dtype=np.float32, seed=43) - # shape of rej_weights: [num_samples, 9, 2, 2] - chk1 = st.assert_true_mean_equal_by_dkwm( - rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes, - false_fail_rate=1e-6) - chk2 = check_ops.assert_less( - st.min_discrepancy_of_true_means_detectable_by_dkwm( - num_samples, low=0., high=rej_proposal_volume, - # Correct the false fail rate due to different broadcasting - false_fail_rate=1.1e-7, false_pass_rate=1e-6), - 0.036) - with ops.control_dependencies([chk1, chk2]): - rej_weights = array_ops.identity(rej_weights) - self.evaluate(rej_weights) - - def testRejection3D(self): - num_samples = int(1e5) # Chosen for a small min detectable discrepancy - det_bounds = np.array([0.0], dtype=np.float32) - exact_volumes = np.array([three_by_three_volume()], dtype=np.float32) - (rej_weights, - rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples( - det_bounds, 3, [num_samples, 1], dtype=np.float32, seed=44) - # shape of rej_weights: [num_samples, 1, 3, 3] - chk1 = st.assert_true_mean_equal_by_dkwm( - rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes, - false_fail_rate=1e-6) - chk2 = check_ops.assert_less( - st.min_discrepancy_of_true_means_detectable_by_dkwm( - num_samples, low=0., high=rej_proposal_volume, - false_fail_rate=1e-6, false_pass_rate=1e-6), - # Going for about a 3% relative error - 0.15) - with ops.control_dependencies([chk1, chk2]): - rej_weights = array_ops.identity(rej_weights) - self.evaluate(rej_weights) - - def testRejection4D(self): - num_samples = int(1e5) # Chosen for a small min detectable discrepancy - det_bounds = np.array([0.0], dtype=np.float32) - exact_volumes = [four_by_four_volume()] - (rej_weights, - rej_proposal_volume) = corr.correlation_matrix_volume_rejection_samples( - det_bounds, 4, [num_samples, 1], dtype=np.float32, seed=45) - # shape of rej_weights: [num_samples, 1, 4, 4] - chk1 = st.assert_true_mean_equal_by_dkwm( - rej_weights, low=0., high=rej_proposal_volume, expected=exact_volumes, - false_fail_rate=1e-6) - chk2 = check_ops.assert_less( - st.min_discrepancy_of_true_means_detectable_by_dkwm( - num_samples, low=0., high=rej_proposal_volume, - false_fail_rate=1e-6, false_pass_rate=1e-6), - # Going for about a 10% relative error - 1.1) - with ops.control_dependencies([chk1, chk2]): - rej_weights = array_ops.identity(rej_weights) - self.evaluate(rej_weights) - - def testVolumeEstimation2D(self): - # Test that the confidence intervals produced by - # corr.compte_true_volumes are sound, in the sense of containing - # the exact volume. - num_samples = int(1e5) # Chosen by symmetry with testRejection2D - det_bounds = np.array( - [0.01, 0.02, 0.03, 0.04, 0.05, 0.3, 0.35, 0.4, 0.5], dtype=np.float32) - volume_bounds = corr.compute_true_volumes( - det_bounds, 2, num_samples, error_rate=1e-6, seed=47) - exact_volumes = two_by_two_volume(det_bounds) - for det, volume in zip(det_bounds, exact_volumes): - computed_low, computed_high = volume_bounds[det] - self.assertLess(computed_low, volume) - self.assertGreater(computed_high, volume) - -if __name__ == "__main__": - test.main() |