aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/bayesflow
diff options
context:
space:
mode:
authorGravatar Joshua V. Dillon <jvdillon@google.com>2018-03-26 18:50:27 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-26 18:52:45 -0700
commit0be974c423f6e5c363db2d95ed335dde4cb4e69b (patch)
tree21ebd16c26fa85b2dcaf8de3221404c1d37cfb68 /tensorflow/contrib/bayesflow
parent931f6d553172ddfc9ec4a7a94ea2c6233bf33cb0 (diff)
Finish deprecation of tf.contrib.bayesflow.{HMC,MetropolisHastings}.
Diffstat (limited to 'tensorflow/contrib/bayesflow')
-rw-r--r--tensorflow/contrib/bayesflow/BUILD41
-rw-r--r--tensorflow/contrib/bayesflow/README.md17
-rw-r--r--tensorflow/contrib/bayesflow/__init__.py8
-rw-r--r--tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py737
-rw-r--r--tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py340
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/hmc.py30
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/hmc_impl.py961
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py34
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py527
9 files changed, 17 insertions, 2678 deletions
diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index c6feec68e0..a55029b314 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -38,25 +38,6 @@ py_library(
)
cuda_py_test(
- name = "metropolis_hastings_test",
- size = "large",
- srcs = ["python/kernel_tests/metropolis_hastings_test.py"],
- additional_deps = [
- ":bayesflow_py",
- "//third_party/py/numpy",
- "//tensorflow/python:array_ops",
- "//tensorflow/python:math_ops",
- "//tensorflow/python:client_testlib",
- "//tensorflow/python:framework",
- "//tensorflow/python:framework_for_generated_wrappers",
- "//tensorflow/python:platform_test",
- "//tensorflow/python:random_ops",
- "//tensorflow/python:variable_scope",
- "//tensorflow/python:variables",
- ],
-)
-
-cuda_py_test(
name = "monte_carlo_test",
size = "small",
srcs = ["python/kernel_tests/monte_carlo_test.py"],
@@ -77,28 +58,6 @@ cuda_py_test(
],
)
-cuda_py_test(
- name = "hmc_test",
- size = "large",
- srcs = ["python/kernel_tests/hmc_test.py"],
- additional_deps = [
- ":bayesflow_py",
- "//third_party/py/numpy",
- "//tensorflow/contrib/distributions:distributions_py",
- "//tensorflow/contrib/layers:layers_py",
- "//tensorflow/python/ops/distributions",
- "//tensorflow/python:client_testlib",
- "//tensorflow/python:framework",
- "//tensorflow/python:framework_for_generated_wrappers",
- "//tensorflow/python:framework_test_lib",
- "//tensorflow/python:gradients",
- "//tensorflow/python:math_ops",
- "//tensorflow/python:platform_test",
- "//tensorflow/python:random_seed",
- ],
- tags = ["nomsan"],
-)
-
filegroup(
name = "all_files",
srcs = glob(
diff --git a/tensorflow/contrib/bayesflow/README.md b/tensorflow/contrib/bayesflow/README.md
new file mode 100644
index 0000000000..10323dc6d5
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/README.md
@@ -0,0 +1,17 @@
+# Notice
+
+`tf.contrib.bayesflow` has moved!
+
+See new code at [github.com/tensorflow/probability](
+https://github.com/tensorflow/probability).
+
+Switch imports with:
+
+```python
+# old
+import tensorflow as tf
+tfp = tf.contrib.bayesflow
+
+# new
+import tensorflow_probability as tfp
+```
diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py
index f868203826..41a8c920fc 100644
--- a/tensorflow/contrib/bayesflow/__init__.py
+++ b/tensorflow/contrib/bayesflow/__init__.py
@@ -21,8 +21,6 @@ from __future__ import division
from __future__ import print_function
# pylint: disable=unused-import,line-too-long
-from tensorflow.contrib.bayesflow.python.ops import hmc
-from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings
from tensorflow.contrib.bayesflow.python.ops import monte_carlo
# pylint: enable=unused-import,line-too-long
@@ -30,13 +28,7 @@ from tensorflow.python.util.all_util import remove_undocumented
_allowed_symbols = [
- 'entropy',
- 'hmc',
- 'metropolis_hastings',
'monte_carlo',
- 'special_math',
- 'stochastic_variables',
- 'variational_inference',
]
remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
deleted file mode 100644
index dabadfc7b6..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
+++ /dev/null
@@ -1,737 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Hamiltonian Monte Carlo."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-import numpy as np
-from scipy import stats
-
-from tensorflow.contrib.bayesflow.python.ops import hmc
-from tensorflow.contrib.bayesflow.python.ops.hmc_impl import _compute_energy_change
-from tensorflow.contrib.bayesflow.python.ops.hmc_impl import _leapfrog_integrator
-
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_linalg_ops
-from tensorflow.python.ops import gradients_impl as gradients_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import gamma as gamma_lib
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging as logging_ops
-
-
-def _reduce_variance(x, axis=None, keepdims=False):
- sample_mean = math_ops.reduce_mean(x, axis, keepdims=True)
- return math_ops.reduce_mean(
- math_ops.squared_difference(x, sample_mean), axis, keepdims)
-
-
-class HMCTest(test.TestCase):
-
- def setUp(self):
- self._shape_param = 5.
- self._rate_param = 10.
-
- random_seed.set_random_seed(10003)
- np.random.seed(10003)
-
- def assertAllFinite(self, x):
- self.assertAllEqual(np.ones_like(x).astype(bool), np.isfinite(x))
-
- def _log_gamma_log_prob(self, x, event_dims=()):
- """Computes log-pdf of a log-gamma random variable.
-
- Args:
- x: Value of the random variable.
- event_dims: Dimensions not to treat as independent.
-
- Returns:
- log_prob: The log-pdf up to a normalizing constant.
- """
- return math_ops.reduce_sum(self._shape_param * x -
- self._rate_param * math_ops.exp(x),
- event_dims)
-
- def _integrator_conserves_energy(self, x, independent_chain_ndims, sess,
- feed_dict=None):
- step_size = array_ops.placeholder(np.float32, [], name="step_size")
- hmc_lf_steps = array_ops.placeholder(np.int32, [], name="hmc_lf_steps")
-
- if feed_dict is None:
- feed_dict = {}
- feed_dict[hmc_lf_steps] = 1000
-
- event_dims = math_ops.range(independent_chain_ndims,
- array_ops.rank(x))
-
- m = random_ops.random_normal(array_ops.shape(x))
- log_prob_0 = self._log_gamma_log_prob(x, event_dims)
- grad_0 = gradients_ops.gradients(log_prob_0, x)
- old_energy = -log_prob_0 + 0.5 * math_ops.reduce_sum(m**2., event_dims)
-
- new_m, _, log_prob_1, _ = _leapfrog_integrator(
- current_momentums=[m],
- target_log_prob_fn=lambda x: self._log_gamma_log_prob(x, event_dims),
- current_state_parts=[x],
- step_sizes=[step_size],
- num_leapfrog_steps=hmc_lf_steps,
- current_target_log_prob=log_prob_0,
- current_grads_target_log_prob=grad_0)
- new_m = new_m[0]
-
- new_energy = -log_prob_1 + 0.5 * math_ops.reduce_sum(new_m * new_m,
- event_dims)
-
- x_shape = sess.run(x, feed_dict).shape
- event_size = np.prod(x_shape[independent_chain_ndims:])
- feed_dict[step_size] = 0.1 / event_size
- old_energy_, new_energy_ = sess.run([old_energy, new_energy],
- feed_dict)
- logging_ops.vlog(1, "average energy relative change: {}".format(
- (1. - new_energy_ / old_energy_).mean()))
- self.assertAllClose(old_energy_, new_energy_, atol=0., rtol=0.02)
-
- def _integrator_conserves_energy_wrapper(self, independent_chain_ndims):
- """Tests the long-term energy conservation of the leapfrog integrator.
-
- The leapfrog integrator is symplectic, so for sufficiently small step
- sizes it should be possible to run it more or less indefinitely without
- the energy of the system blowing up or collapsing.
-
- Args:
- independent_chain_ndims: Python `int` scalar representing the number of
- dims associated with independent chains.
- """
- with self.test_session(graph=ops.Graph()) as sess:
- x_ph = array_ops.placeholder(np.float32, name="x_ph")
- feed_dict = {x_ph: np.random.rand(50, 10, 2)}
- self._integrator_conserves_energy(x_ph, independent_chain_ndims,
- sess, feed_dict)
-
- def testIntegratorEnergyConservationNullShape(self):
- self._integrator_conserves_energy_wrapper(0)
-
- def testIntegratorEnergyConservation1(self):
- self._integrator_conserves_energy_wrapper(1)
-
- def testIntegratorEnergyConservation2(self):
- self._integrator_conserves_energy_wrapper(2)
-
- def testIntegratorEnergyConservation3(self):
- self._integrator_conserves_energy_wrapper(3)
-
- def testSampleChainSeedReproducibleWorksCorrectly(self):
- with self.test_session(graph=ops.Graph()) as sess:
- num_results = 10
- independent_chain_ndims = 1
-
- def log_gamma_log_prob(x):
- event_dims = math_ops.range(independent_chain_ndims,
- array_ops.rank(x))
- return self._log_gamma_log_prob(x, event_dims)
-
- kwargs = dict(
- target_log_prob_fn=log_gamma_log_prob,
- current_state=np.random.rand(4, 3, 2),
- step_size=0.1,
- num_leapfrog_steps=2,
- num_burnin_steps=150,
- seed=52,
- )
-
- samples0, kernel_results0 = hmc.sample_chain(
- **dict(list(kwargs.items()) + list(dict(
- num_results=2 * num_results,
- num_steps_between_results=0).items())))
-
- samples1, kernel_results1 = hmc.sample_chain(
- **dict(list(kwargs.items()) + list(dict(
- num_results=num_results,
- num_steps_between_results=1).items())))
-
- [
- samples0_,
- samples1_,
- target_log_prob0_,
- target_log_prob1_,
- ] = sess.run([
- samples0,
- samples1,
- kernel_results0.current_target_log_prob,
- kernel_results1.current_target_log_prob,
- ])
- self.assertAllClose(samples0_[::2], samples1_,
- atol=1e-5, rtol=1e-5)
- self.assertAllClose(target_log_prob0_[::2], target_log_prob1_,
- atol=1e-5, rtol=1e-5)
-
- def _chain_gets_correct_expectations(self, x, independent_chain_ndims,
- sess, feed_dict=None):
- counter = collections.Counter()
- def log_gamma_log_prob(x):
- counter["target_calls"] += 1
- event_dims = math_ops.range(independent_chain_ndims,
- array_ops.rank(x))
- return self._log_gamma_log_prob(x, event_dims)
-
- num_results = array_ops.placeholder(
- np.int32, [], name="num_results")
- step_size = array_ops.placeholder(
- np.float32, [], name="step_size")
- num_leapfrog_steps = array_ops.placeholder(
- np.int32, [], name="num_leapfrog_steps")
-
- if feed_dict is None:
- feed_dict = {}
- feed_dict.update({num_results: 150,
- step_size: 0.05,
- num_leapfrog_steps: 2})
-
- samples, kernel_results = hmc.sample_chain(
- num_results=num_results,
- target_log_prob_fn=log_gamma_log_prob,
- current_state=x,
- step_size=step_size,
- num_leapfrog_steps=num_leapfrog_steps,
- num_burnin_steps=150,
- seed=42)
-
- self.assertAllEqual(dict(target_calls=2), counter)
-
- expected_x = (math_ops.digamma(self._shape_param)
- - np.log(self._rate_param))
-
- expected_exp_x = self._shape_param / self._rate_param
-
- log_accept_ratio_, samples_, expected_x_ = sess.run(
- [kernel_results.log_accept_ratio, samples, expected_x],
- feed_dict)
-
- actual_x = samples_.mean()
- actual_exp_x = np.exp(samples_).mean()
- acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-
- logging_ops.vlog(1, "True E[x, exp(x)]: {}\t{}".format(
- expected_x_, expected_exp_x))
- logging_ops.vlog(1, "Estimated E[x, exp(x)]: {}\t{}".format(
- actual_x, actual_exp_x))
- self.assertNear(actual_x, expected_x_, 2e-2)
- self.assertNear(actual_exp_x, expected_exp_x, 2e-2)
- self.assertAllEqual(np.ones_like(acceptance_probs, np.bool),
- acceptance_probs > 0.5)
- self.assertAllEqual(np.ones_like(acceptance_probs, np.bool),
- acceptance_probs <= 1.)
-
- def _chain_gets_correct_expectations_wrapper(self, independent_chain_ndims):
- with self.test_session(graph=ops.Graph()) as sess:
- x_ph = array_ops.placeholder(np.float32, name="x_ph")
- feed_dict = {x_ph: np.random.rand(50, 10, 2)}
- self._chain_gets_correct_expectations(x_ph, independent_chain_ndims,
- sess, feed_dict)
-
- def testHMCChainExpectationsNullShape(self):
- self._chain_gets_correct_expectations_wrapper(0)
-
- def testHMCChainExpectations1(self):
- self._chain_gets_correct_expectations_wrapper(1)
-
- def testHMCChainExpectations2(self):
- self._chain_gets_correct_expectations_wrapper(2)
-
- def testKernelResultsUsingTruncatedDistribution(self):
- def log_prob(x):
- return array_ops.where(
- x >= 0.,
- -x - x**2, # Non-constant gradient.
- array_ops.fill(x.shape, math_ops.cast(-np.inf, x.dtype)))
- # This log_prob has the property that it is likely to attract
- # the flow toward, and below, zero...but for x <=0,
- # log_prob(x) = -inf, which should result in rejection, as well
- # as a non-finite log_prob. Thus, this distribution gives us an opportunity
- # to test out the kernel results ability to correctly capture rejections due
- # to finite AND non-finite reasons.
- # Why use a non-constant gradient? This ensures the leapfrog integrator
- # will not be exact.
-
- num_results = 1000
- # Large step size, will give rejections due to integration error in addition
- # to rejection due to going into a region of log_prob = -inf.
- step_size = 0.1
- num_leapfrog_steps = 5
- num_chains = 2
-
- with self.test_session(graph=ops.Graph()) as sess:
-
- # Start multiple independent chains.
- initial_state = ops.convert_to_tensor([0.1] * num_chains)
-
- states, kernel_results = hmc.sample_chain(
- num_results=num_results,
- target_log_prob_fn=log_prob,
- current_state=initial_state,
- step_size=step_size,
- num_leapfrog_steps=num_leapfrog_steps,
- seed=42)
-
- states_, kernel_results_ = sess.run([states, kernel_results])
- pstates_ = kernel_results_.proposed_state
-
- neg_inf_mask = np.isneginf(kernel_results_.proposed_target_log_prob)
-
- # First: Test that the mathematical properties of the above log prob
- # function in conjunction with HMC show up as expected in kernel_results_.
-
- # We better have log_prob = -inf some of the time.
- self.assertLess(0, neg_inf_mask.sum())
- # We better have some rejections due to something other than -inf.
- self.assertLess(neg_inf_mask.sum(), (~kernel_results_.is_accepted).sum())
- # We better have accepted a decent amount, even near end of the chain.
- self.assertLess(
- 0.1, kernel_results_.is_accepted[int(0.9 * num_results):].mean())
- # We better not have any NaNs in states or log_prob.
- # We may have some NaN in grads, which involve multiplication/addition due
- # to gradient rules. This is the known "NaN grad issue with tf.where."
- self.assertAllEqual(np.zeros_like(states_),
- np.isnan(kernel_results_.proposed_target_log_prob))
- self.assertAllEqual(np.zeros_like(states_),
- np.isnan(states_))
- # We better not have any +inf in states, grads, or log_prob.
- self.assertAllEqual(np.zeros_like(states_),
- np.isposinf(kernel_results_.proposed_target_log_prob))
- self.assertAllEqual(
- np.zeros_like(states_),
- np.isposinf(kernel_results_.proposed_grads_target_log_prob[0]))
- self.assertAllEqual(np.zeros_like(states_),
- np.isposinf(states_))
-
- # Second: Test that kernel_results is congruent with itself and
- # acceptance/rejection of states.
-
- # Proposed state is negative iff proposed target log prob is -inf.
- np.testing.assert_array_less(pstates_[neg_inf_mask], 0.)
- np.testing.assert_array_less(0., pstates_[~neg_inf_mask])
-
- # Acceptance probs are zero whenever proposed state is negative.
- acceptance_probs = np.exp(np.minimum(
- kernel_results_.log_accept_ratio, 0.))
- self.assertAllEqual(
- np.zeros_like(pstates_[neg_inf_mask]),
- acceptance_probs[neg_inf_mask])
-
- # The move is accepted ==> state = proposed state.
- self.assertAllEqual(
- states_[kernel_results_.is_accepted],
- pstates_[kernel_results_.is_accepted],
- )
- # The move was rejected <==> state[t] == state[t - 1].
- for t in range(1, num_results):
- for i in range(num_chains):
- if kernel_results_.is_accepted[t, i]:
- self.assertNotEqual(states_[t, i], states_[t - 1, i])
- else:
- self.assertEqual(states_[t, i], states_[t - 1, i])
-
- def _kernel_leaves_target_invariant(self, initial_draws,
- independent_chain_ndims,
- sess, feed_dict=None):
- def log_gamma_log_prob(x):
- event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x))
- return self._log_gamma_log_prob(x, event_dims)
-
- def fake_log_prob(x):
- """Cooled version of the target distribution."""
- return 1.1 * log_gamma_log_prob(x)
-
- step_size = array_ops.placeholder(np.float32, [], name="step_size")
-
- if feed_dict is None:
- feed_dict = {}
-
- feed_dict[step_size] = 0.4
-
- sample, kernel_results = hmc.kernel(
- target_log_prob_fn=log_gamma_log_prob,
- current_state=initial_draws,
- step_size=step_size,
- num_leapfrog_steps=5,
- seed=43)
-
- bad_sample, bad_kernel_results = hmc.kernel(
- target_log_prob_fn=fake_log_prob,
- current_state=initial_draws,
- step_size=step_size,
- num_leapfrog_steps=5,
- seed=44)
-
- [
- log_accept_ratio_,
- bad_log_accept_ratio_,
- initial_draws_,
- updated_draws_,
- fake_draws_,
- ] = sess.run([
- kernel_results.log_accept_ratio,
- bad_kernel_results.log_accept_ratio,
- initial_draws,
- sample,
- bad_sample,
- ], feed_dict)
-
- # Confirm step size is small enough that we usually accept.
- acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
- bad_acceptance_probs = np.exp(np.minimum(bad_log_accept_ratio_, 0.))
- self.assertGreater(acceptance_probs.mean(), 0.5)
- self.assertGreater(bad_acceptance_probs.mean(), 0.5)
-
- # Confirm step size is large enough that we sometimes reject.
- self.assertLess(acceptance_probs.mean(), 0.99)
- self.assertLess(bad_acceptance_probs.mean(), 0.99)
-
- _, ks_p_value_true = stats.ks_2samp(initial_draws_.flatten(),
- updated_draws_.flatten())
- _, ks_p_value_fake = stats.ks_2samp(initial_draws_.flatten(),
- fake_draws_.flatten())
-
- logging_ops.vlog(1, "acceptance rate for true target: {}".format(
- acceptance_probs.mean()))
- logging_ops.vlog(1, "acceptance rate for fake target: {}".format(
- bad_acceptance_probs.mean()))
- logging_ops.vlog(1, "K-S p-value for true target: {}".format(
- ks_p_value_true))
- logging_ops.vlog(1, "K-S p-value for fake target: {}".format(
- ks_p_value_fake))
- # Make sure that the MCMC update hasn't changed the empirical CDF much.
- self.assertGreater(ks_p_value_true, 1e-3)
- # Confirm that targeting the wrong distribution does
- # significantly change the empirical CDF.
- self.assertLess(ks_p_value_fake, 1e-6)
-
- def _kernel_leaves_target_invariant_wrapper(self, independent_chain_ndims):
- """Tests that the kernel leaves the target distribution invariant.
-
- Draws some independent samples from the target distribution,
- applies an iteration of the MCMC kernel, then runs a
- Kolmogorov-Smirnov test to determine if the distribution of the
- MCMC-updated samples has changed.
-
- We also confirm that running the kernel with a different log-pdf
- does change the target distribution. (And that we can detect that.)
-
- Args:
- independent_chain_ndims: Python `int` scalar representing the number of
- dims associated with independent chains.
- """
- with self.test_session(graph=ops.Graph()) as sess:
- initial_draws = np.log(np.random.gamma(self._shape_param,
- size=[50000, 2, 2]))
- initial_draws -= np.log(self._rate_param)
- x_ph = array_ops.placeholder(np.float32, name="x_ph")
-
- feed_dict = {x_ph: initial_draws}
-
- self._kernel_leaves_target_invariant(x_ph, independent_chain_ndims,
- sess, feed_dict)
-
- def testKernelLeavesTargetInvariant1(self):
- self._kernel_leaves_target_invariant_wrapper(1)
-
- def testKernelLeavesTargetInvariant2(self):
- self._kernel_leaves_target_invariant_wrapper(2)
-
- def testKernelLeavesTargetInvariant3(self):
- self._kernel_leaves_target_invariant_wrapper(3)
-
- def testNanRejection(self):
- """Tests that an update that yields NaN potentials gets rejected.
-
- We run HMC with a target distribution that returns NaN
- log-likelihoods if any element of x < 0, and unit-scale
- exponential log-likelihoods otherwise. The exponential potential
- pushes x towards 0, ensuring that any reasonably large update will
- push us over the edge into NaN territory.
- """
- def _unbounded_exponential_log_prob(x):
- """An exponential distribution with log-likelihood NaN for x < 0."""
- per_element_potentials = array_ops.where(
- x < 0.,
- array_ops.fill(array_ops.shape(x), x.dtype.as_numpy_dtype(np.nan)),
- -x)
- return math_ops.reduce_sum(per_element_potentials)
-
- with self.test_session(graph=ops.Graph()) as sess:
- initial_x = math_ops.linspace(0.01, 5, 10)
- updated_x, kernel_results = hmc.kernel(
- target_log_prob_fn=_unbounded_exponential_log_prob,
- current_state=initial_x,
- step_size=2.,
- num_leapfrog_steps=5,
- seed=46)
- initial_x_, updated_x_, log_accept_ratio_ = sess.run(
- [initial_x, updated_x, kernel_results.log_accept_ratio])
- acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-
- logging_ops.vlog(1, "initial_x = {}".format(initial_x_))
- logging_ops.vlog(1, "updated_x = {}".format(updated_x_))
- logging_ops.vlog(1, "log_accept_ratio = {}".format(log_accept_ratio_))
-
- self.assertAllEqual(initial_x_, updated_x_)
- self.assertEqual(acceptance_probs, 0.)
-
- def testNanFromGradsDontPropagate(self):
- """Test that update with NaN gradients does not cause NaN in results."""
- def _nan_log_prob_with_nan_gradient(x):
- return np.nan * math_ops.reduce_sum(x)
-
- with self.test_session(graph=ops.Graph()) as sess:
- initial_x = math_ops.linspace(0.01, 5, 10)
- updated_x, kernel_results = hmc.kernel(
- target_log_prob_fn=_nan_log_prob_with_nan_gradient,
- current_state=initial_x,
- step_size=2.,
- num_leapfrog_steps=5,
- seed=47)
- initial_x_, updated_x_, log_accept_ratio_ = sess.run(
- [initial_x, updated_x, kernel_results.log_accept_ratio])
- acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-
- logging_ops.vlog(1, "initial_x = {}".format(initial_x_))
- logging_ops.vlog(1, "updated_x = {}".format(updated_x_))
- logging_ops.vlog(1, "log_accept_ratio = {}".format(log_accept_ratio_))
-
- self.assertAllEqual(initial_x_, updated_x_)
- self.assertEqual(acceptance_probs, 0.)
-
- self.assertAllFinite(
- gradients_ops.gradients(updated_x, initial_x)[0].eval())
- self.assertAllEqual([True], [g is None for g in gradients_ops.gradients(
- kernel_results.proposed_grads_target_log_prob, initial_x)])
- self.assertAllEqual([False], [g is None for g in gradients_ops.gradients(
- kernel_results.proposed_grads_target_log_prob,
- kernel_results.proposed_state)])
-
- # Gradients of the acceptance probs and new log prob are not finite.
- # self.assertAllFinite(
- # gradients_ops.gradients(acceptance_probs, initial_x)[0].eval())
- # self.assertAllFinite(
- # gradients_ops.gradients(new_log_prob, initial_x)[0].eval())
-
- def _testChainWorksDtype(self, dtype):
- with self.test_session(graph=ops.Graph()) as sess:
- states, kernel_results = hmc.sample_chain(
- num_results=10,
- target_log_prob_fn=lambda x: -math_ops.reduce_sum(x**2., axis=-1),
- current_state=np.zeros(5).astype(dtype),
- step_size=0.01,
- num_leapfrog_steps=10,
- seed=48)
- states_, log_accept_ratio_ = sess.run(
- [states, kernel_results.log_accept_ratio])
- self.assertEqual(dtype, states_.dtype)
- self.assertEqual(dtype, log_accept_ratio_.dtype)
-
- def testChainWorksIn64Bit(self):
- self._testChainWorksDtype(np.float64)
-
- def testChainWorksIn16Bit(self):
- self._testChainWorksDtype(np.float16)
-
- def testChainWorksCorrelatedMultivariate(self):
- dtype = np.float32
- true_mean = dtype([0, 0])
- true_cov = dtype([[1, 0.5],
- [0.5, 1]])
- num_results = 2000
- counter = collections.Counter()
- with self.test_session(graph=ops.Graph()) as sess:
- def target_log_prob(x, y):
- counter["target_calls"] += 1
- # Corresponds to unnormalized MVN.
- # z = matmul(inv(chol(true_cov)), [x, y] - true_mean)
- z = array_ops.stack([x, y], axis=-1) - true_mean
- z = array_ops.squeeze(
- gen_linalg_ops.matrix_triangular_solve(
- np.linalg.cholesky(true_cov),
- z[..., array_ops.newaxis]),
- axis=-1)
- return -0.5 * math_ops.reduce_sum(z**2., axis=-1)
- states, _ = hmc.sample_chain(
- num_results=num_results,
- target_log_prob_fn=target_log_prob,
- current_state=[dtype(-2), dtype(2)],
- step_size=[0.5, 0.5],
- num_leapfrog_steps=2,
- num_burnin_steps=200,
- num_steps_between_results=1,
- seed=54)
- self.assertAllEqual(dict(target_calls=2), counter)
- states = array_ops.stack(states, axis=-1)
- self.assertEqual(num_results, states.shape[0].value)
- sample_mean = math_ops.reduce_mean(states, axis=0)
- x = states - sample_mean
- sample_cov = math_ops.matmul(x, x, transpose_a=True) / dtype(num_results)
- [sample_mean_, sample_cov_] = sess.run([
- sample_mean, sample_cov])
- self.assertAllClose(true_mean, sample_mean_,
- atol=0.05, rtol=0.)
- self.assertAllClose(true_cov, sample_cov_,
- atol=0., rtol=0.1)
-
-
-class _EnergyComputationTest(object):
-
- def testHandlesNanFromPotential(self):
- with self.test_session(graph=ops.Graph()) as sess:
- x = [1, np.inf, -np.inf, np.nan]
- target_log_prob, proposed_target_log_prob = [
- self.dtype(x.flatten()) for x in np.meshgrid(x, x)]
- num_chains = len(target_log_prob)
- dummy_momentums = [-1, 1]
- momentums = [self.dtype([dummy_momentums] * num_chains)]
- proposed_momentums = [self.dtype([dummy_momentums] * num_chains)]
-
- target_log_prob = ops.convert_to_tensor(target_log_prob)
- momentums = [ops.convert_to_tensor(momentums[0])]
- proposed_target_log_prob = ops.convert_to_tensor(proposed_target_log_prob)
- proposed_momentums = [ops.convert_to_tensor(proposed_momentums[0])]
-
- energy = _compute_energy_change(
- target_log_prob,
- momentums,
- proposed_target_log_prob,
- proposed_momentums,
- independent_chain_ndims=1)
- grads = gradients_ops.gradients(energy, momentums)
-
- [actual_energy, grads_] = sess.run([energy, grads])
-
- # Ensure energy is `inf` (note: that's positive inf) in weird cases and
- # finite otherwise.
- expected_energy = self.dtype([0] + [np.inf]*(num_chains - 1))
- self.assertAllEqual(expected_energy, actual_energy)
-
- # Ensure gradient is finite.
- self.assertAllEqual(np.ones_like(grads_).astype(np.bool),
- np.isfinite(grads_))
-
- def testHandlesNanFromKinetic(self):
- with self.test_session(graph=ops.Graph()) as sess:
- x = [1, np.inf, -np.inf, np.nan]
- momentums, proposed_momentums = [
- [np.reshape(self.dtype(x), [-1, 1])]
- for x in np.meshgrid(x, x)]
- num_chains = len(momentums[0])
- target_log_prob = np.ones(num_chains, self.dtype)
- proposed_target_log_prob = np.ones(num_chains, self.dtype)
-
- target_log_prob = ops.convert_to_tensor(target_log_prob)
- momentums = [ops.convert_to_tensor(momentums[0])]
- proposed_target_log_prob = ops.convert_to_tensor(proposed_target_log_prob)
- proposed_momentums = [ops.convert_to_tensor(proposed_momentums[0])]
-
- energy = _compute_energy_change(
- target_log_prob,
- momentums,
- proposed_target_log_prob,
- proposed_momentums,
- independent_chain_ndims=1)
- grads = gradients_ops.gradients(energy, momentums)
-
- [actual_energy, grads_] = sess.run([energy, grads])
-
- # Ensure energy is `inf` (note: that's positive inf) in weird cases and
- # finite otherwise.
- expected_energy = self.dtype([0] + [np.inf]*(num_chains - 1))
- self.assertAllEqual(expected_energy, actual_energy)
-
- # Ensure gradient is finite.
- g = grads_[0].reshape([len(x), len(x)])[:, 0]
- self.assertAllEqual(np.ones_like(g).astype(np.bool), np.isfinite(g))
-
- # The remaining gradients are nan because the momentum was itself nan or
- # inf.
- g = grads_[0].reshape([len(x), len(x)])[:, 1:]
- self.assertAllEqual(np.ones_like(g).astype(np.bool), np.isnan(g))
-
-
-class EnergyComputationTest16(test.TestCase, _EnergyComputationTest):
- dtype = np.float16
-
-
-class EnergyComputationTest32(test.TestCase, _EnergyComputationTest):
- dtype = np.float32
-
-
-class EnergyComputationTest64(test.TestCase, _EnergyComputationTest):
- dtype = np.float64
-
-
-class _HMCHandlesLists(object):
-
- def testStateParts(self):
- with self.test_session(graph=ops.Graph()) as sess:
- dist_x = normal_lib.Normal(loc=self.dtype(0), scale=self.dtype(1))
- dist_y = independent_lib.Independent(
- gamma_lib.Gamma(concentration=self.dtype([1, 2]),
- rate=self.dtype([0.5, 0.75])),
- reinterpreted_batch_ndims=1)
- def target_log_prob(x, y):
- return dist_x.log_prob(x) + dist_y.log_prob(y)
- x0 = [dist_x.sample(seed=1), dist_y.sample(seed=2)]
- samples, _ = hmc.sample_chain(
- num_results=int(2e3),
- target_log_prob_fn=target_log_prob,
- current_state=x0,
- step_size=0.85,
- num_leapfrog_steps=3,
- num_burnin_steps=int(250),
- seed=49)
- actual_means = [math_ops.reduce_mean(s, axis=0) for s in samples]
- actual_vars = [_reduce_variance(s, axis=0) for s in samples]
- expected_means = [dist_x.mean(), dist_y.mean()]
- expected_vars = [dist_x.variance(), dist_y.variance()]
- [
- actual_means_,
- actual_vars_,
- expected_means_,
- expected_vars_,
- ] = sess.run([
- actual_means,
- actual_vars,
- expected_means,
- expected_vars,
- ])
- self.assertAllClose(expected_means_, actual_means_, atol=0.05, rtol=0.16)
- self.assertAllClose(expected_vars_, actual_vars_, atol=0., rtol=0.25)
-
-
-class HMCHandlesLists32(_HMCHandlesLists, test.TestCase):
- dtype = np.float32
-
-
-class HMCHandlesLists64(_HMCHandlesLists, test.TestCase):
- dtype = np.float64
-
-
-if __name__ == "__main__":
- test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py
deleted file mode 100644
index f508e5b114..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py
+++ /dev/null
@@ -1,340 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Metropolis-Hastings."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings_impl as mh
-from tensorflow.contrib.distributions.python.ops import mvn_tril as mvn_tril_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.platform import test
-
-
-class MetropolisHastingsTest(test.TestCase):
-
- def testKernelStateTensor(self):
- """Test that transition kernel works with tensor input to `state`."""
- loc = variable_scope.get_variable("loc", initializer=0.)
-
- def target_log_prob_fn(loc):
- return normal_lib.Normal(loc=0.0, scale=0.1).log_prob(loc)
-
- new_state, _ = mh.kernel(
- target_log_prob_fn=target_log_prob_fn,
- proposal_fn=mh.proposal_normal(scale=0.05),
- current_state=loc,
- seed=231251)
- loc_update = loc.assign(new_state)
-
- init = variables.initialize_all_variables()
- with self.test_session() as sess:
- sess.run(init)
- loc_samples = []
- for _ in range(2500):
- loc_sample = sess.run(loc_update)
- loc_samples.append(loc_sample)
- loc_samples = loc_samples[500:] # drop samples for burn-in
-
- self.assertAllClose(np.mean(loc_samples), 0.0, rtol=1e-5, atol=1e-1)
- self.assertAllClose(np.std(loc_samples), 0.1, rtol=1e-5, atol=1e-1)
-
- def testKernelStateList(self):
- """Test that transition kernel works with list input to `state`."""
- num_chains = 2
- loc_one = variable_scope.get_variable(
- "loc_one", [num_chains],
- initializer=init_ops.zeros_initializer())
- loc_two = variable_scope.get_variable(
- "loc_two", [num_chains], initializer=init_ops.zeros_initializer())
-
- def target_log_prob_fn(loc_one, loc_two):
- loc = array_ops.stack([loc_one, loc_two])
- log_prob = mvn_tril_lib.MultivariateNormalTriL(
- loc=constant_op.constant([0., 0.]),
- scale_tril=constant_op.constant([[0.1, 0.1], [0.0, 0.1]])).log_prob(
- loc)
- return math_ops.reduce_sum(log_prob, 0)
-
- def proposal_fn(loc_one, loc_two):
- loc_one_proposal = mh.proposal_normal(scale=0.05)
- loc_two_proposal = mh.proposal_normal(scale=0.05)
- loc_one_sample, _ = loc_one_proposal(loc_one)
- loc_two_sample, _ = loc_two_proposal(loc_two)
- return [loc_one_sample, loc_two_sample], None
-
- new_state, _ = mh.kernel(
- target_log_prob_fn=target_log_prob_fn,
- proposal_fn=proposal_fn,
- current_state=[loc_one, loc_two],
- seed=12415)
- loc_one_update = loc_one.assign(new_state[0])
- loc_two_update = loc_two.assign(new_state[1])
-
- init = variables.initialize_all_variables()
- with self.test_session() as sess:
- sess.run(init)
- loc_one_samples = []
- loc_two_samples = []
- for _ in range(10000):
- loc_one_sample, loc_two_sample = sess.run(
- [loc_one_update, loc_two_update])
- loc_one_samples.append(loc_one_sample)
- loc_two_samples.append(loc_two_sample)
-
- loc_one_samples = np.array(loc_one_samples)
- loc_two_samples = np.array(loc_two_samples)
- loc_one_samples = loc_one_samples[1000:] # drop samples for burn-in
- loc_two_samples = loc_two_samples[1000:] # drop samples for burn-in
-
- self.assertAllClose(np.mean(loc_one_samples, 0),
- np.array([0.] * num_chains),
- rtol=1e-5, atol=1e-1)
- self.assertAllClose(np.mean(loc_two_samples, 0),
- np.array([0.] * num_chains),
- rtol=1e-5, atol=1e-1)
- self.assertAllClose(np.std(loc_one_samples, 0),
- np.array([0.1] * num_chains),
- rtol=1e-5, atol=1e-1)
- self.assertAllClose(np.std(loc_two_samples, 0),
- np.array([0.1] * num_chains),
- rtol=1e-5, atol=1e-1)
-
- def testKernelResultsUsingTruncatedDistribution(self):
- def log_prob(x):
- return array_ops.where(
- x >= 0.,
- -x - x**2,
- array_ops.fill(x.shape, math_ops.cast(-np.inf, x.dtype)))
- # The truncated distribution has the property that it is likely to attract
- # the flow toward, and below, zero...but for x <=0,
- # log_prob(x) = -inf, which should result in rejection, as well
- # as a non-finite log_prob. Thus, this distribution gives us an opportunity
- # to test out the kernel results ability to correctly capture rejections due
- # to finite AND non-finite reasons.
-
- num_results = 1000
- # Large step size, will give rejections due to going into a region of
- # log_prob = -inf.
- step_size = 0.3
- num_chains = 2
-
- with self.test_session(graph=ops.Graph()) as sess:
-
- # Start multiple independent chains.
- initial_state = ops.convert_to_tensor([0.1] * num_chains)
-
- states = []
- is_accepted = []
- proposed_states = []
- current_state = initial_state
- for _ in range(num_results):
- current_state, kernel_results = mh.kernel(
- target_log_prob_fn=log_prob,
- proposal_fn=mh.proposal_uniform(step_size=step_size),
- current_state=current_state,
- seed=42)
- states.append(current_state)
- proposed_states.append(kernel_results.proposed_state)
- is_accepted.append(kernel_results.is_accepted)
-
- states = array_ops.stack(states)
- proposed_states = array_ops.stack(proposed_states)
- is_accepted = array_ops.stack(is_accepted)
- states_, pstates_, is_accepted_ = sess.run(
- [states, proposed_states, is_accepted])
-
- # We better have accepted a decent amount, even near end of the chain.
- self.assertLess(
- 0.1, is_accepted_[int(0.9 * num_results):].mean())
- # We better not have any NaNs in states.
- self.assertAllEqual(np.zeros_like(states_),
- np.isnan(states_))
- # We better not have any +inf in states.
- self.assertAllEqual(np.zeros_like(states_),
- np.isposinf(states_))
-
- # The move is accepted ==> state = proposed state.
- self.assertAllEqual(
- states_[is_accepted_],
- pstates_[is_accepted_],
- )
-
- # The move was rejected <==> state[t] == state[t - 1].
- for t in range(1, num_results):
- for i in range(num_chains):
- if is_accepted_[t, i]:
- self.assertNotEqual(states_[t, i], states_[t - 1, i])
- else:
- self.assertEqual(states_[t, i], states_[t - 1, i])
-
- def testDensityIncreasingStepAccepted(self):
- """Tests that if a transition increases density, it is always accepted."""
- target_log_density = lambda x: - x * x
- state = variable_scope.get_variable("state", initializer=10.)
- state_log_density = variable_scope.get_variable(
- "state_log_density",
- initializer=target_log_density(state.initialized_value()))
- log_accept_ratio = variable_scope.get_variable(
- "log_accept_ratio", initializer=0.)
-
- get_next_proposal = lambda x: (x - 1., None)
- step = mh.evolve(state, state_log_density, log_accept_ratio,
- target_log_density, get_next_proposal, seed=1234)
- init = variables.initialize_all_variables()
- with self.test_session() as sess:
- sess.run(init)
- for j in range(9):
- sess.run(step)
- sample = sess.run(state)
- sample_log_density = sess.run(state_log_density)
- self.assertAlmostEqual(sample, 9 - j)
- self.assertAlmostEqual(sample_log_density, - (9 - j) * (9 - j))
-
- def testSampleProperties(self):
- """Tests that the samples converge to the target distribution."""
-
- def target_log_density(x):
- """Log-density corresponding to a normal distribution with mean = 4."""
- return - (x - 2.0) * (x - 2.0) * 0.5
-
- # Use the uniform random walker to generate proposals.
- proposal_fn = mh.proposal_uniform(
- step_size=1.0, seed=1234)
-
- state = variable_scope.get_variable("state", initializer=0.0)
- state_log_density = variable_scope.get_variable(
- "state_log_density",
- initializer=target_log_density(state.initialized_value()))
- log_accept_ratio = variable_scope.get_variable(
- "log_accept_ratio", initializer=0.)
-
- # Random walk MCMC converges slowly so need to put in enough iterations.
- num_iterations = 5000
- step = mh.evolve(state, state_log_density, log_accept_ratio,
- target_log_density, proposal_fn, seed=4321)
-
- init = variables.global_variables_initializer()
-
- sample_sum, sample_sq_sum = 0.0, 0.0
- with self.test_session() as sess:
- sess.run(init)
- for _ in np.arange(num_iterations):
- # Allow for the mixing of the chain and discard these samples.
- sess.run(step)
- for _ in np.arange(num_iterations):
- sess.run(step)
- sample = sess.run(state)
- sample_sum += sample
- sample_sq_sum += sample * sample
-
- sample_mean = sample_sum / num_iterations
- sample_variance = sample_sq_sum / num_iterations - sample_mean * sample_mean
- # The samples have large autocorrelation which reduces the effective sample
- # size.
- self.assertAlmostEqual(sample_mean, 2.0, delta=0.1)
- self.assertAlmostEqual(sample_variance, 1.0, delta=0.1)
-
- def testProposalNormal(self):
- """Tests that the normal proposals are correctly distributed."""
-
- initial_points = array_ops.ones([10000], dtype=dtypes.float32)
- proposal_fn = mh.proposal_normal(
- scale=2.0, seed=1234)
- proposal_points, _ = proposal_fn(initial_points)
-
- with self.test_session() as sess:
- sample = sess.run(proposal_points)
-
- # It is expected that the elements in proposal_points have the same mean as
- # initial_points and have the standard deviation that was supplied to the
- # proposal scheme.
- self.assertAlmostEqual(np.mean(sample), 1.0, delta=0.1)
- self.assertAlmostEqual(np.std(sample), 2.0, delta=0.1)
-
- def testDocstringExample(self):
- """Tests the simplified docstring example with multiple chains."""
-
- n = 2 # dimension of the problem
-
- # Generate 300 initial values randomly. Each of these would be an
- # independent starting point for a Markov chain.
- state = variable_scope.get_variable(
- "state", initializer=random_ops.random_normal(
- [300, n], mean=3.0, dtype=dtypes.float32, seed=42))
-
- # Computes the log(p(x)) for the unit normal density and ignores the
- # normalization constant.
- def log_density(x):
- return - math_ops.reduce_sum(x * x, reduction_indices=-1) / 2.0
-
- # Initial log-density value
- state_log_density = variable_scope.get_variable(
- "state_log_density",
- initializer=log_density(state.initialized_value()))
-
- # A variable to store the log_acceptance_ratio:
- log_acceptance_ratio = variable_scope.get_variable(
- "log_acceptance_ratio",
- initializer=array_ops.zeros([300], dtype=dtypes.float32))
-
- # Generates random proposals by moving each coordinate uniformly and
- # independently in a box of size 2 centered around the current value.
- # Returns the new point and also the log of the Hastings ratio (the
- # ratio of the probability of going from the proposal to origin and the
- # probability of the reverse transition). When this ratio is 1, the value
- # may be omitted and replaced by None.
- def random_proposal(x):
- return (x + random_ops.random_uniform(
- array_ops.shape(x), minval=-1, maxval=1,
- dtype=x.dtype, seed=12)), None
-
- # Create the op to propagate the chain for 100 steps.
- stepper = mh.evolve(
- state, state_log_density, log_acceptance_ratio,
- log_density, random_proposal, n_steps=100, seed=123)
- init = variables.initialize_all_variables()
- with self.test_session() as sess:
- sess.run(init)
- # Run the chains for a total of 1000 steps.
- for _ in range(10):
- sess.run(stepper)
- samples = sess.run(state)
- covariance = np.eye(n)
- # Verify that the estimated mean and covariance are close to the true
- # values.
- self.assertAlmostEqual(
- np.max(np.abs(np.mean(samples, 0)
- - np.zeros(n))), 0,
- delta=0.1)
- self.assertAlmostEqual(
- np.max(np.abs(np.reshape(np.cov(samples, rowvar=False), [n**2])
- - np.reshape(covariance, [n**2]))), 0,
- delta=0.2)
-
-if __name__ == "__main__":
- test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc.py b/tensorflow/contrib/bayesflow/python/ops/hmc.py
deleted file mode 100644
index c8a5a195d3..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/hmc.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Hamiltonian Monte Carlo, a gradient-based MCMC algorithm."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# go/tf-wildcard-import
-from tensorflow.contrib.bayesflow.python.ops.hmc_impl import * # pylint: disable=wildcard-import,unused-wildcard-import,g-importing-member
-from tensorflow.python.util import all_util
-
-_allowed_symbols = [
- "sample_chain",
- "kernel",
-]
-
-all_util.remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
deleted file mode 100644
index 66afcc7497..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
+++ /dev/null
@@ -1,961 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Hamiltonian Monte Carlo, a gradient-based MCMC algorithm.
-
-@@sample_chain
-@@kernel
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import gradients_impl as gradients_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import util as distributions_util
-
-__all__ = [
- "sample_chain",
- "kernel",
-]
-
-
-KernelResults = collections.namedtuple(
- "KernelResults",
- [
- "log_accept_ratio",
- "current_grads_target_log_prob", # "Current result" means "accepted".
- "current_target_log_prob", # "Current result" means "accepted".
- "is_accepted",
- "proposed_grads_target_log_prob",
- "proposed_state",
- "proposed_target_log_prob",
- ])
-
-
-def _make_dummy_kernel_results(
- dummy_state,
- dummy_target_log_prob,
- dummy_grads_target_log_prob):
- return KernelResults(
- log_accept_ratio=dummy_target_log_prob,
- current_grads_target_log_prob=dummy_grads_target_log_prob,
- current_target_log_prob=dummy_target_log_prob,
- is_accepted=array_ops.ones_like(dummy_target_log_prob, dtypes.bool),
- proposed_grads_target_log_prob=dummy_grads_target_log_prob,
- proposed_state=dummy_state,
- proposed_target_log_prob=dummy_target_log_prob,
- )
-
-
-def sample_chain(
- num_results,
- target_log_prob_fn,
- current_state,
- step_size,
- num_leapfrog_steps,
- num_burnin_steps=0,
- num_steps_between_results=0,
- seed=None,
- current_target_log_prob=None,
- current_grads_target_log_prob=None,
- name=None):
- """Runs multiple iterations of one or more Hamiltonian Monte Carlo chains.
-
- Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) algorithm
- that takes a series of gradient-informed steps to produce a Metropolis
- proposal. This function samples from an HMC Markov chain at `current_state`
- and whose stationary distribution has log-unnormalized-density
- `target_log_prob_fn()`.
-
- This function samples from multiple chains in parallel. It assumes that the
- the leftmost dimensions of (each) `current_state` (part) index an independent
- chain. The function `target_log_prob_fn()` sums log-probabilities across
- event dimensions (i.e., current state (part) rightmost dimensions). Each
- element of the output of `target_log_prob_fn()` represents the (possibly
- unnormalized) log-probability of the joint distribution over (all) the current
- state (parts).
-
- The `current_state` can be represented as a single `Tensor` or a `list` of
- `Tensors` which collectively represent the current state. When specifying a
- `list`, one must also specify a list of `step_size`s.
-
- Note: `target_log_prob_fn` is called exactly twice.
-
- Since HMC states are correlated, it is sometimes desirable to produce
- additional intermediate states, and then discard them, ending up with a set of
- states with decreased autocorrelation. See [1]. Such "thinning" is made
- possible by setting `num_steps_between_results > 0`. The chain then takes
- `num_steps_between_results` extra steps between the steps that make it into
- the results. The extra steps are never materialized (in calls to `sess.run`),
- and thus do not increase memory requirements.
-
- [1]: "Statistically efficient thinning of a Markov chain sampler."
- Art B. Owen. April 2017.
- http://statweb.stanford.edu/~owen/reports/bestthinning.pdf
-
- #### Examples:
-
- ##### Sample from a diagonal-variance Gaussian.
-
- ```python
- tfd = tf.contrib.distributions
-
- def make_likelihood(true_variances):
- return tfd.MultivariateNormalDiag(
- scale_diag=tf.sqrt(true_variances))
-
- dims = 10
- dtype = np.float32
- true_variances = tf.linspace(dtype(1), dtype(3), dims)
- likelihood = make_likelihood(true_variances)
-
- states, kernel_results = hmc.sample_chain(
- num_results=1000,
- target_log_prob_fn=likelihood.log_prob,
- current_state=tf.zeros(dims),
- step_size=0.5,
- num_leapfrog_steps=2,
- num_burnin_steps=500)
-
- # Compute sample stats.
- sample_mean = tf.reduce_mean(states, axis=0)
- sample_var = tf.reduce_mean(
- tf.squared_difference(states, sample_mean),
- axis=0)
- ```
-
- ##### Sampling from factor-analysis posteriors with known factors.
-
- I.e.,
-
- ```none
- for i=1..n:
- w[i] ~ Normal(0, eye(d)) # prior
- x[i] ~ Normal(loc=matmul(w[i], F)) # likelihood
- ```
-
- where `F` denotes factors.
-
- ```python
- tfd = tf.contrib.distributions
-
- def make_prior(dims, dtype):
- return tfd.MultivariateNormalDiag(
- loc=tf.zeros(dims, dtype))
-
- def make_likelihood(weights, factors):
- return tfd.MultivariateNormalDiag(
- loc=tf.tensordot(weights, factors, axes=[[0], [-1]]))
-
- # Setup data.
- num_weights = 10
- num_factors = 4
- num_chains = 100
- dtype = np.float32
-
- prior = make_prior(num_weights, dtype)
- weights = prior.sample(num_chains)
- factors = np.random.randn(num_factors, num_weights).astype(dtype)
- x = make_likelihood(weights, factors).sample(num_chains)
-
- def target_log_prob(w):
- # Target joint is: `f(w) = p(w, x | factors)`.
- return prior.log_prob(w) + make_likelihood(w, factors).log_prob(x)
-
- # Get `num_results` samples from `num_chains` independent chains.
- chains_states, kernels_results = hmc.sample_chain(
- num_results=1000,
- target_log_prob_fn=target_log_prob,
- current_state=tf.zeros([num_chains, dims], dtype),
- step_size=0.1,
- num_leapfrog_steps=2,
- num_burnin_steps=500)
-
- # Compute sample stats.
- sample_mean = tf.reduce_mean(chains_states, axis=[0, 1])
- sample_var = tf.reduce_mean(
- tf.squared_difference(chains_states, sample_mean),
- axis=[0, 1])
- ```
-
- Args:
- num_results: Integer number of Markov chain draws.
- target_log_prob_fn: Python callable which takes an argument like
- `current_state` (or `*current_state` if it's a list) and returns its
- (possibly unnormalized) log-density under the target distribution.
- current_state: `Tensor` or Python `list` of `Tensor`s representing the
- current state(s) of the Markov chain(s). The first `r` dimensions index
- independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
- step_size: `Tensor` or Python `list` of `Tensor`s representing the step size
- for the leapfrog integrator. Must broadcast with the shape of
- `current_state`. Larger step sizes lead to faster progress, but too-large
- step sizes make rejection exponentially more likely. When possible, it's
- often helpful to match per-variable step sizes to the standard deviations
- of the target distribution in each variable.
- num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
- for. Total progress per HMC step is roughly proportional to `step_size *
- num_leapfrog_steps`.
- num_burnin_steps: Integer number of chain steps to take before starting to
- collect results.
- Default value: 0 (i.e., no burn-in).
- num_steps_between_results: Integer number of chain steps between collecting
- a result. Only one out of every `num_steps_between_samples + 1` steps is
- included in the returned results. The number of returned chain states is
- still equal to `num_results`. Default value: 0 (i.e., no thinning).
- seed: Python integer to seed the random number generator.
- current_target_log_prob: (Optional) `Tensor` representing the value of
- `target_log_prob_fn` at the `current_state`. The only reason to specify
- this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- current_grads_target_log_prob: (Optional) Python list of `Tensor`s
- representing gradient of `target_log_prob` at the `current_state` and wrt
- the `current_state`. Must have same shape as `current_state`. The only
- reason to specify this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- name: Python `str` name prefixed to Ops created by this function.
- Default value: `None` (i.e., "hmc_sample_chain").
-
- Returns:
- next_states: Tensor or Python list of `Tensor`s representing the
- state(s) of the Markov chain(s) at each result step. Has same shape as
- input `current_state` but with a prepended `num_results`-size dimension.
- kernel_results: `collections.namedtuple` of internal calculations used to
- advance the chain.
- """
- with ops.name_scope(
- name, "hmc_sample_chain",
- [num_results, current_state, step_size, num_leapfrog_steps,
- num_burnin_steps, num_steps_between_results, seed,
- current_target_log_prob, current_grads_target_log_prob]):
- with ops.name_scope("initialize"):
- [
- current_state,
- step_size,
- current_target_log_prob,
- current_grads_target_log_prob,
- ] = _prepare_args(
- target_log_prob_fn,
- current_state,
- step_size,
- current_target_log_prob,
- current_grads_target_log_prob)
- num_results = ops.convert_to_tensor(
- num_results,
- dtype=dtypes.int32,
- name="num_results")
- num_leapfrog_steps = ops.convert_to_tensor(
- num_leapfrog_steps,
- dtype=dtypes.int32,
- name="num_leapfrog_steps")
- num_burnin_steps = ops.convert_to_tensor(
- num_burnin_steps,
- dtype=dtypes.int32,
- name="num_burnin_steps")
- num_steps_between_results = ops.convert_to_tensor(
- num_steps_between_results,
- dtype=dtypes.int32,
- name="num_steps_between_results")
-
- def _run_chain(num_steps, current_state, kernel_results):
- """Runs the chain(s) for `num_steps`."""
- def _loop_body(iter_, current_state, kernel_results):
- return [iter_ + 1] + list(kernel(
- target_log_prob_fn,
- current_state,
- step_size,
- num_leapfrog_steps,
- seed,
- kernel_results.current_target_log_prob,
- kernel_results.current_grads_target_log_prob))
- while_loop_kwargs = dict(
- cond=lambda iter_, *args: iter_ < num_steps,
- body=_loop_body,
- loop_vars=[
- np.int32(0),
- current_state,
- kernel_results,
- ],
- )
- if seed is not None:
- while_loop_kwargs["parallel_iterations"] = 1
- return control_flow_ops.while_loop(
- **while_loop_kwargs)[1:] # Lop-off "iter_".
-
- def _scan_body(args_list, iter_):
- """Closure which implements `tf.scan` body."""
- current_state, kernel_results = args_list
- return _run_chain(
- 1 + array_ops.where(math_ops.equal(iter_, 0),
- num_burnin_steps,
- num_steps_between_results),
- current_state,
- kernel_results)
-
- scan_kwargs = dict(
- fn=_scan_body,
- elems=math_ops.range(num_results), # iter_: used to choose burnin.
- initializer=[
- current_state,
- _make_dummy_kernel_results(
- current_state,
- current_target_log_prob,
- current_grads_target_log_prob),
- ])
- if seed is not None:
- scan_kwargs["parallel_iterations"] = 1
- return functional_ops.scan(**scan_kwargs)
-
-
-def kernel(target_log_prob_fn,
- current_state,
- step_size,
- num_leapfrog_steps,
- seed=None,
- current_target_log_prob=None,
- current_grads_target_log_prob=None,
- name=None):
- """Runs one iteration of Hamiltonian Monte Carlo.
-
- Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC)
- algorithm that takes a series of gradient-informed steps to produce
- a Metropolis proposal. This function applies one step of HMC to
- randomly update the variable `x`.
-
- This function can update multiple chains in parallel. It assumes that all
- leftmost dimensions of `current_state` index independent chain states (and are
- therefore updated independently). The output of `target_log_prob_fn()` should
- sum log-probabilities across all event dimensions. Slices along the rightmost
- dimensions may have different target distributions; for example,
- `current_state[0, :]` could have a different target distribution from
- `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of
- independent chains is `tf.size(target_log_prob_fn(*current_state))`.)
-
- #### Examples:
-
- ##### Simple chain with warm-up.
-
- ```python
- tfd = tf.contrib.distributions
-
- # Tuning acceptance rates:
- dtype = np.float32
- target_accept_rate = 0.631
- num_warmup_iter = 500
- num_chain_iter = 500
-
- x = tf.get_variable(name="x", initializer=dtype(1))
- step_size = tf.get_variable(name="step_size", initializer=dtype(1))
-
- target = tfd.Normal(loc=dtype(0), scale=dtype(1))
-
- next_x, other_results = hmc.kernel(
- target_log_prob_fn=target.log_prob,
- current_state=x,
- step_size=step_size,
- num_leapfrog_steps=3)[:4]
-
- x_update = x.assign(next_x)
-
- step_size_update = step_size.assign_add(
- step_size * tf.where(
- tf.exp(tf.minimum(other_results.log_accept_ratio), 0.) >
- target_accept_rate,
- 0.01, -0.01))
-
- warmup = tf.group([x_update, step_size_update])
-
- tf.global_variables_initializer().run()
-
- sess.graph.finalize() # No more graph building.
-
- # Warm up the sampler and adapt the step size
- for _ in xrange(num_warmup_iter):
- sess.run(warmup)
-
- # Collect samples without adapting step size
- samples = np.zeros([num_chain_iter])
- for i in xrange(num_chain_iter):
- _, x_, target_log_prob_, grad_ = sess.run([
- x_update,
- x,
- other_results.target_log_prob,
- other_results.grads_target_log_prob])
- samples[i] = x_
-
- print(samples.mean(), samples.std())
- ```
-
- ##### Sample from more complicated posterior.
-
- I.e.,
-
- ```none
- W ~ MVN(loc=0, scale=sigma * eye(dims))
- for i=1...num_samples:
- X[i] ~ MVN(loc=0, scale=eye(dims))
- eps[i] ~ Normal(loc=0, scale=1)
- Y[i] = X[i].T * W + eps[i]
- ```
-
- ```python
- tfd = tf.contrib.distributions
-
- def make_training_data(num_samples, dims, sigma):
- dt = np.asarray(sigma).dtype
- zeros = tf.zeros(dims, dtype=dt)
- x = tfd.MultivariateNormalDiag(
- loc=zeros).sample(num_samples, seed=1)
- w = tfd.MultivariateNormalDiag(
- loc=zeros,
- scale_identity_multiplier=sigma).sample(seed=2)
- noise = tfd.Normal(
- loc=dt(0),
- scale=dt(1)).sample(num_samples, seed=3)
- y = tf.tensordot(x, w, axes=[[1], [0]]) + noise
- return y, x, w
-
- def make_prior(sigma, dims):
- # p(w | sigma)
- return tfd.MultivariateNormalDiag(
- loc=tf.zeros([dims], dtype=sigma.dtype),
- scale_identity_multiplier=sigma)
-
- def make_likelihood(x, w):
- # p(y | x, w)
- return tfd.MultivariateNormalDiag(
- loc=tf.tensordot(x, w, axes=[[1], [0]]))
-
- # Setup assumptions.
- dtype = np.float32
- num_samples = 150
- dims = 10
- num_iters = int(5e3)
-
- true_sigma = dtype(0.5)
- y, x, true_weights = make_training_data(num_samples, dims, true_sigma)
-
- # Estimate of `log(true_sigma)`.
- log_sigma = tf.get_variable(name="log_sigma", initializer=dtype(0))
- sigma = tf.exp(log_sigma)
-
- # State of the Markov chain.
- weights = tf.get_variable(
- name="weights",
- initializer=np.random.randn(dims).astype(dtype))
-
- prior = make_prior(sigma, dims)
-
- def joint_log_prob_fn(w):
- # f(w) = log p(w, y | x)
- return prior.log_prob(w) + make_likelihood(x, w).log_prob(y)
-
- weights_update = weights.assign(
- hmc.kernel(target_log_prob_fn=joint_log_prob,
- current_state=weights,
- step_size=0.1,
- num_leapfrog_steps=5)[0])
-
- with tf.control_dependencies([weights_update]):
- loss = -prior.log_prob(weights)
-
- optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
- log_sigma_update = optimizer.minimize(loss, var_list=[log_sigma])
-
- sess.graph.finalize() # No more graph building.
-
- tf.global_variables_initializer().run()
-
- sigma_history = np.zeros(num_iters, dtype)
- weights_history = np.zeros([num_iters, dims], dtype)
-
- for i in xrange(num_iters):
- _, sigma_, weights_, _ = sess.run([log_sigma_update, sigma, weights])
- weights_history[i, :] = weights_
- sigma_history[i] = sigma_
-
- true_weights_ = sess.run(true_weights)
-
- # Should converge to something close to true_sigma.
- plt.plot(sigma_history);
- plt.ylabel("sigma");
- plt.xlabel("iteration");
- ```
-
- Args:
- target_log_prob_fn: Python callable which takes an argument like
- `current_state` (or `*current_state` if it's a list) and returns its
- (possibly unnormalized) log-density under the target distribution.
- current_state: `Tensor` or Python `list` of `Tensor`s representing the
- current state(s) of the Markov chain(s). The first `r` dimensions index
- independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
- step_size: `Tensor` or Python `list` of `Tensor`s representing the step size
- for the leapfrog integrator. Must broadcast with the shape of
- `current_state`. Larger step sizes lead to faster progress, but too-large
- step sizes make rejection exponentially more likely. When possible, it's
- often helpful to match per-variable step sizes to the standard deviations
- of the target distribution in each variable.
- num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
- for. Total progress per HMC step is roughly proportional to `step_size *
- num_leapfrog_steps`.
- seed: Python integer to seed the random number generator.
- current_target_log_prob: (Optional) `Tensor` representing the value of
- `target_log_prob_fn` at the `current_state`. The only reason to
- specify this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- current_grads_target_log_prob: (Optional) Python list of `Tensor`s
- representing gradient of `current_target_log_prob` at the `current_state`
- and wrt the `current_state`. Must have same shape as `current_state`. The
- only reason to specify this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- name: Python `str` name prefixed to Ops created by this function.
- Default value: `None` (i.e., "hmc_kernel").
-
- Returns:
- next_state: Tensor or Python list of `Tensor`s representing the state(s)
- of the Markov chain(s) at each result step. Has same shape as
- `current_state`.
- kernel_results: `collections.namedtuple` of internal calculations used to
- advance the chain.
-
- Raises:
- ValueError: if there isn't one `step_size` or a list with same length as
- `current_state`.
- """
- with ops.name_scope(
- name, "hmc_kernel",
- [current_state, step_size, num_leapfrog_steps, seed,
- current_target_log_prob, current_grads_target_log_prob]):
- with ops.name_scope("initialize"):
- [current_state_parts, step_sizes, current_target_log_prob,
- current_grads_target_log_prob] = _prepare_args(
- target_log_prob_fn, current_state, step_size,
- current_target_log_prob, current_grads_target_log_prob,
- maybe_expand=True)
- independent_chain_ndims = distributions_util.prefer_static_rank(
- current_target_log_prob)
- current_momentums = []
- for s in current_state_parts:
- current_momentums.append(random_ops.random_normal(
- shape=array_ops.shape(s),
- dtype=s.dtype.base_dtype,
- seed=seed))
- seed = distributions_util.gen_new_seed(
- seed, salt="hmc_kernel_momentums")
-
- num_leapfrog_steps = ops.convert_to_tensor(
- num_leapfrog_steps,
- dtype=dtypes.int32,
- name="num_leapfrog_steps")
- [
- proposed_momentums,
- proposed_state_parts,
- proposed_target_log_prob,
- proposed_grads_target_log_prob,
- ] = _leapfrog_integrator(current_momentums,
- target_log_prob_fn,
- current_state_parts,
- step_sizes,
- num_leapfrog_steps,
- current_target_log_prob,
- current_grads_target_log_prob)
-
- energy_change = _compute_energy_change(current_target_log_prob,
- current_momentums,
- proposed_target_log_prob,
- proposed_momentums,
- independent_chain_ndims)
- log_accept_ratio = -energy_change
-
- # u < exp(log_accept_ratio), where u~Uniform[0,1)
- # ==> log(u) < log_accept_ratio
- random_value = random_ops.random_uniform(
- shape=array_ops.shape(energy_change),
- dtype=energy_change.dtype,
- seed=seed)
- random_negative = math_ops.log(random_value)
- is_accepted = random_negative < log_accept_ratio
-
- accepted_target_log_prob = array_ops.where(is_accepted,
- proposed_target_log_prob,
- current_target_log_prob)
-
- next_state_parts = [_choose(is_accepted,
- proposed_state_part,
- current_state_part,
- independent_chain_ndims)
- for current_state_part, proposed_state_part
- in zip(current_state_parts, proposed_state_parts)]
-
- accepted_grads_target_log_prob = [
- _choose(is_accepted,
- proposed_grad,
- grad,
- independent_chain_ndims)
- for proposed_grad, grad
- in zip(proposed_grads_target_log_prob, current_grads_target_log_prob)]
-
- maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0]
- return [
- maybe_flatten(next_state_parts),
- KernelResults(
- log_accept_ratio=log_accept_ratio,
- current_grads_target_log_prob=accepted_grads_target_log_prob,
- current_target_log_prob=accepted_target_log_prob,
- is_accepted=is_accepted,
- proposed_grads_target_log_prob=proposed_grads_target_log_prob,
- proposed_state=maybe_flatten(proposed_state_parts),
- proposed_target_log_prob=proposed_target_log_prob,
- ),
- ]
-
-
-def _leapfrog_integrator(current_momentums,
- target_log_prob_fn,
- current_state_parts,
- step_sizes,
- num_leapfrog_steps,
- current_target_log_prob=None,
- current_grads_target_log_prob=None,
- name=None):
- """Applies `num_leapfrog_steps` of the leapfrog integrator.
-
- Assumes a simple quadratic kinetic energy function: `0.5 ||momentum||**2`.
-
- #### Examples:
-
- ##### Simple quadratic potential.
-
- ```python
- tfd = tf.contrib.distributions
-
- dims = 10
- num_iter = int(1e3)
- dtype = np.float32
-
- position = tf.placeholder(np.float32)
- momentum = tf.placeholder(np.float32)
-
- [
- next_momentums,
- next_positions,
- ] = hmc._leapfrog_integrator(
- current_momentums=[momentum],
- target_log_prob_fn=tfd.MultivariateNormalDiag(
- loc=tf.zeros(dims, dtype)).log_prob,
- current_state_parts=[position],
- step_sizes=0.1,
- num_leapfrog_steps=3)[:2]
-
- sess.graph.finalize() # No more graph building.
-
- momentum_ = np.random.randn(dims).astype(dtype)
- position_ = np.random.randn(dims).astype(dtype)
-
- positions = np.zeros([num_iter, dims], dtype)
- for i in xrange(num_iter):
- position_, momentum_ = sess.run(
- [next_momentums[0], next_position[0]],
- feed_dict={position: position_, momentum: momentum_})
- positions[i] = position_
-
- plt.plot(positions[:, 0]); # Sinusoidal.
- ```
-
- Args:
- current_momentums: Tensor containing the value(s) of the momentum
- variable(s) to update.
- target_log_prob_fn: Python callable which takes an argument like
- `*current_state_parts` and returns its (possibly unnormalized) log-density
- under the target distribution.
- current_state_parts: Python `list` of `Tensor`s representing the current
- state(s) of the Markov chain(s). The first `independent_chain_ndims` of
- the `Tensor`(s) index different chains.
- step_sizes: Python `list` of `Tensor`s representing the step size for the
- leapfrog integrator. Must broadcast with the shape of
- `current_state_parts`. Larger step sizes lead to faster progress, but
- too-large step sizes make rejection exponentially more likely. When
- possible, it's often helpful to match per-variable step sizes to the
- standard deviations of the target distribution in each variable.
- num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
- for. Total progress per HMC step is roughly proportional to `step_size *
- num_leapfrog_steps`.
- current_target_log_prob: (Optional) `Tensor` representing the value of
- `target_log_prob_fn(*current_state_parts)`. The only reason to specify
- this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- current_grads_target_log_prob: (Optional) Python list of `Tensor`s
- representing gradient of `target_log_prob_fn(*current_state_parts`) wrt
- `current_state_parts`. Must have same shape as `current_state_parts`. The
- only reason to specify this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- name: Python `str` name prefixed to Ops created by this function.
- Default value: `None` (i.e., "hmc_leapfrog_integrator").
-
- Returns:
- proposed_momentums: Updated value of the momentum.
- proposed_state_parts: Tensor or Python list of `Tensor`s representing the
- state(s) of the Markov chain(s) at each result step. Has same shape as
- input `current_state_parts`.
- proposed_target_log_prob: `Tensor` representing the value of
- `target_log_prob_fn` at `next_state`.
- proposed_grads_target_log_prob: Gradient of `proposed_target_log_prob` wrt
- `next_state`.
-
- Raises:
- ValueError: if `len(momentums) != len(state_parts)`.
- ValueError: if `len(state_parts) != len(step_sizes)`.
- ValueError: if `len(state_parts) != len(grads_target_log_prob)`.
- TypeError: if `not target_log_prob.dtype.is_floating`.
- """
- def _loop_body(step,
- current_momentums,
- current_state_parts,
- ignore_current_target_log_prob, # pylint: disable=unused-argument
- current_grads_target_log_prob):
- return [step + 1] + list(_leapfrog_step(current_momentums,
- target_log_prob_fn,
- current_state_parts,
- step_sizes,
- current_grads_target_log_prob))
-
- with ops.name_scope(
- name, "hmc_leapfrog_integrator",
- [current_momentums, current_state_parts, step_sizes, num_leapfrog_steps,
- current_target_log_prob, current_grads_target_log_prob]):
- if len(current_momentums) != len(current_state_parts):
- raise ValueError("`momentums` must be in one-to-one correspondence "
- "with `state_parts`")
- num_leapfrog_steps = ops.convert_to_tensor(num_leapfrog_steps,
- name="num_leapfrog_steps")
- current_target_log_prob, current_grads_target_log_prob = (
- _maybe_call_fn_and_grads(
- target_log_prob_fn,
- current_state_parts,
- current_target_log_prob,
- current_grads_target_log_prob))
- return control_flow_ops.while_loop(
- cond=lambda iter_, *args: iter_ < num_leapfrog_steps,
- body=_loop_body,
- loop_vars=[
- np.int32(0), # iter_
- current_momentums,
- current_state_parts,
- current_target_log_prob,
- current_grads_target_log_prob,
- ],
- back_prop=False)[1:] # Lop-off "iter_".
-
-
-def _leapfrog_step(current_momentums,
- target_log_prob_fn,
- current_state_parts,
- step_sizes,
- current_grads_target_log_prob,
- name=None):
- """Applies one step of the leapfrog integrator."""
- with ops.name_scope(
- name, "_leapfrog_step",
- [current_momentums, current_state_parts, step_sizes,
- current_grads_target_log_prob]):
- proposed_momentums = [m + 0.5 * ss * g for m, ss, g
- in zip(current_momentums,
- step_sizes,
- current_grads_target_log_prob)]
- proposed_state_parts = [x + ss * m for x, ss, m
- in zip(current_state_parts,
- step_sizes,
- proposed_momentums)]
- proposed_target_log_prob = target_log_prob_fn(*proposed_state_parts)
- if not proposed_target_log_prob.dtype.is_floating:
- raise TypeError("`target_log_prob_fn` must produce a `Tensor` "
- "with `float` `dtype`.")
- proposed_grads_target_log_prob = gradients_ops.gradients(
- proposed_target_log_prob, proposed_state_parts)
- if any(g is None for g in proposed_grads_target_log_prob):
- raise ValueError(
- "Encountered `None` gradient. Does your target `target_log_prob_fn` "
- "access all `tf.Variable`s via `tf.get_variable`?\n"
- " current_state_parts: {}\n"
- " proposed_state_parts: {}\n"
- " proposed_grads_target_log_prob: {}".format(
- current_state_parts,
- proposed_state_parts,
- proposed_grads_target_log_prob))
- proposed_momentums = [m + 0.5 * ss * g for m, ss, g
- in zip(proposed_momentums,
- step_sizes,
- proposed_grads_target_log_prob)]
- return [
- proposed_momentums,
- proposed_state_parts,
- proposed_target_log_prob,
- proposed_grads_target_log_prob,
- ]
-
-
-def _compute_energy_change(current_target_log_prob,
- current_momentums,
- proposed_target_log_prob,
- proposed_momentums,
- independent_chain_ndims,
- name=None):
- """Helper to `kernel` which computes the energy change."""
- with ops.name_scope(
- name, "compute_energy_change",
- ([current_target_log_prob, proposed_target_log_prob,
- independent_chain_ndims] +
- current_momentums + proposed_momentums)):
- # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy
- # since they're a mouthful and lets us inline more.
- lk0, lk1 = [], []
- for current_momentum, proposed_momentum in zip(current_momentums,
- proposed_momentums):
- axis = math_ops.range(independent_chain_ndims,
- array_ops.rank(current_momentum))
- lk0.append(_log_sum_sq(current_momentum, axis))
- lk1.append(_log_sum_sq(proposed_momentum, axis))
-
- lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1),
- axis=-1)
- lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1),
- axis=-1)
- lp0 = -current_target_log_prob # potential
- lp1 = -proposed_target_log_prob # proposed_potential
- x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)],
- axis=-1)
-
- # The sum is NaN if any element is NaN or we see both +Inf and -Inf.
- # Thus we will replace such rows with infinite energy change which implies
- # rejection. Recall that float-comparisons with NaN are always False.
- is_sum_determinate = (
- math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) &
- math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1))
- is_sum_determinate = array_ops.tile(
- is_sum_determinate[..., array_ops.newaxis],
- multiples=array_ops.concat([
- array_ops.ones(array_ops.rank(is_sum_determinate),
- dtype=dtypes.int32),
- [4],
- ], axis=0))
- x = array_ops.where(is_sum_determinate,
- x,
- array_ops.fill(array_ops.shape(x),
- value=x.dtype.as_numpy_dtype(np.inf)))
-
- return math_ops.reduce_sum(x, axis=-1)
-
-
-def _choose(is_accepted,
- accepted,
- rejected,
- independent_chain_ndims,
- name=None):
- """Helper to `kernel` which expand_dims `is_accepted` to apply tf.where."""
- def _expand_is_accepted_like(x):
- with ops.name_scope("_choose"):
- expand_shape = array_ops.concat([
- array_ops.shape(is_accepted),
- array_ops.ones([array_ops.rank(x) - array_ops.rank(is_accepted)],
- dtype=dtypes.int32),
- ], axis=0)
- multiples = array_ops.concat([
- array_ops.ones([array_ops.rank(is_accepted)], dtype=dtypes.int32),
- array_ops.shape(x)[independent_chain_ndims:],
- ], axis=0)
- m = array_ops.tile(array_ops.reshape(is_accepted, expand_shape),
- multiples)
- m.set_shape(x.shape)
- return m
- with ops.name_scope(name, "_choose", values=[
- is_accepted, accepted, rejected, independent_chain_ndims]):
- return array_ops.where(_expand_is_accepted_like(accepted),
- accepted,
- rejected)
-
-
-def _maybe_call_fn_and_grads(fn,
- fn_arg_list,
- fn_result=None,
- grads_fn_result=None,
- description="target_log_prob"):
- """Helper which computes `fn_result` and `grads` if needed."""
- fn_arg_list = (list(fn_arg_list) if _is_list_like(fn_arg_list)
- else [fn_arg_list])
- if fn_result is None:
- fn_result = fn(*fn_arg_list)
- if not fn_result.dtype.is_floating:
- raise TypeError("`{}` must be a `Tensor` with `float` `dtype`.".format(
- description))
- if grads_fn_result is None:
- grads_fn_result = gradients_ops.gradients(
- fn_result, fn_arg_list)
- if len(fn_arg_list) != len(grads_fn_result):
- raise ValueError("`{}` must be in one-to-one correspondence with "
- "`grads_{}`".format(*[description]*2))
- if any(g is None for g in grads_fn_result):
- raise ValueError("Encountered `None` gradient.")
- return fn_result, grads_fn_result
-
-
-def _prepare_args(target_log_prob_fn, state, step_size,
- target_log_prob=None, grads_target_log_prob=None,
- maybe_expand=False, description="target_log_prob"):
- """Helper which processes input args to meet list-like assumptions."""
- state_parts = list(state) if _is_list_like(state) else [state]
- state_parts = [ops.convert_to_tensor(s, name="state")
- for s in state_parts]
- target_log_prob, grads_target_log_prob = _maybe_call_fn_and_grads(
- target_log_prob_fn,
- state_parts,
- target_log_prob,
- grads_target_log_prob,
- description)
- step_sizes = list(step_size) if _is_list_like(step_size) else [step_size]
- step_sizes = [
- ops.convert_to_tensor(
- s, name="step_size", dtype=target_log_prob.dtype)
- for s in step_sizes]
- if len(step_sizes) == 1:
- step_sizes *= len(state_parts)
- if len(state_parts) != len(step_sizes):
- raise ValueError("There should be exactly one `step_size` or it should "
- "have same length as `current_state`.")
- maybe_flatten = lambda x: x if maybe_expand or _is_list_like(state) else x[0]
- return [
- maybe_flatten(state_parts),
- maybe_flatten(step_sizes),
- target_log_prob,
- grads_target_log_prob,
- ]
-
-
-def _is_list_like(x):
- """Helper which returns `True` if input is `list`-like."""
- return isinstance(x, (tuple, list))
-
-
-def _log_sum_sq(x, axis=None):
- """Computes log(sum(x**2))."""
- return math_ops.reduce_logsumexp(2. * math_ops.log(math_ops.abs(x)), axis)
diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py
deleted file mode 100644
index e7fcbc65ef..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functions to create a Markov Chain Monte Carlo Metropolis step."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.bayesflow.python.ops.metropolis_hastings_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
-
-_allowed_symbols = [
- 'kernel',
- 'evolve',
- 'proposal_uniform',
- 'proposal_normal',
-]
-
-remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py
deleted file mode 100644
index 05aa134ed5..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py
+++ /dev/null
@@ -1,527 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Metropolis-Hastings and proposal distributions.
-
-@@kernel
-@@evolve
-@@proposal_uniform
-@@proposal_normal
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import state_ops
-
-__all__ = [
- "kernel",
- "evolve",
- "proposal_uniform",
- "proposal_normal",
-]
-
-
-KernelResults = collections.namedtuple(
- "KernelResults",
- [
- "log_accept_ratio",
- "current_target_log_prob", # "Current result" means "accepted".
- "is_accepted",
- "proposed_state",
- ])
-
-
-def kernel(target_log_prob_fn,
- proposal_fn,
- current_state,
- seed=None,
- current_target_log_prob=None,
- name=None):
- """Runs the Metropolis-Hastings transition kernel.
-
- This function can update multiple chains in parallel. It assumes that all
- leftmost dimensions of `current_state` index independent chain states (and are
- therefore updated independently). The output of `target_log_prob_fn()` should
- sum log-probabilities across all event dimensions. Slices along the rightmost
- dimensions may have different target distributions; for example,
- `current_state[0, :]` could have a different target distribution from
- `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of
- independent chains is `tf.size(target_log_prob_fn(*current_state))`.)
-
- Args:
- target_log_prob_fn: Python callable which takes an argument like
- `current_state` (or `*current_state` if it's a list) and returns its
- (possibly unnormalized) log-density under the target distribution.
- proposal_fn: Python callable which takes an argument like `current_state`
- (or `*current_state` if it's a list) and returns a tuple of proposed
- states of same shape as `state`, and a log ratio `Tensor` of same shape
- as `current_target_log_prob`. The log ratio is the log-probability of
- `state` given proposed states minus the log-probability of proposed
- states given `state`. If the proposal is symmetric, set the second value
- to `None`: this enables more efficient computation than explicitly
- supplying a tensor of zeros.
- current_state: `Tensor` or Python `list` of `Tensor`s representing the
- current state(s) of the Markov chain(s). The first `r` dimensions index
- independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
- seed: Python integer to seed the random number generator.
- current_target_log_prob: (Optional) `Tensor` representing the value of
- `target_log_prob_fn` at the `current_state`. The only reason to
- specify this argument is to reduce TF graph size.
- Default value: `None` (i.e., compute as needed).
- name: A name of the operation (optional).
-
- Returns:
- next_state: Tensor or Python list of `Tensor`s representing the state(s)
- of the Markov chain(s) at each result step. Has same shape as
- `current_state`.
- kernel_results: `collections.namedtuple` of internal calculations used to
- advance the chain.
-
- #### Examples
-
- We illustrate Metropolis-Hastings on a Normal likelihood with
- unknown mean.
-
- ```python
- tfd = tf.contrib.distributions
- tfp = tf.contrib.bayesflow
-
- loc = tf.get_variable("loc", initializer=1.)
- x = tf.constant([0.0] * 50)
-
- def make_target_log_prob_fn(x):
- def target_log_prob_fn(loc):
- prior = tfd.Normal(loc=0., scale=1.)
- likelihood = tfd.Independent(
- tfd.Normal(loc=loc, scale=0.1),
- reinterpreted_batch_ndims=1)
- return prior.log_prob(loc) + likelihood.log_prob(x)
- return target_log_prob_fn
-
- next_state, kernel_results = tfp.metropolis_hastings.kernel(
- target_log_prob_fn=make_target_log_prob_fn(x),
- proposal_fn=tfp.metropolis_hastings.proposal_normal(),
- current_state=loc)
- loc_update = loc.assign(next_state)
- ```
-
- We illustrate Metropolis-Hastings on a Normal likelihood with
- unknown mean and variance. We apply 4 chains.
-
- ```python
- tfd = tf.contrib.distributions
- tfp = tf.contrib.bayesflow
-
- num_chains = 4
- loc = tf.get_variable("loc", shape=[num_chains],
- initializer=tf.random_normal_initializer())
- scale = tf.get_variable("scale", shape=[num_chains],
- initializer=tf.ones_initializer())
- x = tf.constant([0.0] * 50)
-
- def make_target_log_prob_fn(x):
- data = tf.reshape(x, shape=[-1, 1])
- def target_log_prob_fn(loc, scale):
- prior_loc = tfd.Normal(loc=0., scale=1.)
- prior_scale = tfd.InverseGamma(concentration=1., rate=1.)
- likelihood = tfd.Independent(
- tfd.Normal(loc=loc, scale=scale),
- reinterpreted_batch_ndims=1)
- return (prior_loc.log_prob(loc) +
- prior_scale.log_prob(scale) +
- likelihood.log_prob(data))
- return target_log_prob_fn
-
- def proposal_fn(loc, scale):
- loc_proposal = tfp.metropolis_hastings.proposal_normal()
- scale_proposal = tfp.metropolis_hastings.proposal_uniform(minval=-1.)
- proposed_loc, _ = loc_proposal(loc)
- proposed_scale, _ = scale_proposal(scale)
- proposed_scale = tf.maximum(proposed_scale, 0.01)
- return [proposed_loc, proposed_scale], None
-
- next_state, kernel_results = tfp.metropolis_hastings.kernel(
- target_log_prob_fn=make_target_log_prob_fn(x),
- proposal_fn=proposal_fn,
- current_state=[loc, scale])
- train_op = tf.group(loc.assign(next_state[0]),
- scale.assign(next_state[1]))
- ```
-
- """
- with ops.name_scope(
- name, "metropolis_hastings_kernel",
- [current_state, seed, current_target_log_prob]):
- with ops.name_scope("initialize"):
- maybe_expand = lambda x: list(x) if _is_list_like(x) else [x]
- current_state_parts = maybe_expand(current_state)
- if current_target_log_prob is None:
- current_target_log_prob = target_log_prob_fn(*current_state_parts)
-
- proposed_state, log_transit_ratio = proposal_fn(*current_state_parts)
- proposed_state_parts = maybe_expand(proposed_state)
-
- proposed_target_log_prob = target_log_prob_fn(*proposed_state_parts)
-
- with ops.name_scope(
- "accept_reject",
- [current_state_parts, proposed_state_parts,
- current_target_log_prob, proposed_target_log_prob]):
- log_accept_ratio = proposed_target_log_prob - current_target_log_prob
- if log_transit_ratio is not None:
- # If the log_transit_ratio is None, then assume the proposal is
- # symmetric, i.e.,
- # log p(old | new) - log p(new | old) = 0.
- log_accept_ratio += log_transit_ratio
-
- # u < exp(log_accept_ratio), where u~Uniform[0,1)
- # ==> log(u) < log_accept_ratio
- random_value = random_ops.random_uniform(
- array_ops.shape(log_accept_ratio),
- dtype=log_accept_ratio.dtype,
- seed=seed)
- random_negative = math_ops.log(random_value)
- is_accepted = random_negative < log_accept_ratio
- next_state_parts = [array_ops.where(is_accepted,
- proposed_state_part,
- current_state_part)
- for proposed_state_part, current_state_part in
- zip(proposed_state_parts, current_state_parts)]
- accepted_log_prob = array_ops.where(is_accepted,
- proposed_target_log_prob,
- current_target_log_prob)
- maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0]
- return [
- maybe_flatten(next_state_parts),
- KernelResults(
- log_accept_ratio=log_accept_ratio,
- current_target_log_prob=accepted_log_prob,
- is_accepted=is_accepted,
- proposed_state=maybe_flatten(proposed_state_parts),
- ),
- ]
-
-
-def evolve(initial_sample,
- initial_log_density,
- initial_log_accept_ratio,
- target_log_prob_fn,
- proposal_fn,
- n_steps=1,
- seed=None,
- name=None):
- """Performs `n_steps` of the Metropolis-Hastings update.
-
- Given a probability density function, `f(x)` and a proposal scheme which
- generates new points from old, this `Op` returns a tensor
- which may be used to generate approximate samples from the target distribution
- using the Metropolis-Hastings algorithm. These samples are from a Markov chain
- whose equilibrium distribution matches the target distribution.
-
- The probability distribution may have an unknown normalization constan.
- We parameterize the probability density as follows:
-
- ```none
- f(x) = exp(L(x) + constant)
- ```
-
- Here `L(x)` is any continuous function with an (possibly unknown but finite)
- upper bound, i.e. there exists a number beta such that
- `L(x)< beta < infinity` for all x. The constant is the normalization needed
- to make `f(x)` a probability density (as opposed to just a finite measure).
-
- Although `initial_sample` can be arbitrary, a poor choice may result in a
- slow-to-mix chain. In many cases the best choice is the one that maximizes
- the target density, i.e., choose `initial_sample` such that
- `f(initial_sample) >= f(x)` for all `x`.
-
-
- If the support of the distribution is a strict subset of R^n (but of non zero
- measure), then the unnormalized log-density `L(x)` should return `-infinity`
- outside the support domain. This effectively forces the sampler to only
- explore points in the regions of finite support.
-
- Usage:
- This function is meant to be wrapped up with some of the common proposal
- schemes (e.g. random walk, Langevin diffusion etc) to produce a more user
- friendly interface. However, it may also be used to create bespoke samplers.
-
- The following example, demonstrates the use to generate a 1000 uniform random
- walk Metropolis samplers run in parallel for the normal target distribution.
-
- ```python
- n = 3 # dimension of the problem
-
- # Generate 1000 initial values randomly. Each of these would be an
- # independent starting point for a Markov chain.
- state = tf.get_variable(
- "state",
- initializer=tf.random_normal([1000, n],
- mean=3.0,
- dtype=tf.float64,
- seed=42))
-
- # Computes the log(p(x)) for the unit normal density and ignores the
- # normalization constant.
- def log_density(x):
- return -tf.reduce_sum(x * x, reduction_indices=-1) / 2.0
-
- # Initial log-density value
- state_log_density = tf.get_variable(
- "state_log_density",
- initializer=log_density(state.initialized_value()))
-
- # A variable to store the log_acceptance_ratio:
- log_acceptance_ratio = tf.get_variable(
- "log_acceptance_ratio",
- initializer=tf.zeros([1000], dtype=tf.float64))
-
- # Generates random proposals by moving each coordinate uniformly and
- # independently in a box of size 2 centered around the current value.
- # Returns the new point and also the log of the Hastings ratio (the
- # ratio of the probability of going from the proposal to origin and the
- # probability of the reverse transition). When this ratio is 1, the value
- # may be omitted and replaced by None.
- def random_proposal(x):
- return (x + tf.random_uniform(tf.shape(x), minval=-1, maxval=1,
- dtype=x.dtype, seed=12)), None
-
- # Create the op to propagate the chain for 100 steps.
- stepper = mh.evolve(
- state, state_log_density, log_acceptance_ratio,
- log_density, random_proposal, n_steps=100, seed=123)
- init = tf.initialize_all_variables()
- with tf.Session() as sess:
- sess.run(init)
- # Run the chains for a total of 1000 steps and print out the mean across
- # the chains every 100 iterations.
- for n_iter in range(10):
- # Executing the stepper advances the chain to the next state.
- sess.run(stepper)
- # Print out the current value of the mean(sample) for every dimension.
- print(np.mean(sess.run(state), 0))
- # Estimated covariance matrix
- samples = sess.run(state)
- print(np.cov(samples, rowvar=False))
- ```
-
- Args:
- initial_sample: A float-like `tf.Variable` of any shape that can
- be consumed by the `target_log_prob_fn` and `proposal_fn`
- callables.
- initial_log_density: Float-like `tf.Variable` with `dtype` and shape
- equivalent to `target_log_prob_fn(initial_sample)`, i.e., matching
- the result of `target_log_prob_fn` invoked at `current_state`.
- initial_log_accept_ratio: A `tf.Variable` with `dtype` and shape matching
- `initial_log_density`. Stands for the log of Metropolis-Hastings
- acceptance ratio after propagating the chain for `n_steps`.
- target_log_prob_fn: A Python callable evaluated at
- `current_state` and returning a float-like `Tensor` of log target-density
- up to a normalizing constant. In other words,
- `target_log_prob_fn(x) = log(g(x))`, where
- `target_density = g(x)/Z` for some constant `A`. The shape of the input
- tensor is the same as the shape of the `current_state`. The shape of the
- output tensor is either
- (a). Same as the input shape if the density being sampled is one
- dimensional, or
- (b). If the density is defined for `events` of shape
- `event_shape = [E1, E2, ... Ee]`, then the input tensor should be of
- shape `batch_shape + event_shape`, here `batch_shape = [B1, ..., Bb]`
- and the result must be of shape [B1, ..., Bb]. For example, if the
- distribution that is being sampled is a 10 dimensional normal,
- then the input tensor may be of shape [100, 10] or [30, 20, 10]. The
- last dimension will then be 'consumed' by `target_log_prob_fn`
- and it should return tensors of shape [100] and [30, 20] respectively.
- proposal_fn: A callable accepting a real valued `Tensor` of current sample
- points and returning a tuple of two `Tensors`. The first element of the
- pair should be a `Tensor` containing the proposal state and should have
- the same shape as the input `Tensor`. The second element of the pair gives
- the log of the ratio of the probability of transitioning from the
- proposal points to the input points and the probability of transitioning
- from the input points to the proposal points. If the proposal is
- symmetric, i.e.
- Probability(Proposal -> Current) = Probability(Current -> Proposal)
- the second value should be set to None instead of explicitly supplying a
- tensor of zeros. In addition to being convenient, this also leads to a
- more efficient graph.
- n_steps: A positive `int` or a scalar `int32` tensor. Sets the number of
- iterations of the chain.
- seed: `int` or None. The random seed for this `Op`. If `None`, no seed is
- applied.
- name: A string that sets the name for this `Op`.
-
- Returns:
- forward_step: an `Op` to step the Markov chain forward for `n_steps`.
- """
-
- with ops.name_scope(name, "metropolis_hastings", [initial_sample]):
- current_state = initial_sample
- current_target_log_prob = initial_log_density
- log_accept_ratio = initial_log_accept_ratio
-
- def step(i, current_state, current_target_log_prob, log_accept_ratio):
- """Wrap single Markov chain iteration in `while_loop`."""
- next_state, kernel_results = kernel(
- target_log_prob_fn=target_log_prob_fn,
- proposal_fn=proposal_fn,
- current_state=current_state,
- current_target_log_prob=current_target_log_prob,
- seed=seed)
- accepted_log_prob = kernel_results.current_target_log_prob
- log_accept_ratio = kernel_results.log_accept_ratio
- return i + 1, next_state, accepted_log_prob, log_accept_ratio
-
- (_, accepted_state, accepted_target_log_prob, accepted_log_accept_ratio) = (
- control_flow_ops.while_loop(
- cond=lambda i, *ignored_args: i < n_steps,
- body=step,
- loop_vars=[
- 0, # i
- current_state,
- current_target_log_prob,
- log_accept_ratio,
- ],
- parallel_iterations=1 if seed is not None else 10,
- # TODO(b/73775595): Confirm optimal setting of swap_memory.
- swap_memory=1))
-
- forward_step = control_flow_ops.group(
- state_ops.assign(current_target_log_prob, accepted_target_log_prob),
- state_ops.assign(current_state, accepted_state),
- state_ops.assign(log_accept_ratio, accepted_log_accept_ratio))
-
- return forward_step
-
-
-def proposal_uniform(step_size=1.,
- seed=None,
- name=None):
- """Returns a callable that adds a random uniform tensor to the input.
-
- This function returns a callable that accepts one `Tensor` argument of any
- shape and a real data type (i.e. `tf.float32` or `tf.float64`). It adds a
- sample from a random uniform distribution drawn from [-stepsize, stepsize]
- to its input. It also returns the log of the ratio of the probability of
- moving from the input point to the proposed point, but since this log ratio is
- identically equal to 0 (because the probability of drawing a value `x` from
- the symmetric uniform distribution is the same as the probability of drawing
- `-x`), it simply returns None for the second element of the returned tuple.
-
- Args:
- step_size: A positive `float` or a scalar tensor of real dtype
- controlling the scale of the uniform distribution.
- If step_size = a, then draws are made uniformly from [-a, a].
- seed: `int` or None. The random seed for this `Op`. If `None`, no seed is
- applied.
- name: A string that sets the name for this `Op`.
-
- Returns:
- proposal_fn: A callable accepting one float-like `Tensor` and returning a
- 2-tuple. The first value in the tuple is a `Tensor` of the same shape and
- dtype as the input argument and the second element of the tuple is None.
- """
-
- with ops.name_scope(name, "proposal_uniform", [step_size]):
- step_size = ops.convert_to_tensor(step_size, name="step_size")
-
- def proposal_fn(input_state, name=None):
- """Adds a uniform perturbation to the input state.
-
- Args:
- input_state: A `Tensor` of any shape and real dtype.
- name: A string that sets the name for this `Op`.
-
- Returns:
- proposal_state: A float-like `Tensor` with `dtype` and shape matching
- `input_state`.
- log_transit_ratio: `None`. Proposal is symmetric.
- """
- with ops.name_scope(name, "proposer", [input_state]):
- input_state = ops.convert_to_tensor(input_state, name="input_state")
- return input_state + random_ops.random_uniform(
- array_ops.shape(input_state),
- minval=-step_size,
- maxval=step_size,
- seed=seed), None
- return proposal_fn
-
-
-def proposal_normal(scale=1.,
- seed=None,
- name=None):
- """Returns a callable that adds a random normal tensor to the input.
-
- This function returns a callable that accepts one `Tensor` argument of any
- shape and a real data type (i.e. `tf.float32` or `tf.float64`). The callable
- adds a sample from a normal distribution with the supplied standard deviation
- and zero mean to its input argument (called the proposal point).
- The callable returns a tuple with the proposal point as the first element.
- The second element is identically `None`. It is included so the callable is
- compatible with the expected signature of the proposal scheme argument in the
- `metropolis_hastings` function. A value of `None` indicates that the
- probability of going from the input point to the proposal point is equal to
- the probability of going from the proposal point to the input point.
-
- Args:
- scale: A positive `float` or a scalar tensor of any real dtype controlling
- the scale of the normal distribution.
- seed: `int` or None. The random seed for this `Op`. If `None`, no seed is
- applied.
- name: A string that sets the name for this `Op`.
-
- Returns:
- proposal_fn: A callable accepting one float-like `Tensor` and returning a
- 2-tuple. The first value in the tuple is a `Tensor` of the same shape and
- dtype as the input argument and the second element of the tuple is None.
- """
-
- with ops.name_scope(name, "proposal_normal", [scale]):
- scale = ops.convert_to_tensor(scale, name="scale")
-
- def proposal_fn(input_state, name=None):
- """Adds a normal perturbation to the input state.
-
- Args:
- input_state: A `Tensor` of any shape and real dtype.
- name: A string that sets the name for this `Op`.
-
- Returns:
- proposal_state: A float-like `Tensor` with `dtype` and shape matching
- `input_state`.
- log_transit_ratio: `None`. Proposal is symmetric.
- """
-
- with ops.name_scope(name, "proposer", [input_state]):
- input_state = ops.convert_to_tensor(input_state, name="input_state")
- return input_state + random_ops.random_normal(
- array_ops.shape(input_state),
- mean=0.,
- stddev=scale,
- dtype=scale.dtype,
- seed=seed), None
- return proposal_fn
-
-
-def _is_list_like(x):
- """Helper which returns `True` if input is `list`-like."""
- return isinstance(x, (tuple, list))