Finish deprecation of tf.contrib.bayesflow.{HMC,MetropolisHastings}.

New home: https://github.com/tensorflow/probability/tree/master/tensorflow_probability/python/mcmc PiperOrigin-RevId: 190560180
author: Joshua V. Dillon <jvdillon@google.com> 2018-03-26 18:50:27 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-03-26 18:52:45 -0700
commit: 0be974c423f6e5c363db2d95ed335dde4cb4e69b (patch)
tree: 21ebd16c26fa85b2dcaf8de3221404c1d37cfb68 /tensorflow/contrib/bayesflow
parent: 931f6d553172ddfc9ec4a7a94ea2c6233bf33cb0 (diff)
9 files changed, 17 insertions, 2678 deletions
diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index c6feec68e0..a55029b314 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -38,25 +38,6 @@ py_library(
 )
 
 cuda_py_test(
-    name = "metropolis_hastings_test",
-    size = "large",
-    srcs = ["python/kernel_tests/metropolis_hastings_test.py"],
-    additional_deps = [
-        ":bayesflow_py",
-        "//third_party/py/numpy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:random_ops",
-        "//tensorflow/python:variable_scope",
-        "//tensorflow/python:variables",
-    ],
-)
-
-cuda_py_test(
     name = "monte_carlo_test",
     size = "small",
     srcs = ["python/kernel_tests/monte_carlo_test.py"],
@@ -77,28 +58,6 @@ cuda_py_test(
     ],
 )
 
-cuda_py_test(
-    name = "hmc_test",
-    size = "large",
-    srcs = ["python/kernel_tests/hmc_test.py"],
-    additional_deps = [
-        ":bayesflow_py",
-        "//third_party/py/numpy",
-        "//tensorflow/contrib/distributions:distributions_py",
-        "//tensorflow/contrib/layers:layers_py",
-        "//tensorflow/python/ops/distributions",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:gradients",
-        "//tensorflow/python:math_ops",
-        "//tensorflow/python:platform_test",
-        "//tensorflow/python:random_seed",
-    ],
-    tags = ["nomsan"],
-)
-
 filegroup(
     name = "all_files",
     srcs = glob(
diff --git a/tensorflow/contrib/bayesflow/README.md b/tensorflow/contrib/bayesflow/README.md
new file mode 100644
index 0000000000..10323dc6d5
--- /dev/null
+++ b/tensorflow/contrib/bayesflow/README.md
@@ -0,0 +1,17 @@
+# Notice
+
+`tf.contrib.bayesflow` has moved!
+
+See new code at [github.com/tensorflow/probability](
+https://github.com/tensorflow/probability).
+
+Switch imports with:
+
+```python
+# old
+import tensorflow as tf
+tfp = tf.contrib.bayesflow
+
+# new
+import tensorflow_probability as tfp
+```
diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py
index f868203826..41a8c920fc 100644
--- a/tensorflow/contrib/bayesflow/__init__.py
+++ b/tensorflow/contrib/bayesflow/__init__.py
@@ -21,8 +21,6 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import,line-too-long
-from tensorflow.contrib.bayesflow.python.ops import hmc
-from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings
 from tensorflow.contrib.bayesflow.python.ops import monte_carlo
 # pylint: enable=unused-import,line-too-long
 
@@ -30,13 +28,7 @@ from tensorflow.python.util.all_util import remove_undocumented
 
 
 _allowed_symbols = [
-    'entropy',
-    'hmc',
-    'metropolis_hastings',
     'monte_carlo',
-    'special_math',
-    'stochastic_variables',
-    'variational_inference',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
deleted file mode 100644
index dabadfc7b6..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py
+++ /dev/null
@@ -1,737 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Hamiltonian Monte Carlo."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-import numpy as np
-from scipy import stats
-
-from tensorflow.contrib.bayesflow.python.ops import hmc
-from tensorflow.contrib.bayesflow.python.ops.hmc_impl import _compute_energy_change
-from tensorflow.contrib.bayesflow.python.ops.hmc_impl import _leapfrog_integrator
-
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import random_seed
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gen_linalg_ops
-from tensorflow.python.ops import gradients_impl as gradients_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import gamma as gamma_lib
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.platform import test
-from tensorflow.python.platform import tf_logging as logging_ops
-
-
-def _reduce_variance(x, axis=None, keepdims=False):
-  sample_mean = math_ops.reduce_mean(x, axis, keepdims=True)
-  return math_ops.reduce_mean(
-      math_ops.squared_difference(x, sample_mean), axis, keepdims)
-
-
-class HMCTest(test.TestCase):
-
-  def setUp(self):
-    self._shape_param = 5.
-    self._rate_param = 10.
-
-    random_seed.set_random_seed(10003)
-    np.random.seed(10003)
-
-  def assertAllFinite(self, x):
-    self.assertAllEqual(np.ones_like(x).astype(bool), np.isfinite(x))
-
-  def _log_gamma_log_prob(self, x, event_dims=()):
-    """Computes log-pdf of a log-gamma random variable.
-
-    Args:
-      x: Value of the random variable.
-      event_dims: Dimensions not to treat as independent.
-
-    Returns:
-      log_prob: The log-pdf up to a normalizing constant.
-    """
-    return math_ops.reduce_sum(self._shape_param * x -
-                               self._rate_param * math_ops.exp(x),
-                               event_dims)
-
-  def _integrator_conserves_energy(self, x, independent_chain_ndims, sess,
-                                   feed_dict=None):
-    step_size = array_ops.placeholder(np.float32, [], name="step_size")
-    hmc_lf_steps = array_ops.placeholder(np.int32, [], name="hmc_lf_steps")
-
-    if feed_dict is None:
-      feed_dict = {}
-    feed_dict[hmc_lf_steps] = 1000
-
-    event_dims = math_ops.range(independent_chain_ndims,
-                                array_ops.rank(x))
-
-    m = random_ops.random_normal(array_ops.shape(x))
-    log_prob_0 = self._log_gamma_log_prob(x, event_dims)
-    grad_0 = gradients_ops.gradients(log_prob_0, x)
-    old_energy = -log_prob_0 + 0.5 * math_ops.reduce_sum(m**2., event_dims)
-
-    new_m, _, log_prob_1, _ = _leapfrog_integrator(
-        current_momentums=[m],
-        target_log_prob_fn=lambda x: self._log_gamma_log_prob(x, event_dims),
-        current_state_parts=[x],
-        step_sizes=[step_size],
-        num_leapfrog_steps=hmc_lf_steps,
-        current_target_log_prob=log_prob_0,
-        current_grads_target_log_prob=grad_0)
-    new_m = new_m[0]
-
-    new_energy = -log_prob_1 + 0.5 * math_ops.reduce_sum(new_m * new_m,
-                                                         event_dims)
-
-    x_shape = sess.run(x, feed_dict).shape
-    event_size = np.prod(x_shape[independent_chain_ndims:])
-    feed_dict[step_size] = 0.1 / event_size
-    old_energy_, new_energy_ = sess.run([old_energy, new_energy],
-                                        feed_dict)
-    logging_ops.vlog(1, "average energy relative change: {}".format(
-        (1. - new_energy_ / old_energy_).mean()))
-    self.assertAllClose(old_energy_, new_energy_, atol=0., rtol=0.02)
-
-  def _integrator_conserves_energy_wrapper(self, independent_chain_ndims):
-    """Tests the long-term energy conservation of the leapfrog integrator.
-
-    The leapfrog integrator is symplectic, so for sufficiently small step
-    sizes it should be possible to run it more or less indefinitely without
-    the energy of the system blowing up or collapsing.
-
-    Args:
-      independent_chain_ndims: Python `int` scalar representing the number of
-        dims associated with independent chains.
-    """
-    with self.test_session(graph=ops.Graph()) as sess:
-      x_ph = array_ops.placeholder(np.float32, name="x_ph")
-      feed_dict = {x_ph: np.random.rand(50, 10, 2)}
-      self._integrator_conserves_energy(x_ph, independent_chain_ndims,
-                                        sess, feed_dict)
-
-  def testIntegratorEnergyConservationNullShape(self):
-    self._integrator_conserves_energy_wrapper(0)
-
-  def testIntegratorEnergyConservation1(self):
-    self._integrator_conserves_energy_wrapper(1)
-
-  def testIntegratorEnergyConservation2(self):
-    self._integrator_conserves_energy_wrapper(2)
-
-  def testIntegratorEnergyConservation3(self):
-    self._integrator_conserves_energy_wrapper(3)
-
-  def testSampleChainSeedReproducibleWorksCorrectly(self):
-    with self.test_session(graph=ops.Graph()) as sess:
-      num_results = 10
-      independent_chain_ndims = 1
-
-      def log_gamma_log_prob(x):
-        event_dims = math_ops.range(independent_chain_ndims,
-                                    array_ops.rank(x))
-        return self._log_gamma_log_prob(x, event_dims)
-
-      kwargs = dict(
-          target_log_prob_fn=log_gamma_log_prob,
-          current_state=np.random.rand(4, 3, 2),
-          step_size=0.1,
-          num_leapfrog_steps=2,
-          num_burnin_steps=150,
-          seed=52,
-      )
-
-      samples0, kernel_results0 = hmc.sample_chain(
-          **dict(list(kwargs.items()) + list(dict(
-              num_results=2 * num_results,
-              num_steps_between_results=0).items())))
-
-      samples1, kernel_results1 = hmc.sample_chain(
-          **dict(list(kwargs.items()) + list(dict(
-              num_results=num_results,
-              num_steps_between_results=1).items())))
-
-      [
-          samples0_,
-          samples1_,
-          target_log_prob0_,
-          target_log_prob1_,
-      ] = sess.run([
-          samples0,
-          samples1,
-          kernel_results0.current_target_log_prob,
-          kernel_results1.current_target_log_prob,
-      ])
-      self.assertAllClose(samples0_[::2], samples1_,
-                          atol=1e-5, rtol=1e-5)
-      self.assertAllClose(target_log_prob0_[::2], target_log_prob1_,
-                          atol=1e-5, rtol=1e-5)
-
-  def _chain_gets_correct_expectations(self, x, independent_chain_ndims,
-                                       sess, feed_dict=None):
-    counter = collections.Counter()
-    def log_gamma_log_prob(x):
-      counter["target_calls"] += 1
-      event_dims = math_ops.range(independent_chain_ndims,
-                                  array_ops.rank(x))
-      return self._log_gamma_log_prob(x, event_dims)
-
-    num_results = array_ops.placeholder(
-        np.int32, [], name="num_results")
-    step_size = array_ops.placeholder(
-        np.float32, [], name="step_size")
-    num_leapfrog_steps = array_ops.placeholder(
-        np.int32, [], name="num_leapfrog_steps")
-
-    if feed_dict is None:
-      feed_dict = {}
-    feed_dict.update({num_results: 150,
-                      step_size: 0.05,
-                      num_leapfrog_steps: 2})
-
-    samples, kernel_results = hmc.sample_chain(
-        num_results=num_results,
-        target_log_prob_fn=log_gamma_log_prob,
-        current_state=x,
-        step_size=step_size,
-        num_leapfrog_steps=num_leapfrog_steps,
-        num_burnin_steps=150,
-        seed=42)
-
-    self.assertAllEqual(dict(target_calls=2), counter)
-
-    expected_x = (math_ops.digamma(self._shape_param)
-                  - np.log(self._rate_param))
-
-    expected_exp_x = self._shape_param / self._rate_param
-
-    log_accept_ratio_, samples_, expected_x_ = sess.run(
-        [kernel_results.log_accept_ratio, samples, expected_x],
-        feed_dict)
-
-    actual_x = samples_.mean()
-    actual_exp_x = np.exp(samples_).mean()
-    acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-
-    logging_ops.vlog(1, "True      E[x, exp(x)]: {}\t{}".format(
-        expected_x_, expected_exp_x))
-    logging_ops.vlog(1, "Estimated E[x, exp(x)]: {}\t{}".format(
-        actual_x, actual_exp_x))
-    self.assertNear(actual_x, expected_x_, 2e-2)
-    self.assertNear(actual_exp_x, expected_exp_x, 2e-2)
-    self.assertAllEqual(np.ones_like(acceptance_probs, np.bool),
-                        acceptance_probs > 0.5)
-    self.assertAllEqual(np.ones_like(acceptance_probs, np.bool),
-                        acceptance_probs <= 1.)
-
-  def _chain_gets_correct_expectations_wrapper(self, independent_chain_ndims):
-    with self.test_session(graph=ops.Graph()) as sess:
-      x_ph = array_ops.placeholder(np.float32, name="x_ph")
-      feed_dict = {x_ph: np.random.rand(50, 10, 2)}
-      self._chain_gets_correct_expectations(x_ph, independent_chain_ndims,
-                                            sess, feed_dict)
-
-  def testHMCChainExpectationsNullShape(self):
-    self._chain_gets_correct_expectations_wrapper(0)
-
-  def testHMCChainExpectations1(self):
-    self._chain_gets_correct_expectations_wrapper(1)
-
-  def testHMCChainExpectations2(self):
-    self._chain_gets_correct_expectations_wrapper(2)
-
-  def testKernelResultsUsingTruncatedDistribution(self):
-    def log_prob(x):
-      return array_ops.where(
-          x >= 0.,
-          -x - x**2,  # Non-constant gradient.
-          array_ops.fill(x.shape, math_ops.cast(-np.inf, x.dtype)))
-    # This log_prob has the property that it is likely to attract
-    # the flow toward, and below, zero...but for x <=0,
-    # log_prob(x) = -inf, which should result in rejection, as well
-    # as a non-finite log_prob.  Thus, this distribution gives us an opportunity
-    # to test out the kernel results ability to correctly capture rejections due
-    # to finite AND non-finite reasons.
-    # Why use a non-constant gradient?  This ensures the leapfrog integrator
-    # will not be exact.
-
-    num_results = 1000
-    # Large step size, will give rejections due to integration error in addition
-    # to rejection due to going into a region of log_prob = -inf.
-    step_size = 0.1
-    num_leapfrog_steps = 5
-    num_chains = 2
-
-    with self.test_session(graph=ops.Graph()) as sess:
-
-      # Start multiple independent chains.
-      initial_state = ops.convert_to_tensor([0.1] * num_chains)
-
-      states, kernel_results = hmc.sample_chain(
-          num_results=num_results,
-          target_log_prob_fn=log_prob,
-          current_state=initial_state,
-          step_size=step_size,
-          num_leapfrog_steps=num_leapfrog_steps,
-          seed=42)
-
-      states_, kernel_results_ = sess.run([states, kernel_results])
-      pstates_ = kernel_results_.proposed_state
-
-      neg_inf_mask = np.isneginf(kernel_results_.proposed_target_log_prob)
-
-      # First:  Test that the mathematical properties of the above log prob
-      # function in conjunction with HMC show up as expected in kernel_results_.
-
-      # We better have log_prob = -inf some of the time.
-      self.assertLess(0, neg_inf_mask.sum())
-      # We better have some rejections due to something other than -inf.
-      self.assertLess(neg_inf_mask.sum(), (~kernel_results_.is_accepted).sum())
-      # We better have accepted a decent amount, even near end of the chain.
-      self.assertLess(
-          0.1, kernel_results_.is_accepted[int(0.9 * num_results):].mean())
-      # We better not have any NaNs in states or log_prob.
-      # We may have some NaN in grads, which involve multiplication/addition due
-      # to gradient rules.  This is the known "NaN grad issue with tf.where."
-      self.assertAllEqual(np.zeros_like(states_),
-                          np.isnan(kernel_results_.proposed_target_log_prob))
-      self.assertAllEqual(np.zeros_like(states_),
-                          np.isnan(states_))
-      # We better not have any +inf in states, grads, or log_prob.
-      self.assertAllEqual(np.zeros_like(states_),
-                          np.isposinf(kernel_results_.proposed_target_log_prob))
-      self.assertAllEqual(
-          np.zeros_like(states_),
-          np.isposinf(kernel_results_.proposed_grads_target_log_prob[0]))
-      self.assertAllEqual(np.zeros_like(states_),
-                          np.isposinf(states_))
-
-      # Second:  Test that kernel_results is congruent with itself and
-      # acceptance/rejection of states.
-
-      # Proposed state is negative iff proposed target log prob is -inf.
-      np.testing.assert_array_less(pstates_[neg_inf_mask], 0.)
-      np.testing.assert_array_less(0., pstates_[~neg_inf_mask])
-
-      # Acceptance probs are zero whenever proposed state is negative.
-      acceptance_probs = np.exp(np.minimum(
-          kernel_results_.log_accept_ratio, 0.))
-      self.assertAllEqual(
-          np.zeros_like(pstates_[neg_inf_mask]),
-          acceptance_probs[neg_inf_mask])
-
-      # The move is accepted ==> state = proposed state.
-      self.assertAllEqual(
-          states_[kernel_results_.is_accepted],
-          pstates_[kernel_results_.is_accepted],
-      )
-      # The move was rejected <==> state[t] == state[t - 1].
-      for t in range(1, num_results):
-        for i in range(num_chains):
-          if kernel_results_.is_accepted[t, i]:
-            self.assertNotEqual(states_[t, i], states_[t - 1, i])
-          else:
-            self.assertEqual(states_[t, i], states_[t - 1, i])
-
-  def _kernel_leaves_target_invariant(self, initial_draws,
-                                      independent_chain_ndims,
-                                      sess, feed_dict=None):
-    def log_gamma_log_prob(x):
-      event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x))
-      return self._log_gamma_log_prob(x, event_dims)
-
-    def fake_log_prob(x):
-      """Cooled version of the target distribution."""
-      return 1.1 * log_gamma_log_prob(x)
-
-    step_size = array_ops.placeholder(np.float32, [], name="step_size")
-
-    if feed_dict is None:
-      feed_dict = {}
-
-    feed_dict[step_size] = 0.4
-
-    sample, kernel_results = hmc.kernel(
-        target_log_prob_fn=log_gamma_log_prob,
-        current_state=initial_draws,
-        step_size=step_size,
-        num_leapfrog_steps=5,
-        seed=43)
-
-    bad_sample, bad_kernel_results = hmc.kernel(
-        target_log_prob_fn=fake_log_prob,
-        current_state=initial_draws,
-        step_size=step_size,
-        num_leapfrog_steps=5,
-        seed=44)
-
-    [
-        log_accept_ratio_,
-        bad_log_accept_ratio_,
-        initial_draws_,
-        updated_draws_,
-        fake_draws_,
-    ] = sess.run([
-        kernel_results.log_accept_ratio,
-        bad_kernel_results.log_accept_ratio,
-        initial_draws,
-        sample,
-        bad_sample,
-    ], feed_dict)
-
-    # Confirm step size is small enough that we usually accept.
-    acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-    bad_acceptance_probs = np.exp(np.minimum(bad_log_accept_ratio_, 0.))
-    self.assertGreater(acceptance_probs.mean(), 0.5)
-    self.assertGreater(bad_acceptance_probs.mean(), 0.5)
-
-    # Confirm step size is large enough that we sometimes reject.
-    self.assertLess(acceptance_probs.mean(), 0.99)
-    self.assertLess(bad_acceptance_probs.mean(), 0.99)
-
-    _, ks_p_value_true = stats.ks_2samp(initial_draws_.flatten(),
-                                        updated_draws_.flatten())
-    _, ks_p_value_fake = stats.ks_2samp(initial_draws_.flatten(),
-                                        fake_draws_.flatten())
-
-    logging_ops.vlog(1, "acceptance rate for true target: {}".format(
-        acceptance_probs.mean()))
-    logging_ops.vlog(1, "acceptance rate for fake target: {}".format(
-        bad_acceptance_probs.mean()))
-    logging_ops.vlog(1, "K-S p-value for true target: {}".format(
-        ks_p_value_true))
-    logging_ops.vlog(1, "K-S p-value for fake target: {}".format(
-        ks_p_value_fake))
-    # Make sure that the MCMC update hasn't changed the empirical CDF much.
-    self.assertGreater(ks_p_value_true, 1e-3)
-    # Confirm that targeting the wrong distribution does
-    # significantly change the empirical CDF.
-    self.assertLess(ks_p_value_fake, 1e-6)
-
-  def _kernel_leaves_target_invariant_wrapper(self, independent_chain_ndims):
-    """Tests that the kernel leaves the target distribution invariant.
-
-    Draws some independent samples from the target distribution,
-    applies an iteration of the MCMC kernel, then runs a
-    Kolmogorov-Smirnov test to determine if the distribution of the
-    MCMC-updated samples has changed.
-
-    We also confirm that running the kernel with a different log-pdf
-    does change the target distribution. (And that we can detect that.)
-
-    Args:
-      independent_chain_ndims: Python `int` scalar representing the number of
-        dims associated with independent chains.
-    """
-    with self.test_session(graph=ops.Graph()) as sess:
-      initial_draws = np.log(np.random.gamma(self._shape_param,
-                                             size=[50000, 2, 2]))
-      initial_draws -= np.log(self._rate_param)
-      x_ph = array_ops.placeholder(np.float32, name="x_ph")
-
-      feed_dict = {x_ph: initial_draws}
-
-      self._kernel_leaves_target_invariant(x_ph, independent_chain_ndims,
-                                           sess, feed_dict)
-
-  def testKernelLeavesTargetInvariant1(self):
-    self._kernel_leaves_target_invariant_wrapper(1)
-
-  def testKernelLeavesTargetInvariant2(self):
-    self._kernel_leaves_target_invariant_wrapper(2)
-
-  def testKernelLeavesTargetInvariant3(self):
-    self._kernel_leaves_target_invariant_wrapper(3)
-
-  def testNanRejection(self):
-    """Tests that an update that yields NaN potentials gets rejected.
-
-    We run HMC with a target distribution that returns NaN
-    log-likelihoods if any element of x < 0, and unit-scale
-    exponential log-likelihoods otherwise. The exponential potential
-    pushes x towards 0, ensuring that any reasonably large update will
-    push us over the edge into NaN territory.
-    """
-    def _unbounded_exponential_log_prob(x):
-      """An exponential distribution with log-likelihood NaN for x < 0."""
-      per_element_potentials = array_ops.where(
-          x < 0.,
-          array_ops.fill(array_ops.shape(x), x.dtype.as_numpy_dtype(np.nan)),
-          -x)
-      return math_ops.reduce_sum(per_element_potentials)
-
-    with self.test_session(graph=ops.Graph()) as sess:
-      initial_x = math_ops.linspace(0.01, 5, 10)
-      updated_x, kernel_results = hmc.kernel(
-          target_log_prob_fn=_unbounded_exponential_log_prob,
-          current_state=initial_x,
-          step_size=2.,
-          num_leapfrog_steps=5,
-          seed=46)
-      initial_x_, updated_x_, log_accept_ratio_ = sess.run(
-          [initial_x, updated_x, kernel_results.log_accept_ratio])
-      acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-
-      logging_ops.vlog(1, "initial_x = {}".format(initial_x_))
-      logging_ops.vlog(1, "updated_x = {}".format(updated_x_))
-      logging_ops.vlog(1, "log_accept_ratio = {}".format(log_accept_ratio_))
-
-      self.assertAllEqual(initial_x_, updated_x_)
-      self.assertEqual(acceptance_probs, 0.)
-
-  def testNanFromGradsDontPropagate(self):
-    """Test that update with NaN gradients does not cause NaN in results."""
-    def _nan_log_prob_with_nan_gradient(x):
-      return np.nan * math_ops.reduce_sum(x)
-
-    with self.test_session(graph=ops.Graph()) as sess:
-      initial_x = math_ops.linspace(0.01, 5, 10)
-      updated_x, kernel_results = hmc.kernel(
-          target_log_prob_fn=_nan_log_prob_with_nan_gradient,
-          current_state=initial_x,
-          step_size=2.,
-          num_leapfrog_steps=5,
-          seed=47)
-      initial_x_, updated_x_, log_accept_ratio_ = sess.run(
-          [initial_x, updated_x, kernel_results.log_accept_ratio])
-      acceptance_probs = np.exp(np.minimum(log_accept_ratio_, 0.))
-
-      logging_ops.vlog(1, "initial_x = {}".format(initial_x_))
-      logging_ops.vlog(1, "updated_x = {}".format(updated_x_))
-      logging_ops.vlog(1, "log_accept_ratio = {}".format(log_accept_ratio_))
-
-      self.assertAllEqual(initial_x_, updated_x_)
-      self.assertEqual(acceptance_probs, 0.)
-
-      self.assertAllFinite(
-          gradients_ops.gradients(updated_x, initial_x)[0].eval())
-      self.assertAllEqual([True], [g is None for g in gradients_ops.gradients(
-          kernel_results.proposed_grads_target_log_prob, initial_x)])
-      self.assertAllEqual([False], [g is None for g in gradients_ops.gradients(
-          kernel_results.proposed_grads_target_log_prob,
-          kernel_results.proposed_state)])
-
-      # Gradients of the acceptance probs and new log prob are not finite.
-      # self.assertAllFinite(
-      #     gradients_ops.gradients(acceptance_probs, initial_x)[0].eval())
-      # self.assertAllFinite(
-      #     gradients_ops.gradients(new_log_prob, initial_x)[0].eval())
-
-  def _testChainWorksDtype(self, dtype):
-    with self.test_session(graph=ops.Graph()) as sess:
-      states, kernel_results = hmc.sample_chain(
-          num_results=10,
-          target_log_prob_fn=lambda x: -math_ops.reduce_sum(x**2., axis=-1),
-          current_state=np.zeros(5).astype(dtype),
-          step_size=0.01,
-          num_leapfrog_steps=10,
-          seed=48)
-      states_, log_accept_ratio_ = sess.run(
-          [states, kernel_results.log_accept_ratio])
-      self.assertEqual(dtype, states_.dtype)
-      self.assertEqual(dtype, log_accept_ratio_.dtype)
-
-  def testChainWorksIn64Bit(self):
-    self._testChainWorksDtype(np.float64)
-
-  def testChainWorksIn16Bit(self):
-    self._testChainWorksDtype(np.float16)
-
-  def testChainWorksCorrelatedMultivariate(self):
-    dtype = np.float32
-    true_mean = dtype([0, 0])
-    true_cov = dtype([[1, 0.5],
-                      [0.5, 1]])
-    num_results = 2000
-    counter = collections.Counter()
-    with self.test_session(graph=ops.Graph()) as sess:
-      def target_log_prob(x, y):
-        counter["target_calls"] += 1
-        # Corresponds to unnormalized MVN.
-        # z = matmul(inv(chol(true_cov)), [x, y] - true_mean)
-        z = array_ops.stack([x, y], axis=-1) - true_mean
-        z = array_ops.squeeze(
-            gen_linalg_ops.matrix_triangular_solve(
-                np.linalg.cholesky(true_cov),
-                z[..., array_ops.newaxis]),
-            axis=-1)
-        return -0.5 * math_ops.reduce_sum(z**2., axis=-1)
-      states, _ = hmc.sample_chain(
-          num_results=num_results,
-          target_log_prob_fn=target_log_prob,
-          current_state=[dtype(-2), dtype(2)],
-          step_size=[0.5, 0.5],
-          num_leapfrog_steps=2,
-          num_burnin_steps=200,
-          num_steps_between_results=1,
-          seed=54)
-      self.assertAllEqual(dict(target_calls=2), counter)
-      states = array_ops.stack(states, axis=-1)
-      self.assertEqual(num_results, states.shape[0].value)
-      sample_mean = math_ops.reduce_mean(states, axis=0)
-      x = states - sample_mean
-      sample_cov = math_ops.matmul(x, x, transpose_a=True) / dtype(num_results)
-      [sample_mean_, sample_cov_] = sess.run([
-          sample_mean, sample_cov])
-      self.assertAllClose(true_mean, sample_mean_,
-                          atol=0.05, rtol=0.)
-      self.assertAllClose(true_cov, sample_cov_,
-                          atol=0., rtol=0.1)
-
-
-class _EnergyComputationTest(object):
-
-  def testHandlesNanFromPotential(self):
-    with self.test_session(graph=ops.Graph()) as sess:
-      x = [1, np.inf, -np.inf, np.nan]
-      target_log_prob, proposed_target_log_prob = [
-          self.dtype(x.flatten()) for x in np.meshgrid(x, x)]
-      num_chains = len(target_log_prob)
-      dummy_momentums = [-1, 1]
-      momentums = [self.dtype([dummy_momentums] * num_chains)]
-      proposed_momentums = [self.dtype([dummy_momentums] * num_chains)]
-
-      target_log_prob = ops.convert_to_tensor(target_log_prob)
-      momentums = [ops.convert_to_tensor(momentums[0])]
-      proposed_target_log_prob = ops.convert_to_tensor(proposed_target_log_prob)
-      proposed_momentums = [ops.convert_to_tensor(proposed_momentums[0])]
-
-      energy = _compute_energy_change(
-          target_log_prob,
-          momentums,
-          proposed_target_log_prob,
-          proposed_momentums,
-          independent_chain_ndims=1)
-      grads = gradients_ops.gradients(energy, momentums)
-
-      [actual_energy, grads_] = sess.run([energy, grads])
-
-      # Ensure energy is `inf` (note: that's positive inf) in weird cases and
-      # finite otherwise.
-      expected_energy = self.dtype([0] + [np.inf]*(num_chains - 1))
-      self.assertAllEqual(expected_energy, actual_energy)
-
-      # Ensure gradient is finite.
-      self.assertAllEqual(np.ones_like(grads_).astype(np.bool),
-                          np.isfinite(grads_))
-
-  def testHandlesNanFromKinetic(self):
-    with self.test_session(graph=ops.Graph()) as sess:
-      x = [1, np.inf, -np.inf, np.nan]
-      momentums, proposed_momentums = [
-          [np.reshape(self.dtype(x), [-1, 1])]
-          for x in np.meshgrid(x, x)]
-      num_chains = len(momentums[0])
-      target_log_prob = np.ones(num_chains, self.dtype)
-      proposed_target_log_prob = np.ones(num_chains, self.dtype)
-
-      target_log_prob = ops.convert_to_tensor(target_log_prob)
-      momentums = [ops.convert_to_tensor(momentums[0])]
-      proposed_target_log_prob = ops.convert_to_tensor(proposed_target_log_prob)
-      proposed_momentums = [ops.convert_to_tensor(proposed_momentums[0])]
-
-      energy = _compute_energy_change(
-          target_log_prob,
-          momentums,
-          proposed_target_log_prob,
-          proposed_momentums,
-          independent_chain_ndims=1)
-      grads = gradients_ops.gradients(energy, momentums)
-
-      [actual_energy, grads_] = sess.run([energy, grads])
-
-      # Ensure energy is `inf` (note: that's positive inf) in weird cases and
-      # finite otherwise.
-      expected_energy = self.dtype([0] + [np.inf]*(num_chains - 1))
-      self.assertAllEqual(expected_energy, actual_energy)
-
-      # Ensure gradient is finite.
-      g = grads_[0].reshape([len(x), len(x)])[:, 0]
-      self.assertAllEqual(np.ones_like(g).astype(np.bool), np.isfinite(g))
-
-      # The remaining gradients are nan because the momentum was itself nan or
-      # inf.
-      g = grads_[0].reshape([len(x), len(x)])[:, 1:]
-      self.assertAllEqual(np.ones_like(g).astype(np.bool), np.isnan(g))
-
-
-class EnergyComputationTest16(test.TestCase, _EnergyComputationTest):
-  dtype = np.float16
-
-
-class EnergyComputationTest32(test.TestCase, _EnergyComputationTest):
-  dtype = np.float32
-
-
-class EnergyComputationTest64(test.TestCase, _EnergyComputationTest):
-  dtype = np.float64
-
-
-class _HMCHandlesLists(object):
-
-  def testStateParts(self):
-    with self.test_session(graph=ops.Graph()) as sess:
-      dist_x = normal_lib.Normal(loc=self.dtype(0), scale=self.dtype(1))
-      dist_y = independent_lib.Independent(
-          gamma_lib.Gamma(concentration=self.dtype([1, 2]),
-                          rate=self.dtype([0.5, 0.75])),
-          reinterpreted_batch_ndims=1)
-      def target_log_prob(x, y):
-        return dist_x.log_prob(x) + dist_y.log_prob(y)
-      x0 = [dist_x.sample(seed=1), dist_y.sample(seed=2)]
-      samples, _ = hmc.sample_chain(
-          num_results=int(2e3),
-          target_log_prob_fn=target_log_prob,
-          current_state=x0,
-          step_size=0.85,
-          num_leapfrog_steps=3,
-          num_burnin_steps=int(250),
-          seed=49)
-      actual_means = [math_ops.reduce_mean(s, axis=0) for s in samples]
-      actual_vars = [_reduce_variance(s, axis=0) for s in samples]
-      expected_means = [dist_x.mean(), dist_y.mean()]
-      expected_vars = [dist_x.variance(), dist_y.variance()]
-      [
-          actual_means_,
-          actual_vars_,
-          expected_means_,
-          expected_vars_,
-      ] = sess.run([
-          actual_means,
-          actual_vars,
-          expected_means,
-          expected_vars,
-      ])
-      self.assertAllClose(expected_means_, actual_means_, atol=0.05, rtol=0.16)
-      self.assertAllClose(expected_vars_, actual_vars_, atol=0., rtol=0.25)
-
-
-class HMCHandlesLists32(_HMCHandlesLists, test.TestCase):
-  dtype = np.float32
-
-
-class HMCHandlesLists64(_HMCHandlesLists, test.TestCase):
-  dtype = np.float64
-
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py
deleted file mode 100644
index f508e5b114..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/metropolis_hastings_test.py
+++ /dev/null
@@ -1,340 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for Metropolis-Hastings."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings_impl as mh
-from tensorflow.contrib.distributions.python.ops import mvn_tril as mvn_tril_lib
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.ops import variables
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.platform import test
-
-
-class MetropolisHastingsTest(test.TestCase):
-
-  def testKernelStateTensor(self):
-    """Test that transition kernel works with tensor input to `state`."""
-    loc = variable_scope.get_variable("loc", initializer=0.)
-
-    def target_log_prob_fn(loc):
-      return normal_lib.Normal(loc=0.0, scale=0.1).log_prob(loc)
-
-    new_state, _ = mh.kernel(
-        target_log_prob_fn=target_log_prob_fn,
-        proposal_fn=mh.proposal_normal(scale=0.05),
-        current_state=loc,
-        seed=231251)
-    loc_update = loc.assign(new_state)
-
-    init = variables.initialize_all_variables()
-    with self.test_session() as sess:
-      sess.run(init)
-      loc_samples = []
-      for _ in range(2500):
-        loc_sample = sess.run(loc_update)
-        loc_samples.append(loc_sample)
-    loc_samples = loc_samples[500:]  # drop samples for burn-in
-
-    self.assertAllClose(np.mean(loc_samples), 0.0, rtol=1e-5, atol=1e-1)
-    self.assertAllClose(np.std(loc_samples), 0.1, rtol=1e-5, atol=1e-1)
-
-  def testKernelStateList(self):
-    """Test that transition kernel works with list input to `state`."""
-    num_chains = 2
-    loc_one = variable_scope.get_variable(
-        "loc_one", [num_chains],
-        initializer=init_ops.zeros_initializer())
-    loc_two = variable_scope.get_variable(
-        "loc_two", [num_chains], initializer=init_ops.zeros_initializer())
-
-    def target_log_prob_fn(loc_one, loc_two):
-      loc = array_ops.stack([loc_one, loc_two])
-      log_prob = mvn_tril_lib.MultivariateNormalTriL(
-          loc=constant_op.constant([0., 0.]),
-          scale_tril=constant_op.constant([[0.1, 0.1], [0.0, 0.1]])).log_prob(
-              loc)
-      return math_ops.reduce_sum(log_prob, 0)
-
-    def proposal_fn(loc_one, loc_two):
-      loc_one_proposal = mh.proposal_normal(scale=0.05)
-      loc_two_proposal = mh.proposal_normal(scale=0.05)
-      loc_one_sample, _ = loc_one_proposal(loc_one)
-      loc_two_sample, _ = loc_two_proposal(loc_two)
-      return [loc_one_sample, loc_two_sample], None
-
-    new_state, _ = mh.kernel(
-        target_log_prob_fn=target_log_prob_fn,
-        proposal_fn=proposal_fn,
-        current_state=[loc_one, loc_two],
-        seed=12415)
-    loc_one_update = loc_one.assign(new_state[0])
-    loc_two_update = loc_two.assign(new_state[1])
-
-    init = variables.initialize_all_variables()
-    with self.test_session() as sess:
-      sess.run(init)
-      loc_one_samples = []
-      loc_two_samples = []
-      for _ in range(10000):
-        loc_one_sample, loc_two_sample = sess.run(
-            [loc_one_update, loc_two_update])
-        loc_one_samples.append(loc_one_sample)
-        loc_two_samples.append(loc_two_sample)
-
-    loc_one_samples = np.array(loc_one_samples)
-    loc_two_samples = np.array(loc_two_samples)
-    loc_one_samples = loc_one_samples[1000:]  # drop samples for burn-in
-    loc_two_samples = loc_two_samples[1000:]  # drop samples for burn-in
-
-    self.assertAllClose(np.mean(loc_one_samples, 0),
-                        np.array([0.] * num_chains),
-                        rtol=1e-5, atol=1e-1)
-    self.assertAllClose(np.mean(loc_two_samples, 0),
-                        np.array([0.] * num_chains),
-                        rtol=1e-5, atol=1e-1)
-    self.assertAllClose(np.std(loc_one_samples, 0),
-                        np.array([0.1] * num_chains),
-                        rtol=1e-5, atol=1e-1)
-    self.assertAllClose(np.std(loc_two_samples, 0),
-                        np.array([0.1] * num_chains),
-                        rtol=1e-5, atol=1e-1)
-
-  def testKernelResultsUsingTruncatedDistribution(self):
-    def log_prob(x):
-      return array_ops.where(
-          x >= 0.,
-          -x - x**2,
-          array_ops.fill(x.shape, math_ops.cast(-np.inf, x.dtype)))
-    # The truncated distribution has the property that it is likely to attract
-    # the flow toward, and below, zero...but for x <=0,
-    # log_prob(x) = -inf, which should result in rejection, as well
-    # as a non-finite log_prob.  Thus, this distribution gives us an opportunity
-    # to test out the kernel results ability to correctly capture rejections due
-    # to finite AND non-finite reasons.
-
-    num_results = 1000
-    # Large step size, will give rejections due to going into a region of
-    # log_prob = -inf.
-    step_size = 0.3
-    num_chains = 2
-
-    with self.test_session(graph=ops.Graph()) as sess:
-
-      # Start multiple independent chains.
-      initial_state = ops.convert_to_tensor([0.1] * num_chains)
-
-      states = []
-      is_accepted = []
-      proposed_states = []
-      current_state = initial_state
-      for _ in range(num_results):
-        current_state, kernel_results = mh.kernel(
-            target_log_prob_fn=log_prob,
-            proposal_fn=mh.proposal_uniform(step_size=step_size),
-            current_state=current_state,
-            seed=42)
-        states.append(current_state)
-        proposed_states.append(kernel_results.proposed_state)
-        is_accepted.append(kernel_results.is_accepted)
-
-      states = array_ops.stack(states)
-      proposed_states = array_ops.stack(proposed_states)
-      is_accepted = array_ops.stack(is_accepted)
-      states_, pstates_, is_accepted_ = sess.run(
-          [states, proposed_states, is_accepted])
-
-      # We better have accepted a decent amount, even near end of the chain.
-      self.assertLess(
-          0.1, is_accepted_[int(0.9 * num_results):].mean())
-      # We better not have any NaNs in states.
-      self.assertAllEqual(np.zeros_like(states_),
-                          np.isnan(states_))
-      # We better not have any +inf in states.
-      self.assertAllEqual(np.zeros_like(states_),
-                          np.isposinf(states_))
-
-      # The move is accepted ==> state = proposed state.
-      self.assertAllEqual(
-          states_[is_accepted_],
-          pstates_[is_accepted_],
-      )
-
-      # The move was rejected <==> state[t] == state[t - 1].
-      for t in range(1, num_results):
-        for i in range(num_chains):
-          if is_accepted_[t, i]:
-            self.assertNotEqual(states_[t, i], states_[t - 1, i])
-          else:
-            self.assertEqual(states_[t, i], states_[t - 1, i])
-
-  def testDensityIncreasingStepAccepted(self):
-    """Tests that if a transition increases density, it is always accepted."""
-    target_log_density = lambda x: - x * x
-    state = variable_scope.get_variable("state", initializer=10.)
-    state_log_density = variable_scope.get_variable(
-        "state_log_density",
-        initializer=target_log_density(state.initialized_value()))
-    log_accept_ratio = variable_scope.get_variable(
-        "log_accept_ratio", initializer=0.)
-
-    get_next_proposal = lambda x: (x - 1., None)
-    step = mh.evolve(state, state_log_density, log_accept_ratio,
-                     target_log_density, get_next_proposal, seed=1234)
-    init = variables.initialize_all_variables()
-    with self.test_session() as sess:
-      sess.run(init)
-      for j in range(9):
-        sess.run(step)
-        sample = sess.run(state)
-        sample_log_density = sess.run(state_log_density)
-        self.assertAlmostEqual(sample, 9 - j)
-        self.assertAlmostEqual(sample_log_density, - (9 - j) * (9 - j))
-
-  def testSampleProperties(self):
-    """Tests that the samples converge to the target distribution."""
-
-    def target_log_density(x):
-      """Log-density corresponding to a normal distribution with mean = 4."""
-      return - (x - 2.0) * (x - 2.0) * 0.5
-
-    # Use the uniform random walker to generate proposals.
-    proposal_fn = mh.proposal_uniform(
-        step_size=1.0, seed=1234)
-
-    state = variable_scope.get_variable("state", initializer=0.0)
-    state_log_density = variable_scope.get_variable(
-        "state_log_density",
-        initializer=target_log_density(state.initialized_value()))
-    log_accept_ratio = variable_scope.get_variable(
-        "log_accept_ratio", initializer=0.)
-
-    # Random walk MCMC converges slowly so need to put in enough iterations.
-    num_iterations = 5000
-    step = mh.evolve(state, state_log_density, log_accept_ratio,
-                     target_log_density, proposal_fn, seed=4321)
-
-    init = variables.global_variables_initializer()
-
-    sample_sum, sample_sq_sum = 0.0, 0.0
-    with self.test_session() as sess:
-      sess.run(init)
-      for _ in np.arange(num_iterations):
-        # Allow for the mixing of the chain and discard these samples.
-        sess.run(step)
-      for _ in np.arange(num_iterations):
-        sess.run(step)
-        sample = sess.run(state)
-        sample_sum += sample
-        sample_sq_sum += sample * sample
-
-    sample_mean = sample_sum / num_iterations
-    sample_variance = sample_sq_sum / num_iterations - sample_mean * sample_mean
-    # The samples have large autocorrelation which reduces the effective sample
-    # size.
-    self.assertAlmostEqual(sample_mean, 2.0, delta=0.1)
-    self.assertAlmostEqual(sample_variance, 1.0, delta=0.1)
-
-  def testProposalNormal(self):
-    """Tests that the normal proposals are correctly distributed."""
-
-    initial_points = array_ops.ones([10000], dtype=dtypes.float32)
-    proposal_fn = mh.proposal_normal(
-        scale=2.0, seed=1234)
-    proposal_points, _ = proposal_fn(initial_points)
-
-    with self.test_session() as sess:
-      sample = sess.run(proposal_points)
-
-    # It is expected that the elements in proposal_points have the same mean as
-    # initial_points and have the standard deviation that was supplied to the
-    # proposal scheme.
-    self.assertAlmostEqual(np.mean(sample), 1.0, delta=0.1)
-    self.assertAlmostEqual(np.std(sample), 2.0, delta=0.1)
-
-  def testDocstringExample(self):
-    """Tests the simplified docstring example with multiple chains."""
-
-    n = 2  # dimension of the problem
-
-    # Generate 300 initial values randomly. Each of these would be an
-    # independent starting point for a Markov chain.
-    state = variable_scope.get_variable(
-        "state", initializer=random_ops.random_normal(
-            [300, n], mean=3.0, dtype=dtypes.float32, seed=42))
-
-    # Computes the log(p(x)) for the unit normal density and ignores the
-    # normalization constant.
-    def log_density(x):
-      return  - math_ops.reduce_sum(x * x, reduction_indices=-1) / 2.0
-
-    # Initial log-density value
-    state_log_density = variable_scope.get_variable(
-        "state_log_density",
-        initializer=log_density(state.initialized_value()))
-
-    # A variable to store the log_acceptance_ratio:
-    log_acceptance_ratio = variable_scope.get_variable(
-        "log_acceptance_ratio",
-        initializer=array_ops.zeros([300], dtype=dtypes.float32))
-
-    # Generates random proposals by moving each coordinate uniformly and
-    # independently in a box of size 2 centered around the current value.
-    # Returns the new point and also the log of the Hastings ratio (the
-    # ratio of the probability of going from the proposal to origin and the
-    # probability of the reverse transition). When this ratio is 1, the value
-    # may be omitted and replaced by None.
-    def random_proposal(x):
-      return (x + random_ops.random_uniform(
-          array_ops.shape(x), minval=-1, maxval=1,
-          dtype=x.dtype, seed=12)), None
-
-    #  Create the op to propagate the chain for 100 steps.
-    stepper = mh.evolve(
-        state, state_log_density, log_acceptance_ratio,
-        log_density, random_proposal, n_steps=100, seed=123)
-    init = variables.initialize_all_variables()
-    with self.test_session() as sess:
-      sess.run(init)
-      # Run the chains for a total of 1000 steps.
-      for _ in range(10):
-        sess.run(stepper)
-      samples = sess.run(state)
-      covariance = np.eye(n)
-      # Verify that the estimated mean and covariance are close to the true
-      # values.
-      self.assertAlmostEqual(
-          np.max(np.abs(np.mean(samples, 0)
-                        - np.zeros(n))), 0,
-          delta=0.1)
-      self.assertAlmostEqual(
-          np.max(np.abs(np.reshape(np.cov(samples, rowvar=False), [n**2])
-                        - np.reshape(covariance, [n**2]))), 0,
-          delta=0.2)
-
-if __name__ == "__main__":
-  test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc.py b/tensorflow/contrib/bayesflow/python/ops/hmc.py
deleted file mode 100644
index c8a5a195d3..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/hmc.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Hamiltonian Monte Carlo, a gradient-based MCMC algorithm."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# go/tf-wildcard-import
-from tensorflow.contrib.bayesflow.python.ops.hmc_impl import *  # pylint: disable=wildcard-import,unused-wildcard-import,g-importing-member
-from tensorflow.python.util import all_util
-
-_allowed_symbols = [
-    "sample_chain",
-    "kernel",
-]
-
-all_util.remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
deleted file mode 100644
index 66afcc7497..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py
+++ /dev/null
@@ -1,961 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Hamiltonian Monte Carlo, a gradient-based MCMC algorithm.
-
-@@sample_chain
-@@kernel
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-import numpy as np
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import gradients_impl as gradients_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import util as distributions_util
-
-__all__ = [
-    "sample_chain",
-    "kernel",
-]
-
-
-KernelResults = collections.namedtuple(
-    "KernelResults",
-    [
-        "log_accept_ratio",
-        "current_grads_target_log_prob",  # "Current result" means "accepted".
-        "current_target_log_prob",  # "Current result" means "accepted".
-        "is_accepted",
-        "proposed_grads_target_log_prob",
-        "proposed_state",
-        "proposed_target_log_prob",
-    ])
-
-
-def _make_dummy_kernel_results(
-    dummy_state,
-    dummy_target_log_prob,
-    dummy_grads_target_log_prob):
-  return KernelResults(
-      log_accept_ratio=dummy_target_log_prob,
-      current_grads_target_log_prob=dummy_grads_target_log_prob,
-      current_target_log_prob=dummy_target_log_prob,
-      is_accepted=array_ops.ones_like(dummy_target_log_prob, dtypes.bool),
-      proposed_grads_target_log_prob=dummy_grads_target_log_prob,
-      proposed_state=dummy_state,
-      proposed_target_log_prob=dummy_target_log_prob,
-  )
-
-
-def sample_chain(
-    num_results,
-    target_log_prob_fn,
-    current_state,
-    step_size,
-    num_leapfrog_steps,
-    num_burnin_steps=0,
-    num_steps_between_results=0,
-    seed=None,
-    current_target_log_prob=None,
-    current_grads_target_log_prob=None,
-    name=None):
-  """Runs multiple iterations of one or more Hamiltonian Monte Carlo chains.
-
-  Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) algorithm
-  that takes a series of gradient-informed steps to produce a Metropolis
-  proposal. This function samples from an HMC Markov chain at `current_state`
-  and whose stationary distribution has log-unnormalized-density
-  `target_log_prob_fn()`.
-
-  This function samples from multiple chains in parallel. It assumes that the
-  the leftmost dimensions of (each) `current_state` (part) index an independent
-  chain.  The function `target_log_prob_fn()` sums log-probabilities across
-  event dimensions (i.e., current state (part) rightmost dimensions). Each
-  element of the output of `target_log_prob_fn()` represents the (possibly
-  unnormalized) log-probability of the joint distribution over (all) the current
-  state (parts).
-
-  The `current_state` can be represented as a single `Tensor` or a `list` of
-  `Tensors` which collectively represent the current state. When specifying a
-  `list`, one must also specify a list of `step_size`s.
-
-  Note: `target_log_prob_fn` is called exactly twice.
-
-  Since HMC states are correlated, it is sometimes desirable to produce
-  additional intermediate states, and then discard them, ending up with a set of
-  states with decreased autocorrelation.  See [1].  Such "thinning" is made
-  possible by setting `num_steps_between_results > 0`.  The chain then takes
-  `num_steps_between_results` extra steps between the steps that make it into
-  the results.  The extra steps are never materialized (in calls to `sess.run`),
-  and thus do not increase memory requirements.
-
-  [1]: "Statistically efficient thinning of a Markov chain sampler."
-       Art B. Owen. April 2017.
-       http://statweb.stanford.edu/~owen/reports/bestthinning.pdf
-
-  #### Examples:
-
-  ##### Sample from a diagonal-variance Gaussian.
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  def make_likelihood(true_variances):
-    return tfd.MultivariateNormalDiag(
-        scale_diag=tf.sqrt(true_variances))
-
-  dims = 10
-  dtype = np.float32
-  true_variances = tf.linspace(dtype(1), dtype(3), dims)
-  likelihood = make_likelihood(true_variances)
-
-  states, kernel_results = hmc.sample_chain(
-      num_results=1000,
-      target_log_prob_fn=likelihood.log_prob,
-      current_state=tf.zeros(dims),
-      step_size=0.5,
-      num_leapfrog_steps=2,
-      num_burnin_steps=500)
-
-  # Compute sample stats.
-  sample_mean = tf.reduce_mean(states, axis=0)
-  sample_var = tf.reduce_mean(
-      tf.squared_difference(states, sample_mean),
-      axis=0)
-  ```
-
-  ##### Sampling from factor-analysis posteriors with known factors.
-
-  I.e.,
-
-  ```none
-  for i=1..n:
-    w[i] ~ Normal(0, eye(d))            # prior
-    x[i] ~ Normal(loc=matmul(w[i], F))  # likelihood
-  ```
-
-  where `F` denotes factors.
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  def make_prior(dims, dtype):
-    return tfd.MultivariateNormalDiag(
-        loc=tf.zeros(dims, dtype))
-
-  def make_likelihood(weights, factors):
-    return tfd.MultivariateNormalDiag(
-        loc=tf.tensordot(weights, factors, axes=[[0], [-1]]))
-
-  # Setup data.
-  num_weights = 10
-  num_factors = 4
-  num_chains = 100
-  dtype = np.float32
-
-  prior = make_prior(num_weights, dtype)
-  weights = prior.sample(num_chains)
-  factors = np.random.randn(num_factors, num_weights).astype(dtype)
-  x = make_likelihood(weights, factors).sample(num_chains)
-
-  def target_log_prob(w):
-    # Target joint is: `f(w) = p(w, x | factors)`.
-    return prior.log_prob(w) + make_likelihood(w, factors).log_prob(x)
-
-  # Get `num_results` samples from `num_chains` independent chains.
-  chains_states, kernels_results = hmc.sample_chain(
-      num_results=1000,
-      target_log_prob_fn=target_log_prob,
-      current_state=tf.zeros([num_chains, dims], dtype),
-      step_size=0.1,
-      num_leapfrog_steps=2,
-      num_burnin_steps=500)
-
-  # Compute sample stats.
-  sample_mean = tf.reduce_mean(chains_states, axis=[0, 1])
-  sample_var = tf.reduce_mean(
-      tf.squared_difference(chains_states, sample_mean),
-      axis=[0, 1])
-  ```
-
-  Args:
-    num_results: Integer number of Markov chain draws.
-    target_log_prob_fn: Python callable which takes an argument like
-      `current_state` (or `*current_state` if it's a list) and returns its
-      (possibly unnormalized) log-density under the target distribution.
-    current_state: `Tensor` or Python `list` of `Tensor`s representing the
-      current state(s) of the Markov chain(s). The first `r` dimensions index
-      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
-    step_size: `Tensor` or Python `list` of `Tensor`s representing the step size
-      for the leapfrog integrator. Must broadcast with the shape of
-      `current_state`. Larger step sizes lead to faster progress, but too-large
-      step sizes make rejection exponentially more likely. When possible, it's
-      often helpful to match per-variable step sizes to the standard deviations
-      of the target distribution in each variable.
-    num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
-      for. Total progress per HMC step is roughly proportional to `step_size *
-      num_leapfrog_steps`.
-    num_burnin_steps: Integer number of chain steps to take before starting to
-      collect results.
-      Default value: 0 (i.e., no burn-in).
-    num_steps_between_results: Integer number of chain steps between collecting
-      a result. Only one out of every `num_steps_between_samples + 1` steps is
-      included in the returned results.  The number of returned chain states is
-      still equal to `num_results`.  Default value: 0 (i.e., no thinning).
-    seed: Python integer to seed the random number generator.
-    current_target_log_prob: (Optional) `Tensor` representing the value of
-      `target_log_prob_fn` at the `current_state`. The only reason to specify
-      this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    current_grads_target_log_prob: (Optional) Python list of `Tensor`s
-      representing gradient of `target_log_prob` at the `current_state` and wrt
-      the `current_state`. Must have same shape as `current_state`. The only
-      reason to specify this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    name: Python `str` name prefixed to Ops created by this function.
-      Default value: `None` (i.e., "hmc_sample_chain").
-
-  Returns:
-    next_states: Tensor or Python list of `Tensor`s representing the
-      state(s) of the Markov chain(s) at each result step. Has same shape as
-      input `current_state` but with a prepended `num_results`-size dimension.
-    kernel_results: `collections.namedtuple` of internal calculations used to
-      advance the chain.
-  """
-  with ops.name_scope(
-      name, "hmc_sample_chain",
-      [num_results, current_state, step_size, num_leapfrog_steps,
-       num_burnin_steps, num_steps_between_results, seed,
-       current_target_log_prob, current_grads_target_log_prob]):
-    with ops.name_scope("initialize"):
-      [
-          current_state,
-          step_size,
-          current_target_log_prob,
-          current_grads_target_log_prob,
-      ] = _prepare_args(
-          target_log_prob_fn,
-          current_state,
-          step_size,
-          current_target_log_prob,
-          current_grads_target_log_prob)
-      num_results = ops.convert_to_tensor(
-          num_results,
-          dtype=dtypes.int32,
-          name="num_results")
-      num_leapfrog_steps = ops.convert_to_tensor(
-          num_leapfrog_steps,
-          dtype=dtypes.int32,
-          name="num_leapfrog_steps")
-      num_burnin_steps = ops.convert_to_tensor(
-          num_burnin_steps,
-          dtype=dtypes.int32,
-          name="num_burnin_steps")
-      num_steps_between_results = ops.convert_to_tensor(
-          num_steps_between_results,
-          dtype=dtypes.int32,
-          name="num_steps_between_results")
-
-    def _run_chain(num_steps, current_state, kernel_results):
-      """Runs the chain(s) for `num_steps`."""
-      def _loop_body(iter_, current_state, kernel_results):
-        return [iter_ + 1] + list(kernel(
-            target_log_prob_fn,
-            current_state,
-            step_size,
-            num_leapfrog_steps,
-            seed,
-            kernel_results.current_target_log_prob,
-            kernel_results.current_grads_target_log_prob))
-      while_loop_kwargs = dict(
-          cond=lambda iter_, *args: iter_ < num_steps,
-          body=_loop_body,
-          loop_vars=[
-              np.int32(0),
-              current_state,
-              kernel_results,
-          ],
-      )
-      if seed is not None:
-        while_loop_kwargs["parallel_iterations"] = 1
-      return control_flow_ops.while_loop(
-          **while_loop_kwargs)[1:]  # Lop-off "iter_".
-
-    def _scan_body(args_list, iter_):
-      """Closure which implements `tf.scan` body."""
-      current_state, kernel_results = args_list
-      return _run_chain(
-          1 + array_ops.where(math_ops.equal(iter_, 0),
-                              num_burnin_steps,
-                              num_steps_between_results),
-          current_state,
-          kernel_results)
-
-    scan_kwargs = dict(
-        fn=_scan_body,
-        elems=math_ops.range(num_results),  # iter_: used to choose burnin.
-        initializer=[
-            current_state,
-            _make_dummy_kernel_results(
-                current_state,
-                current_target_log_prob,
-                current_grads_target_log_prob),
-        ])
-    if seed is not None:
-      scan_kwargs["parallel_iterations"] = 1
-    return functional_ops.scan(**scan_kwargs)
-
-
-def kernel(target_log_prob_fn,
-           current_state,
-           step_size,
-           num_leapfrog_steps,
-           seed=None,
-           current_target_log_prob=None,
-           current_grads_target_log_prob=None,
-           name=None):
-  """Runs one iteration of Hamiltonian Monte Carlo.
-
-  Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC)
-  algorithm that takes a series of gradient-informed steps to produce
-  a Metropolis proposal. This function applies one step of HMC to
-  randomly update the variable `x`.
-
-  This function can update multiple chains in parallel. It assumes that all
-  leftmost dimensions of `current_state` index independent chain states (and are
-  therefore updated independently). The output of `target_log_prob_fn()` should
-  sum log-probabilities across all event dimensions. Slices along the rightmost
-  dimensions may have different target distributions; for example,
-  `current_state[0, :]` could have a different target distribution from
-  `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of
-  independent chains is `tf.size(target_log_prob_fn(*current_state))`.)
-
-  #### Examples:
-
-  ##### Simple chain with warm-up.
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  # Tuning acceptance rates:
-  dtype = np.float32
-  target_accept_rate = 0.631
-  num_warmup_iter = 500
-  num_chain_iter = 500
-
-  x = tf.get_variable(name="x", initializer=dtype(1))
-  step_size = tf.get_variable(name="step_size", initializer=dtype(1))
-
-  target = tfd.Normal(loc=dtype(0), scale=dtype(1))
-
-  next_x, other_results = hmc.kernel(
-      target_log_prob_fn=target.log_prob,
-      current_state=x,
-      step_size=step_size,
-      num_leapfrog_steps=3)[:4]
-
-  x_update = x.assign(next_x)
-
-  step_size_update = step_size.assign_add(
-      step_size * tf.where(
-          tf.exp(tf.minimum(other_results.log_accept_ratio), 0.) >
-              target_accept_rate,
-          0.01, -0.01))
-
-  warmup = tf.group([x_update, step_size_update])
-
-  tf.global_variables_initializer().run()
-
-  sess.graph.finalize()  # No more graph building.
-
-  # Warm up the sampler and adapt the step size
-  for _ in xrange(num_warmup_iter):
-    sess.run(warmup)
-
-  # Collect samples without adapting step size
-  samples = np.zeros([num_chain_iter])
-  for i in xrange(num_chain_iter):
-    _, x_, target_log_prob_, grad_ = sess.run([
-        x_update,
-        x,
-        other_results.target_log_prob,
-        other_results.grads_target_log_prob])
-    samples[i] = x_
-
-  print(samples.mean(), samples.std())
-  ```
-
-  ##### Sample from more complicated posterior.
-
-  I.e.,
-
-  ```none
-    W ~ MVN(loc=0, scale=sigma * eye(dims))
-    for i=1...num_samples:
-        X[i] ~ MVN(loc=0, scale=eye(dims))
-      eps[i] ~ Normal(loc=0, scale=1)
-        Y[i] = X[i].T * W + eps[i]
-  ```
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  def make_training_data(num_samples, dims, sigma):
-    dt = np.asarray(sigma).dtype
-    zeros = tf.zeros(dims, dtype=dt)
-    x = tfd.MultivariateNormalDiag(
-        loc=zeros).sample(num_samples, seed=1)
-    w = tfd.MultivariateNormalDiag(
-        loc=zeros,
-        scale_identity_multiplier=sigma).sample(seed=2)
-    noise = tfd.Normal(
-        loc=dt(0),
-        scale=dt(1)).sample(num_samples, seed=3)
-    y = tf.tensordot(x, w, axes=[[1], [0]]) + noise
-    return y, x, w
-
-  def make_prior(sigma, dims):
-    # p(w | sigma)
-    return tfd.MultivariateNormalDiag(
-        loc=tf.zeros([dims], dtype=sigma.dtype),
-        scale_identity_multiplier=sigma)
-
-  def make_likelihood(x, w):
-    # p(y | x, w)
-    return tfd.MultivariateNormalDiag(
-        loc=tf.tensordot(x, w, axes=[[1], [0]]))
-
-  # Setup assumptions.
-  dtype = np.float32
-  num_samples = 150
-  dims = 10
-  num_iters = int(5e3)
-
-  true_sigma = dtype(0.5)
-  y, x, true_weights = make_training_data(num_samples, dims, true_sigma)
-
-  # Estimate of `log(true_sigma)`.
-  log_sigma = tf.get_variable(name="log_sigma", initializer=dtype(0))
-  sigma = tf.exp(log_sigma)
-
-  # State of the Markov chain.
-  weights = tf.get_variable(
-      name="weights",
-      initializer=np.random.randn(dims).astype(dtype))
-
-  prior = make_prior(sigma, dims)
-
-  def joint_log_prob_fn(w):
-    # f(w) = log p(w, y | x)
-    return prior.log_prob(w) + make_likelihood(x, w).log_prob(y)
-
-  weights_update = weights.assign(
-      hmc.kernel(target_log_prob_fn=joint_log_prob,
-                 current_state=weights,
-                 step_size=0.1,
-                 num_leapfrog_steps=5)[0])
-
-  with tf.control_dependencies([weights_update]):
-    loss = -prior.log_prob(weights)
-
-  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
-  log_sigma_update = optimizer.minimize(loss, var_list=[log_sigma])
-
-  sess.graph.finalize()  # No more graph building.
-
-  tf.global_variables_initializer().run()
-
-  sigma_history = np.zeros(num_iters, dtype)
-  weights_history = np.zeros([num_iters, dims], dtype)
-
-  for i in xrange(num_iters):
-    _, sigma_, weights_, _ = sess.run([log_sigma_update, sigma, weights])
-    weights_history[i, :] = weights_
-    sigma_history[i] = sigma_
-
-  true_weights_ = sess.run(true_weights)
-
-  # Should converge to something close to true_sigma.
-  plt.plot(sigma_history);
-  plt.ylabel("sigma");
-  plt.xlabel("iteration");
-  ```
-
-  Args:
-    target_log_prob_fn: Python callable which takes an argument like
-      `current_state` (or `*current_state` if it's a list) and returns its
-      (possibly unnormalized) log-density under the target distribution.
-    current_state: `Tensor` or Python `list` of `Tensor`s representing the
-      current state(s) of the Markov chain(s). The first `r` dimensions index
-      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
-    step_size: `Tensor` or Python `list` of `Tensor`s representing the step size
-      for the leapfrog integrator. Must broadcast with the shape of
-      `current_state`. Larger step sizes lead to faster progress, but too-large
-      step sizes make rejection exponentially more likely. When possible, it's
-      often helpful to match per-variable step sizes to the standard deviations
-      of the target distribution in each variable.
-    num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
-      for. Total progress per HMC step is roughly proportional to `step_size *
-      num_leapfrog_steps`.
-    seed: Python integer to seed the random number generator.
-    current_target_log_prob: (Optional) `Tensor` representing the value of
-      `target_log_prob_fn` at the `current_state`. The only reason to
-      specify this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    current_grads_target_log_prob: (Optional) Python list of `Tensor`s
-      representing gradient of `current_target_log_prob` at the `current_state`
-      and wrt the `current_state`. Must have same shape as `current_state`. The
-      only reason to specify this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    name: Python `str` name prefixed to Ops created by this function.
-      Default value: `None` (i.e., "hmc_kernel").
-
-  Returns:
-    next_state: Tensor or Python list of `Tensor`s representing the state(s)
-      of the Markov chain(s) at each result step. Has same shape as
-      `current_state`.
-    kernel_results: `collections.namedtuple` of internal calculations used to
-      advance the chain.
-
-  Raises:
-    ValueError: if there isn't one `step_size` or a list with same length as
-      `current_state`.
-  """
-  with ops.name_scope(
-      name, "hmc_kernel",
-      [current_state, step_size, num_leapfrog_steps, seed,
-       current_target_log_prob, current_grads_target_log_prob]):
-    with ops.name_scope("initialize"):
-      [current_state_parts, step_sizes, current_target_log_prob,
-       current_grads_target_log_prob] = _prepare_args(
-           target_log_prob_fn, current_state, step_size,
-           current_target_log_prob, current_grads_target_log_prob,
-           maybe_expand=True)
-      independent_chain_ndims = distributions_util.prefer_static_rank(
-          current_target_log_prob)
-      current_momentums = []
-      for s in current_state_parts:
-        current_momentums.append(random_ops.random_normal(
-            shape=array_ops.shape(s),
-            dtype=s.dtype.base_dtype,
-            seed=seed))
-        seed = distributions_util.gen_new_seed(
-            seed, salt="hmc_kernel_momentums")
-
-      num_leapfrog_steps = ops.convert_to_tensor(
-          num_leapfrog_steps,
-          dtype=dtypes.int32,
-          name="num_leapfrog_steps")
-    [
-        proposed_momentums,
-        proposed_state_parts,
-        proposed_target_log_prob,
-        proposed_grads_target_log_prob,
-    ] = _leapfrog_integrator(current_momentums,
-                             target_log_prob_fn,
-                             current_state_parts,
-                             step_sizes,
-                             num_leapfrog_steps,
-                             current_target_log_prob,
-                             current_grads_target_log_prob)
-
-    energy_change = _compute_energy_change(current_target_log_prob,
-                                           current_momentums,
-                                           proposed_target_log_prob,
-                                           proposed_momentums,
-                                           independent_chain_ndims)
-    log_accept_ratio = -energy_change
-
-    # u < exp(log_accept_ratio),  where u~Uniform[0,1)
-    # ==> log(u) < log_accept_ratio
-    random_value = random_ops.random_uniform(
-        shape=array_ops.shape(energy_change),
-        dtype=energy_change.dtype,
-        seed=seed)
-    random_negative = math_ops.log(random_value)
-    is_accepted = random_negative < log_accept_ratio
-
-    accepted_target_log_prob = array_ops.where(is_accepted,
-                                               proposed_target_log_prob,
-                                               current_target_log_prob)
-
-    next_state_parts = [_choose(is_accepted,
-                                proposed_state_part,
-                                current_state_part,
-                                independent_chain_ndims)
-                        for current_state_part, proposed_state_part
-                        in zip(current_state_parts, proposed_state_parts)]
-
-    accepted_grads_target_log_prob = [
-        _choose(is_accepted,
-                proposed_grad,
-                grad,
-                independent_chain_ndims)
-        for proposed_grad, grad
-        in zip(proposed_grads_target_log_prob, current_grads_target_log_prob)]
-
-    maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0]
-    return [
-        maybe_flatten(next_state_parts),
-        KernelResults(
-            log_accept_ratio=log_accept_ratio,
-            current_grads_target_log_prob=accepted_grads_target_log_prob,
-            current_target_log_prob=accepted_target_log_prob,
-            is_accepted=is_accepted,
-            proposed_grads_target_log_prob=proposed_grads_target_log_prob,
-            proposed_state=maybe_flatten(proposed_state_parts),
-            proposed_target_log_prob=proposed_target_log_prob,
-        ),
-    ]
-
-
-def _leapfrog_integrator(current_momentums,
-                         target_log_prob_fn,
-                         current_state_parts,
-                         step_sizes,
-                         num_leapfrog_steps,
-                         current_target_log_prob=None,
-                         current_grads_target_log_prob=None,
-                         name=None):
-  """Applies `num_leapfrog_steps` of the leapfrog integrator.
-
-  Assumes a simple quadratic kinetic energy function: `0.5 ||momentum||**2`.
-
-  #### Examples:
-
-  ##### Simple quadratic potential.
-
-  ```python
-  tfd = tf.contrib.distributions
-
-  dims = 10
-  num_iter = int(1e3)
-  dtype = np.float32
-
-  position = tf.placeholder(np.float32)
-  momentum = tf.placeholder(np.float32)
-
-  [
-      next_momentums,
-      next_positions,
-  ] = hmc._leapfrog_integrator(
-      current_momentums=[momentum],
-      target_log_prob_fn=tfd.MultivariateNormalDiag(
-          loc=tf.zeros(dims, dtype)).log_prob,
-      current_state_parts=[position],
-      step_sizes=0.1,
-      num_leapfrog_steps=3)[:2]
-
-  sess.graph.finalize()  # No more graph building.
-
-  momentum_ = np.random.randn(dims).astype(dtype)
-  position_ = np.random.randn(dims).astype(dtype)
-
-  positions = np.zeros([num_iter, dims], dtype)
-  for i in xrange(num_iter):
-    position_, momentum_ = sess.run(
-        [next_momentums[0], next_position[0]],
-        feed_dict={position: position_, momentum: momentum_})
-    positions[i] = position_
-
-  plt.plot(positions[:, 0]);  # Sinusoidal.
-  ```
-
-  Args:
-    current_momentums: Tensor containing the value(s) of the momentum
-      variable(s) to update.
-    target_log_prob_fn: Python callable which takes an argument like
-      `*current_state_parts` and returns its (possibly unnormalized) log-density
-      under the target distribution.
-    current_state_parts: Python `list` of `Tensor`s representing the current
-      state(s) of the Markov chain(s). The first `independent_chain_ndims` of
-      the `Tensor`(s) index different chains.
-    step_sizes: Python `list` of `Tensor`s representing the step size for the
-      leapfrog integrator. Must broadcast with the shape of
-      `current_state_parts`.  Larger step sizes lead to faster progress, but
-      too-large step sizes make rejection exponentially more likely. When
-      possible, it's often helpful to match per-variable step sizes to the
-      standard deviations of the target distribution in each variable.
-    num_leapfrog_steps: Integer number of steps to run the leapfrog integrator
-      for. Total progress per HMC step is roughly proportional to `step_size *
-      num_leapfrog_steps`.
-    current_target_log_prob: (Optional) `Tensor` representing the value of
-      `target_log_prob_fn(*current_state_parts)`. The only reason to specify
-      this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    current_grads_target_log_prob: (Optional) Python list of `Tensor`s
-      representing gradient of `target_log_prob_fn(*current_state_parts`) wrt
-      `current_state_parts`. Must have same shape as `current_state_parts`. The
-      only reason to specify this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    name: Python `str` name prefixed to Ops created by this function.
-      Default value: `None` (i.e., "hmc_leapfrog_integrator").
-
-  Returns:
-    proposed_momentums: Updated value of the momentum.
-    proposed_state_parts: Tensor or Python list of `Tensor`s representing the
-      state(s) of the Markov chain(s) at each result step. Has same shape as
-      input `current_state_parts`.
-    proposed_target_log_prob: `Tensor` representing the value of
-      `target_log_prob_fn` at `next_state`.
-    proposed_grads_target_log_prob: Gradient of `proposed_target_log_prob` wrt
-      `next_state`.
-
-  Raises:
-    ValueError: if `len(momentums) != len(state_parts)`.
-    ValueError: if `len(state_parts) != len(step_sizes)`.
-    ValueError: if `len(state_parts) != len(grads_target_log_prob)`.
-    TypeError: if `not target_log_prob.dtype.is_floating`.
-  """
-  def _loop_body(step,
-                 current_momentums,
-                 current_state_parts,
-                 ignore_current_target_log_prob,  # pylint: disable=unused-argument
-                 current_grads_target_log_prob):
-    return [step + 1] + list(_leapfrog_step(current_momentums,
-                                            target_log_prob_fn,
-                                            current_state_parts,
-                                            step_sizes,
-                                            current_grads_target_log_prob))
-
-  with ops.name_scope(
-      name, "hmc_leapfrog_integrator",
-      [current_momentums, current_state_parts, step_sizes, num_leapfrog_steps,
-       current_target_log_prob, current_grads_target_log_prob]):
-    if len(current_momentums) != len(current_state_parts):
-      raise ValueError("`momentums` must be in one-to-one correspondence "
-                       "with `state_parts`")
-    num_leapfrog_steps = ops.convert_to_tensor(num_leapfrog_steps,
-                                               name="num_leapfrog_steps")
-    current_target_log_prob, current_grads_target_log_prob = (
-        _maybe_call_fn_and_grads(
-            target_log_prob_fn,
-            current_state_parts,
-            current_target_log_prob,
-            current_grads_target_log_prob))
-    return control_flow_ops.while_loop(
-        cond=lambda iter_, *args: iter_ < num_leapfrog_steps,
-        body=_loop_body,
-        loop_vars=[
-            np.int32(0),  # iter_
-            current_momentums,
-            current_state_parts,
-            current_target_log_prob,
-            current_grads_target_log_prob,
-        ],
-        back_prop=False)[1:]  # Lop-off "iter_".
-
-
-def _leapfrog_step(current_momentums,
-                   target_log_prob_fn,
-                   current_state_parts,
-                   step_sizes,
-                   current_grads_target_log_prob,
-                   name=None):
-  """Applies one step of the leapfrog integrator."""
-  with ops.name_scope(
-      name, "_leapfrog_step",
-      [current_momentums, current_state_parts, step_sizes,
-       current_grads_target_log_prob]):
-    proposed_momentums = [m + 0.5 * ss * g for m, ss, g
-                          in zip(current_momentums,
-                                 step_sizes,
-                                 current_grads_target_log_prob)]
-    proposed_state_parts = [x + ss * m for x, ss, m
-                            in zip(current_state_parts,
-                                   step_sizes,
-                                   proposed_momentums)]
-    proposed_target_log_prob = target_log_prob_fn(*proposed_state_parts)
-    if not proposed_target_log_prob.dtype.is_floating:
-      raise TypeError("`target_log_prob_fn` must produce a `Tensor` "
-                      "with `float` `dtype`.")
-    proposed_grads_target_log_prob = gradients_ops.gradients(
-        proposed_target_log_prob, proposed_state_parts)
-    if any(g is None for g in proposed_grads_target_log_prob):
-      raise ValueError(
-          "Encountered `None` gradient. Does your target `target_log_prob_fn` "
-          "access all `tf.Variable`s via `tf.get_variable`?\n"
-          "  current_state_parts: {}\n"
-          "  proposed_state_parts: {}\n"
-          "  proposed_grads_target_log_prob: {}".format(
-              current_state_parts,
-              proposed_state_parts,
-              proposed_grads_target_log_prob))
-    proposed_momentums = [m + 0.5 * ss * g for m, ss, g
-                          in zip(proposed_momentums,
-                                 step_sizes,
-                                 proposed_grads_target_log_prob)]
-    return [
-        proposed_momentums,
-        proposed_state_parts,
-        proposed_target_log_prob,
-        proposed_grads_target_log_prob,
-    ]
-
-
-def _compute_energy_change(current_target_log_prob,
-                           current_momentums,
-                           proposed_target_log_prob,
-                           proposed_momentums,
-                           independent_chain_ndims,
-                           name=None):
-  """Helper to `kernel` which computes the energy change."""
-  with ops.name_scope(
-      name, "compute_energy_change",
-      ([current_target_log_prob, proposed_target_log_prob,
-        independent_chain_ndims] +
-       current_momentums + proposed_momentums)):
-    # Abbreviate lk0=log_kinetic_energy and lk1=proposed_log_kinetic_energy
-    # since they're a mouthful and lets us inline more.
-    lk0, lk1 = [], []
-    for current_momentum, proposed_momentum in zip(current_momentums,
-                                                   proposed_momentums):
-      axis = math_ops.range(independent_chain_ndims,
-                            array_ops.rank(current_momentum))
-      lk0.append(_log_sum_sq(current_momentum, axis))
-      lk1.append(_log_sum_sq(proposed_momentum, axis))
-
-    lk0 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk0, axis=-1),
-                                                  axis=-1)
-    lk1 = -np.log(2.) + math_ops.reduce_logsumexp(array_ops.stack(lk1, axis=-1),
-                                                  axis=-1)
-    lp0 = -current_target_log_prob   # potential
-    lp1 = -proposed_target_log_prob  # proposed_potential
-    x = array_ops.stack([lp1, math_ops.exp(lk1), -lp0, -math_ops.exp(lk0)],
-                        axis=-1)
-
-    # The sum is NaN if any element is NaN or we see both +Inf and -Inf.
-    # Thus we will replace such rows with infinite energy change which implies
-    # rejection. Recall that float-comparisons with NaN are always False.
-    is_sum_determinate = (
-        math_ops.reduce_all(math_ops.is_finite(x) | (x >= 0.), axis=-1) &
-        math_ops.reduce_all(math_ops.is_finite(x) | (x <= 0.), axis=-1))
-    is_sum_determinate = array_ops.tile(
-        is_sum_determinate[..., array_ops.newaxis],
-        multiples=array_ops.concat([
-            array_ops.ones(array_ops.rank(is_sum_determinate),
-                           dtype=dtypes.int32),
-            [4],
-        ], axis=0))
-    x = array_ops.where(is_sum_determinate,
-                        x,
-                        array_ops.fill(array_ops.shape(x),
-                                       value=x.dtype.as_numpy_dtype(np.inf)))
-
-    return math_ops.reduce_sum(x, axis=-1)
-
-
-def _choose(is_accepted,
-            accepted,
-            rejected,
-            independent_chain_ndims,
-            name=None):
-  """Helper to `kernel` which expand_dims `is_accepted` to apply tf.where."""
-  def _expand_is_accepted_like(x):
-    with ops.name_scope("_choose"):
-      expand_shape = array_ops.concat([
-          array_ops.shape(is_accepted),
-          array_ops.ones([array_ops.rank(x) - array_ops.rank(is_accepted)],
-                         dtype=dtypes.int32),
-      ], axis=0)
-      multiples = array_ops.concat([
-          array_ops.ones([array_ops.rank(is_accepted)], dtype=dtypes.int32),
-          array_ops.shape(x)[independent_chain_ndims:],
-      ], axis=0)
-      m = array_ops.tile(array_ops.reshape(is_accepted, expand_shape),
-                         multiples)
-      m.set_shape(x.shape)
-      return m
-  with ops.name_scope(name, "_choose", values=[
-      is_accepted, accepted, rejected, independent_chain_ndims]):
-    return array_ops.where(_expand_is_accepted_like(accepted),
-                           accepted,
-                           rejected)
-
-
-def _maybe_call_fn_and_grads(fn,
-                             fn_arg_list,
-                             fn_result=None,
-                             grads_fn_result=None,
-                             description="target_log_prob"):
-  """Helper which computes `fn_result` and `grads` if needed."""
-  fn_arg_list = (list(fn_arg_list) if _is_list_like(fn_arg_list)
-                 else [fn_arg_list])
-  if fn_result is None:
-    fn_result = fn(*fn_arg_list)
-  if not fn_result.dtype.is_floating:
-    raise TypeError("`{}` must be a `Tensor` with `float` `dtype`.".format(
-        description))
-  if grads_fn_result is None:
-    grads_fn_result = gradients_ops.gradients(
-        fn_result, fn_arg_list)
-  if len(fn_arg_list) != len(grads_fn_result):
-    raise ValueError("`{}` must be in one-to-one correspondence with "
-                     "`grads_{}`".format(*[description]*2))
-  if any(g is None for g in grads_fn_result):
-    raise ValueError("Encountered `None` gradient.")
-  return fn_result, grads_fn_result
-
-
-def _prepare_args(target_log_prob_fn, state, step_size,
-                  target_log_prob=None, grads_target_log_prob=None,
-                  maybe_expand=False, description="target_log_prob"):
-  """Helper which processes input args to meet list-like assumptions."""
-  state_parts = list(state) if _is_list_like(state) else [state]
-  state_parts = [ops.convert_to_tensor(s, name="state")
-                 for s in state_parts]
-  target_log_prob, grads_target_log_prob = _maybe_call_fn_and_grads(
-      target_log_prob_fn,
-      state_parts,
-      target_log_prob,
-      grads_target_log_prob,
-      description)
-  step_sizes = list(step_size) if _is_list_like(step_size) else [step_size]
-  step_sizes = [
-      ops.convert_to_tensor(
-          s, name="step_size", dtype=target_log_prob.dtype)
-      for s in step_sizes]
-  if len(step_sizes) == 1:
-    step_sizes *= len(state_parts)
-  if len(state_parts) != len(step_sizes):
-    raise ValueError("There should be exactly one `step_size` or it should "
-                     "have same length as `current_state`.")
-  maybe_flatten = lambda x: x if maybe_expand or _is_list_like(state) else x[0]
-  return [
-      maybe_flatten(state_parts),
-      maybe_flatten(step_sizes),
-      target_log_prob,
-      grads_target_log_prob,
-  ]
-
-
-def _is_list_like(x):
-  """Helper which returns `True` if input is `list`-like."""
-  return isinstance(x, (tuple, list))
-
-
-def _log_sum_sq(x, axis=None):
-  """Computes log(sum(x**2))."""
-  return math_ops.reduce_logsumexp(2. * math_ops.log(math_ops.abs(x)), axis)
diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py
deleted file mode 100644
index e7fcbc65ef..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Functions to create a Markov Chain Monte Carlo Metropolis step."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.bayesflow.python.ops.metropolis_hastings_impl import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
-
-_allowed_symbols = [
-    'kernel',
-    'evolve',
-    'proposal_uniform',
-    'proposal_normal',
-]
-
-remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py b/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py
deleted file mode 100644
index 05aa134ed5..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/metropolis_hastings_impl.py
+++ /dev/null
@@ -1,527 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Metropolis-Hastings and proposal distributions.
-
-@@kernel
-@@evolve
-@@proposal_uniform
-@@proposal_normal
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import collections
-
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import control_flow_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops import state_ops
-
-__all__ = [
-    "kernel",
-    "evolve",
-    "proposal_uniform",
-    "proposal_normal",
-]
-
-
-KernelResults = collections.namedtuple(
-    "KernelResults",
-    [
-        "log_accept_ratio",
-        "current_target_log_prob",  # "Current result" means "accepted".
-        "is_accepted",
-        "proposed_state",
-    ])
-
-
-def kernel(target_log_prob_fn,
-           proposal_fn,
-           current_state,
-           seed=None,
-           current_target_log_prob=None,
-           name=None):
-  """Runs the Metropolis-Hastings transition kernel.
-
-  This function can update multiple chains in parallel. It assumes that all
-  leftmost dimensions of `current_state` index independent chain states (and are
-  therefore updated independently). The output of `target_log_prob_fn()` should
-  sum log-probabilities across all event dimensions. Slices along the rightmost
-  dimensions may have different target distributions; for example,
-  `current_state[0, :]` could have a different target distribution from
-  `current_state[1, :]`. This is up to `target_log_prob_fn()`. (The number of
-  independent chains is `tf.size(target_log_prob_fn(*current_state))`.)
-
-  Args:
-    target_log_prob_fn: Python callable which takes an argument like
-      `current_state` (or `*current_state` if it's a list) and returns its
-      (possibly unnormalized) log-density under the target distribution.
-    proposal_fn: Python callable which takes an argument like `current_state`
-      (or `*current_state` if it's a list) and returns a tuple of proposed
-      states of same shape as `state`, and a log ratio `Tensor` of same shape
-      as `current_target_log_prob`. The log ratio is the log-probability of
-      `state` given proposed states minus the log-probability of proposed
-      states given `state`. If the proposal is symmetric, set the second value
-      to `None`: this enables more efficient computation than explicitly
-      supplying a tensor of zeros.
-    current_state: `Tensor` or Python `list` of `Tensor`s representing the
-      current state(s) of the Markov chain(s). The first `r` dimensions index
-      independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`.
-    seed: Python integer to seed the random number generator.
-    current_target_log_prob: (Optional) `Tensor` representing the value of
-      `target_log_prob_fn` at the `current_state`. The only reason to
-      specify this argument is to reduce TF graph size.
-      Default value: `None` (i.e., compute as needed).
-    name: A name of the operation (optional).
-
-  Returns:
-    next_state: Tensor or Python list of `Tensor`s representing the state(s)
-      of the Markov chain(s) at each result step. Has same shape as
-      `current_state`.
-    kernel_results: `collections.namedtuple` of internal calculations used to
-      advance the chain.
-
-  #### Examples
-
-  We illustrate Metropolis-Hastings on a Normal likelihood with
-  unknown mean.
-
-  ```python
-  tfd = tf.contrib.distributions
-  tfp = tf.contrib.bayesflow
-
-  loc = tf.get_variable("loc", initializer=1.)
-  x = tf.constant([0.0] * 50)
-
-  def make_target_log_prob_fn(x):
-    def target_log_prob_fn(loc):
-      prior = tfd.Normal(loc=0., scale=1.)
-      likelihood = tfd.Independent(
-        tfd.Normal(loc=loc, scale=0.1),
-        reinterpreted_batch_ndims=1)
-      return prior.log_prob(loc) + likelihood.log_prob(x)
-    return target_log_prob_fn
-
-  next_state, kernel_results = tfp.metropolis_hastings.kernel(
-      target_log_prob_fn=make_target_log_prob_fn(x),
-      proposal_fn=tfp.metropolis_hastings.proposal_normal(),
-      current_state=loc)
-  loc_update = loc.assign(next_state)
-  ```
-
-  We illustrate Metropolis-Hastings on a Normal likelihood with
-  unknown mean and variance. We apply 4 chains.
-
-  ```python
-  tfd = tf.contrib.distributions
-  tfp = tf.contrib.bayesflow
-
-  num_chains = 4
-  loc = tf.get_variable("loc", shape=[num_chains],
-                        initializer=tf.random_normal_initializer())
-  scale = tf.get_variable("scale", shape=[num_chains],
-                          initializer=tf.ones_initializer())
-  x = tf.constant([0.0] * 50)
-
-  def make_target_log_prob_fn(x):
-    data = tf.reshape(x, shape=[-1, 1])
-    def target_log_prob_fn(loc, scale):
-      prior_loc = tfd.Normal(loc=0., scale=1.)
-      prior_scale = tfd.InverseGamma(concentration=1., rate=1.)
-      likelihood = tfd.Independent(
-        tfd.Normal(loc=loc, scale=scale),
-        reinterpreted_batch_ndims=1)
-      return (prior_loc.log_prob(loc) +
-              prior_scale.log_prob(scale) +
-              likelihood.log_prob(data))
-    return target_log_prob_fn
-
-  def proposal_fn(loc, scale):
-    loc_proposal = tfp.metropolis_hastings.proposal_normal()
-    scale_proposal = tfp.metropolis_hastings.proposal_uniform(minval=-1.)
-    proposed_loc, _ = loc_proposal(loc)
-    proposed_scale, _ = scale_proposal(scale)
-    proposed_scale = tf.maximum(proposed_scale, 0.01)
-    return [proposed_loc, proposed_scale], None
-
-  next_state, kernel_results = tfp.metropolis_hastings.kernel(
-      target_log_prob_fn=make_target_log_prob_fn(x),
-      proposal_fn=proposal_fn,
-      current_state=[loc, scale])
-  train_op = tf.group(loc.assign(next_state[0]),
-                      scale.assign(next_state[1]))
-  ```
-
-  """
-  with ops.name_scope(
-      name, "metropolis_hastings_kernel",
-      [current_state, seed, current_target_log_prob]):
-    with ops.name_scope("initialize"):
-      maybe_expand = lambda x: list(x) if _is_list_like(x) else [x]
-      current_state_parts = maybe_expand(current_state)
-      if current_target_log_prob is None:
-        current_target_log_prob = target_log_prob_fn(*current_state_parts)
-
-    proposed_state, log_transit_ratio = proposal_fn(*current_state_parts)
-    proposed_state_parts = maybe_expand(proposed_state)
-
-    proposed_target_log_prob = target_log_prob_fn(*proposed_state_parts)
-
-    with ops.name_scope(
-        "accept_reject",
-        [current_state_parts, proposed_state_parts,
-         current_target_log_prob, proposed_target_log_prob]):
-      log_accept_ratio = proposed_target_log_prob - current_target_log_prob
-      if log_transit_ratio is not None:
-        # If the log_transit_ratio is None, then assume the proposal is
-        # symmetric, i.e.,
-        #   log p(old | new) - log p(new | old) = 0.
-        log_accept_ratio += log_transit_ratio
-
-      # u < exp(log_accept_ratio),  where u~Uniform[0,1)
-      # ==> log(u) < log_accept_ratio
-      random_value = random_ops.random_uniform(
-          array_ops.shape(log_accept_ratio),
-          dtype=log_accept_ratio.dtype,
-          seed=seed)
-      random_negative = math_ops.log(random_value)
-      is_accepted = random_negative < log_accept_ratio
-      next_state_parts = [array_ops.where(is_accepted,
-                                          proposed_state_part,
-                                          current_state_part)
-                          for proposed_state_part, current_state_part in
-                          zip(proposed_state_parts, current_state_parts)]
-      accepted_log_prob = array_ops.where(is_accepted,
-                                          proposed_target_log_prob,
-                                          current_target_log_prob)
-    maybe_flatten = lambda x: x if _is_list_like(current_state) else x[0]
-    return [
-        maybe_flatten(next_state_parts),
-        KernelResults(
-            log_accept_ratio=log_accept_ratio,
-            current_target_log_prob=accepted_log_prob,
-            is_accepted=is_accepted,
-            proposed_state=maybe_flatten(proposed_state_parts),
-        ),
-    ]
-
-
-def evolve(initial_sample,
-           initial_log_density,
-           initial_log_accept_ratio,
-           target_log_prob_fn,
-           proposal_fn,
-           n_steps=1,
-           seed=None,
-           name=None):
-  """Performs `n_steps` of the Metropolis-Hastings update.
-
-  Given a probability density function, `f(x)` and a proposal scheme which
-  generates new points from old, this `Op` returns a tensor
-  which may be used to generate approximate samples from the target distribution
-  using the Metropolis-Hastings algorithm. These samples are from a Markov chain
-  whose equilibrium distribution matches the target distribution.
-
-  The probability distribution may have an unknown normalization constan.
-  We parameterize the probability density as follows:
-
-  ```none
-  f(x) = exp(L(x) + constant)
-  ```
-
-  Here `L(x)` is any continuous function with an (possibly unknown but finite)
-  upper bound, i.e. there exists a number beta such that
-  `L(x)< beta < infinity` for all x. The constant is the normalization needed
-  to make `f(x)` a probability density (as opposed to just a finite measure).
-
-  Although `initial_sample` can be arbitrary, a poor choice may result in a
-  slow-to-mix chain. In many cases the best choice is the one that maximizes
-  the target density, i.e., choose `initial_sample` such that
-  `f(initial_sample) >= f(x)` for all `x`.
-
-
-  If the support of the distribution is a strict subset of R^n (but of non zero
-  measure), then the unnormalized log-density `L(x)` should return `-infinity`
-  outside the support domain. This effectively forces the sampler to only
-  explore points in the regions of finite support.
-
-  Usage:
-  This function is meant to be wrapped up with some of the common proposal
-  schemes (e.g. random walk, Langevin diffusion etc) to produce a more user
-  friendly interface. However, it may also be used to create bespoke samplers.
-
-  The following example, demonstrates the use to generate a 1000 uniform random
-  walk Metropolis samplers run in parallel for the normal target distribution.
-
-  ```python
-  n = 3  # dimension of the problem
-
-  # Generate 1000 initial values randomly. Each of these would be an
-  # independent starting point for a Markov chain.
-  state = tf.get_variable(
-      "state",
-      initializer=tf.random_normal([1000, n],
-                                   mean=3.0,
-                                   dtype=tf.float64,
-                                   seed=42))
-
-  # Computes the log(p(x)) for the unit normal density and ignores the
-  # normalization constant.
-  def log_density(x):
-    return -tf.reduce_sum(x * x, reduction_indices=-1) / 2.0
-
-  # Initial log-density value
-  state_log_density = tf.get_variable(
-      "state_log_density",
-      initializer=log_density(state.initialized_value()))
-
-  # A variable to store the log_acceptance_ratio:
-  log_acceptance_ratio = tf.get_variable(
-      "log_acceptance_ratio",
-      initializer=tf.zeros([1000], dtype=tf.float64))
-
-  # Generates random proposals by moving each coordinate uniformly and
-  # independently in a box of size 2 centered around the current value.
-  # Returns the new point and also the log of the Hastings ratio (the
-  # ratio of the probability of going from the proposal to origin and the
-  # probability of the reverse transition). When this ratio is 1, the value
-  # may be omitted and replaced by None.
-  def random_proposal(x):
-    return (x + tf.random_uniform(tf.shape(x), minval=-1, maxval=1,
-                                  dtype=x.dtype, seed=12)), None
-
-  #  Create the op to propagate the chain for 100 steps.
-  stepper = mh.evolve(
-      state, state_log_density, log_acceptance_ratio,
-      log_density, random_proposal, n_steps=100, seed=123)
-  init = tf.initialize_all_variables()
-  with tf.Session() as sess:
-    sess.run(init)
-    # Run the chains for a total of 1000 steps and print out the mean across
-    # the chains every 100 iterations.
-    for n_iter in range(10):
-      # Executing the stepper advances the chain to the next state.
-      sess.run(stepper)
-      # Print out the current value of the mean(sample) for every dimension.
-      print(np.mean(sess.run(state), 0))
-    # Estimated covariance matrix
-    samples = sess.run(state)
-    print(np.cov(samples, rowvar=False))
-  ```
-
-  Args:
-    initial_sample: A float-like `tf.Variable` of any shape that can
-      be consumed by the `target_log_prob_fn` and `proposal_fn`
-      callables.
-    initial_log_density: Float-like `tf.Variable` with `dtype` and shape
-      equivalent  to `target_log_prob_fn(initial_sample)`, i.e., matching
-        the result of `target_log_prob_fn` invoked at `current_state`.
-    initial_log_accept_ratio: A `tf.Variable` with `dtype` and shape matching
-      `initial_log_density`. Stands for the log of Metropolis-Hastings
-      acceptance ratio after propagating the chain for `n_steps`.
-    target_log_prob_fn: A Python callable evaluated at
-      `current_state` and returning a float-like `Tensor` of log target-density
-      up to a normalizing constant. In other words,
-      `target_log_prob_fn(x) = log(g(x))`, where
-      `target_density = g(x)/Z` for some constant `A`. The shape of the input
-      tensor is the same as the shape of the `current_state`. The shape of the
-      output tensor is either
-        (a). Same as the input shape if the density being sampled is one
-          dimensional, or
-        (b). If the density is defined for `events` of shape
-          `event_shape = [E1, E2, ... Ee]`, then the input tensor should be of
-          shape `batch_shape + event_shape`, here `batch_shape = [B1, ..., Bb]`
-          and the result must be of shape [B1, ..., Bb]. For example, if the
-          distribution that is being sampled is a 10 dimensional normal,
-          then the input tensor may be of shape [100, 10] or [30, 20, 10]. The
-          last dimension will then be 'consumed' by `target_log_prob_fn`
-          and it should return tensors of shape [100] and [30, 20] respectively.
-    proposal_fn: A callable accepting a real valued `Tensor` of current sample
-      points and returning a tuple of two `Tensors`. The first element of the
-      pair should be a `Tensor` containing the proposal state and should have
-      the same shape as the input `Tensor`. The second element of the pair gives
-      the log of the ratio of the probability of transitioning from the
-      proposal points to the input points and the probability of transitioning
-      from the input points to the proposal points. If the proposal is
-      symmetric, i.e.
-      Probability(Proposal -> Current) = Probability(Current -> Proposal)
-      the second value should be set to None instead of explicitly supplying a
-      tensor of zeros. In addition to being convenient, this also leads to a
-      more efficient graph.
-    n_steps: A positive `int` or a scalar `int32` tensor. Sets the number of
-      iterations of the chain.
-    seed: `int` or None. The random seed for this `Op`. If `None`, no seed is
-      applied.
-    name: A string that sets the name for this `Op`.
-
-  Returns:
-    forward_step: an `Op` to step the Markov chain forward for `n_steps`.
-  """
-
-  with ops.name_scope(name, "metropolis_hastings", [initial_sample]):
-    current_state = initial_sample
-    current_target_log_prob = initial_log_density
-    log_accept_ratio = initial_log_accept_ratio
-
-    def step(i, current_state, current_target_log_prob, log_accept_ratio):
-      """Wrap single Markov chain iteration in `while_loop`."""
-      next_state, kernel_results = kernel(
-          target_log_prob_fn=target_log_prob_fn,
-          proposal_fn=proposal_fn,
-          current_state=current_state,
-          current_target_log_prob=current_target_log_prob,
-          seed=seed)
-      accepted_log_prob = kernel_results.current_target_log_prob
-      log_accept_ratio = kernel_results.log_accept_ratio
-      return i + 1, next_state, accepted_log_prob, log_accept_ratio
-
-    (_, accepted_state, accepted_target_log_prob, accepted_log_accept_ratio) = (
-        control_flow_ops.while_loop(
-            cond=lambda i, *ignored_args: i < n_steps,
-            body=step,
-            loop_vars=[
-                0,  # i
-                current_state,
-                current_target_log_prob,
-                log_accept_ratio,
-            ],
-            parallel_iterations=1 if seed is not None else 10,
-            # TODO(b/73775595): Confirm optimal setting of swap_memory.
-            swap_memory=1))
-
-    forward_step = control_flow_ops.group(
-        state_ops.assign(current_target_log_prob, accepted_target_log_prob),
-        state_ops.assign(current_state, accepted_state),
-        state_ops.assign(log_accept_ratio, accepted_log_accept_ratio))
-
-    return forward_step
-
-
-def proposal_uniform(step_size=1.,
-                     seed=None,
-                     name=None):
-  """Returns a callable that adds a random uniform tensor to the input.
-
-  This function returns a callable that accepts one `Tensor` argument of any
-  shape and a real data type (i.e. `tf.float32` or `tf.float64`). It adds a
-  sample from a random uniform distribution drawn from [-stepsize, stepsize]
-  to its input. It also returns the log of the ratio of the probability of
-  moving from the input point to the proposed point, but since this log ratio is
-  identically equal to 0 (because the probability of drawing a value `x` from
-  the symmetric uniform distribution is the same as the probability of drawing
-  `-x`), it simply returns None for the second element of the returned tuple.
-
-  Args:
-    step_size: A positive `float` or a scalar tensor of real dtype
-      controlling the scale of the uniform distribution.
-      If step_size = a, then draws are made uniformly from [-a, a].
-    seed: `int` or None. The random seed for this `Op`. If `None`, no seed is
-      applied.
-    name: A string that sets the name for this `Op`.
-
-  Returns:
-    proposal_fn:  A callable accepting one float-like `Tensor` and returning a
-      2-tuple. The first value in the tuple is a `Tensor` of the same shape and
-      dtype as the input argument and the second element of the tuple is None.
-  """
-
-  with ops.name_scope(name, "proposal_uniform", [step_size]):
-    step_size = ops.convert_to_tensor(step_size, name="step_size")
-
-    def proposal_fn(input_state, name=None):
-      """Adds a uniform perturbation to the input state.
-
-      Args:
-        input_state: A `Tensor` of any shape and real dtype.
-        name: A string that sets the name for this `Op`.
-
-      Returns:
-        proposal_state:  A float-like `Tensor` with `dtype` and shape matching
-          `input_state`.
-        log_transit_ratio: `None`. Proposal is symmetric.
-      """
-      with ops.name_scope(name, "proposer", [input_state]):
-        input_state = ops.convert_to_tensor(input_state, name="input_state")
-        return input_state + random_ops.random_uniform(
-            array_ops.shape(input_state),
-            minval=-step_size,
-            maxval=step_size,
-            seed=seed), None
-    return proposal_fn
-
-
-def proposal_normal(scale=1.,
-                    seed=None,
-                    name=None):
-  """Returns a callable that adds a random normal tensor to the input.
-
-  This function returns a callable that accepts one `Tensor` argument of any
-  shape and a real data type (i.e. `tf.float32` or `tf.float64`). The callable
-  adds a sample from a normal distribution with the supplied standard deviation
-  and zero mean to its input argument (called the proposal point).
-  The callable returns a tuple with the proposal point as the first element.
-  The second element is identically `None`. It is included so the callable is
-  compatible with the expected signature of the proposal scheme argument in the
-  `metropolis_hastings` function. A value of `None` indicates that the
-  probability of going from the input point to the proposal point is equal to
-  the probability of going from the proposal point to the input point.
-
-  Args:
-    scale: A positive `float` or a scalar tensor of any real dtype controlling
-      the scale of the normal distribution.
-    seed: `int` or None. The random seed for this `Op`. If `None`, no seed is
-      applied.
-    name: A string that sets the name for this `Op`.
-
-  Returns:
-    proposal_fn: A callable accepting one float-like `Tensor` and returning a
-      2-tuple. The first value in the tuple is a `Tensor` of the same shape and
-      dtype as the input argument and the second element of the tuple is None.
-  """
-
-  with ops.name_scope(name, "proposal_normal", [scale]):
-    scale = ops.convert_to_tensor(scale, name="scale")
-
-    def proposal_fn(input_state, name=None):
-      """Adds a normal perturbation to the input state.
-
-      Args:
-        input_state: A `Tensor` of any shape and real dtype.
-        name: A string that sets the name for this `Op`.
-
-      Returns:
-        proposal_state:  A float-like `Tensor` with `dtype` and shape matching
-          `input_state`.
-        log_transit_ratio: `None`. Proposal is symmetric.
-      """
-
-      with ops.name_scope(name, "proposer", [input_state]):
-        input_state = ops.convert_to_tensor(input_state, name="input_state")
-        return input_state + random_ops.random_normal(
-            array_ops.shape(input_state),
-            mean=0.,
-            stddev=scale,
-            dtype=scale.dtype,
-            seed=seed), None
-    return proposal_fn
-
-
-def _is_list_like(x):
-  """Helper which returns `True` if input is `list`-like."""
-  return isinstance(x, (tuple, list))
author	Joshua V. Dillon <jvdillon@google.com>	2018-03-26 18:50:27 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-03-26 18:52:45 -0700
commit	0be974c423f6e5c363db2d95ed335dde4cb4e69b (patch)
tree	21ebd16c26fa85b2dcaf8de3221404c1d37cfb68 /tensorflow/contrib/bayesflow
parent	931f6d553172ddfc9ec4a7a94ea2c6233bf33cb0 (diff)