aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/bayesflow
diff options
context:
space:
mode:
authorGravatar Joshua V. Dillon <jvdillon@google.com>2018-03-07 11:12:11 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-07 11:16:10 -0800
commit36c91bba08963ed4f7363b5e3d6f5ac9f6e9004d (patch)
tree3f0f79144dede5c6fbbad0723e5812ff1588c317 /tensorflow/contrib/bayesflow
parentc905620906f306bfe222118276ffff199deb0367 (diff)
Move `tf.contrib.bayesflow.layers` to `tfp.layers`.
PiperOrigin-RevId: 188203941
Diffstat (limited to 'tensorflow/contrib/bayesflow')
-rw-r--r--tensorflow/contrib/bayesflow/BUILD48
-rw-r--r--tensorflow/contrib/bayesflow/__init__.py2
-rw-r--r--tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py87
-rw-r--r--tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py521
-rw-r--r--tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py443
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/docstring_util.py88
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/layers.py67
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py2486
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py955
-rw-r--r--tensorflow/contrib/bayesflow/python/ops/layers_util.py191
10 files changed, 0 insertions, 4888 deletions
diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD
index 8b5c6cec61..e1b34d6deb 100644
--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -77,54 +77,6 @@ cuda_py_test(
)
cuda_py_test(
- name = "docstring_util_test",
- size = "small",
- srcs = ["python/kernel_tests/docstring_util_test.py"],
- additional_deps = [
- ":bayesflow_py",
- "//tensorflow/python:client_testlib",
- ],
-)
-
-cuda_py_test(
- name = "layers_conv_variational_test",
- size = "small",
- srcs = ["python/kernel_tests/layers_conv_variational_test.py"],
- additional_deps = [
- ":bayesflow_py",
- "//third_party/py/numpy",
- "//tensorflow/contrib/distributions:distributions_py",
- "//tensorflow/python/ops/distributions",
- "//tensorflow/python:array_ops",
- "//tensorflow/python:client_testlib",
- "//tensorflow/python:framework_for_generated_wrappers",
- "//tensorflow/python:gradients",
- "//tensorflow/python:linalg_ops",
- "//tensorflow/python:math_ops",
- "//tensorflow/python:nn_ops",
- ],
-)
-
-cuda_py_test(
- name = "layers_dense_variational_test",
- size = "small",
- srcs = ["python/kernel_tests/layers_dense_variational_test.py"],
- additional_deps = [
- ":bayesflow_py",
- "//third_party/py/numpy",
- "//tensorflow/contrib/distributions:distributions_py",
- "//tensorflow/python/ops/distributions",
- "//tensorflow/python:array_ops",
- "//tensorflow/python:client_testlib",
- "//tensorflow/python:framework_for_generated_wrappers",
- "//tensorflow/python:gradients",
- "//tensorflow/python:linalg_ops",
- "//tensorflow/python:math_ops",
- "//tensorflow/python:nn_ops",
- ],
-)
-
-cuda_py_test(
name = "monte_carlo_test",
size = "small",
srcs = ["python/kernel_tests/monte_carlo_test.py"],
diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py
index 32f2df4b88..bff8ac2cf6 100644
--- a/tensorflow/contrib/bayesflow/__init__.py
+++ b/tensorflow/contrib/bayesflow/__init__.py
@@ -23,7 +23,6 @@ from __future__ import print_function
# pylint: disable=unused-import,line-too-long
from tensorflow.contrib.bayesflow.python.ops import custom_grad
from tensorflow.contrib.bayesflow.python.ops import hmc
-from tensorflow.contrib.bayesflow.python.ops import layers
from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings
from tensorflow.contrib.bayesflow.python.ops import monte_carlo
from tensorflow.contrib.bayesflow.python.ops import optimizers
@@ -36,7 +35,6 @@ _allowed_symbols = [
'custom_grad',
'entropy',
'hmc',
- 'layers',
'metropolis_hastings',
'monte_carlo',
'optimizers',
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py
deleted file mode 100644
index 8ed500b19d..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for docstring utilities."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.bayesflow.python.ops import docstring_util
-from tensorflow.python.platform import test
-
-
-class DocstringUtil(test.TestCase):
-
- def _testFunction(self):
- doc_args = """x: Input to return as output.
- y: Baz."""
- @docstring_util.expand_docstring(args=doc_args)
- def foo(x):
- # pylint: disable=g-doc-args
- """Hello world.
-
- Args:
- @{args}
-
- Returns:
- x.
- """
- # pylint: enable=g-doc-args
- return x
-
- true_docstring = """Hello world.
-
- Args:
- x: Input to return as output.
- y: Baz.
-
- Returns:
- x.
- """
- self.assertEqual(foo.__doc__, true_docstring)
-
- def _testClassInit(self):
- doc_args = """x: Input to return as output.
- y: Baz."""
-
- class Foo(object):
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(self, x, y):
- # pylint: disable=g-doc-args
- """Hello world.
-
- Args:
- @{args}
-
- Bar.
- """
- # pylint: enable=g-doc-args
- pass
-
- true_docstring = """Hello world.
-
- Args:
- x: Input to return as output.
- y: Baz.
-
- Bar.
- """
- self.assertEqual(Foo.__doc__, true_docstring)
-
-
-if __name__ == "__main__":
- test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
deleted file mode 100644
index 750afb6654..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py
+++ /dev/null
@@ -1,521 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for convolutional Bayesian layers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.bayesflow.python.ops import layers_conv_variational as prob_layers_lib
-from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.ops.distributions import util as distribution_util
-from tensorflow.python.platform import test
-
-
-class Counter(object):
- """Helper class to manage incrementing a counting `int`."""
-
- def __init__(self):
- self._value = -1
-
- @property
- def value(self):
- return self._value
-
- def __call__(self):
- self._value += 1
- return self._value
-
-
-class MockDistribution(independent_lib.Independent):
- """Monitors layer calls to the underlying distribution."""
-
- def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
- self.result_sample = result_sample
- self.result_log_prob = result_log_prob
- self.result_loc = loc
- self.result_scale = scale
- self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0)
- if loc is not None and scale is not None:
- self.result_distribution = normal_lib.Normal(loc=self.result_loc,
- scale=self.result_scale)
- self.called_log_prob = Counter()
- self.called_sample = Counter()
- self.called_loc = Counter()
- self.called_scale = Counter()
-
- def log_prob(self, *args, **kwargs):
- self.called_log_prob()
- return self.result_log_prob
-
- def sample(self, *args, **kwargs):
- self.called_sample()
- return self.result_sample
-
- @property
- def distribution(self): # for dummy check on Independent(Normal)
- return self.result_distribution
-
- @property
- def loc(self):
- self.called_loc()
- return self.result_loc
-
- @property
- def scale(self):
- self.called_scale()
- return self.result_scale
-
-
-class MockKLDivergence(object):
- """Monitors layer calls to the divergence implementation."""
-
- def __init__(self, result):
- self.result = result
- self.args = []
- self.called = Counter()
-
- def __call__(self, *args, **kwargs):
- self.called()
- self.args.append(args)
- return self.result
-
-
-class ConvVariational(test.TestCase):
-
- def _testKLPenaltyKernel(self, layer_class):
- with self.test_session():
- layer = layer_class(filters=2, kernel_size=3)
- if layer_class in (prob_layers_lib.Conv1DReparameterization,
- prob_layers_lib.Conv1DFlipout):
- inputs = random_ops.random_uniform([2, 3, 1], seed=1)
- elif layer_class in (prob_layers_lib.Conv2DReparameterization,
- prob_layers_lib.Conv2DFlipout):
- inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1)
- elif layer_class in (prob_layers_lib.Conv3DReparameterization,
- prob_layers_lib.Conv3DFlipout):
- inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1)
-
- # No keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 0)
- self.assertListEqual(layer.losses, losses)
-
- _ = layer(inputs)
-
- # Yes keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 1)
- self.assertListEqual(layer.losses, losses)
-
- def _testKLPenaltyBoth(self, layer_class):
- def _make_normal(dtype, *args): # pylint: disable=unused-argument
- return normal_lib.Normal(
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.))
- with self.test_session():
- layer = layer_class(
- filters=2,
- kernel_size=3,
- bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(),
- bias_prior_fn=_make_normal)
- if layer_class in (prob_layers_lib.Conv1DReparameterization,
- prob_layers_lib.Conv1DFlipout):
- inputs = random_ops.random_uniform([2, 3, 1], seed=1)
- elif layer_class in (prob_layers_lib.Conv2DReparameterization,
- prob_layers_lib.Conv2DFlipout):
- inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1)
- elif layer_class in (prob_layers_lib.Conv3DReparameterization,
- prob_layers_lib.Conv3DFlipout):
- inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1)
-
- # No keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 0)
- self.assertListEqual(layer.losses, losses)
-
- _ = layer(inputs)
-
- # Yes keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 2)
- self.assertListEqual(layer.losses, losses)
-
- def _testConvSetUp(self, layer_class, batch_size, depth=None,
- height=None, width=None, channels=None, filters=None,
- **kwargs):
- seed = Counter()
- if layer_class in (prob_layers_lib.Conv1DReparameterization,
- prob_layers_lib.Conv1DFlipout):
- inputs = random_ops.random_uniform(
- [batch_size, width, channels], seed=seed())
- kernel_size = (2,)
- elif layer_class in (prob_layers_lib.Conv2DReparameterization,
- prob_layers_lib.Conv2DFlipout):
- inputs = random_ops.random_uniform(
- [batch_size, height, width, channels], seed=seed())
- kernel_size = (2, 2)
- elif layer_class in (prob_layers_lib.Conv3DReparameterization,
- prob_layers_lib.Conv3DFlipout):
- inputs = random_ops.random_uniform(
- [batch_size, depth, height, width, channels], seed=seed())
- kernel_size = (2, 2, 2)
-
- kernel_shape = kernel_size + (channels, filters)
- kernel_posterior = MockDistribution(
- loc=random_ops.random_uniform(kernel_shape, seed=seed()),
- scale=random_ops.random_uniform(kernel_shape, seed=seed()),
- result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()),
- result_sample=random_ops.random_uniform(kernel_shape, seed=seed()))
- kernel_prior = MockDistribution(
- result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()),
- result_sample=random_ops.random_uniform(kernel_shape, seed=seed()))
- kernel_divergence = MockKLDivergence(
- result=random_ops.random_uniform(kernel_shape, seed=seed()))
-
- bias_size = (filters,)
- bias_posterior = MockDistribution(
- result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
- result_sample=random_ops.random_uniform(bias_size, seed=seed()))
- bias_prior = MockDistribution(
- result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
- result_sample=random_ops.random_uniform(bias_size, seed=seed()))
- bias_divergence = MockKLDivergence(
- result=random_ops.random_uniform(bias_size, seed=seed()))
-
- layer = layer_class(
- filters=filters,
- kernel_size=kernel_size,
- padding="SAME",
- kernel_posterior_fn=lambda *args: kernel_posterior,
- kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
- kernel_prior_fn=lambda *args: kernel_prior,
- kernel_divergence_fn=kernel_divergence,
- bias_posterior_fn=lambda *args: bias_posterior,
- bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
- bias_prior_fn=lambda *args: bias_prior,
- bias_divergence_fn=bias_divergence,
- **kwargs)
-
- outputs = layer(inputs)
-
- kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- return (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence,
- layer, inputs, outputs, kl_penalty, kernel_shape)
-
- def _testConvReparameterization(self, layer_class):
- batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
- with self.test_session() as sess:
- (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence, layer, inputs,
- outputs, kl_penalty, kernel_shape) = self._testConvSetUp(
- layer_class, batch_size,
- depth=depth, height=height, width=width, channels=channels,
- filters=filters)
-
- convolution_op = nn_ops.Convolution(
- tensor_shape.TensorShape(inputs.shape),
- filter_shape=tensor_shape.TensorShape(kernel_shape),
- padding="SAME")
- expected_outputs = convolution_op(inputs, kernel_posterior.result_sample)
- expected_outputs = nn.bias_add(expected_outputs,
- bias_posterior.result_sample,
- data_format="NHWC")
-
- [
- expected_outputs_, actual_outputs_,
- expected_kernel_, actual_kernel_,
- expected_kernel_divergence_, actual_kernel_divergence_,
- expected_bias_, actual_bias_,
- expected_bias_divergence_, actual_bias_divergence_,
- ] = sess.run([
- expected_outputs, outputs,
- kernel_posterior.result_sample, layer.kernel_posterior_tensor,
- kernel_divergence.result, kl_penalty[0],
- bias_posterior.result_sample, layer.bias_posterior_tensor,
- bias_divergence.result, kl_penalty[1],
- ])
-
- self.assertAllClose(
- expected_kernel_, actual_kernel_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_, actual_bias_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_outputs_, actual_outputs_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_kernel_divergence_, actual_kernel_divergence_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_divergence_, actual_bias_divergence_,
- rtol=1e-6, atol=0.)
-
- self.assertAllEqual(
- [[kernel_posterior.distribution,
- kernel_prior.distribution,
- kernel_posterior.result_sample]],
- kernel_divergence.args)
-
- self.assertAllEqual(
- [[bias_posterior.distribution,
- bias_prior.distribution,
- bias_posterior.result_sample]],
- bias_divergence.args)
-
- def _testConvFlipout(self, layer_class):
- batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
- with self.test_session() as sess:
- (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence, layer, inputs,
- outputs, kl_penalty, kernel_shape) = self._testConvSetUp(
- layer_class, batch_size,
- depth=depth, height=height, width=width, channels=channels,
- filters=filters, seed=44)
-
- convolution_op = nn_ops.Convolution(
- tensor_shape.TensorShape(inputs.shape),
- filter_shape=tensor_shape.TensorShape(kernel_shape),
- padding="SAME")
-
- expected_kernel_posterior_affine = normal_lib.Normal(
- loc=array_ops.zeros_like(kernel_posterior.result_loc),
- scale=kernel_posterior.result_scale)
- expected_kernel_posterior_affine_tensor = (
- expected_kernel_posterior_affine.sample(seed=42))
-
- expected_outputs = convolution_op(
- inputs, kernel_posterior.distribution.loc)
-
- input_shape = array_ops.shape(inputs)
- output_shape = array_ops.shape(expected_outputs)
- batch_shape = array_ops.expand_dims(input_shape[0], 0)
- channels = input_shape[-1]
- rank = len(inputs.get_shape()) - 2
-
- sign_input = random_ops.random_uniform(
- array_ops.concat([batch_shape,
- array_ops.expand_dims(channels, 0)], 0),
- minval=0,
- maxval=2,
- dtype=dtypes.int32,
- seed=layer.seed)
- sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype)
- sign_output = random_ops.random_uniform(
- array_ops.concat([batch_shape,
- array_ops.expand_dims(filters, 0)], 0),
- minval=0,
- maxval=2,
- dtype=dtypes.int32,
- seed=distribution_util.gen_new_seed(
- layer.seed, salt="conv_flipout"))
- sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype)
- for _ in range(rank):
- sign_input = array_ops.expand_dims(sign_input, 1) # 2D ex: (B, 1, 1, C)
- sign_output = array_ops.expand_dims(sign_output, 1)
-
- sign_input = array_ops.tile( # tile for element-wise op broadcasting
- sign_input,
- [1] + [input_shape[i + 1] for i in range(rank)] + [1])
- sign_output = array_ops.tile(
- sign_output,
- [1] + [output_shape[i + 1] for i in range(rank)] + [1])
-
- perturbed_inputs = convolution_op(
- inputs * sign_input, expected_kernel_posterior_affine_tensor)
- perturbed_inputs *= sign_output
-
- expected_outputs += perturbed_inputs
- expected_outputs = nn.bias_add(expected_outputs,
- bias_posterior.result_sample,
- data_format="NHWC")
-
- [
- expected_outputs_, actual_outputs_,
- expected_kernel_divergence_, actual_kernel_divergence_,
- expected_bias_, actual_bias_,
- expected_bias_divergence_, actual_bias_divergence_,
- ] = sess.run([
- expected_outputs, outputs,
- kernel_divergence.result, kl_penalty[0],
- bias_posterior.result_sample, layer.bias_posterior_tensor,
- bias_divergence.result, kl_penalty[1],
- ])
-
- self.assertAllClose(
- expected_bias_, actual_bias_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_outputs_, actual_outputs_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_kernel_divergence_, actual_kernel_divergence_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_divergence_, actual_bias_divergence_,
- rtol=1e-6, atol=0.)
-
- self.assertAllEqual(
- [[kernel_posterior.distribution, kernel_prior.distribution, None]],
- kernel_divergence.args)
-
- self.assertAllEqual(
- [[bias_posterior.distribution,
- bias_prior.distribution,
- bias_posterior.result_sample]],
- bias_divergence.args)
-
- def _testRandomConvFlipout(self, layer_class):
- batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5
- with self.test_session() as sess:
- seed = Counter()
- if layer_class in (prob_layers_lib.Conv1DReparameterization,
- prob_layers_lib.Conv1DFlipout):
- inputs = random_ops.random_uniform(
- [batch_size, width, channels], seed=seed())
- kernel_size = (2,)
- elif layer_class in (prob_layers_lib.Conv2DReparameterization,
- prob_layers_lib.Conv2DFlipout):
- inputs = random_ops.random_uniform(
- [batch_size, height, width, channels], seed=seed())
- kernel_size = (2, 2)
- elif layer_class in (prob_layers_lib.Conv3DReparameterization,
- prob_layers_lib.Conv3DFlipout):
- inputs = random_ops.random_uniform(
- [batch_size, depth, height, width, channels], seed=seed())
- kernel_size = (2, 2, 2)
-
- kernel_shape = kernel_size + (channels, filters)
- bias_size = (filters,)
-
- kernel_posterior = MockDistribution(
- loc=random_ops.random_uniform(
- kernel_shape, seed=seed()),
- scale=random_ops.random_uniform(
- kernel_shape, seed=seed()),
- result_log_prob=random_ops.random_uniform(
- kernel_shape, seed=seed()),
- result_sample=random_ops.random_uniform(
- kernel_shape, seed=seed()))
- bias_posterior = MockDistribution(
- loc=random_ops.random_uniform(
- bias_size, seed=seed()),
- scale=random_ops.random_uniform(
- bias_size, seed=seed()),
- result_log_prob=random_ops.random_uniform(
- bias_size, seed=seed()),
- result_sample=random_ops.random_uniform(
- bias_size, seed=seed()))
- layer_one = layer_class(
- filters=filters,
- kernel_size=kernel_size,
- padding="SAME",
- kernel_posterior_fn=lambda *args: kernel_posterior,
- kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
- bias_posterior_fn=lambda *args: bias_posterior,
- bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
- seed=44)
- layer_two = layer_class(
- filters=filters,
- kernel_size=kernel_size,
- padding="SAME",
- kernel_posterior_fn=lambda *args: kernel_posterior,
- kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
- bias_posterior_fn=lambda *args: bias_posterior,
- bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
- seed=45)
-
- outputs_one = layer_one(inputs)
- outputs_two = layer_two(inputs)
-
- outputs_one_, outputs_two_ = sess.run([
- outputs_one, outputs_two])
-
- self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)),
- np.prod(outputs_one_.shape))
-
- def testKLPenaltyKernelConv1DReparameterization(self):
- self._testKLPenaltyKernel(prob_layers_lib.Conv1DReparameterization)
-
- def testKLPenaltyKernelConv2DReparameterization(self):
- self._testKLPenaltyKernel(prob_layers_lib.Conv2DReparameterization)
-
- def testKLPenaltyKernelConv3DReparameterization(self):
- self._testKLPenaltyKernel(prob_layers_lib.Conv3DReparameterization)
-
- def testKLPenaltyKernelConv1DFlipout(self):
- self._testKLPenaltyKernel(prob_layers_lib.Conv1DFlipout)
-
- def testKLPenaltyKernelConv2DFlipout(self):
- self._testKLPenaltyKernel(prob_layers_lib.Conv2DFlipout)
-
- def testKLPenaltyKernelConv3DFlipout(self):
- self._testKLPenaltyKernel(prob_layers_lib.Conv3DFlipout)
-
- def testKLPenaltyBothConv1DReparameterization(self):
- self._testKLPenaltyBoth(prob_layers_lib.Conv1DReparameterization)
-
- def testKLPenaltyBothConv2DReparameterization(self):
- self._testKLPenaltyBoth(prob_layers_lib.Conv2DReparameterization)
-
- def testKLPenaltyBothConv3DReparameterization(self):
- self._testKLPenaltyBoth(prob_layers_lib.Conv3DReparameterization)
-
- def testKLPenaltyBothConv1DFlipout(self):
- self._testKLPenaltyBoth(prob_layers_lib.Conv1DFlipout)
-
- def testKLPenaltyBothConv2DFlipout(self):
- self._testKLPenaltyBoth(prob_layers_lib.Conv2DFlipout)
-
- def testKLPenaltyBothConv3DFlipout(self):
- self._testKLPenaltyBoth(prob_layers_lib.Conv3DFlipout)
-
- def testConv1DReparameterization(self):
- self._testConvReparameterization(prob_layers_lib.Conv1DReparameterization)
-
- def testConv2DReparameterization(self):
- self._testConvReparameterization(prob_layers_lib.Conv2DReparameterization)
-
- def testConv3DReparameterization(self):
- self._testConvReparameterization(prob_layers_lib.Conv3DReparameterization)
-
- def testConv1DFlipout(self):
- self._testConvFlipout(prob_layers_lib.Conv1DFlipout)
-
- def testConv2DFlipout(self):
- self._testConvFlipout(prob_layers_lib.Conv2DFlipout)
-
- def testConv3DFlipout(self):
- self._testConvFlipout(prob_layers_lib.Conv3DFlipout)
-
- def testRandomConv1DFlipout(self):
- self._testRandomConvFlipout(prob_layers_lib.Conv1DFlipout)
-
-
-if __name__ == "__main__":
- test.main()
diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
deleted file mode 100644
index 342f38ccec..0000000000
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py
+++ /dev/null
@@ -1,443 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for dense Bayesian layers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational as prob_layers_lib
-from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.ops.distributions import util as distribution_util
-from tensorflow.python.platform import test
-
-
-class Counter(object):
- """Helper class to manage incrementing a counting `int`."""
-
- def __init__(self):
- self._value = -1
-
- @property
- def value(self):
- return self._value
-
- def __call__(self):
- self._value += 1
- return self._value
-
-
-class MockDistribution(independent_lib.Independent):
- """Monitors layer calls to the underlying distribution."""
-
- def __init__(self, result_sample, result_log_prob, loc=None, scale=None):
- self.result_sample = result_sample
- self.result_log_prob = result_log_prob
- self.result_loc = loc
- self.result_scale = scale
- self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0)
- if loc is not None and scale is not None:
- self.result_distribution = normal_lib.Normal(loc=self.result_loc,
- scale=self.result_scale)
- self.called_log_prob = Counter()
- self.called_sample = Counter()
- self.called_loc = Counter()
- self.called_scale = Counter()
-
- def log_prob(self, *args, **kwargs):
- self.called_log_prob()
- return self.result_log_prob
-
- def sample(self, *args, **kwargs):
- self.called_sample()
- return self.result_sample
-
- @property
- def distribution(self): # for dummy check on Independent(Normal)
- return self.result_distribution
-
- @property
- def loc(self):
- self.called_loc()
- return self.result_loc
-
- @property
- def scale(self):
- self.called_scale()
- return self.result_scale
-
-
-class MockKLDivergence(object):
- """Monitors layer calls to the divergence implementation."""
-
- def __init__(self, result):
- self.result = result
- self.args = []
- self.called = Counter()
-
- def __call__(self, *args, **kwargs):
- self.called()
- self.args.append(args)
- return self.result
-
-
-class DenseVariational(test.TestCase):
-
- def _testKLPenaltyKernel(self, layer_class):
- with self.test_session():
- layer = layer_class(units=2)
- inputs = random_ops.random_uniform([2, 3], seed=1)
-
- # No keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 0)
- self.assertListEqual(layer.losses, losses)
-
- _ = layer(inputs)
-
- # Yes keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 1)
- self.assertListEqual(layer.losses, losses)
-
- def _testKLPenaltyBoth(self, layer_class):
- def _make_normal(dtype, *args): # pylint: disable=unused-argument
- return normal_lib.Normal(
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.))
- with self.test_session():
- layer = layer_class(
- units=2,
- bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(),
- bias_prior_fn=_make_normal)
- inputs = random_ops.random_uniform([2, 3], seed=1)
-
- # No keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 0)
- self.assertListEqual(layer.losses, losses)
-
- _ = layer(inputs)
-
- # Yes keys.
- losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- self.assertEqual(len(losses), 2)
- self.assertListEqual(layer.losses, losses)
-
- def _testDenseSetUp(self, layer_class, batch_size, in_size, out_size,
- **kwargs):
- seed = Counter()
- inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
-
- kernel_size = [in_size, out_size]
- kernel_posterior = MockDistribution(
- loc=random_ops.random_uniform(kernel_size, seed=seed()),
- scale=random_ops.random_uniform(kernel_size, seed=seed()),
- result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
- result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
- kernel_prior = MockDistribution(
- result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()),
- result_sample=random_ops.random_uniform(kernel_size, seed=seed()))
- kernel_divergence = MockKLDivergence(
- result=random_ops.random_uniform(kernel_size, seed=seed()))
-
- bias_size = [out_size]
- bias_posterior = MockDistribution(
- result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
- result_sample=random_ops.random_uniform(bias_size, seed=seed()))
- bias_prior = MockDistribution(
- result_log_prob=random_ops.random_uniform(bias_size, seed=seed()),
- result_sample=random_ops.random_uniform(bias_size, seed=seed()))
- bias_divergence = MockKLDivergence(
- result=random_ops.random_uniform(bias_size, seed=seed()))
-
- layer = layer_class(
- units=out_size,
- kernel_posterior_fn=lambda *args: kernel_posterior,
- kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
- kernel_prior_fn=lambda *args: kernel_prior,
- kernel_divergence_fn=kernel_divergence,
- bias_posterior_fn=lambda *args: bias_posterior,
- bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
- bias_prior_fn=lambda *args: bias_prior,
- bias_divergence_fn=bias_divergence,
- **kwargs)
-
- outputs = layer(inputs)
-
- kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES)
- return (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence,
- layer, inputs, outputs, kl_penalty)
-
- def testKLPenaltyKernelReparameterization(self):
- self._testKLPenaltyKernel(prob_layers_lib.DenseReparameterization)
-
- def testKLPenaltyKernelLocalReparameterization(self):
- self._testKLPenaltyKernel(prob_layers_lib.DenseLocalReparameterization)
-
- def testKLPenaltyKernelFlipout(self):
- self._testKLPenaltyKernel(prob_layers_lib.DenseFlipout)
-
- def testKLPenaltyBothReparameterization(self):
- self._testKLPenaltyBoth(prob_layers_lib.DenseReparameterization)
-
- def testKLPenaltyBothLocalReparameterization(self):
- self._testKLPenaltyBoth(prob_layers_lib.DenseLocalReparameterization)
-
- def testKLPenaltyBothFlipout(self):
- self._testKLPenaltyBoth(prob_layers_lib.DenseFlipout)
-
- def testDenseReparameterization(self):
- batch_size, in_size, out_size = 2, 3, 4
- with self.test_session() as sess:
- (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence, layer, inputs,
- outputs, kl_penalty) = self._testDenseSetUp(
- prob_layers_lib.DenseReparameterization,
- batch_size, in_size, out_size)
-
- expected_outputs = (
- math_ops.matmul(inputs, kernel_posterior.result_sample) +
- bias_posterior.result_sample)
-
- [
- expected_outputs_, actual_outputs_,
- expected_kernel_, actual_kernel_,
- expected_kernel_divergence_, actual_kernel_divergence_,
- expected_bias_, actual_bias_,
- expected_bias_divergence_, actual_bias_divergence_,
- ] = sess.run([
- expected_outputs, outputs,
- kernel_posterior.result_sample, layer.kernel_posterior_tensor,
- kernel_divergence.result, kl_penalty[0],
- bias_posterior.result_sample, layer.bias_posterior_tensor,
- bias_divergence.result, kl_penalty[1],
- ])
-
- self.assertAllClose(
- expected_kernel_, actual_kernel_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_, actual_bias_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_outputs_, actual_outputs_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_kernel_divergence_, actual_kernel_divergence_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_divergence_, actual_bias_divergence_,
- rtol=1e-6, atol=0.)
-
- self.assertAllEqual(
- [[kernel_posterior.distribution,
- kernel_prior.distribution,
- kernel_posterior.result_sample]],
- kernel_divergence.args)
-
- self.assertAllEqual(
- [[bias_posterior.distribution,
- bias_prior.distribution,
- bias_posterior.result_sample]],
- bias_divergence.args)
-
- def testDenseLocalReparameterization(self):
- batch_size, in_size, out_size = 2, 3, 4
- with self.test_session() as sess:
- (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence, layer, inputs,
- outputs, kl_penalty) = self._testDenseSetUp(
- prob_layers_lib.DenseLocalReparameterization,
- batch_size, in_size, out_size)
-
- expected_kernel_posterior_affine = normal_lib.Normal(
- loc=math_ops.matmul(inputs, kernel_posterior.result_loc),
- scale=math_ops.matmul(
- inputs**2., kernel_posterior.result_scale**2)**0.5)
- expected_kernel_posterior_affine_tensor = (
- expected_kernel_posterior_affine.sample(seed=42))
- expected_outputs = (expected_kernel_posterior_affine_tensor +
- bias_posterior.result_sample)
-
- [
- expected_outputs_, actual_outputs_,
- expected_kernel_divergence_, actual_kernel_divergence_,
- expected_bias_, actual_bias_,
- expected_bias_divergence_, actual_bias_divergence_,
- ] = sess.run([
- expected_outputs, outputs,
- kernel_divergence.result, kl_penalty[0],
- bias_posterior.result_sample, layer.bias_posterior_tensor,
- bias_divergence.result, kl_penalty[1],
- ])
-
- self.assertAllClose(
- expected_bias_, actual_bias_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_outputs_, actual_outputs_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_kernel_divergence_, actual_kernel_divergence_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_divergence_, actual_bias_divergence_,
- rtol=1e-6, atol=0.)
-
- self.assertAllEqual(
- [[kernel_posterior.distribution,
- kernel_prior.distribution,
- None]],
- kernel_divergence.args)
-
- self.assertAllEqual(
- [[bias_posterior.distribution,
- bias_prior.distribution,
- bias_posterior.result_sample]],
- bias_divergence.args)
-
- def testDenseFlipout(self):
- batch_size, in_size, out_size = 2, 3, 4
- with self.test_session() as sess:
- (kernel_posterior, kernel_prior, kernel_divergence,
- bias_posterior, bias_prior, bias_divergence, layer, inputs,
- outputs, kl_penalty) = self._testDenseSetUp(
- prob_layers_lib.DenseFlipout,
- batch_size, in_size, out_size, seed=44)
-
- expected_kernel_posterior_affine = normal_lib.Normal(
- loc=array_ops.zeros_like(kernel_posterior.result_loc),
- scale=kernel_posterior.result_scale)
- expected_kernel_posterior_affine_tensor = (
- expected_kernel_posterior_affine.sample(seed=42))
-
- sign_input = random_ops.random_uniform(
- [batch_size, in_size],
- minval=0,
- maxval=2,
- dtype=dtypes.int32,
- seed=layer.seed)
- sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype)
- sign_output = random_ops.random_uniform(
- [batch_size, out_size],
- minval=0,
- maxval=2,
- dtype=dtypes.int32,
- seed=distribution_util.gen_new_seed(
- layer.seed, salt="dense_flipout"))
- sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype)
- perturbed_inputs = math_ops.matmul(
- inputs * sign_input, expected_kernel_posterior_affine_tensor)
- perturbed_inputs *= sign_output
-
- expected_outputs = math_ops.matmul(inputs, kernel_posterior.result_loc)
- expected_outputs += perturbed_inputs
- expected_outputs += bias_posterior.result_sample
-
- [
- expected_outputs_, actual_outputs_,
- expected_kernel_divergence_, actual_kernel_divergence_,
- expected_bias_, actual_bias_,
- expected_bias_divergence_, actual_bias_divergence_,
- ] = sess.run([
- expected_outputs, outputs,
- kernel_divergence.result, kl_penalty[0],
- bias_posterior.result_sample, layer.bias_posterior_tensor,
- bias_divergence.result, kl_penalty[1],
- ])
-
- self.assertAllClose(
- expected_bias_, actual_bias_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_outputs_, actual_outputs_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_kernel_divergence_, actual_kernel_divergence_,
- rtol=1e-6, atol=0.)
- self.assertAllClose(
- expected_bias_divergence_, actual_bias_divergence_,
- rtol=1e-6, atol=0.)
-
- self.assertAllEqual(
- [[kernel_posterior.distribution, kernel_prior.distribution, None]],
- kernel_divergence.args)
-
- self.assertAllEqual(
- [[bias_posterior.distribution,
- bias_prior.distribution,
- bias_posterior.result_sample]],
- bias_divergence.args)
-
- def testRandomDenseFlipout(self):
- batch_size, in_size, out_size = 2, 3, 4
- with self.test_session() as sess:
- seed = Counter()
- inputs = random_ops.random_uniform([batch_size, in_size], seed=seed())
-
- kernel_posterior = MockDistribution(
- loc=random_ops.random_uniform(
- [in_size, out_size], seed=seed()),
- scale=random_ops.random_uniform(
- [in_size, out_size], seed=seed()),
- result_log_prob=random_ops.random_uniform(
- [in_size, out_size], seed=seed()),
- result_sample=random_ops.random_uniform(
- [in_size, out_size], seed=seed()))
- bias_posterior = MockDistribution(
- loc=random_ops.random_uniform(
- [out_size], seed=seed()),
- scale=random_ops.random_uniform(
- [out_size], seed=seed()),
- result_log_prob=random_ops.random_uniform(
- [out_size], seed=seed()),
- result_sample=random_ops.random_uniform(
- [out_size], seed=seed()))
- layer_one = prob_layers_lib.DenseFlipout(
- units=out_size,
- kernel_posterior_fn=lambda *args: kernel_posterior,
- kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
- bias_posterior_fn=lambda *args: bias_posterior,
- bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
- seed=44)
- layer_two = prob_layers_lib.DenseFlipout(
- units=out_size,
- kernel_posterior_fn=lambda *args: kernel_posterior,
- kernel_posterior_tensor_fn=lambda d: d.sample(seed=42),
- bias_posterior_fn=lambda *args: bias_posterior,
- bias_posterior_tensor_fn=lambda d: d.sample(seed=43),
- seed=45)
-
- outputs_one = layer_one(inputs)
- outputs_two = layer_two(inputs)
-
- outputs_one_, outputs_two_ = sess.run([
- outputs_one, outputs_two])
-
- self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)), out_size)
-
-
-if __name__ == "__main__":
- test.main()
diff --git a/tensorflow/contrib/bayesflow/python/ops/docstring_util.py b/tensorflow/contrib/bayesflow/python/ops/docstring_util.py
deleted file mode 100644
index 081f2d5a8b..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/docstring_util.py
+++ /dev/null
@@ -1,88 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities for programmable docstrings.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import re
-import six
-
-
-def expand_docstring(**kwargs):
- """Decorator to programmatically expand the docstring.
-
- Args:
- **kwargs: Keyword arguments to set. For each key-value pair `k` and `v`,
- the key is found as `@{k}` in the docstring and replaced with `v`.
-
- Returns:
- Decorated function.
- """
- def _fn_wrapped(fn):
- """Original function with modified `__doc__` attribute."""
- doc = _trim(fn.__doc__)
- for k, v in six.iteritems(kwargs):
- # Capture each @{k} reference to replace with v.
- # We wrap the replacement in a function so no backslash escapes
- # are processed.
- pattern = r'@\{' + str(k) + r'\}'
- doc = re.sub(pattern, lambda match: v, doc) # pylint: disable=cell-var-from-loop
- fn.__doc__ = doc
- return fn
- return _fn_wrapped
-
-
-def _trim(docstring):
- """Trims docstring indentation.
-
- In general, multi-line docstrings carry their level of indentation when
- defined under a function or class method. This function standardizes
- indentation levels by removing them. Taken from PEP 257 docs.
-
- Args:
- docstring: Python string to trim indentation.
-
- Returns:
- Trimmed docstring.
- """
- if not docstring:
- return ''
- # Convert tabs to spaces (following the normal Python rules)
- # and split into a list of lines:
- lines = docstring.expandtabs().splitlines()
- # Determine minimum indentation (first line doesn't count):
- indent = None
- for line in lines[1:]:
- stripped = line.lstrip()
- if stripped:
- if indent is None:
- indent = len(line) - len(stripped)
- else:
- indent = min(indent, len(line) - len(stripped))
- # Remove indentation (first line is special):
- trimmed = [lines[0].strip()]
- if indent is not None:
- for line in lines[1:]:
- trimmed.append(line[indent:].rstrip())
- # Strip off trailing and leading blank lines:
- while trimmed and not trimmed[-1]:
- trimmed.pop()
- while trimmed and not trimmed[0]:
- trimmed.pop(0)
- # Return a single string:
- return '\n'.join(trimmed)
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py
deleted file mode 100644
index 610613dca5..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/layers.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Probabilistic neural layers.
-
-See @{tf.contrib.bayesflow.layers}.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# go/tf-wildcard-import
-# pylint: disable=wildcard-import
-from tensorflow.contrib.bayesflow.python.ops.layers_conv_variational import *
-from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational import *
-from tensorflow.contrib.bayesflow.python.ops.layers_util import *
-# pylint: enable=wildcard-import
-from tensorflow.python.util.all_util import remove_undocumented
-
-_allowed_symbols = [
- 'Convolution1DReparameterization',
- 'Convolution2DReparameterization',
- 'Convolution3DReparameterization',
- 'Convolution1DFlipout',
- 'Convolution2DFlipout',
- 'Convolution3DFlipout',
- 'Conv1DReparameterization',
- 'Conv2DReparameterization',
- 'Conv3DReparameterization',
- 'Conv1DFlipout',
- 'Conv2DFlipout',
- 'Conv3DFlipout',
- 'convolution1d_reparameterization',
- 'convolution2d_reparameterization',
- 'convolution3d_reparameterization',
- 'convolution1d_flipout',
- 'convolution2d_flipout',
- 'convolution3d_flipout',
- 'conv1d_reparameterization',
- 'conv2d_reparameterization',
- 'conv3d_reparameterization',
- 'conv1d_flipout',
- 'conv2d_flipout',
- 'conv3d_flipout',
- 'DenseReparameterization',
- 'DenseLocalReparameterization',
- 'DenseFlipout',
- 'dense_reparameterization',
- 'dense_local_reparameterization',
- 'dense_flipout',
- 'default_loc_scale_fn',
- 'default_mean_field_normal_fn',
-]
-
-remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
deleted file mode 100644
index cb80718f71..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py
+++ /dev/null
@@ -1,2486 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Convolutional variational layer classes and their functional aliases.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.bayesflow.python.ops import docstring_util
-from tensorflow.contrib.bayesflow.python.ops import layers_util
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.layers import base as layers_lib
-from tensorflow.python.layers import utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import standard_ops
-from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.ops.distributions import util as distribution_util
-
-doc_args = """activation: Activation function. Set it to None to maintain a
- linear activation.
- activity_regularizer: Optional regularizer function for the output.
- trainable: Boolean, if `True` also add variables to the graph collection
- `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
- kernel_posterior_fn: Python `callable` which creates
- `tf.distributions.Distribution` instance representing the surrogate
- posterior of the `kernel` parameter. Default value:
- `default_mean_field_normal_fn()`.
- kernel_posterior_tensor_fn: Python `callable` which takes a
- `tf.distributions.Distribution` instance and returns a representative
- value. Default value: `lambda d: d.sample()`.
- kernel_prior_fn: Python `callable` which creates `tf.distributions`
- instance. See `default_mean_field_normal_fn` docstring for required
- parameter signature.
- Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
- kernel_divergence_fn: Python `callable` which takes the surrogate posterior
- distribution, prior distribution and random variate sample(s) from the
- surrogate posterior and computes or approximates the KL divergence. The
- distributions are `tf.distributions.Distribution`-like instances and the
- sample is a `Tensor`.
- bias_posterior_fn: Python `callable` which creates
- `tf.distributions.Distribution` instance representing the surrogate
- posterior of the `bias` parameter. Default value:
- `default_mean_field_normal_fn(is_singular=True)` (which creates an
- instance of `tf.distributions.Deterministic`).
- bias_posterior_tensor_fn: Python `callable` which takes a
- `tf.distributions.Distribution` instance and returns a representative
- value. Default value: `lambda d: d.sample()`.
- bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
- See `default_mean_field_normal_fn` docstring for required parameter
- signature. Default value: `None` (no prior, no variational inference)
- bias_divergence_fn: Python `callable` which takes the surrogate posterior
- distribution, prior distribution and random variate sample(s) from the
- surrogate posterior and computes or approximates the KL divergence. The
- distributions are `tf.distributions.Distribution`-like instances and the
- sample is a `Tensor`.
- name: A string, the name of the layer."""
-
-
-class _ConvVariational(layers_lib.Layer):
- """Abstract nD convolution layer (private, used as implementation base).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- rank: Python integer, dimensionality of convolution.
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- rank,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- rank: An integer, the rank of the convolution, e.g. "2" for 2D
- convolution.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of n integers, specifying the
- length of the convolution window.
- strides: An integer or tuple/list of n integers,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, ...,
- channels)` while `channels_first` corresponds to inputs with shape
- `(batch, channels, ...)`.
- dilation_rate: An integer or tuple/list of n integers, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(_ConvVariational, self).__init__(
- trainable=trainable,
- name=name,
- activity_regularizer=activity_regularizer,
- **kwargs)
- self.rank = rank
- self.filters = filters
- self.kernel_size = utils.normalize_tuple(kernel_size, rank, "kernel_size")
- self.strides = utils.normalize_tuple(strides, rank, "strides")
- self.padding = utils.normalize_padding(padding)
- self.data_format = utils.normalize_data_format(data_format)
- self.dilation_rate = utils.normalize_tuple(
- dilation_rate, rank, "dilation_rate")
- self.activation = activation
- self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2)
- self.kernel_posterior_fn = kernel_posterior_fn
- self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
- self.kernel_prior_fn = kernel_prior_fn
- self.kernel_divergence_fn = kernel_divergence_fn
- self.bias_posterior_fn = bias_posterior_fn
- self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
- self.bias_prior_fn = bias_prior_fn
- self.bias_divergence_fn = bias_divergence_fn
-
- def build(self, input_shape):
- input_shape = tensor_shape.TensorShape(input_shape)
- if self.data_format == "channels_first":
- channel_axis = 1
- else:
- channel_axis = -1
- if input_shape[channel_axis].value is None:
- raise ValueError("The channel dimension of the inputs "
- "should be defined. Found `None`.")
- input_dim = input_shape[channel_axis].value
- kernel_shape = self.kernel_size + (input_dim, self.filters)
- dtype = dtypes.as_dtype(self.dtype)
-
- # Must have a posterior kernel.
- self.kernel_posterior = self.kernel_posterior_fn(
- dtype, kernel_shape, "kernel_posterior",
- self.trainable, self.add_variable)
-
- if self.kernel_prior_fn is None:
- self.kernel_prior = None
- else:
- self.kernel_prior = self.kernel_prior_fn(
- dtype, kernel_shape, "kernel_prior",
- self.trainable, self.add_variable)
- self._built_kernel_divergence = False
-
- if self.bias_posterior_fn is None:
- self.bias_posterior = None
- else:
- self.bias_posterior = self.bias_posterior_fn(
- dtype, (self.filters,), "bias_posterior",
- self.trainable, self.add_variable)
-
- if self.bias_prior_fn is None:
- self.bias_prior = None
- else:
- self.bias_prior = self.bias_prior_fn(
- dtype, (self.filters,), "bias_prior",
- self.trainable, self.add_variable)
- self._built_bias_divergence = False
-
- self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2,
- axes={channel_axis: input_dim})
- self._convolution_op = nn_ops.Convolution(
- input_shape,
- filter_shape=tensor_shape.TensorShape(kernel_shape),
- dilation_rate=self.dilation_rate,
- strides=self.strides,
- padding=self.padding.upper(),
- data_format=utils.convert_data_format(self.data_format,
- self.rank + 2))
-
- self.built = True
-
- def call(self, inputs):
- inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
-
- outputs = self._apply_variational_kernel(inputs)
- outputs = self._apply_variational_bias(outputs)
- if self.activation is not None:
- outputs = self.activation(outputs)
- if not self._built_kernel_divergence:
- kernel_posterior = self.kernel_posterior
- kernel_prior = self.kernel_prior
- if isinstance(self.kernel_posterior, independent_lib.Independent):
- kernel_posterior = kernel_posterior.distribution
- if isinstance(self.kernel_prior, independent_lib.Independent):
- kernel_prior = kernel_prior.distribution
- self._apply_divergence(self.kernel_divergence_fn,
- kernel_posterior,
- kernel_prior,
- self.kernel_posterior_tensor,
- name="divergence_kernel")
- self._built_kernel_divergence = True
- if not self._built_bias_divergence:
- bias_posterior = self.bias_posterior
- bias_prior = self.bias_prior
- if isinstance(self.bias_posterior, independent_lib.Independent):
- bias_posterior = bias_posterior.distribution
- if isinstance(self.bias_prior, independent_lib.Independent):
- bias_prior = bias_prior.distribution
- self._apply_divergence(self.bias_divergence_fn,
- bias_posterior,
- bias_prior,
- self.bias_posterior_tensor,
- name="divergence_bias")
- self._built_bias_divergence = True
- return outputs
-
- def _apply_variational_bias(self, inputs):
- if self.bias_posterior is None:
- self.bias_posterior_tensor = None
- return inputs
- self.bias_posterior_tensor = self.bias_posterior_tensor_fn(
- self.bias_posterior)
- outputs = inputs
- if self.data_format == "channels_first":
- if self.rank == 1:
- # nn.bias_add does not accept a 1D input tensor.
- bias = array_ops.reshape(self.bias_posterior_tensor,
- (1, self.filters, 1))
- outputs += bias
- if self.rank == 2:
- outputs = nn.bias_add(outputs,
- self.bias_posterior_tensor,
- data_format="NCHW")
- if self.rank == 3:
- # As of Mar 2017, direct addition is significantly slower than
- # bias_add when computing gradients. To use bias_add, we collapse Z
- # and Y into a single dimension to obtain a 4D input tensor.
- outputs_shape = outputs.shape.as_list()
- outputs_4d = array_ops.reshape(outputs,
- [outputs_shape[0], outputs_shape[1],
- outputs_shape[2] * outputs_shape[3],
- outputs_shape[4]])
- outputs_4d = nn.bias_add(outputs_4d,
- self.bias_posterior_tensor,
- data_format="NCHW")
- outputs = array_ops.reshape(outputs_4d, outputs_shape)
- else:
- outputs = nn.bias_add(outputs,
- self.bias_posterior_tensor,
- data_format="NHWC")
- return outputs
-
- def _apply_divergence(self, divergence_fn, posterior, prior,
- posterior_tensor, name):
- if (divergence_fn is None or
- posterior is None or
- prior is None):
- divergence = None
- return
- divergence = standard_ops.identity(
- divergence_fn(
- posterior, prior, posterior_tensor),
- name=name)
- self.add_loss(divergence)
-
- def _compute_output_shape(self, input_shape):
- input_shape = tensor_shape.TensorShape(input_shape).as_list()
- if self.data_format == "channels_last":
- space = input_shape[1:-1]
- new_space = []
- for i in range(len(space)):
- new_dim = utils.conv_output_length(
- space[i],
- self.kernel_size[i],
- padding=self.padding,
- stride=self.strides[i],
- dilation=self.dilation_rate[i])
- new_space.append(new_dim)
- return tensor_shape.TensorShape([input_shape[0]] + new_space +
- [self.filters])
- else:
- space = input_shape[2:]
- new_space = []
- for i in range(len(space)):
- new_dim = utils.conv_output_length(
- space[i],
- self.kernel_size[i],
- padding=self.padding,
- stride=self.strides[i],
- dilation=self.dilation_rate[i])
- new_space.append(new_dim)
- return tensor_shape.TensorShape([input_shape[0], self.filters] +
- new_space)
-
-
-class _ConvReparameterization(_ConvVariational):
- """Abstract nD convolution layer (private, used as implementation base).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- rank: Python integer, dimensionality of convolution.
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- rank,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- rank: An integer, the rank of the convolution, e.g. "2" for 2D
- convolution.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of n integers, specifying the
- length of the convolution window.
- strides: An integer or tuple/list of n integers,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, ...,
- channels)` while `channels_first` corresponds to inputs with shape
- `(batch, channels, ...)`.
- dilation_rate: An integer or tuple/list of n integers, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(_ConvReparameterization, self).__init__(
- rank=rank,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name, **kwargs)
-
- def _apply_variational_kernel(self, inputs):
- self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
- self.kernel_posterior)
- self.kernel_posterior_affine = None
- self.kernel_posterior_affine_tensor = None
- outputs = self._convolution_op(inputs, self.kernel_posterior_tensor)
- return outputs
-
-
-class Conv1DReparameterization(_ConvReparameterization):
- """1D convolution layer (e.g. temporal convolution).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 128, 1])
- net = tfp.layers.Conv1DReparameterization(64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)(net)
- net = tf.reshape(net, [-1, 128 * 64])
- logits = tfp.layers.DenseReparameterization(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of a single integer, specifying the
- length of the 1D convolution window.
- strides: An integer or tuple/list of a single integer,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, length,
- channels)` while `channels_first` corresponds to inputs with shape
- `(batch, channels, length)`.
- dilation_rate: An integer or tuple/list of a single integer, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(Conv1DReparameterization, self).__init__(
- rank=1,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name, **kwargs)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def conv1d_reparameterization(
- inputs,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Functional interface for 1D convolution layer (e.g. temporal convolution).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of a single integer, specifying the
- length of the 1D convolution window.
- strides: An integer or tuple/list of a single integer,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, length, channels)` while `channels_first` corresponds to
- inputs with shape `(batch, channels, length)`.
- dilation_rate: An integer or tuple/list of a single integer, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- reuse: Boolean, whether to reuse the weights of a previous layer
- by the same name.
-
- Returns:
- Output tensor.
-
- Raises:
- ValueError: if eager execution is enabled.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 128, 1])
- net = tfp.layers.conv1d_reparameterization(net,
- filters=64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)
- net = tf.reshape(net, [-1, 128 * 64])
- logits = tfp.layers.dense_reparameterization(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
- # pylint: enable=g-doc-args
- layer = Conv1DReparameterization(
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class Conv2DReparameterization(_ConvReparameterization):
- """2D convolution layer (e.g. spatial convolution over images).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 32, 32, 3])
- net = tfp.layers.Conv2DReparameterization(64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)(net)
- net = tf.layers.MaxPooling2D(pool_size=2,
- strides=2,
- padding="SAME")(net)
- net = tf.reshape(net, [-1, 8 * 8 * 64])
- logits = tfp.layers.DenseReparameterization(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- filters,
- kernel_size,
- strides=(1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 2 integers, specifying the
- height and width of the 2D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, height,
- width, channels)` while `channels_first` corresponds to inputs with
- shape `(batch, channels, height, width)`.
- dilation_rate: An integer or tuple/list of 2 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(Conv2DReparameterization, self).__init__(
- rank=2,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name, **kwargs)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def conv2d_reparameterization(
- inputs,
- filters,
- kernel_size,
- strides=(1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Functional interface for the 2D convolution layer.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 2 integers, specifying the
- height and width of the 2D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, height, width, channels)` while `channels_first` corresponds to
- inputs with shape `(batch, channels, height, width)`.
- dilation_rate: An integer or tuple/list of 2 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- reuse: Boolean, whether to reuse the weights of a previous layer
- by the same name.
-
- Returns:
- Output tensor.
-
- Raises:
- ValueError: if eager execution is enabled.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 32, 32, 3])
- net = tfp.layers.conv2d_reparameterization(net,
- filters=64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)
- net = tf.layers.max_pooling2d(net,
- pool_size=2,
- strides=2,
- padding="SAME")
- net = tf.reshape(net, [-1, 8 * 8 * 64])
- logits = tfp.layers.dense_reparameterization(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
- # pylint: enable=g-doc-args
- layer = Conv2DReparameterization(
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class Conv3DReparameterization(_ConvReparameterization):
- """3D convolution layer (e.g. spatial convolution over volumes).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 256, 32, 32, 3])
- net = tfp.layers.Conv3DReparameterization(64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)(net)
- net = tf.layers.MaxPooling2D(pool_size=2,
- strides=2,
- padding="SAME")(net)
- net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
- logits = tfp.layers.DenseReparameterization(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- filters,
- kernel_size,
- strides=(1, 1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 3 integers, specifying the
- depth, height and width of the 3D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 3 integers,
- specifying the strides of the convolution along the depth,
- height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, depth,
- height, width, channels)` while `channels_first` corresponds to inputs
- with shape `(batch, channels, depth, height, width)`.
- dilation_rate: An integer or tuple/list of 3 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(Conv3DReparameterization, self).__init__(
- rank=3,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name, **kwargs)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def conv3d_reparameterization(
- inputs,
- filters,
- kernel_size,
- strides=(1, 1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Functional interface for the 3D convolution layer.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the reparameterization
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 3 integers, specifying the
- depth, height and width of the 3D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 3 integers,
- specifying the strides of the convolution along the depth,
- height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, depth, height, width, channels)` while `channels_first`
- corresponds to inputs with shape
- `(batch, channels, depth, height, width)`.
- dilation_rate: An integer or tuple/list of 3 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- reuse: Boolean, whether to reuse the weights of a previous layer
- by the same name.
-
- Returns:
- Output tensor.
-
- Raises:
- ValueError: if eager execution is enabled.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 256, 32, 32, 3])
- net = tfp.layers.conv3d_reparameterization(net,
- filters=64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)
- net = tf.layers.max_pooling2d(net,
- pool_size=2,
- strides=2,
- padding="SAME")
- net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
- logits = tfp.layers.dense_reparameterization(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
- # pylint: enable=g-doc-args
- layer = Conv3DReparameterization(
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class _ConvFlipout(_ConvVariational):
- """Abstract nD convolution layer (private, used as implementation base).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- rank: Python integer, dimensionality of convolution.
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- seed: Python integer, used to create random seeds.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- rank,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- rank: An integer, the rank of the convolution, e.g. "2" for 2D
- convolution.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of n integers, specifying the
- length of the convolution window.
- strides: An integer or tuple/list of n integers,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, ...,
- channels)` while `channels_first` corresponds to inputs with shape
- `(batch, channels, ...)`.
- dilation_rate: An integer or tuple/list of n integers, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(_ConvFlipout, self).__init__(
- rank=rank,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name, **kwargs)
- self.seed = seed
-
- def _apply_variational_kernel(self, inputs):
- if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
- not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
- raise TypeError(
- "`{}` requires "
- "`kernel_posterior_fn` produce an instance of "
- "`tf.distributions.Independent(tf.distributions.Normal)` "
- "(saw: \"{}\").".format(
- type(self).__name__, self.kernel_posterior.name))
- self.kernel_posterior_affine = normal_lib.Normal(
- loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc),
- scale=self.kernel_posterior.distribution.scale)
- self.kernel_posterior_affine_tensor = (
- self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
- self.kernel_posterior_tensor = None
-
- outputs = self._convolution_op(
- inputs, self.kernel_posterior.distribution.loc)
-
- input_shape = array_ops.shape(inputs)
- output_shape = array_ops.shape(outputs)
- batch_shape = array_ops.expand_dims(input_shape[0], 0)
- channels = input_shape[-1]
-
- sign_input = layers_util.random_sign(
- array_ops.concat([batch_shape,
- array_ops.expand_dims(channels, 0)], 0),
- dtype=inputs.dtype,
- seed=self.seed)
- sign_output = layers_util.random_sign(
- array_ops.concat([batch_shape,
- array_ops.expand_dims(self.filters, 0)], 0),
- dtype=inputs.dtype,
- seed=distribution_util.gen_new_seed(
- self.seed, salt="conv_flipout"))
- for _ in range(self.rank):
- sign_input = array_ops.expand_dims(sign_input, 1) # 2D ex: (B, 1, 1, C)
- sign_output = array_ops.expand_dims(sign_output, 1)
-
- sign_input = array_ops.tile( # tile for element-wise op broadcasting
- sign_input,
- [1] + [input_shape[i + 1] for i in range(self.rank)] + [1])
- sign_output = array_ops.tile(
- sign_output,
- [1] + [output_shape[i + 1] for i in range(self.rank)] + [1])
-
- perturbed_inputs = self._convolution_op(
- inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output
-
- outputs += perturbed_inputs
- return outputs
-
-
-class Conv1DFlipout(_ConvFlipout):
- """1D convolution layer (e.g. temporal convolution) with Flipout.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- seed: Python integer, used to create random seeds.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 128, 1])
- net = tfp.layers.Conv1DFlipout(64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)(net)
- net = tf.reshape(net, [-1, 128 * 64])
- logits = tfp.layers.DenseFlipout(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of a single integer, specifying the
- length of the 1D convolution window.
- strides: An integer or tuple/list of a single integer,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, length,
- channels)` while `channels_first` corresponds to inputs with shape
- `(batch, channels, length)`.
- dilation_rate: An integer or tuple/list of a single integer, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(Conv1DFlipout, self).__init__(
- rank=1,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name, **kwargs)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def conv1d_flipout(
- inputs,
- filters,
- kernel_size,
- strides=1,
- padding="valid",
- data_format="channels_last",
- dilation_rate=1,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Functional interface for 1D convolution layer (e.g. temporal convolution).
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of a single integer, specifying the
- length of the 1D convolution window.
- strides: An integer or tuple/list of a single integer,
- specifying the stride length of the convolution.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, length, channels)` while `channels_first` corresponds to
- inputs with shape `(batch, channels, length)`.
- dilation_rate: An integer or tuple/list of a single integer, specifying
- the dilation rate to use for dilated convolution.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any `strides` value != 1.
- @{args}
- reuse: Boolean, whether to reuse the weights of a previous layer
- by the same name.
-
- Returns:
- Output tensor.
-
- Raises:
- ValueError: if eager execution is enabled.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 128, 1])
- net = tfp.layers.conv1d_flipout(net,
- filters=64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)
- net = tf.reshape(net, [-1, 128 * 64])
- logits = tfp.layers.dense_flipout(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
- # pylint: enable=g-doc-args
- layer = Conv1DFlipout(
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class Conv2DFlipout(_ConvFlipout):
- """2D convolution layer (e.g. spatial convolution over images) with Flipout.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- seed: Python integer, used to create random seeds.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 32, 32, 3])
- net = tfp.layers.Conv2DFlipout(64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)(net)
- net = tf.layers.MaxPooling2D(pool_size=2,
- strides=2,
- padding="SAME")(net)
- net = tf.reshape(net, [-1, 8 * 8 * 64])
- logits = tfp.layers.DenseFlipout(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- filters,
- kernel_size,
- strides=(1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 2 integers, specifying the
- height and width of the 2D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, height,
- width, channels)` while `channels_first` corresponds to inputs with
- shape `(batch, channels, height, width)`.
- dilation_rate: An integer or tuple/list of 2 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(Conv2DFlipout, self).__init__(
- rank=2,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name, **kwargs)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def conv2d_flipout(
- inputs,
- filters,
- kernel_size,
- strides=(1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Functional interface for the 2D convolution layer.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 2 integers, specifying the
- height and width of the 2D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 2 integers,
- specifying the strides of the convolution along the height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, height, width, channels)` while `channels_first` corresponds to
- inputs with shape `(batch, channels, height, width)`.
- dilation_rate: An integer or tuple/list of 2 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- reuse: Boolean, whether to reuse the weights of a previous layer
- by the same name.
-
- Returns:
- Output tensor.
-
- Raises:
- ValueError: if eager execution is enabled.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 32, 32, 3])
- net = tfp.layers.conv2d_flipout(net,
- filters=64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)
- net = tf.layers.max_pooling2d(net,
- pool_size=2,
- strides=2,
- padding="SAME")
- net = tf.reshape(net, [-1, 8 * 8 * 64])
- logits = tfp.layers.dense_flipout(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
- # pylint: enable=g-doc-args
- layer = Conv2DFlipout(
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class Conv3DFlipout(_ConvFlipout):
- """3D convolution layer (e.g. spatial convolution over volumes) with Flipout.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- filters: Python integer, dimensionality of the output space.
- kernel_size: Size of the convolution window.
- strides: Stride length of convolution.
- padding: Python string describing padding approach.
- data_format: Python string describing input data's dimensions.
- dilation_rate: Dilation rate for an atrous convolution.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- seed: Python integer, used to create random seeds.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 256, 32, 32, 3])
- net = tfp.layers.Conv3DFlipout(64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)(net)
- net = tf.layers.MaxPooling2D(pool_size=2,
- strides=2,
- padding="SAME")(net)
- net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
- logits = tfp.layers.DenseFlipout(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- filters,
- kernel_size,
- strides=(1, 1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 3 integers, specifying the
- depth, height and width of the 3D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 3 integers,
- specifying the strides of the convolution along the depth,
- height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or
- `channels_first`. The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape `(batch, depth,
- height, width, channels)` while `channels_first` corresponds to inputs
- with shape `(batch, channels, depth, height, width)`.
- dilation_rate: An integer or tuple/list of 3 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- """
- # pylint: enable=g-doc-args
- super(Conv3DFlipout, self).__init__(
- rank=3,
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name, **kwargs)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def conv3d_flipout(
- inputs,
- filters,
- kernel_size,
- strides=(1, 1, 1),
- padding="valid",
- data_format="channels_last",
- dilation_rate=(1, 1, 1),
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Functional interface for the 3D convolution layer.
-
- This layer creates a convolution kernel that is convolved
- (actually cross-correlated) with the layer input to produce a tensor of
- outputs. It may also include a bias addition and activation function
- on the outputs. It assumes the `kernel` and/or `bias` are drawn from
- distributions.
-
- By default, the layer implements a stochastic forward pass via
- sampling from the kernel and bias posteriors,
- ```none
- outputs = f(inputs; kernel, bias), kernel, bias ~ posterior
- ```
- where f denotes the layer's calculation. It uses the Flipout
- estimator [1], which performs a Monte Carlo approximation of the
- distribution integrating over the `kernel` and `bias`. Flipout uses
- roughly twice as many floating point operations as the
- reparameterization estimator but has the advantage of significantly
- lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- filters: Integer, the dimensionality of the output space (i.e. the number
- of filters in the convolution).
- kernel_size: An integer or tuple/list of 3 integers, specifying the
- depth, height and width of the 3D convolution window.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- strides: An integer or tuple/list of 3 integers,
- specifying the strides of the convolution along the depth,
- height and width.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Specifying any stride value != 1 is incompatible with specifying
- any `dilation_rate` value != 1.
- padding: One of `"valid"` or `"same"` (case-insensitive).
- data_format: A string, one of `channels_last` (default) or `channels_first`.
- The ordering of the dimensions in the inputs.
- `channels_last` corresponds to inputs with shape
- `(batch, depth, height, width, channels)` while `channels_first`
- corresponds to inputs with shape
- `(batch, channels, depth, height, width)`.
- dilation_rate: An integer or tuple/list of 3 integers, specifying
- the dilation rate to use for dilated convolution.
- Can be a single integer to specify the same value for
- all spatial dimensions.
- Currently, specifying any `dilation_rate` value != 1 is
- incompatible with specifying any stride value != 1.
- @{args}
- reuse: Boolean, whether to reuse the weights of a previous layer
- by the same name.
-
- Returns:
- Output tensor.
-
- Raises:
- ValueError: if eager execution is enabled.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tf.reshape(features, [-1, 256, 32, 32, 3])
- net = tfp.layers.conv3d_flipout(net,
- filters=64,
- kernel_size=5,
- padding="SAME",
- activation=tf.nn.relu)
- net = tf.layers.max_pooling2d(net,
- pool_size=2,
- strides=2,
- padding="SAME")
- net = tf.reshape(net, [-1, 256 * 8 * 8 * 64])
- logits = tfp.layers.dense_flipout(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse.
- International Conference on Learning Representations, 2018.
- """
- # pylint: enable=g-doc-args
- layer = Conv3DFlipout(
- filters=filters,
- kernel_size=kernel_size,
- strides=strides,
- padding=padding,
- data_format=data_format,
- dilation_rate=dilation_rate,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-# Aliases
-
-Convolution1DReparameterization = Conv1DReparameterization
-Convolution2DReparameterization = Conv2DReparameterization
-Convolution3DReparameterization = Conv3DReparameterization
-convolution1d_reparameterization = conv1d_reparameterization
-convolution2d_reparameterization = conv2d_reparameterization
-convolution3d_reparameterization = conv3d_reparameterization
-Convolution1DFlipout = Conv1DFlipout
-Convolution2DFlipout = Conv2DFlipout
-Convolution3DFlipout = Conv3DFlipout
-convolution1d_flipout = conv1d_flipout
-convolution2d_flipout = conv2d_flipout
-convolution3d_flipout = conv3d_flipout
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py
deleted file mode 100644
index 1f1d8fda2a..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py
+++ /dev/null
@@ -1,955 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Dense Bayesian layer using KL-divergence based variational inference.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.bayesflow.python.ops import docstring_util
-from tensorflow.contrib.bayesflow.python.ops import layers_util
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.layers import base as layers_lib
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import standard_ops
-from tensorflow.python.ops.distributions import kullback_leibler as kl_lib
-from tensorflow.python.ops.distributions import normal as normal_lib
-from tensorflow.python.ops.distributions import util as distribution_util
-
-
-doc_args = """units: Integer or Long, dimensionality of the output space.
- activation: Activation function (`callable`). Set it to None to maintain a
- linear activation.
- activity_regularizer: Regularizer function for the output.
- trainable: Boolean, if `True` also add variables to the graph collection
- `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
- kernel_posterior_fn: Python `callable` which creates
- `tf.distributions.Distribution` instance representing the surrogate
- posterior of the `kernel` parameter. Default value:
- `default_mean_field_normal_fn()`.
- kernel_posterior_tensor_fn: Python `callable` which takes a
- `tf.distributions.Distribution` instance and returns a representative
- value. Default value: `lambda d: d.sample()`.
- kernel_prior_fn: Python `callable` which creates `tf.distributions`
- instance. See `default_mean_field_normal_fn` docstring for required
- parameter signature.
- Default value: `tf.distributions.Normal(loc=0., scale=1.)`.
- kernel_divergence_fn: Python `callable` which takes the surrogate posterior
- distribution, prior distribution and random variate sample(s) from the
- surrogate posterior and computes or approximates the KL divergence. The
- distributions are `tf.distributions.Distribution`-like instances and the
- sample is a `Tensor`.
- bias_posterior_fn: Python `callable` which creates
- `tf.distributions.Distribution` instance representing the surrogate
- posterior of the `bias` parameter. Default value:
- `default_mean_field_normal_fn(is_singular=True)` (which creates an
- instance of `tf.distributions.Deterministic`).
- bias_posterior_tensor_fn: Python `callable` which takes a
- `tf.distributions.Distribution` instance and returns a representative
- value. Default value: `lambda d: d.sample()`.
- bias_prior_fn: Python `callable` which creates `tf.distributions` instance.
- See `default_mean_field_normal_fn` docstring for required parameter
- signature. Default value: `None` (no prior, no variational inference)
- bias_divergence_fn: Python `callable` which takes the surrogate posterior
- distribution, prior distribution and random variate sample(s) from the
- surrogate posterior and computes or approximates the KL divergence. The
- distributions are `tf.distributions.Distribution`-like instances and the
- sample is a `Tensor`.
- seed: Python scalar `int` which initializes the random number
- generator. Default value: `None` (i.e., use global seed).
- name: Python `str`, the name of the layer. Layers with the same name will
- share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in
- such cases.
- reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous
- layer by the same name."""
-
-
-class _DenseVariational(layers_lib.Layer):
- """Abstract densely-connected class (private, used as implementation base).
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- units: Python integer, dimensionality of the output space.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- @{args}
- """
- # pylint: enable=g-doc-args
- super(_DenseVariational, self).__init__(
- trainable=trainable,
- name=name,
- activity_regularizer=activity_regularizer,
- **kwargs)
- self.units = units
- self.activation = activation
- self.input_spec = layers_lib.InputSpec(min_ndim=2)
- self.kernel_posterior_fn = kernel_posterior_fn
- self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn
- self.kernel_prior_fn = kernel_prior_fn
- self.kernel_divergence_fn = kernel_divergence_fn
- self.bias_posterior_fn = bias_posterior_fn
- self.bias_posterior_tensor_fn = bias_posterior_tensor_fn
- self.bias_prior_fn = bias_prior_fn
- self.bias_divergence_fn = bias_divergence_fn
-
- def build(self, input_shape):
- input_shape = tensor_shape.TensorShape(input_shape)
- in_size = input_shape.with_rank_at_least(2)[-1].value
- if in_size is None:
- raise ValueError("The last dimension of the inputs to `Dense` "
- "should be defined. Found `None`.")
- self._input_spec = layers_lib.InputSpec(min_ndim=2, axes={-1: in_size})
- dtype = dtypes.as_dtype(self.dtype)
-
- # Must have a posterior kernel.
- self.kernel_posterior = self.kernel_posterior_fn(
- dtype, [in_size, self.units], "kernel_posterior",
- self.trainable, self.add_variable)
-
- if self.kernel_prior_fn is None:
- self.kernel_prior = None
- else:
- self.kernel_prior = self.kernel_prior_fn(
- dtype, [in_size, self.units], "kernel_prior",
- self.trainable, self.add_variable)
- self._built_kernel_divergence = False
-
- if self.bias_posterior_fn is None:
- self.bias_posterior = None
- else:
- self.bias_posterior = self.bias_posterior_fn(
- dtype, [self.units], "bias_posterior",
- self.trainable, self.add_variable)
-
- if self.bias_prior_fn is None:
- self.bias_prior = None
- else:
- self.bias_prior = self.bias_prior_fn(
- dtype, [self.units], "bias_prior",
- self.trainable, self.add_variable)
- self._built_bias_divergence = False
-
- self.built = True
-
- def call(self, inputs):
- inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
-
- outputs = self._apply_variational_kernel(inputs)
- outputs = self._apply_variational_bias(outputs)
- if self.activation is not None:
- outputs = self.activation(outputs) # pylint: disable=not-callable
- if not self._built_kernel_divergence:
- kernel_posterior = self.kernel_posterior
- kernel_prior = self.kernel_prior
- if isinstance(self.kernel_posterior, independent_lib.Independent):
- kernel_posterior = kernel_posterior.distribution
- if isinstance(self.kernel_prior, independent_lib.Independent):
- kernel_prior = kernel_prior.distribution
- self._apply_divergence(self.kernel_divergence_fn,
- kernel_posterior,
- kernel_prior,
- self.kernel_posterior_tensor,
- name="divergence_kernel")
- self._built_kernel_divergence = True
- if not self._built_bias_divergence:
- bias_posterior = self.bias_posterior
- bias_prior = self.bias_prior
- if isinstance(self.bias_posterior, independent_lib.Independent):
- bias_posterior = bias_posterior.distribution
- if isinstance(self.bias_prior, independent_lib.Independent):
- bias_prior = bias_prior.distribution
- self._apply_divergence(self.bias_divergence_fn,
- bias_posterior,
- bias_prior,
- self.bias_posterior_tensor,
- name="divergence_bias")
- self._built_bias_divergence = True
- return outputs
-
- def _apply_variational_bias(self, inputs):
- if self.bias_posterior is None:
- self.bias_posterior_tensor = None
- return inputs
- self.bias_posterior_tensor = self.bias_posterior_tensor_fn(
- self.bias_posterior)
- return nn.bias_add(inputs, self.bias_posterior_tensor)
-
- def _apply_divergence(self, divergence_fn, posterior, prior,
- posterior_tensor, name):
- if (divergence_fn is None or
- posterior is None or
- prior is None):
- divergence = None
- return
- divergence = standard_ops.identity(
- divergence_fn(
- posterior, prior, posterior_tensor),
- name=name)
- self.add_loss(divergence)
-
- def _matmul(self, inputs, kernel):
- if inputs.shape.ndims <= 2:
- return standard_ops.matmul(inputs, kernel)
- # To handle broadcasting, we must use `tensordot`.
- return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]])
-
- def _compute_output_shape(self, input_shape):
- input_shape = tensor_shape.TensorShape(input_shape).with_rank_at_least(2)
- if input_shape[-1].value is None:
- raise ValueError(
- "The innermost dimension of input_shape must be defined, "
- "but saw: {}".format(input_shape))
- return input_shape[:-1].concatenate(self.units)
-
-
-class DenseReparameterization(_DenseVariational):
- """Densely-connected layer class with reparameterization estimator.
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- It uses the reparameterization estimator [1], which performs a Monte Carlo
- approximation of the distribution integrating over the `kernel` and
- `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- units: Python integer, dimensionality of the output space.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tfp.layers.DenseReparameterization(
- 512, activation=tf.nn.relu)(features)
- logits = tfp.layers.DenseReparameterization(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(
- is_singular=True),
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- @{args}
- """
- # pylint: enable=g-doc-args
- super(DenseReparameterization, self).__init__(
- units=units,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- **kwargs)
-
- def _apply_variational_kernel(self, inputs):
- self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn(
- self.kernel_posterior)
- self.kernel_posterior_affine = None
- self.kernel_posterior_affine_tensor = None
- return self._matmul(inputs, self.kernel_posterior_tensor)
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def dense_reparameterization(
- inputs,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Densely-connected layer with reparameterization estimator.
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- It uses the reparameterization estimator [1], which performs a Monte Carlo
- approximation of the distribution integrating over the `kernel` and
- `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- @{args}
-
- Returns:
- output: `Tensor` representing a the affine transformed input under a random
- draw from the surrogate posterior distribution.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tfp.layers.dense_reparameterization(
- features, 512, activation=tf.nn.relu)
- logits = tfp.layers.dense_reparameterization(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Auto-Encoding Variational Bayes."
- Diederik P. Kingma, Max Welling.
- International Conference on Learning Representations, 2014.
- """
- # pylint: enable=g-doc-args
- layer = DenseReparameterization(
- units,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class DenseLocalReparameterization(_DenseVariational):
- """Densely-connected layer class with local reparameterization estimator.
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- It uses the local reparameterization estimator [1], which performs a
- Monte Carlo approximation of the distribution on the hidden units
- induced by the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- units: Python integer, dimensionality of the output space.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tfp.layers.DenseLocalReparameterization(
- 512, activation=tf.nn.relu)(features)
- logits = tfp.layers.DenseLocalReparameterization(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses local reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Variational Dropout and the Local Reparameterization Trick."
- Diederik P. Kingma, Tim Salimans, Max Welling.
- Neural Information Processing Systems, 2015.
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(
- is_singular=True),
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- @{args}
- """
- # pylint: enable=g-doc-args
- super(DenseLocalReparameterization, self).__init__(
- units=units,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- **kwargs)
-
- def _apply_variational_kernel(self, inputs):
- if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
- not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
- raise TypeError(
- "`DenseLocalReparameterization` requires "
- "`kernel_posterior_fn` produce an instance of "
- "`tf.distributions.Independent(tf.distributions.Normal)` "
- "(saw: \"{}\").".format(self.kernel_posterior.name))
- self.kernel_posterior_affine = normal_lib.Normal(
- loc=self._matmul(inputs, self.kernel_posterior.distribution.loc),
- scale=standard_ops.sqrt(self._matmul(
- standard_ops.square(inputs),
- standard_ops.square(self.kernel_posterior.distribution.scale))))
- self.kernel_posterior_affine_tensor = (
- self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
- self.kernel_posterior_tensor = None
- return self.kernel_posterior_affine_tensor
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def dense_local_reparameterization(
- inputs,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(
- is_singular=True),
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Densely-connected layer with local reparameterization estimator.
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- It uses the local reparameterization estimator [1], which performs a
- Monte Carlo approximation of the distribution on the hidden units
- induced by the `kernel` and `bias`.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- @{args}
-
- Returns:
- output: `Tensor` representing a the affine transformed input under a random
- draw from the surrogate posterior distribution.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tfp.layers.dense_local_reparameterization(
- features, 512, activation=tf.nn.relu)
- logits = tfp.layers.dense_local_reparameterization(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses local reparameterization gradients to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Variational Dropout and the Local Reparameterization Trick."
- Diederik P. Kingma, Tim Salimans, Max Welling.
- Neural Information Processing Systems, 2015.
- """
- # pylint: enable=g-doc-args
- layer = DenseLocalReparameterization(
- units,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
-
-
-class DenseFlipout(_DenseVariational):
- """Densely-connected layer class with Flipout estimator.
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- It uses the Flipout estimator [1], which performs a Monte Carlo
- approximation of the distribution integrating over the `kernel` and
- `bias`. Flipout uses roughly twice as many floating point operations
- as the reparameterization estimator but has the advantage of
- significantly lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Properties:
- units: Python integer, dimensionality of the output space.
- activation: Activation function (`callable`).
- activity_regularizer: Regularizer function for the output.
- kernel_posterior_fn: `callable` returning posterior.
- kernel_posterior_tensor_fn: `callable` operating on posterior.
- kernel_prior_fn: `callable` returning prior.
- kernel_divergence_fn: `callable` returning divergence.
- bias_posterior_fn: `callable` returning posterior.
- bias_posterior_tensor_fn: `callable` operating on posterior.
- bias_prior_fn: `callable` returning prior.
- bias_divergence_fn: `callable` returning divergence.
- seed: Python integer, used to create random seeds.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tfp.layers.DenseFlipout(
- 512, activation=tf.nn.relu)(features)
- logits = tfp.layers.DenseFlipout(10)(net)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Anonymous. OpenReview, 2017.
- https://openreview.net/forum?id=rJnpifWAb
- """
-
- @docstring_util.expand_docstring(args=doc_args)
- def __init__(
- self,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(
- is_singular=True),
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- **kwargs):
- # pylint: disable=g-doc-args
- """Construct layer.
-
- Args:
- @{args}
- """
- # pylint: enable=g-doc-args
- super(DenseFlipout, self).__init__(
- units=units,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- name=name,
- **kwargs)
- self.seed = seed
-
- def _apply_variational_kernel(self, inputs):
- if (not isinstance(self.kernel_posterior, independent_lib.Independent) or
- not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)):
- raise TypeError(
- "`DenseFlipout` requires "
- "`kernel_posterior_fn` produce an instance of "
- "`tf.distributions.Independent(tf.distributions.Normal)` "
- "(saw: \"{}\").".format(self.kernel_posterior.name))
- self.kernel_posterior_affine = normal_lib.Normal(
- loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc),
- scale=self.kernel_posterior.distribution.scale)
- self.kernel_posterior_affine_tensor = (
- self.kernel_posterior_tensor_fn(self.kernel_posterior_affine))
- self.kernel_posterior_tensor = None
-
- input_shape = array_ops.shape(inputs)
- batch_shape = input_shape[:-1]
-
- sign_input = layers_util.random_sign(
- input_shape,
- dtype=inputs.dtype,
- seed=self.seed)
- sign_output = layers_util.random_sign(
- array_ops.concat([batch_shape,
- array_ops.expand_dims(self.units, 0)], 0),
- dtype=inputs.dtype,
- seed=distribution_util.gen_new_seed(
- self.seed, salt="dense_flipout"))
- perturbed_inputs = self._matmul(
- inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output
-
- outputs = self._matmul(inputs, self.kernel_posterior.distribution.loc)
- outputs += perturbed_inputs
- return outputs
-
-
-@docstring_util.expand_docstring(args=doc_args)
-def dense_flipout(
- inputs,
- units,
- activation=None,
- activity_regularizer=None,
- trainable=True,
- kernel_posterior_fn=layers_util.default_mean_field_normal_fn(),
- kernel_posterior_tensor_fn=lambda d: d.sample(),
- kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda
- loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)),
- kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- bias_posterior_fn=layers_util.default_mean_field_normal_fn(
- is_singular=True),
- bias_posterior_tensor_fn=lambda d: d.sample(),
- bias_prior_fn=None,
- bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p),
- seed=None,
- name=None,
- reuse=None):
- # pylint: disable=g-doc-args
- """Densely-connected layer with Flipout estimator.
-
- This layer implements the Bayesian variational inference analogue to
- a dense layer by assuming the `kernel` and/or the `bias` are drawn
- from distributions. By default, the layer implements a stochastic
- forward pass via sampling from the kernel and bias posteriors,
-
- ```none
- kernel, bias ~ posterior
- outputs = activation(matmul(inputs, kernel) + bias)
- ```
-
- It uses the Flipout estimator [1], which performs a Monte Carlo
- approximation of the distribution integrating over the `kernel` and
- `bias`. Flipout uses roughly twice as many floating point operations
- as the reparameterization estimator but has the advantage of
- significantly lower variance.
-
- The arguments permit separate specification of the surrogate posterior
- (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias`
- distributions.
-
- Args:
- inputs: Tensor input.
- @{args}
-
- Returns:
- output: `Tensor` representing a the affine transformed input under a random
- draw from the surrogate posterior distribution.
-
- #### Examples
-
- We illustrate a Bayesian neural network with [variational inference](
- https://en.wikipedia.org/wiki/Variational_Bayesian_methods),
- assuming a dataset of `features` and `labels`.
-
- ```python
- tfp = tf.contrib.bayesflow
-
- net = tfp.layers.dense_flipout(
- features, 512, activation=tf.nn.relu)
- logits = tfp.layers.dense_flipout(net, 10)
- neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits(
- labels=labels, logits=logits)
- kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
- loss = neg_log_likelihood + kl
- train_op = tf.train.AdamOptimizer().minimize(loss)
- ```
-
- It uses the Flipout gradient estimator to minimize the
- Kullback-Leibler divergence up to a constant, also known as the
- negative Evidence Lower Bound. It consists of the sum of two terms:
- the expected negative log-likelihood, which we approximate via
- Monte Carlo; and the KL divergence, which is added via regularizer
- terms which are arguments to the layer.
-
- [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on
- Mini-Batches."
- Anonymous. OpenReview, 2017.
- https://openreview.net/forum?id=rJnpifWAb
- """
- # pylint: enable=g-doc-args
- layer = DenseFlipout(
- units,
- activation=activation,
- activity_regularizer=activity_regularizer,
- trainable=trainable,
- kernel_posterior_fn=kernel_posterior_fn,
- kernel_posterior_tensor_fn=kernel_posterior_tensor_fn,
- kernel_prior_fn=kernel_prior_fn,
- kernel_divergence_fn=kernel_divergence_fn,
- bias_posterior_fn=bias_posterior_fn,
- bias_posterior_tensor_fn=bias_posterior_tensor_fn,
- bias_prior_fn=bias_prior_fn,
- bias_divergence_fn=bias_divergence_fn,
- seed=seed,
- name=name,
- dtype=inputs.dtype.base_dtype,
- _scope=name,
- _reuse=reuse)
- return layer.apply(inputs)
diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_util.py b/tensorflow/contrib/bayesflow/python/ops/layers_util.py
deleted file mode 100644
index 8c1fb203f7..0000000000
--- a/tensorflow/contrib/bayesflow/python/ops/layers_util.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Utilities for probabilistic layers.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-
-from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib
-from tensorflow.contrib.distributions.python.ops import independent as independent_lib
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import random_ops
-from tensorflow.python.ops.distributions import normal as normal_lib
-
-
-def default_loc_scale_fn(
- is_singular=False,
- loc_initializer=init_ops.random_normal_initializer(stddev=0.1),
- untransformed_scale_initializer=init_ops.random_normal_initializer(
- mean=-3., stddev=0.1),
- loc_regularizer=None,
- untransformed_scale_regularizer=None,
- loc_constraint=None,
- untransformed_scale_constraint=None):
- """Makes closure which creates `loc`, `scale` params from `tf.get_variable`.
-
- This function produces a closure which produces `loc`, `scale` using
- `tf.get_variable`. The closure accepts the following arguments:
-
- dtype: Type of parameter's event.
- shape: Python `list`-like representing the parameter's event shape.
- name: Python `str` name prepended to any created (or existing)
- `tf.Variable`s.
- trainable: Python `bool` indicating all created `tf.Variable`s should be
- added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
- add_variable_fn: `tf.get_variable`-like `callable` used to create (or
- access existing) `tf.Variable`s.
-
- Args:
- is_singular: Python `bool` indicating if `scale is None`. Default: `False`.
- loc_initializer: Initializer function for the `loc` parameters.
- The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`.
- untransformed_scale_initializer: Initializer function for the `scale`
- parameters. Default value: `tf.random_normal_initializer(mean=-3.,
- stddev=0.1)`. This implies the softplus transformed result has mean
- approximately `0.05` and std. deviation approximately `0.005`.
- loc_regularizer: Regularizer function for the `loc` parameters.
- The default (`None`) is to use the `tf.get_variable` default.
- untransformed_scale_regularizer: Regularizer function for the `scale`
- parameters. The default (`None`) is to use the `tf.get_variable` default.
- loc_constraint: An optional projection function to be applied to the
- loc after being updated by an `Optimizer`. The function must take as input
- the unprojected variable and must return the projected variable (which
- must have the same shape). Constraints are not safe to use when doing
- asynchronous distributed training.
- The default (`None`) is to use the `tf.get_variable` default.
- untransformed_scale_constraint: An optional projection function to be
- applied to the `scale` parameters after being updated by an `Optimizer`
- (e.g. used to implement norm constraints or value constraints). The
- function must take as input the unprojected variable and must return the
- projected variable (which must have the same shape). Constraints are not
- safe to use when doing asynchronous distributed training. The default
- (`None`) is to use the `tf.get_variable` default.
-
- Returns:
- default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale`
- parameters from args: `dtype, shape, name, trainable, add_variable_fn`.
- """
- def _fn(dtype, shape, name, trainable, add_variable_fn):
- """Creates `loc`, `scale` parameters."""
- loc = add_variable_fn(
- name=name + "_loc",
- shape=shape,
- initializer=loc_initializer,
- regularizer=loc_regularizer,
- constraint=loc_constraint,
- dtype=dtype,
- trainable=trainable)
- if is_singular:
- return loc, None
- untransformed_scale = add_variable_fn(
- name=name + "_untransformed_scale",
- shape=shape,
- initializer=untransformed_scale_initializer,
- regularizer=untransformed_scale_regularizer,
- constraint=untransformed_scale_constraint,
- dtype=dtype,
- trainable=trainable)
- scale = (np.finfo(dtype.as_numpy_dtype).eps +
- nn_ops.softplus(untransformed_scale))
- return loc, scale
- return _fn
-
-
-def default_mean_field_normal_fn(
- is_singular=False,
- loc_initializer=None,
- untransformed_scale_initializer=None,
- loc_regularizer=None,
- untransformed_scale_regularizer=None,
- loc_constraint=None,
- untransformed_scale_constraint=None):
- """Creates a function to build Normal distributions with trainable params.
-
- This function produces a closure which produces `tf.distributions.Normal`
- parameterized by a loc` and `scale` each created using `tf.get_variable`. The
- produced closure accepts the following arguments:
-
- name: Python `str` name prepended to any created (or existing)
- `tf.Variable`s.
- shape: Python `list`-like representing the parameter's event shape.
- dtype: Type of parameter's event.
- trainable: Python `bool` indicating all created `tf.Variable`s should be
- added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`.
- add_variable_fn: `tf.get_variable`-like `callable` used to create (or
- access existing) `tf.Variable`s.
-
- Args:
- is_singular: Python `bool` if `True`, forces the special case limit of
- `scale->0`, i.e., a `Deterministic` distribution.
- loc_initializer: Initializer function for the `loc` parameters.
- If `None` (default), values are initialized using the default
- initializer used by `tf.get_variable`.
- untransformed_scale_initializer: Initializer function for the `scale`
- parameters. If `None` (default), values are initialized using the default
- initializer used by `tf.get_variable`.
- loc_regularizer: Regularizer function for the `loc` parameters.
- untransformed_scale_regularizer: Regularizer function for the `scale`
- parameters.
- loc_constraint: An optional projection function to be applied to the
- loc after being updated by an `Optimizer`. The function must take as input
- the unprojected variable and must return the projected variable (which
- must have the same shape). Constraints are not safe to use when doing
- asynchronous distributed training.
- untransformed_scale_constraint: An optional projection function to be
- applied to the `scale` parameters after being updated by an `Optimizer`
- (e.g. used to implement norm constraints or value constraints). The
- function must take as input the unprojected variable and must return the
- projected variable (which must have the same shape). Constraints are not
- safe to use when doing asynchronous distributed training.
-
- Returns:
- make_normal_fn: Python `callable` which creates a `tf.distributions.Normal`
- using from args: `dtype, shape, name, trainable, add_variable_fn`.
- """
- loc_scale_fn_ = default_loc_scale_fn(
- is_singular,
- loc_initializer,
- untransformed_scale_initializer,
- loc_regularizer,
- untransformed_scale_regularizer,
- loc_constraint,
- untransformed_scale_constraint)
- def _fn(dtype, shape, name, trainable, add_variable_fn):
- """Creates multivariate `Deterministic` or `Normal` distribution."""
- loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn)
- if scale is None:
- dist = deterministic_lib.Deterministic(loc=loc)
- else:
- dist = normal_lib.Normal(loc=loc, scale=scale)
- reinterpreted_batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0]
- return independent_lib.Independent(
- dist, reinterpreted_batch_ndims=reinterpreted_batch_ndims)
- return _fn
-
-
-def random_sign(shape, dtype=dtypes.float32, seed=None):
- """Draw values from {-1, 1} uniformly, i.e., Rademacher distribution."""
- random_bernoulli = random_ops.random_uniform(shape, minval=0, maxval=2,
- dtype=dtypes.int32,
- seed=seed)
- return math_ops.cast(2 * random_bernoulli - 1, dtype)