aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/init_ops.py
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/python/ops/init_ops.py')
-rw-r--r--tensorflow/python/ops/init_ops.py181
1 files changed, 181 insertions, 0 deletions
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
new file mode 100644
index 0000000000..09c8801e0e
--- /dev/null
+++ b/tensorflow/python/ops/init_ops.py
@@ -0,0 +1,181 @@
+"""Operations often used for initializing tensors."""
+
+import math
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+
+
+# TODO(mrry): PEP8 these.
+def constant_initializer(value=0.0):
+ """Returns an initializer that generates Tensors with a single value.
+
+ Args:
+ value: A Python scalar. All elements of the initialized variable
+ will be set to this value.
+
+ Returns:
+ An initializer that generates Tensors with a single value.
+ """
+ def _initializer(shape, dtype=types.float32):
+ return constant_op.constant(value, dtype=dtype, shape=shape)
+ return _initializer
+
+def random_uniform_initializer(minval=0.0, maxval=1.0, seed=None):
+ """Returns an initializer that generates Tensors with a uniform distribution.
+
+ Args:
+ minval: a python scalar or a scalar tensor. lower bound of the range
+ of random values to generate.
+ maxval: a python scalar or a scalar tensor. upper bound of the range
+ of random values to generate.
+ seed: A Python integer. Used to create random seeds.
+ See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+ Returns:
+ An initializer that generates Tensors with a uniform distribution.
+ """
+ def _initializer(shape, dtype=types.float32):
+ return random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)
+ return _initializer
+
+def random_normal_initializer(mean=0.0, stddev=1.0, seed=None):
+ """Returns an initializer that generates Tensors with a normal distribution.
+
+ Args:
+ mean: a python scalar or a scalar tensor. Mean of the random values
+ to generate.
+ stddev: a python scalar or a scalar tensor. Standard deviation of the
+ random values to generate.
+ seed: A Python integer. Used to create random seeds.
+ See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+ Returns:
+ An initializer that generates Tensors with a normal distribution.
+ """
+ def _initializer(shape, dtype=types.float32):
+ return random_ops.random_normal(shape, mean, stddev, dtype, seed=seed)
+ return _initializer
+
+def truncated_normal_initializer(mean=0.0, stddev=1.0, seed=None):
+ """Returns an initializer that generates a truncated normal distribution.
+
+ These values are similar to values from a random_normal_initializer
+ except that values more than two standard deviations from the mean
+ are discarded and re-drawn. This is the recommended initializer for
+ neural network weights and filters.
+
+ Args:
+ mean: a python scalar or a scalar tensor. Mean of the random values
+ to generate.
+ stddev: a python scalar or a scalar tensor. Standard deviation of the
+ random values to generate.
+ seed: A Python integer. Used to create random seeds.
+ See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+ Returns:
+ An initializer that generates Tensors with a truncated normal
+ distribution.
+ """
+ def _initializer(shape, dtype=types.float32):
+ return random_ops.truncated_normal(shape, mean, stddev, dtype, seed=seed)
+ return _initializer
+
+def uniform_unit_scaling_initializer(factor=1.0, seed=None):
+ """Returns an initializer that generates tensors without scaling variance.
+
+ When initializing a deep network, it is in principle advantageous to keep
+ the scale of the input variance constant, so it does not explode or diminish
+ by reaching the final layer. If the input is `x` and the operation `x * W`,
+ and we want to initialize `W` uniformly at random, we need to pick `W` from
+
+ [-sqrt(3) / sqrt(dim), sqrt(3) / sqrt(dim)]
+
+ to keep the scale intact, where `dim = W.shape[0]` (the size of the input).
+ A similar calculation for convolutional networks gives an analogous result
+ with `dim` equal to the product of the first 3 dimensions. When
+ nonlinearities are present, we need to multiply this by a constant `factor`.
+ See <https://arxiv.org/pdf/1412.6558v3.pdf> for deeper motivation, experiments
+ and the calculation of constants. In section 2.3 there, the constants were
+ numerically computed: for a linear layer it's 1.0, relu: ~1.43, tanh: ~1.15.
+
+ Args:
+ factor: Float. A multiplicative factor by which the values will be scaled.
+ seed: A Python integer. Used to create random seeds.
+ See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+ Returns:
+ An initializer that generates tensors with unit variance.
+ """
+ def _initializer(shape, dtype=types.float32):
+ input_size = 1.0
+ # Estimating input size is not possible to do perfectly, but we try.
+ # The estimate, obtained by multiplying all dimensions but the last one,
+ # is the right thing for matrix multiply and convolutions (see above).
+ for dim in shape[:-1]:
+ input_size *= float(dim)
+ max_val = math.sqrt(float(3) / float(input_size)) * factor
+ return random_ops.random_uniform(shape, -max_val, max_val,
+ dtype, seed=seed)
+ return _initializer
+
+# TODO(vrv): Unhide when we are ready to expose this publicly.
+def _random_walk(shape, nonlinearity, dtype=types.float32, seed=None,
+ name="random_walk"):
+ """Create a random tensor such that backprop neither vanishes nor explodes.
+
+ Args:
+ shape: a python array of int or a 1-d tensor. Sizes of the Tensor.
+ nonlinearity: the brain python function for implementing the
+ nonlinearity in tensor flow.
+ dtype: The type of the output.
+ seed: A Python integer. Used to create random seeds.
+ See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+ name: string. Optional name for the op.
+
+ Returns:
+ A Tensor of the specified sizes filled with random values.
+ """
+ assert len(shape) == 2, "Random Walk initialization only supports 2D tensors."
+ num_inputs = shape[0]
+ if nonlinearity == math_ops.tanh:
+ # No real formula for this case yet, but this works well for many
+ # layer widths.
+ rwg = 1.13
+ elif nonlinearity == array_ops.identity:
+ rwg = math.exp(1.0 / float(2.0 * num_inputs))
+ elif nonlinearity == nn_ops.relu:
+ rwg = math.sqrt(2.0) * math.exp(1.2 / float(max(num_inputs, 6) - 2.4))
+ else:
+ assert False, "Unsupported nonlinearity for Random Walk initialization."
+
+ mean = 0.0
+ stddev = rwg / math.sqrt(float(num_inputs))
+
+ return random_ops.random_normal(shape, mean=mean, stddev=stddev, dtype=dtype,
+ seed=seed, name=name)
+
+
+# TODO(vrv): Unhide when we are ready to expose this publicly.
+class _RandomWalkInitializer(object):
+ """An Initializer that generates a tensor for Random Walk Initialization."""
+
+ def __init__(self, nonlinearity, seed=None):
+ """Construct a RandomWalkInitializer.
+
+ Args:
+ nonlinearity: the python tensorflow function that computes a nonlinearity
+ in the graph, typically after a Wx+b type operation.
+ seed: A Python integer. Used to create random seeds.
+ See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+ """
+ self._nonlinearity = nonlinearity
+ self._seed = seed
+
+ def __call__(self, shape, dtype=types.float32):
+ """Generate a tensor used to initialize a variable."""
+ return random_ops._random_walk(shape, self._nonlinearity, dtype,
+ seed=self._seed)