1 files changed, 181 insertions, 0 deletions
diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py
new file mode 100644
index 0000000000..09c8801e0e
--- /dev/null
+++ b/tensorflow/python/ops/init_ops.py
@@ -0,0 +1,181 @@
+"""Operations often used for initializing tensors."""
+
+import math
+from tensorflow.python.framework import types
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
+
+
+# TODO(mrry): PEP8 these.
+def constant_initializer(value=0.0):
+  """Returns an initializer that generates Tensors with a single value.
+
+  Args:
+    value: A Python scalar. All elements of the initialized variable
+      will be set to this value.
+
+  Returns:
+    An initializer that generates Tensors with a single value.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return constant_op.constant(value, dtype=dtype, shape=shape)
+  return _initializer
+
+def random_uniform_initializer(minval=0.0, maxval=1.0, seed=None):
+  """Returns an initializer that generates Tensors with a uniform distribution.
+
+  Args:
+    minval: a python scalar or a scalar tensor. lower bound of the range
+      of random values to generate.
+    maxval: a python scalar or a scalar tensor. upper bound of the range
+      of random values to generate.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates Tensors with a uniform distribution.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)
+  return _initializer
+
+def random_normal_initializer(mean=0.0, stddev=1.0, seed=None):
+  """Returns an initializer that generates Tensors with a normal distribution.
+
+  Args:
+    mean: a python scalar or a scalar tensor. Mean of the random values
+      to generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the
+      random values to generate.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates Tensors with a normal distribution.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return random_ops.random_normal(shape, mean, stddev, dtype, seed=seed)
+  return _initializer
+
+def truncated_normal_initializer(mean=0.0, stddev=1.0, seed=None):
+  """Returns an initializer that generates a truncated normal distribution.
+
+  These values are similar to values from a random_normal_initializer
+  except that values more than two standard deviations from the mean
+  are discarded and re-drawn. This is the recommended initializer for
+  neural network weights and filters.
+
+  Args:
+    mean: a python scalar or a scalar tensor. Mean of the random values
+      to generate.
+    stddev: a python scalar or a scalar tensor. Standard deviation of the
+      random values to generate.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates Tensors with a truncated normal
+    distribution.
+  """
+  def _initializer(shape, dtype=types.float32):
+    return random_ops.truncated_normal(shape, mean, stddev, dtype, seed=seed)
+  return _initializer
+
+def uniform_unit_scaling_initializer(factor=1.0, seed=None):
+  """Returns an initializer that generates tensors without scaling variance.
+
+  When initializing a deep network, it is in principle advantageous to keep
+  the scale of the input variance constant, so it does not explode or diminish
+  by reaching the final layer. If the input is `x` and the operation `x * W`,
+  and we want to initialize `W` uniformly at random, we need to pick `W` from
+
+      [-sqrt(3) / sqrt(dim), sqrt(3) / sqrt(dim)]
+
+  to keep the scale intact, where `dim = W.shape[0]` (the size of the input).
+  A similar calculation for convolutional networks gives an analogous result
+  with `dim` equal to the product of the first 3 dimensions.  When
+  nonlinearities are present, we need to multiply this by a constant `factor`.
+  See <https://arxiv.org/pdf/1412.6558v3.pdf> for deeper motivation, experiments
+  and the calculation of constants. In section 2.3 there, the constants were
+  numerically computed: for a linear layer it's 1.0, relu: ~1.43, tanh: ~1.15.
+
+  Args:
+    factor: Float.  A multiplicative factor by which the values will be scaled.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+
+  Returns:
+    An initializer that generates tensors with unit variance.
+  """
+  def _initializer(shape, dtype=types.float32):
+    input_size = 1.0
+    # Estimating input size is not possible to do perfectly, but we try.
+    # The estimate, obtained by multiplying all dimensions but the last one,
+    # is the right thing for matrix multiply and convolutions (see above).
+    for dim in shape[:-1]:
+      input_size *= float(dim)
+    max_val = math.sqrt(float(3) / float(input_size)) * factor
+    return random_ops.random_uniform(shape, -max_val, max_val,
+                                     dtype, seed=seed)
+  return _initializer
+
+# TODO(vrv): Unhide when we are ready to expose this publicly.
+def _random_walk(shape, nonlinearity, dtype=types.float32, seed=None,
+                 name="random_walk"):
+  """Create a random tensor such that backprop neither vanishes nor explodes.
+
+  Args:
+    shape: a python array of int or a 1-d tensor. Sizes of the Tensor.
+    nonlinearity: the brain python function for implementing the
+      nonlinearity in tensor flow.
+    dtype: The type of the output.
+    seed: A Python integer. Used to create random seeds.
+      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    name: string.  Optional name for the op.
+
+  Returns:
+    A Tensor of the specified sizes filled with random values.
+  """
+  assert len(shape) == 2, "Random Walk initialization only supports 2D tensors."
+  num_inputs = shape[0]
+  if nonlinearity == math_ops.tanh:
+    # No real formula for this case yet, but this works well for many
+    # layer widths.
+    rwg = 1.13
+  elif nonlinearity == array_ops.identity:
+    rwg = math.exp(1.0 / float(2.0 * num_inputs))
+  elif nonlinearity == nn_ops.relu:
+    rwg = math.sqrt(2.0) * math.exp(1.2 / float(max(num_inputs, 6) - 2.4))
+  else:
+    assert False, "Unsupported nonlinearity for Random Walk initialization."
+
+  mean = 0.0
+  stddev = rwg / math.sqrt(float(num_inputs))
+
+  return random_ops.random_normal(shape, mean=mean, stddev=stddev, dtype=dtype,
+                                  seed=seed, name=name)
+
+
+# TODO(vrv): Unhide when we are ready to expose this publicly.
+class _RandomWalkInitializer(object):
+  """An Initializer that generates a tensor for Random Walk Initialization."""
+
+  def __init__(self, nonlinearity, seed=None):
+    """Construct a RandomWalkInitializer.
+
+    Args:
+      nonlinearity: the python tensorflow function that computes a nonlinearity
+        in the graph, typically after a Wx+b type operation.
+      seed: A Python integer. Used to create random seeds.
+        See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
+    """
+    self._nonlinearity = nonlinearity
+    self._seed = seed
+
+  def __call__(self, shape, dtype=types.float32):
+    """Generate a tensor used to initialize a variable."""
+    return random_ops._random_walk(shape, self._nonlinearity, dtype,
+                                   seed=self._seed)