tensorflow/python/ops/init_ops.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181

"""Operations often used for initializing tensors."""

import math
from tensorflow.python.framework import types
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import constant_op
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import random_ops


# TODO(mrry): PEP8 these.
def constant_initializer(value=0.0):
  """Returns an initializer that generates Tensors with a single value.

  Args:
    value: A Python scalar. All elements of the initialized variable
      will be set to this value.

  Returns:
    An initializer that generates Tensors with a single value.
  """
  def _initializer(shape, dtype=types.float32):
    return constant_op.constant(value, dtype=dtype, shape=shape)
  return _initializer

def random_uniform_initializer(minval=0.0, maxval=1.0, seed=None):
  """Returns an initializer that generates Tensors with a uniform distribution.

  Args:
    minval: a python scalar or a scalar tensor. lower bound of the range
      of random values to generate.
    maxval: a python scalar or a scalar tensor. upper bound of the range
      of random values to generate.
    seed: A Python integer. Used to create random seeds.
      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.

  Returns:
    An initializer that generates Tensors with a uniform distribution.
  """
  def _initializer(shape, dtype=types.float32):
    return random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)
  return _initializer

def random_normal_initializer(mean=0.0, stddev=1.0, seed=None):
  """Returns an initializer that generates Tensors with a normal distribution.

  Args:
    mean: a python scalar or a scalar tensor. Mean of the random values
      to generate.
    stddev: a python scalar or a scalar tensor. Standard deviation of the
      random values to generate.
    seed: A Python integer. Used to create random seeds.
      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.

  Returns:
    An initializer that generates Tensors with a normal distribution.
  """
  def _initializer(shape, dtype=types.float32):
    return random_ops.random_normal(shape, mean, stddev, dtype, seed=seed)
  return _initializer

def truncated_normal_initializer(mean=0.0, stddev=1.0, seed=None):
  """Returns an initializer that generates a truncated normal distribution.

  These values are similar to values from a random_normal_initializer
  except that values more than two standard deviations from the mean
  are discarded and re-drawn. This is the recommended initializer for
  neural network weights and filters.

  Args:
    mean: a python scalar or a scalar tensor. Mean of the random values
      to generate.
    stddev: a python scalar or a scalar tensor. Standard deviation of the
      random values to generate.
    seed: A Python integer. Used to create random seeds.
      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.

  Returns:
    An initializer that generates Tensors with a truncated normal
    distribution.
  """
  def _initializer(shape, dtype=types.float32):
    return random_ops.truncated_normal(shape, mean, stddev, dtype, seed=seed)
  return _initializer

def uniform_unit_scaling_initializer(factor=1.0, seed=None):
  """Returns an initializer that generates tensors without scaling variance.

  When initializing a deep network, it is in principle advantageous to keep
  the scale of the input variance constant, so it does not explode or diminish
  by reaching the final layer. If the input is `x` and the operation `x * W`,
  and we want to initialize `W` uniformly at random, we need to pick `W` from

      [-sqrt(3) / sqrt(dim), sqrt(3) / sqrt(dim)]

  to keep the scale intact, where `dim = W.shape[0]` (the size of the input).
  A similar calculation for convolutional networks gives an analogous result
  with `dim` equal to the product of the first 3 dimensions.  When
  nonlinearities are present, we need to multiply this by a constant `factor`.
  See <https://arxiv.org/pdf/1412.6558v3.pdf> for deeper motivation, experiments
  and the calculation of constants. In section 2.3 there, the constants were
  numerically computed: for a linear layer it's 1.0, relu: ~1.43, tanh: ~1.15.

  Args:
    factor: Float.  A multiplicative factor by which the values will be scaled.
    seed: A Python integer. Used to create random seeds.
      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.

  Returns:
    An initializer that generates tensors with unit variance.
  """
  def _initializer(shape, dtype=types.float32):
    input_size = 1.0
    # Estimating input size is not possible to do perfectly, but we try.
    # The estimate, obtained by multiplying all dimensions but the last one,
    # is the right thing for matrix multiply and convolutions (see above).
    for dim in shape[:-1]:
      input_size *= float(dim)
    max_val = math.sqrt(float(3) / float(input_size)) * factor
    return random_ops.random_uniform(shape, -max_val, max_val,
                                     dtype, seed=seed)
  return _initializer

# TODO(vrv): Unhide when we are ready to expose this publicly.
def _random_walk(shape, nonlinearity, dtype=types.float32, seed=None,
                 name="random_walk"):
  """Create a random tensor such that backprop neither vanishes nor explodes.

  Args:
    shape: a python array of int or a 1-d tensor. Sizes of the Tensor.
    nonlinearity: the brain python function for implementing the
      nonlinearity in tensor flow.
    dtype: The type of the output.
    seed: A Python integer. Used to create random seeds.
      See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
    name: string.  Optional name for the op.

  Returns:
    A Tensor of the specified sizes filled with random values.
  """
  assert len(shape) == 2, "Random Walk initialization only supports 2D tensors."
  num_inputs = shape[0]
  if nonlinearity == math_ops.tanh:
    # No real formula for this case yet, but this works well for many
    # layer widths.
    rwg = 1.13
  elif nonlinearity == array_ops.identity:
    rwg = math.exp(1.0 / float(2.0 * num_inputs))
  elif nonlinearity == nn_ops.relu:
    rwg = math.sqrt(2.0) * math.exp(1.2 / float(max(num_inputs, 6) - 2.4))
  else:
    assert False, "Unsupported nonlinearity for Random Walk initialization."

  mean = 0.0
  stddev = rwg / math.sqrt(float(num_inputs))

  return random_ops.random_normal(shape, mean=mean, stddev=stddev, dtype=dtype,
                                  seed=seed, name=name)


# TODO(vrv): Unhide when we are ready to expose this publicly.
class _RandomWalkInitializer(object):
  """An Initializer that generates a tensor for Random Walk Initialization."""

  def __init__(self, nonlinearity, seed=None):
    """Construct a RandomWalkInitializer.

    Args:
      nonlinearity: the python tensorflow function that computes a nonlinearity
        in the graph, typically after a Wx+b type operation.
      seed: A Python integer. Used to create random seeds.
        See [`set_random_seed`](constant_op.md#set_random_seed) for behavior.
    """
    self._nonlinearity = nonlinearity
    self._seed = seed

  def __call__(self, shape, dtype=types.float32):
    """Generate a tensor used to initialize a variable."""
    return random_ops._random_walk(shape, self._nonlinearity, dtype,
                                   seed=self._seed)