aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/ops/template.py
blob: 09955e690c3a72301f89a0e562a1e664c988d3b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Provides templates which allow variable sharing."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import functools
import traceback

from tensorflow.python.framework import ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.platform import tf_logging as logging


__all__ = ["make_template"]


def make_template(name_, func_, create_scope_now_=False, unique_name_=None,
                  custom_getter_=None, **kwargs):
  """Given an arbitrary function, wrap it so that it does variable sharing.

  This wraps `func_` in a Template and partially evaluates it. Templates are
  functions that create variables the first time they are called and reuse them
  thereafter. In order for `func_` to be compatible with a `Template` it must
  have the following properties:

  * The function should create all trainable variables and any variables that
     should be reused by calling `tf.get_variable`. If a trainable variable is
     created using `tf.Variable`, then a ValueError will be thrown. Variables
     that are intended to be locals can be created by specifying
     `tf.Variable(..., trainable=false)`.
  * The function may use variable scopes and other templates internally to
      create and reuse variables, but it shouldn't use `tf.all_variables` to
      capture variables that are defined outside of the scope of the function.
  * Internal scopes and variable names should not depend on any arguments that
      are not supplied to `make_template`. In general you will get a ValueError
      telling you that you are trying to reuse a variable that doesn't exist
      if you make a mistake.

  In the following example, both `z` and `w` will be scaled by the same `y`. It
  is important to note that if we didn't assign `scalar_name` and used a
  different name for z and w that a `ValueError` would be thrown because it
  couldn't reuse the variable.

  ```python
  def my_op(x, scalar_name):
    var1 = tf.get_variable(scalar_name,
                           shape=[],
                           initializer=tf.constant_initializer(1))
    return x * var1

  scale_by_y = tf.make_template('scale_by_y', my_op, scalar_name='y')

  z = scale_by_y(input1)
  w = scale_by_y(input2)
  ```

  As a safe-guard, the returned function will raise a `ValueError` after the
  first call if trainable variables are created by calling `tf.Variable`.

  If all of these are true, then 2 properties are enforced by the template:

  1. Calling the same template multiple times will share all non-local
      variables.
  2. Two different templates are guaranteed to be unique, unless you reenter the
      same variable scope as the initial definition of a template and redefine
      it. An examples of this exception:

  ```python
  def my_op(x, scalar_name):
    var1 = tf.get_variable(scalar_name,
                           shape=[],
                           initializer=tf.constant_initializer(1))
    return x * var1

  with tf.variable_scope('scope') as vs:
    scale_by_y = tf.make_template('scale_by_y', my_op, scalar_name='y')
    z = scale_by_y(input1)
    w = scale_by_y(input2)

  # Creates a template that reuses the variables above.
  with tf.variable_scope(vs, reuse=True):
    scale_by_y2 = tf.make_template('scale_by_y', my_op, scalar_name='y')
    z2 = scale_by_y2(input1)
    w2 = scale_by_y2(input2)
  ```

  Depending on the value of `create_scope_now_`, the full variable scope may be
  captured either at the time of first call or at the time of construction. If
  this option is set to True, then all Tensors created by repeated calls to the
  template will have an extra trailing _N+1 to their name, as the first time the
  scope is entered in the Template constructor no Tensors are created.

  Note: `name_`, `func_` and `create_scope_now_` have a trailing underscore to
  reduce the likelihood of collisions with kwargs.

  Args:
    name_: A name for the scope created by this template. If necessary, the name
      will be made unique by appending `_N` to the name.
    func_: The function to wrap.
    create_scope_now_: Boolean controlling whether the scope should be created
      when the template is constructed or when the template is called. Default
      is False, meaning the scope is created when the template is called.
    unique_name_: When used, it overrides name_ and is not made unique. If a
      template of the same scope/unique_name already exists and reuse is false,
      an error is raised. Defaults to None.
    custom_getter_: Optional custom getter for variables used in `func_`. See
      the [`get_variable`](#get_variable) `custom_getter` documentation for
      more information.
    **kwargs: Keyword arguments to apply to `func_`.

  Returns:
    A function to encapsulate a set of variables which should be created once
    and reused. An enclosing scope will created, either where `make_template`
    is called, or wherever the result is called, depending on the value of
    `create_scope_now_`. Regardless of the value, the first time the template
    is called it will enter the scope with no reuse, and call `func_` to create
    variables, which are guaranteed to be unique. All subsequent calls will
    re-enter the scope and reuse those variables.

  Raises:
    ValueError: if the name is None.
  """
  if kwargs:
    func_ = functools.partial(func_, **kwargs)
  return Template(
      name_, func_, create_scope_now=create_scope_now_,
      unique_name=unique_name_, custom_getter=custom_getter_)


def _skip_common_stack_elements(stacktrace, base_case):
  """Skips items that the target stacktrace shares with the base stacktrace."""
  for i, (trace, base) in enumerate(zip(stacktrace, base_case)):
    if trace != base:
      return stacktrace[i:]
  return stacktrace[-1:]


class Template(object):
  """Wrap a function to aid in variable sharing.

  Templates are functions that create variables the first time they are called
  and reuse them thereafter. See `make_template` for full documentation.

  Note: By default, the full variable scope is captured at the time of first
  call. If `create_scope_now_` is passed as True to the constructor, the full
  scope will be captured there, but no variables will created until the first
  call.
  """

  def __init__(self, name, func, create_scope_now=False, unique_name=None,
               custom_getter=None):
    """Creates a template for the given function.

    Args:
      name: A name for the scope created by this template. The
        name will be made unique by appending `_N` to the it (see how
        `tf.variable_scope` treats the `default_name` for details).
      func: The function to apply each time.
      create_scope_now: Whether to create the scope at Template construction
        time, rather than first call. Defaults to false. Creating the scope at
        construction time may be more convenient if the template is to passed
        through much lower level code, and you want to be sure of the scope
        name without knowing exactly where it will be first called. If set to
        True, the scope will be created in the constructor, and all subsequent
        times in __call__, leading to a trailing numeral being added to the
        names of all created Tensors. If set to False, the scope will be created
        at the first call location.
      unique_name: When used, it overrides name_ and is not made unique. If a
        template of the same scope/unique_name already exists and reuse is
        false, an error is raised. Defaults to None.
      custom_getter: optional custom getter to pass to variable_scope()

    Raises:
      ValueError: if the name is None.
    """
    self._func = func
    self._stacktrace = traceback.format_stack()[:-2]
    self._name = name
    self._unique_name = unique_name
    self._custom_getter = custom_getter
    if name is None:
      raise ValueError("name cannot be None.")
    if create_scope_now:
      with variable_scope.variable_scope(
          self._unique_name, self._name,
          custom_getter=self._custom_getter) as vs:
        self._var_scope = vs
    else:
      self._var_scope = None
    # This variable keeps track of whether the template has been called yet,
    # which is not the same as whether the scope has been created.
    self._variables_created = False

  def _call_func(self, args, kwargs, check_for_new_variables):
    try:
      vars_at_start = len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))
      trainable_at_start = len(
          ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES))

      result = self._func(*args, **kwargs)
      if check_for_new_variables:
        trainable_variables = ops.get_collection(
            ops.GraphKeys.TRAINABLE_VARIABLES)
        # If a variable that we intend to train is created as a side effect
        # of creating a template, then that is almost certainly an error.
        if trainable_at_start != len(trainable_variables):
          raise ValueError("Trainable variable created when calling a template "
                           "after the first time, perhaps you used tf.Variable "
                           "when you meant tf.get_variable: %s" %
                           (trainable_variables[trainable_at_start:],))

        # Non-trainable tracking variables are a legitimate reason why a new
        # variable would be created, but it is a relatively advanced use-case,
        # so log it.
        variables = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)
        if vars_at_start != len(variables):
          logging.info("New variables created when calling a template after "
                       "the first time, perhaps you used tf.Variable when you "
                       "meant tf.get_variable: %s",
                       variables[vars_at_start:])
      return result
    except Exception as exc:
      # Reraise the exception, but append the original definition to the
      # trace.
      args = exc.args
      if not args:
        arg0 = ""
      else:
        arg0 = args[0]
      trace = "".join(_skip_common_stack_elements(self._stacktrace,
                                                  traceback.format_stack()))
      arg0 = "%s\n\noriginally defined at:\n%s" % (arg0, trace)
      new_args = [arg0]
      new_args.extend(args[1:])
      exc.args = tuple(new_args)
      raise

  def __call__(self, *args, **kwargs):
    if self._var_scope:
      if self._variables_created:
        # This is not the first visit to __call__, so variables have already
        # been created, and we want to reuse them.
        with variable_scope.variable_scope(self._var_scope, reuse=True):
          return self._call_func(args, kwargs, check_for_new_variables=True)
      else:
        # This is the first visit to __call__, but the scope has already been
        # created in the constructor. Set _variables_created so that subsequent
        # calls take the if branch above.
        self._variables_created = True
        with variable_scope.variable_scope(self._var_scope):
          return self._call_func(args, kwargs, check_for_new_variables=False)
    else:
      # The scope was not created at construction time, so create it here.
      # Subsequent calls should reuse variables.
      self._variables_created = True
      with variable_scope.variable_scope(
          self._unique_name, self._name,
          custom_getter=self._custom_getter) as vs:
        self._var_scope = vs
        return self._call_func(args, kwargs, check_for_new_variables=False)

  @property
  def var_scope(self):
    """Returns the variable scope object created by this Template."""
    return self._var_scope