tensorflow/python/training/checkpointable/base.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767

"""An object-local variable management scheme."""
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import functools
import json
import weakref

from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import gen_io_ops as io_ops
from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.training import saveable_object
from tensorflow.python.util import nest
from tensorflow.python.util import serialization
from tensorflow.python.util import tf_decorator


# Key where the object graph proto is saved in a TensorBundle
OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH"


# A key indicating a variable's value in an object's checkpointed Tensors
# (Checkpointable._gather_saveables_for_checkpoint). If this is the only key and
# the object has no dependencies, then its value may be restored on object
# creation (avoiding double assignment when executing eagerly).
VARIABLE_VALUE_KEY = "VARIABLE_VALUE"
OBJECT_CONFIG_JSON_KEY = "OBJECT_CONFIG_JSON"

CheckpointableReference = collections.namedtuple(
    "CheckpointableReference",
    [
        # The local name for this dependency.
        "name",
        # The Checkpointable object being referenced.
        "ref"
    ])


class CheckpointInitialValue(ops.Tensor):
  """Tensor wrapper for managing update UIDs in `Variables`.

  When supplied as an initial value, objects of this type let a `Variable`
  (`Variable`, `ResourceVariable`, etc.) know the UID of the restore the initial
  value came from. This allows deferred restorations to be sequenced in the
  order the user specified them, and lets us fall back on assignment if an
  initial value is not set (e.g. due to a custom getter interfering).

  See comments in _add_variable_with_custom_getter for more information about
  how `CheckpointInitialValue` is used.
  """

  def __init__(self, checkpoint_position, shape=None):
    self.wrapped_value = checkpoint_position.value_tensors()[
        VARIABLE_VALUE_KEY]
    if shape:
      # We need to set the static shape information on the initializer if
      # possible so we don't get a variable with an unknown shape.
      self.wrapped_value.set_shape(shape)
    self._checkpoint_position = checkpoint_position

  @property
  def __class__(self):
    return (self.wrapped_value.__class__, CheckpointInitialValue)

  def __getattr__(self, attr):
    try:
      return getattr(self.wrapped_value, attr)
    except AttributeError:
      return self.__getattribute__(attr)

  @property
  def checkpoint_position(self):
    return self._checkpoint_position


class PythonStringStateSaveable(saveable_object.SaveableObject):
  """Saves Python state in a checkpoint."""

  def __init__(self, name, state_callback):
    """Configure saving.

    Args:
      name: The checkpoint key to write to.
      state_callback: A function taking no arguments which returns a
        string. This function is run every time a checkpoint is written.
    """
    if context.executing_eagerly():
      self._save_string = (
          lambda: constant_op.constant(state_callback(), dtype=dtypes.string))
    else:
      self._save_string = constant_op.constant("", dtype=dtypes.string)
      self.feed_dict_additions = (
          lambda: {self._save_string: state_callback()})
    spec = saveable_object.SaveSpec(
        self._save_string, "", name, dtype=dtypes.string)
    super(PythonStringStateSaveable, self).__init__(
        self._save_string, [spec], name)

  def restore(self, restored_tensors, restored_shapes):
    # TODO(allenl): Add a Python hook for state coming out of a checkpoint
    # (currently PythonStringStateSaveable is write-only).
    return control_flow_ops.no_op()


class _CheckpointPosition(object):
  """Indicates a position within a `_Checkpoint`."""

  def __init__(self, checkpoint, proto_id):
    """Specify an object within a checkpoint.

    Args:
      checkpoint: A _Checkpoint object.
      proto_id: The index of this object in CheckpointableObjectGraph.nodes.
    """
    self._checkpoint = checkpoint
    self._proto_id = proto_id

  def restore(self, checkpointable):
    """Restore this value into `checkpointable`."""
    with ops.init_scope():
      if self.bind_object(checkpointable):
        # This object's correspondence with a checkpointed object is new, so
        # process deferred restorations for it and its dependencies.
        restore_ops = checkpointable._restore_from_checkpoint_position(self)  # pylint: disable=protected-access
        if restore_ops:
          self._checkpoint.restore_ops.extend(restore_ops)

  def bind_object(self, checkpointable):
    """Set a checkpoint<->object correspondence and process slot variables.

    Args:
      checkpointable: The object to record a correspondence for.
    Returns:
      True if this is a new assignment, False if this object has already been
      mapped to a checkpointed `Object` proto.
    Raises:
      AssertionError: If another object is already bound to the `Object` proto.
    """
    checkpoint = self.checkpoint
    checkpoint.all_python_objects.add(checkpointable)
    current_assignment = checkpoint.object_by_proto_id.get(self._proto_id, None)
    if current_assignment is None:
      checkpoint.object_by_proto_id[self._proto_id] = checkpointable
      for deferred_slot_restoration in (
          checkpoint.deferred_slot_restorations.pop(self._proto_id, ())):
        checkpointable._create_or_restore_slot_variable(  # pylint: disable=protected-access
            slot_variable_position=_CheckpointPosition(
                checkpoint=checkpoint,
                proto_id=deferred_slot_restoration.slot_variable_id),
            variable=deferred_slot_restoration.original_variable,
            slot_name=deferred_slot_restoration.slot_name)
      for slot_restoration in checkpoint.slot_restorations.pop(
          self._proto_id, ()):
        optimizer_object = checkpoint.object_by_proto_id.get(
            slot_restoration.optimizer_id, None)
        if optimizer_object is None:
          # The optimizer has not yet been created or tracked. Record in the
          # checkpoint that the slot variables need to be restored when it is.
          checkpoint.deferred_slot_restorations.setdefault(
              slot_restoration.optimizer_id, []).append(
                  _DeferredSlotVariableRestoration(
                      original_variable=checkpointable,
                      slot_variable_id=slot_restoration.slot_variable_id,
                      slot_name=slot_restoration.slot_name))
        else:
          optimizer_object._create_or_restore_slot_variable(  # pylint: disable=protected-access
              slot_variable_position=_CheckpointPosition(
                  checkpoint=checkpoint,
                  proto_id=slot_restoration.slot_variable_id),
              variable=checkpointable,
              slot_name=slot_restoration.slot_name)
      return True  # New assignment
    else:
      # The object was already mapped for this checkpoint load, which means
      # we don't need to do anything besides check that the mapping is
      # consistent (if the dependency DAG is not a tree then there are
      # multiple paths to the same object).
      if current_assignment is not checkpointable:
        logging.warning(
            ("Inconsistent references when loading the checkpoint into this "
             "object graph. Either the Checkpointable object references in the "
             "Python program have changed in an incompatible way, or the "
             "checkpoint was generated in an incompatible program.\n\nTwo "
             "checkpoint references resolved to different objects (%s and %s).")
            % (current_assignment, checkpointable))
      return False  # Not a new assignment

  def is_simple_variable(self):
    """Determine whether this value is restorable with a Tensor initializer."""
    attributes = self.object_proto.attributes
    return (len(attributes) == 1
            and attributes[0].name == VARIABLE_VALUE_KEY
            and not self.object_proto.children)

  def value_tensors(self):
    """Create value `Tensor`s for this object's attributes.

    Does not require that the Python object has been created. Used for
    restore-on-create when executing eagerly.

    Returns:
      A dictionary mapping from object attribute names to `Tensor`s.
    """
    value_tensors = {}
    for serialized_tensor in self.object_proto.attributes:
      checkpoint_key = serialized_tensor.checkpoint_key
      dtype = self._checkpoint.dtype_map[checkpoint_key]
      base_type = dtype.base_dtype
      with ops.init_scope():
        with ops.device("/cpu:0"):
          # Run the restore itself on the CPU.
          value, = io_ops.restore_v2(
              prefix=self._checkpoint.save_path,
              tensor_names=[checkpoint_key],
              shape_and_slices=[""],
              dtypes=[base_type],
              name="%s_checkpoint_read" % (serialized_tensor.name,))
        # Copy the value to the current device if necessary.
        value_tensors[serialized_tensor.name] = array_ops.identity(value)
      return value_tensors

  def restore_ops(self):
    """Create or fetch restore ops for this object's attributes.

    Requires that the `Checkpointable` Python object has been bound to an object
    ID in the checkpoint.

    Returns:
      A list of operations when graph building, or an empty list when executing
      eagerly.
    """
    saveables = self.checkpointable._gather_saveables_for_checkpoint()  # pylint: disable=protected-access
    # Name saveables based on the name this object had when it was checkpointed.
    named_saveables = {}
    restore_ops = []
    building_graph = not context.executing_eagerly()
    for serialized_tensor in self.object_proto.attributes:
      saveable_factory = saveables.get(serialized_tensor.name, None)
      if saveable_factory is None:
        # Purposefully does not throw an exception if attributes have been added
        # or deleted. Stores unused attributes so an exception can be raised if
        # the user decides to check that everything in the checkpoint was
        # loaded.
        self._checkpoint.unused_attributes.setdefault(
            self.checkpointable, []).append(serialized_tensor.name)
        continue
      if building_graph:
        existing_ops = self._checkpoint.restore_ops_by_name.get(
            serialized_tensor.name, None)
      else:
        existing_ops = None
      if existing_ops is None:
        if callable(saveable_factory):
          saveable = saveable_factory(name=serialized_tensor.checkpoint_key)
        else:
          saveable = saveable_factory
        named_saveables[serialized_tensor.checkpoint_key] = saveable
    if named_saveables:
      validated_saveables = (
          self._checkpoint.builder._ValidateAndSliceInputs(named_saveables))  # pylint: disable=protected-access
      validated_names = set(saveable.name for saveable in validated_saveables)
      if set(named_saveables.keys()) != validated_names:
        raise AssertionError(
            ("Saveable keys changed when validating. Got back %s, was "
             "expecting %s") % (named_saveables.keys(), validated_names))
      all_tensors = self._checkpoint.builder.bulk_restore(
          filename_tensor=self._checkpoint.save_path,
          saveables=validated_saveables, preferred_shard=-1,
          restore_sequentially=False)
      saveable_index = 0
      for saveable in validated_saveables:
        num_specs = len(saveable.specs)
        saveable_tensors = all_tensors[
            saveable_index:saveable_index + num_specs]
        saveable_index += num_specs
        restore_op = saveable.restore(saveable_tensors, restored_shapes=None)
        if building_graph:
          assert saveable.name not in self._checkpoint.restore_ops_by_name
          self._checkpoint.restore_ops_by_name[saveable.name] = restore_op
          restore_ops.append(restore_op)
    return restore_ops

  @property
  def checkpoint(self):
    return self._checkpoint

  @property
  def checkpointable(self):
    return self._checkpoint.object_by_proto_id[self._proto_id]

  @property
  def object_proto(self):
    return self._checkpoint.object_graph_proto.nodes[self._proto_id]

  @property
  def restore_uid(self):
    return self._checkpoint.restore_uid

  def __repr__(self):
    return repr(self.object_proto)


_DeferredSlotVariableRestoration = collections.namedtuple(
    "_DeferredSlotVariableRestoration",
    [
        "original_variable",
        "slot_variable_id",
        "slot_name",
    ]
)

_SlotVariableRestoration = collections.namedtuple(
    "_SlotVariableRestoration",
    [
        # The checkpoint proto id of the optimizer object.
        "optimizer_id",
        # The checkpoint proto id of the slot variable.
        "slot_variable_id",
        "slot_name",
    ])


def no_automatic_dependency_tracking(method):
  """Disables automatic dependency tracking on attribute assignment.

  Use to decorate any method of a Checkpointable object. Attribute assignment in
  that method will not add dependencies (also respected in Model). Harmless if
  used in a class which does not do automatic dependency tracking (which means
  it's safe to use in base classes which may have subclasses which also inherit
  from Checkpointable).

  Args:
    method: The method to decorate.
  Returns:
    A decorated method which sets and un-sets automatic dependency tracking for
    the object the method is called on (not thread safe).
  """

  def _method_wrapper(self, *args, **kwargs):
    previous_value = getattr(self, "_setattr_tracking", True)
    self._setattr_tracking = False  # pylint: disable=protected-access
    try:
      method(self, *args, **kwargs)
    finally:
      self._setattr_tracking = previous_value  # pylint: disable=protected-access

  return tf_decorator.make_decorator(
      target=method, decorator_func=_method_wrapper)


class CheckpointableBase(object):
  """Base class for `Checkpointable` objects without automatic dependencies.

  This class has no __setattr__ override for performance reasons. Dependencies
  must be added explicitly. Unless attribute assignment is performance-critical,
  use `Checkpointable` instead. Use `CheckpointableBase` for `isinstance`
  checks.
  """

  # CheckpointableBase does not do automatic dependency tracking, but uses the
  # no_automatic_dependency_tracking decorator so it can avoid adding
  # dependencies if a subclass is Checkpointable / inherits from Model (both of
  # which have __setattr__ overrides).
  @no_automatic_dependency_tracking
  def _maybe_initialize_checkpointable(self):
    """Initialize dependency management.

    Not __init__, since most objects will forget to call it.
    """
    if hasattr(self, "_unconditional_checkpoint_dependencies"):
      # __init__ already called. This check means that we don't need
      # Checkpointable.__init__() in the constructor of every TensorFlow object.
      return
    # A list of CheckpointableReference objects. Some classes implementing
    # `Checkpointable`, notably `Optimizer`s, may override the
    # _checkpoint_dependencies property with conditional dependencies
    # (e.g. based on the current graph when saving).
    self._unconditional_checkpoint_dependencies = []
    # Maps names -> Checkpointable objects
    self._unconditional_dependency_names = {}
    # Restorations for other Checkpointable objects on which this object may
    # eventually depend. Maps local name -> _CheckpointPosition list. Optimizers
    # tack on conditional dependencies, and so need separate management of
    # deferred dependencies too.
    self._unconditional_deferred_dependencies = {}
    # The UID of the highest assignment to this object. Used to ensure that the
    # last requested assignment determines the final value of an object.
    if hasattr(self, "_update_uid"):
      raise AssertionError(
          "Internal error: the object had an update UID set before its "
          "initialization code was run.")
    self._update_uid = -1
    # When executing eagerly, holds a collection of _NameBasedRestoreCoordinator
    # instances, which should be checked when creating variables or other
    # saveables. These are passed on recursively to all dependencies, since
    # unlike object-based checkpoint restores we don't know which subgraph is
    # being restored in advance. This mechanism is only necessary for
    # restore-on-create when executing eagerly, and so is unused when graph
    # building.
    self._name_based_restores = set()

  def _no_dependency(self, value):
    """If automatic dependency tracking is enabled, ignores `value`."""
    return value

  def _name_based_attribute_restore(self, checkpoint):
    """Restore the object's attributes from a name-based checkpoint."""
    self._name_based_restores.add(checkpoint)
    if self._update_uid < checkpoint.restore_uid:
      checkpoint.eager_restore(self)
      self._update_uid = checkpoint.restore_uid

  @property
  def _checkpoint_dependencies(self):
    """All dependencies of this object.

    May be overridden to include conditional dependencies.

    Returns:
      A list of `CheckpointableReference` objects indicating named
      `Checkpointable` dependencies which should be saved along with this
      object.
    """
    return self._unconditional_checkpoint_dependencies

  @property
  def _deferred_dependencies(self):
    """A dictionary with deferred dependencies.

    Stores restorations for other Checkpointable objects on which this object
    may eventually depend. May be overridden by sub-classes (e.g. Optimizers use
    conditional dependencies based the current graph, and so need separate
    management of deferred dependencies too).

    Returns:
      A dictionary mapping from local name to a list of _CheckpointPosition
      objects.
    """
    return self._unconditional_deferred_dependencies

  def _lookup_dependency(self, name):
    """Look up a dependency by name.

    May be overridden to include conditional dependencies.

    Args:
      name: The local name of the dependency.
    Returns:
      A `Checkpointable` object, or `None` if no dependency by this name was
      found.
    """
    return self._unconditional_dependency_names.get(name, None)

  def _add_variable_with_custom_getter(
      self, name, shape=None, dtype=dtypes.float32,
      initializer=None, getter=None, overwrite=False,
      **kwargs_for_getter):
    """Restore-on-create for a variable be saved with this `Checkpointable`.

    If the user has requested that this object or another `Checkpointable` which
    depends on this object be restored from a checkpoint (deferred loading
    before variable object creation), `initializer` may be ignored and the value
    from the checkpoint used instead.

    Args:
      name: A name for the variable. Must be unique within this object.
      shape: The shape of the variable.
      dtype: The data type of the variable.
      initializer: The initializer to use. Ignored if there is a deferred
        restoration left over from a call to
        `_restore_from_checkpoint_position`.
      getter: The getter to wrap which actually fetches the variable.
      overwrite: If True, disables unique name and type checks.
      **kwargs_for_getter: Passed to the getter.

    Returns:
      The new variable object.

    Raises:
      ValueError: If the variable name is not unique.
    """
    self._maybe_initialize_checkpointable()
    with ops.init_scope():
      if context.executing_eagerly():
        # If this is a variable with a single Tensor stored in the checkpoint,
        # we can set that value as an initializer rather than initializing and
        # then assigning (when executing eagerly). This call returns None if
        # there is nothing to restore.
        checkpoint_initializer = self._preload_simple_restoration(
            name=name, shape=shape)
      else:
        checkpoint_initializer = None
      if (checkpoint_initializer is not None
          and not (
              isinstance(initializer, CheckpointInitialValue)
              and (initializer.restore_uid
                   > checkpoint_initializer.restore_uid))):
        # If multiple Checkpointable objects are "creating" the same variable
        # via the magic of custom getters, the one with the highest restore UID
        # (the one called last) has to make the final initializer. If another
        # custom getter interrupts this process by overwriting the initializer,
        # then we'll catch that when we call _track_checkpointable. So this is
        # "best effort" to set the initializer with the highest restore UID.
        initializer = checkpoint_initializer
        shape = None
    new_variable = getter(
        name=name, shape=shape, dtype=dtype, initializer=initializer,
        **kwargs_for_getter)

    # If we set an initializer and the variable processed it, tracking will not
    # assign again. It will add this variable to our dependencies, and if there
    # is a non-trivial restoration queued, it will handle that. This also
    # handles slot variables.
    if not overwrite or isinstance(new_variable, CheckpointableBase):
      return self._track_checkpointable(new_variable, name=name,
                                        overwrite=overwrite)
    else:
      # TODO(allenl): Some variable types are not yet supported. Remove this
      # fallback once all get_variable() return types are Checkpointable.
      return new_variable

  def _preload_simple_restoration(self, name, shape):
    """Return a dependency's value for restore-on-create.

    Note the restoration is not deleted; if for some reason preload is called
    and then not assigned to the variable (for example because a custom getter
    overrides the initializer), the assignment will still happen once the
    variable is tracked (determined based on checkpoint.restore_uid).

    Args:
      name: The object-local name of the dependency holding the variable's
        value.
      shape: The shape of the variable being loaded into.
    Returns:
      An callable for use as a variable's initializer/initial_value, or None if
      one should not be set (either because there was no variable with this name
      in the checkpoint or because it needs more complex deserialization). Any
      non-trivial deserialization will happen when the variable object is
      tracked.
    """
    deferred_dependencies_list = self._deferred_dependencies.get(name, ())
    if not deferred_dependencies_list:
      # Nothing to do; we don't have a restore for this dependency queued up.
      return
    for checkpoint_position in deferred_dependencies_list:
      if not checkpoint_position.is_simple_variable():
        # If _any_ pending restoration is too complicated to fit in an
        # initializer (because it has dependencies, or because there are
        # multiple Tensors to restore), bail and let the general tracking code
        # handle it.
        return None
    checkpoint_position = max(
        deferred_dependencies_list,
        key=lambda restore: restore.checkpoint.restore_uid)
    return CheckpointInitialValue(
        checkpoint_position=checkpoint_position, shape=shape)

  def _track_checkpointable(self, checkpointable, name, overwrite=False):
    """Declare a dependency on another `Checkpointable` object.

    Indicates that checkpoints for this object should include variables from
    `checkpointable`.

    Variables in a checkpoint are mapped to `Checkpointable`s based on the names
    provided when the checkpoint was written. To avoid breaking existing
    checkpoints when modifying a class, neither variable names nor dependency
    names (the names passed to `_track_checkpointable`) may change.

    Args:
      checkpointable: A `Checkpointable` which this object depends on.
      name: A local name for `checkpointable`, used for loading checkpoints into
        the correct objects.
      overwrite: Boolean, whether silently replacing dependencies is OK. Used
        for __setattr__, where throwing an error on attribute reassignment would
        be inappropriate.

    Returns:
      `checkpointable`, for convenience when declaring a dependency and
      assigning to a member variable in one statement.

    Raises:
      TypeError: If `checkpointable` does not inherit from `Checkpointable`.
      ValueError: If another object is already tracked by this name.
    """
    self._maybe_initialize_checkpointable()
    if not isinstance(checkpointable, CheckpointableBase):
      raise TypeError(
          ("Checkpointable._track_checkpointable() passed type %s, not a "
           "Checkpointable.") % (type(checkpointable),))
    new_reference = CheckpointableReference(name=name, ref=checkpointable)
    current_object = self._lookup_dependency(name)
    if (current_object is not None
        and current_object is not checkpointable):
      if not overwrite:
        raise ValueError(
            ("Called Checkpointable._track_checkpointable() with name='%s', "
             "but a Checkpointable with this name is already declared as a "
             "dependency. Names must be unique (or overwrite=True).") % (name,))
      # This is a weird thing to do, but we're not going to stop people from
      # using __setattr__.
      for index, (old_name, _) in enumerate(
          self._unconditional_checkpoint_dependencies):
        if name == old_name:
          self._unconditional_checkpoint_dependencies[index] = new_reference
    elif current_object is None:
      self._unconditional_checkpoint_dependencies.append(new_reference)
      self._handle_deferred_dependencies(
          name=name, checkpointable=checkpointable)
    self._unconditional_dependency_names[name] = checkpointable
    return checkpointable

  def _handle_deferred_dependencies(self, name, checkpointable):
    """Pop and load any deferred checkpoint restores into `checkpointable`.

    This method does not add a new dependency on `checkpointable`, but it does
    check if any outstanding/deferred dependencies have been queued waiting for
    this dependency to be added (matched based on `name`). If so,
    `checkpointable` and its dependencies are restored. The restorations are
    considered fulfilled and so are deleted.

    `_track_checkpointable` is more appropriate for adding a
    normal/unconditional dependency, and includes handling for deferred
    restorations. This method allows objects such as `Optimizer` to use the same
    restoration logic while managing conditional dependencies themselves, by
    overriding `_checkpoint_dependencies` and `_lookup_dependency` to change the
    object's dependencies based on the context it is saved/restored in (a single
    optimizer instance can have state associated with multiple graphs).

    Args:
      name: The name of the dependency within this object (`self`), used to
        match `checkpointable` with values saved in a checkpoint.
      checkpointable: The Checkpointable object to restore (inheriting from
        `CheckpointableBase`).
    """
    self._maybe_initialize_checkpointable()
    checkpointable._maybe_initialize_checkpointable()  # pylint: disable=protected-access
    deferred_dependencies_list = self._deferred_dependencies.pop(name, ())
    for checkpoint_position in sorted(
        deferred_dependencies_list,
        key=lambda restore: restore.checkpoint.restore_uid,
        reverse=True):
      checkpoint_position.restore(checkpointable)

    # Pass on any name-based restores queued in this object.
    for name_based_restore in sorted(
        self._name_based_restores,
        key=lambda checkpoint: checkpoint.restore_uid,
        reverse=True):
      checkpointable._name_based_attribute_restore(name_based_restore)  # pylint: disable=protected-access

  def _restore_from_checkpoint_position(self, checkpoint_position):
    """Restore this object and its dependencies (may be deferred)."""
    # Attempt a breadth-first traversal, since presumably the user has more
    # control over shorter paths. If we don't have all of the dependencies at
    # this point, the end result is not breadth-first (since other deferred
    # traversals will happen later).
    visit_queue = collections.deque([checkpoint_position])
    restore_ops = []
    while visit_queue:
      current_position = visit_queue.popleft()
      restore_ops.extend(nest.flatten(
          current_position.checkpointable  # pylint: disable=protected-access
          ._single_restoration_from_checkpoint_position(
              checkpoint_position=current_position,
              visit_queue=visit_queue)))
    return restore_ops

  def _single_restoration_from_checkpoint_position(
      self, checkpoint_position, visit_queue):
    """Restore this object, and either queue its dependencies or defer them."""
    self._maybe_initialize_checkpointable()
    checkpoint = checkpoint_position.checkpoint
    # If the UID of this restore is lower than our current update UID, we don't
    # need to actually restore the object. However, we should pass the
    # restoration on to our dependencies.
    if checkpoint.restore_uid > self._update_uid:
      restore_ops = checkpoint_position.restore_ops()
      self._update_uid = checkpoint.restore_uid
    else:
      restore_ops = ()
    for child in checkpoint_position.object_proto.children:
      child_position = _CheckpointPosition(
          checkpoint=checkpoint,
          proto_id=child.node_id)
      local_object = self._lookup_dependency(child.local_name)
      if local_object is None:
        # We don't yet have a dependency registered with this name. Save it
        # in case we do.
        self._deferred_dependencies.setdefault(child.local_name, []).append(
            child_position)
      else:
        if child_position.bind_object(checkpointable=local_object):
          # This object's correspondence is new, so dependencies need to be
          # visited. Delay doing it so that we get a breadth-first dependency
          # resolution order (shallowest paths first). The caller is responsible
          # for emptying visit_queue.
          visit_queue.append(child_position)
    return restore_ops

  def _gather_saveables_for_checkpoint(self):
    """Returns a dictionary of values to checkpoint with this object.

    Keys in the returned dictionary are local to this object and in a separate
    namespace from dependencies. Values may either be `SaveableObject` factories
    or variables easily converted to `SaveableObject`s (as in `tf.train.Saver`'s
    `var_list` constructor argument).

    `SaveableObjects` have a name set, which Checkpointable needs to generate
    itself. So rather than returning `SaveableObjects` directly, this method
    should return a dictionary of callables which take `name` arguments and
    return `SaveableObjects` with that name.

    If this object may also be passed to the global-name-based `tf.train.Saver`,
    the returned callables should have a default value for their name argument
    (i.e. be callable with no arguments).

    Returned values must be saved only by this object; if any value may be
    shared, it should instead be a dependency. For example, variable objects
    save their own values with the key `VARIABLE_VALUE_KEY`, but objects which
    reference variables simply add a dependency.

    Returns:
      The dictionary mapping attribute names to `SaveableObject` factories
      described above. For example:
      {VARIABLE_VALUE_KEY:
       lambda name="global_name_for_this_object":
       SaveableObject(name=name, ...)}
    """
    if not hasattr(self, "get_config"):
      return {}
    try:
      self.get_config()
    except NotImplementedError:
      return {}
    weak_self = weakref.ref(self)
    def _state_callback():
      dereferenced_self = weak_self()
      if dereferenced_self:
        return json.dumps(self,
                          default=serialization.get_json_type,
                          sort_keys=True).encode("utf8")
      else:
        return ""
    return {OBJECT_CONFIG_JSON_KEY: functools.partial(
        PythonStringStateSaveable,
        state_callback=_state_callback)}