From 049d98c84ca7474459175914ca49c1fa3c11581d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 8 Oct 2018 10:28:59 -0700 Subject: Wait for shared resources to initialize before initializing local resources. shared resources are very similar to global variables functionally and they are initialized at the same time but since workers are only waiting for global variables being initialized, there is a race condition that sometimes the shared resource is not ready. PiperOrigin-RevId: 216208679 --- tensorflow/python/training/monitored_session.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tensorflow/python') diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py index 82f0e3be52..a479f38165 100644 --- a/tensorflow/python/training/monitored_session.py +++ b/tensorflow/python/training/monitored_session.py @@ -195,8 +195,12 @@ class Scaffold(object): default_ready_op) if self._ready_for_local_init_op is None: def default_ready_for_local_init_op(): - return variables.report_uninitialized_variables( - variables.global_variables()) + return array_ops.concat([ + variables.report_uninitialized_variables( + variables.global_variables()), + resources.report_uninitialized_resources( + resources.shared_resources()) + ], 0) self._ready_for_local_init_op = Scaffold.get_or_default( 'ready_for_local_init_op', ops.GraphKeys.READY_FOR_LOCAL_INIT_OP, default_ready_for_local_init_op) -- cgit v1.2.3