1. Support profiling nodes in RunMetadata but not in GraphDef.

2. Add an API to allow easier profile retrieval. Currently in contrib. PiperOrigin-RevId: 165399640
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-08-15 21:39:28 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-08-15 21:42:58 -0700
commit: 2b51e0ba27af69c914a7523d9aae232de09e3206 (patch)
tree: 892089b4eca69784760e397f9bf1a5dd64dc9f18 /tensorflow/python/profiler
parent: 8041185b16c0ce9348bd79900f6682fc3976bfd7 (diff)
5 files changed, 343 insertions, 38 deletions
diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD
index c32cddbd6d..f9e8578866 100644
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD
@@ -43,6 +43,7 @@ cuda_py_test(
     name = "model_analyzer_test",
     srcs = ["model_analyzer_test.py"],
     additional_deps = [
+        ":profile_context",
         ":model_analyzer",
         "//tensorflow/python/profiler/internal:model_analyzer_testlib",
         "//tensorflow/python:client",
@@ -98,6 +99,16 @@ tf_py_test(
 )
 
 py_library(
+    name = "profile_context",
+    srcs = ["profile_context.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":model_analyzer",
+        ":tfprof_logger",
+    ],
+)
+
+py_library(
     name = "pprof_profiler",
     srcs = ["pprof_profiler.py"],
     srcs_version = "PY2AND3",
diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py
index eb95af6a28..5345949664 100644
--- a/tensorflow/python/profiler/model_analyzer.py
+++ b/tensorflow/python/profiler/model_analyzer.py
@@ -22,6 +22,7 @@ from __future__ import print_function
 
 import six
 
+from google.protobuf import message
 from tensorflow.core.profiler import tfprof_options_pb2
 from tensorflow.core.profiler import tfprof_output_pb2
 from tensorflow.python import pywrap_tensorflow as print_mdl
@@ -303,22 +304,31 @@ def profile(graph,
 
   if cmd == 'code' or cmd == 'op':
     tfprof_node = tfprof_output_pb2.MultiGraphNodeProto()
-    tfprof_node.ParseFromString(
-        print_mdl.PrintModelAnalysis(
-            graph.as_graph_def(add_shapes=True).SerializeToString(),
-            run_meta_str,
-            op_log.SerializeToString(),
-            cmd.encode('utf-8'),
-            opts.SerializeToString()))
+    ret = print_mdl.PrintModelAnalysis(
+        graph.as_graph_def(add_shapes=True).SerializeToString(),
+        run_meta_str,
+        op_log.SerializeToString(),
+        cmd.encode('utf-8'),
+        opts.SerializeToString())
+    try:
+      tfprof_node.ParseFromString(ret)
+    except message.DecodeError as _:
+      pass
+      # sys.stderr.write('Cannot parse returned proto: %s.\n' % e)
+
   elif cmd == 'graph' or cmd == 'scope':
     tfprof_node = tfprof_output_pb2.GraphNodeProto()
-    tfprof_node.ParseFromString(
-        print_mdl.PrintModelAnalysis(
-            graph.as_graph_def(add_shapes=True).SerializeToString(),
-            run_meta_str,
-            op_log.SerializeToString(),
-            cmd.encode('utf-8'),
-            opts.SerializeToString()))
+    ret = print_mdl.PrintModelAnalysis(
+        graph.as_graph_def(add_shapes=True).SerializeToString(),
+        run_meta_str,
+        op_log.SerializeToString(),
+        cmd.encode('utf-8'),
+        opts.SerializeToString())
+    try:
+      tfprof_node.ParseFromString(ret)
+    except message.DecodeError as _:
+      pass
+      # sys.stderr.write('Cannot parse returned proto: %s.\n' % e)
   else:
     raise errors.InvalidArgumentError(
         None, None, 'unknown cmd: %s\n' % cmd)
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 841fe46393..7c0080c206 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.profiler import model_analyzer
 from tensorflow.python.profiler import option_builder
+from tensorflow.python.profiler import profile_context
 from tensorflow.python.profiler.internal import model_analyzer_testlib as lib
 
 builder = option_builder.ProfileOptionBuilder
@@ -149,7 +150,7 @@ class PrintModelAnalysisTest(test.TestCase):
       with gfile.Open(outfile, 'r') as f:
         # pylint: disable=line-too-long
         self.assertEqual(
-            'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/7|--/35, )\n  Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n    DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n    DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n  ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n    ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n    ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n      ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n        ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n        ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n        ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n  init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n  zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n',
+            'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/8|--/36, )\n  Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n  Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n  DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n    DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n      DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n        DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n  DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n    DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n    DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n      DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n        DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n        DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n  ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n    ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n    ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n      ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n        ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n        ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n        ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n        ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n    ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n  _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|RunTimeOp, 1/1|1/1, )\n  init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n  zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n',
             f.read())
         # pylint: enable=line-too-long
 
@@ -562,6 +563,71 @@ class PrintModelAnalysisTest(test.TestCase):
       check_selection(['peak bytes', 'residual bytes', 'output bytes'],
                       ['requested_bytes'])
 
+  def _trainLoop(self, train_op, train_steps, time_dir, time_step,
+                 memory_dir, memory_step, profile_dir, dump_step):
+    with session.Session() as sess:
+      sess.run(variables.global_variables_initializer())
+      # start from 1 because variable_initializer took one step.
+      for i in range(1, train_steps + 1):
+        _ = sess.run(train_op)
+        if i in time_step:
+          ret = gfile.ListDirectory(time_dir)
+          self.assertEqual(len(ret), 1)
+          self.assertTrue(
+              gfile.Open(os.path.join(time_dir, ret[0]), 'r').read()
+              .find('execution time') > 0)
+          _ = [gfile.Remove(os.path.join(time_dir, x)) for x in ret]
+        else:
+          self.assertEqual(len(gfile.ListDirectory(time_dir)), 0)
+        if i in memory_step:
+          ret = gfile.ListDirectory(memory_dir)
+          self.assertEqual(len(ret), 1)
+          self.assertTrue(
+              gfile.Open(os.path.join(memory_dir, ret[0]), 'r').read()
+              .find('requested bytes') > 0)
+          _ = [gfile.Remove(os.path.join(memory_dir, x)) for x in ret]
+        else:
+          self.assertEqual(len(gfile.ListDirectory(memory_dir)), 0)
+        if i in dump_step:
+          ret = gfile.ListDirectory(profile_dir)
+          self.assertAllEqual(sorted(ret),
+                              ['graph.pbtxt', 'run_metadata', 'tfprof_log'])
+          _ = [gfile.Remove(os.path.join(profile_dir, x)) for x in ret]
+        else:
+          if i < dump_step[0]:
+            self.assertFalse(gfile.Exists(profile_dir))
+          else:
+            self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0)
+
+  def testAutoProfiling(self):
+    ops.reset_default_graph()
+    time_dir = os.path.join(test.get_temp_dir(), 'time')
+    memory_dir = os.path.join(test.get_temp_dir(), 'memory')
+    profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile')
+    # TODO(xpan): Should we create parent directory for them?
+    gfile.MkDir(time_dir)
+    gfile.MkDir(memory_dir)
+
+    time_opts = (builder(builder.time_and_memory())
+                 .with_file_output(os.path.join(time_dir, 'profile'))
+                 .select(['micros']).build())
+    memory_opts = (builder(builder.time_and_memory())
+                   .with_file_output(os.path.join(memory_dir, 'profile'))
+                   .select(['bytes']).build())
+
+    time_steps = [2, 3]
+    memory_steps = [1, 3]
+    dump_steps = [3, 4]
+
+    x = lib.BuildSmallModel()
+    with profile_context.ProfileContext() as pctx:
+      pctx.add_auto_profiling('scope', time_opts, time_steps)
+      pctx.add_auto_profiling('scope', memory_opts, memory_steps)
+      pctx.add_auto_profile_dump(profile_dir, dump_steps)
+
+      self._trainLoop(x, 10, time_dir, time_steps,
+                      memory_dir, memory_steps, profile_dir, dump_steps)
+
 
 if __name__ == '__main__':
   test.main()
diff --git a/tensorflow/python/profiler/profile_context.py b/tensorflow/python/profiler/profile_context.py
new file mode 100644
index 0000000000..6438fede2f
--- /dev/null
+++ b/tensorflow/python/profiler/profile_context.py
@@ -0,0 +1,230 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A Context that captures profile and performs profiling/dumping.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.client import session
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import gfile
+from tensorflow.python.profiler import model_analyzer
+from tensorflow.python.profiler import tfprof_logger
+
+
+def _profiled_init(self, target='', graph=None, config=None):
+  """Overwrites the session.__init__."""
+  self._profiler_init_internal(target, graph, config)  # pylint: disable=protected-access
+
+
+def _profiled_run(self,
+                  fetches,
+                  feed_dict=None,
+                  options=None,
+                  run_metadata=None):
+  """Overwrites the session.run()."""
+  # pylint: disable=protected-access
+  # Count the session steps.
+  self.profile_context._new_step()
+  # Fast path if no need for profiling.
+  to_profiles = self.profile_context._profile_candidates()
+  to_dumps = self.profile_context._dump_candidates()
+  if (not to_profiles and not to_dumps and
+      not self.profile_context._is_capture_enforced()):
+    return self._profiler_run_internal(
+        fetches, feed_dict, options, run_metadata)
+
+  # Enable tracing, perform auto profiling or auto dump.
+  if not run_metadata:
+    run_metadata = config_pb2.RunMetadata()
+
+  if not options:
+    options = config_pb2.RunOptions(
+        trace_level=config_pb2.RunOptions.FULL_TRACE)
+    old_trace_level = options.trace_level
+  else:
+    old_trace_level = options.trace_level
+    options.trace_level = config_pb2.RunOptions.FULL_TRACE
+
+  ret = self._profiler_run_internal(fetches, feed_dict, options, run_metadata)
+
+  if self.profile_context._capture_next_step:
+    self.profile_context._add_run_meta(run_metadata)
+
+  for to_dump in to_dumps:
+    outdir, _ = to_dump
+    if not gfile.Exists(outdir):
+      gfile.MakeDirs(outdir)
+    with gfile.Open(os.path.join(outdir, 'graph.pbtxt'), 'w') as f:
+      f.write('%s' % self.graph.as_graph_def(add_shapes=True))
+    with gfile.Open(os.path.join(outdir, 'run_metadata'), 'w') as f:
+      f.write(run_metadata.SerializeToString())
+    tfprof_logger.write_op_log(
+        self.graph, outdir, run_meta=run_metadata, add_trace=True)
+
+  for to_prof in to_profiles:
+    cmd, opts, _ = to_prof
+    model_analyzer.profile(
+        self.graph, run_meta=run_metadata, cmd=cmd, options=opts)
+
+  # Restore to default.
+  options.trace_level = old_trace_level
+  return ret
+  # pylint: enable=protected-access
+
+
+class ProfileContext(object):
+  """A Context that captures RunMetadata and performs profiling.
+
+  ```python
+    # Auto profiling at step 1, 100 and 1000.:
+    with tf.contrib.tfprof.ProfileContext() as pctx:
+      # Create the profiling options.
+      opts = tf.profiler.ProfileOptionBuilder.time_and_memory()
+      # Run profiling at certain steps. Multiple ones can be added.
+      pctx.add_auto_profiling('op', opts, [1, 100, 1000])
+      # Or dump the profile files at certain steps.
+      pctx.add_auto_profile_dump('/tmp/profiles', [1000])
+      # Run train/eval loop.
+      train_loop().
+
+    # Alternatively, enable and capture RunMetadata of next step.
+    with tf.contrib.tfprof.ProfileContext() as pctx:
+      pctx.capture_next_run_meta()
+      opts = tf.profiler.ProfileOptionBuilder.time_and_memory()
+      _ = session.run(train_op)
+      tf.profiler.profile(session.graph,
+                          run_meta=pctx.run_meta(),
+                          cmd='op',
+                          options=opts)
+  ```
+  """
+
+  def __init__(self):
+    self._lock = threading.Lock()
+    self._capture_next_step = False
+    self._step = 0
+    self._auto_profiles = []
+    self._auto_dumps = []
+    self._run_meta = None
+
+  def add_auto_profiling(self, cmd, profile_options, profile_steps):
+    """Runs profiling at some steps with provided command and options.
+
+    Args:
+      cmd: The profiling commands.
+      profile_options: The profiling options.
+      profile_steps: A list/set of integers. The profiling command and options
+          will be run automatically at these integer steps. Each step is
+          a session.run.
+    """
+    with self._lock:
+      self._auto_profiles.append((cmd, profile_options, profile_steps))
+
+  def add_auto_profile_dump(self, outdir, dump_steps):
+    """Dumps profiles at some steps to the directory.
+
+    Args:
+      outdir: The directory to dump the profile files.
+      dump_steps: A list/set of integers. The profile files will be dump at
+          these integer steps. Each step is a session.run.
+    """
+    with self._lock:
+      self._auto_dumps.append((outdir, dump_steps))
+
+  def capture_next_run_meta(self):
+    """Enables tracing and captures RunMetadata at next session.run.
+
+      The captured RunMetadata can be retrieved via run_meta(). It
+      will be cleared one step later.
+    """
+    with self._lock:
+      self._capture_next_step = True
+
+  def run_meta(self):
+    """Returns the RunMetadata captured at previous session.run.
+
+      Needs to call capture_next_run_meta() before session.run to enable
+      capturing.
+    """
+    with self._lock:
+      assert self._run_meta, 'Need to call capture_next_run_meta()'
+      return self._run_meta
+
+  def _is_capture_enforced(self):
+    with self._lock:
+      return self._capture_next_step
+
+  def _add_run_meta(self, run_meta):
+    with self._lock:
+      self._run_meta = run_meta
+      self._capture_next_step = False
+
+  def _new_step(self):
+    with self._lock:
+      self._run_meta = None
+      self._step += 1
+
+  def _profile_candidates(self):
+    to_profile = []
+    with self._lock:
+      for auto_prof in self._auto_profiles:
+        _, _, prof_steps = auto_prof
+        if self._step - 1 in prof_steps:
+          to_profile.append(auto_prof)
+    return to_profile
+
+  def _dump_candidates(self):
+    to_dump = []
+    with self._lock:
+      for auto_dump in self._auto_dumps:
+        _, dump_steps = auto_dump
+        if self._step - 1 in dump_steps:
+          to_dump.append(auto_dump)
+    return to_dump
+
+  def __enter__(self):
+    self.old_run = getattr(session.BaseSession, 'run', None)
+    self.old_init = getattr(session.BaseSession, '__init__', None)
+    if not self.old_run:
+      raise errors.InternalError(None, None, 'BaseSession misses run method.')
+    elif not self.old_init:
+      raise errors.InternalError(None, None,
+                                 'BaseSession misses __init__ method.')
+    elif getattr(session.BaseSession, '_profiler_run_internal', None):
+      raise errors.InternalError(None, None,
+                                 'Already in context or context not cleaned.')
+    elif getattr(session.BaseSession, '_profiler_init_internal', None):
+      raise errors.InternalError(None, None,
+                                 'Already in context or context not cleaned.')
+    else:
+      setattr(session.BaseSession, 'run', _profiled_run)
+      setattr(session.BaseSession, '__init__', _profiled_init)
+      setattr(session.BaseSession, '_profiler_run_internal', self.old_run)
+      setattr(session.BaseSession, '_profiler_init_internal', self.old_init)
+      setattr(session.BaseSession, 'profile_context', self)
+      return self
+
+  def __exit__(self, exec_type, exec_value, exec_tb):
+    setattr(session.BaseSession, 'run', self.old_run)
+    setattr(session.BaseSession, '__init__', self.old_init)
+    setattr(session.BaseSession, '_profiler_run_internal', None)
+    setattr(session.BaseSession, '_profiler_init_internal', None)
+    setattr(session.BaseSession, 'profile_context', None)
diff --git a/tensorflow/python/profiler/profiler_test.py b/tensorflow/python/profiler/profiler_test.py
index 46afe1fe55..eacb7d21e6 100644
--- a/tensorflow/python/profiler/profiler_test.py
+++ b/tensorflow/python/profiler/profiler_test.py
@@ -49,17 +49,6 @@ class ProfilerTest(test.TestCase):
     r = lib.BuildFullModel()
     sess.run(variables.global_variables_initializer())
 
-    profiler = model_analyzer.Profiler(sess.graph)
-    profiler.profile_name_scope(opts)
-    with gfile.Open(outfile, 'r') as f:
-      profiler_str = f.read()
-
-    model_analyzer.profile(
-        sess.graph, cmd='scope', options=opts)
-    with gfile.Open(outfile, 'r') as f:
-      pma_str = f.read()
-    self.assertEqual(pma_str, profiler_str)
-
     # Test the output with run_meta.
     run_meta = config_pb2.RunMetadata()
     _ = sess.run(r,
@@ -67,6 +56,7 @@ class ProfilerTest(test.TestCase):
                      trace_level=config_pb2.RunOptions.FULL_TRACE),
                  run_metadata=run_meta)
 
+    profiler = model_analyzer.Profiler(sess.graph)
     profiler.add_step(1, run_meta)
     profiler.profile_graph(opts)
     with gfile.Open(outfile, 'r') as f:
@@ -78,6 +68,16 @@ class ProfilerTest(test.TestCase):
       pma_str = f.read()
     self.assertEqual(pma_str, profiler_str)
 
+    profiler.profile_name_scope(opts)
+    with gfile.Open(outfile, 'r') as f:
+      profiler_str = f.read()
+
+    model_analyzer.profile(
+        sess.graph, cmd='scope', run_meta=run_meta, options=opts)
+    with gfile.Open(outfile, 'r') as f:
+      pma_str = f.read()
+    self.assertEqual(pma_str, profiler_str)
+
     profiler.profile_python(opts)
     with gfile.Open(outfile, 'r') as f:
       profiler_str = f.read()
@@ -104,18 +104,6 @@ class ProfilerTest(test.TestCase):
       pma_str = f.read()
     self.assertNotEqual(pma_str, profiler_str)
 
-    opts2 = opts.copy()
-    opts2['select'] = ['params', 'float_ops']
-    profiler.profile_name_scope(opts2)
-    with gfile.Open(outfile, 'r') as f:
-      profiler_str = f.read()
-
-    model_analyzer.profile(
-        sess.graph, cmd='scope', run_meta=run_meta, options=opts2)
-    with gfile.Open(outfile, 'r') as f:
-      pma_str = f.read()
-    self.assertEqual(pma_str, profiler_str)
-
   def testMultiStepProfile(self):
     ops.reset_default_graph()
     opts = builder.time_and_memory(min_bytes=0)
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-08-15 21:39:28 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-08-15 21:42:58 -0700
commit	2b51e0ba27af69c914a7523d9aae232de09e3206 (patch)
tree	892089b4eca69784760e397f9bf1a5dd64dc9f18 /tensorflow/python/profiler
parent	8041185b16c0ce9348bd79900f6682fc3976bfd7 (diff)