aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/profiler
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-08-15 21:39:28 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-15 21:42:58 -0700
commit2b51e0ba27af69c914a7523d9aae232de09e3206 (patch)
tree892089b4eca69784760e397f9bf1a5dd64dc9f18 /tensorflow/python/profiler
parent8041185b16c0ce9348bd79900f6682fc3976bfd7 (diff)
1. Support profiling nodes in RunMetadata but not in GraphDef.
2. Add an API to allow easier profile retrieval. Currently in contrib. PiperOrigin-RevId: 165399640
Diffstat (limited to 'tensorflow/python/profiler')
-rw-r--r--tensorflow/python/profiler/BUILD11
-rw-r--r--tensorflow/python/profiler/model_analyzer.py38
-rw-r--r--tensorflow/python/profiler/model_analyzer_test.py68
-rw-r--r--tensorflow/python/profiler/profile_context.py230
-rw-r--r--tensorflow/python/profiler/profiler_test.py34
5 files changed, 343 insertions, 38 deletions
diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD
index c32cddbd6d..f9e8578866 100644
--- a/tensorflow/python/profiler/BUILD
+++ b/tensorflow/python/profiler/BUILD
@@ -43,6 +43,7 @@ cuda_py_test(
name = "model_analyzer_test",
srcs = ["model_analyzer_test.py"],
additional_deps = [
+ ":profile_context",
":model_analyzer",
"//tensorflow/python/profiler/internal:model_analyzer_testlib",
"//tensorflow/python:client",
@@ -98,6 +99,16 @@ tf_py_test(
)
py_library(
+ name = "profile_context",
+ srcs = ["profile_context.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ ":model_analyzer",
+ ":tfprof_logger",
+ ],
+)
+
+py_library(
name = "pprof_profiler",
srcs = ["pprof_profiler.py"],
srcs_version = "PY2AND3",
diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py
index eb95af6a28..5345949664 100644
--- a/tensorflow/python/profiler/model_analyzer.py
+++ b/tensorflow/python/profiler/model_analyzer.py
@@ -22,6 +22,7 @@ from __future__ import print_function
import six
+from google.protobuf import message
from tensorflow.core.profiler import tfprof_options_pb2
from tensorflow.core.profiler import tfprof_output_pb2
from tensorflow.python import pywrap_tensorflow as print_mdl
@@ -303,22 +304,31 @@ def profile(graph,
if cmd == 'code' or cmd == 'op':
tfprof_node = tfprof_output_pb2.MultiGraphNodeProto()
- tfprof_node.ParseFromString(
- print_mdl.PrintModelAnalysis(
- graph.as_graph_def(add_shapes=True).SerializeToString(),
- run_meta_str,
- op_log.SerializeToString(),
- cmd.encode('utf-8'),
- opts.SerializeToString()))
+ ret = print_mdl.PrintModelAnalysis(
+ graph.as_graph_def(add_shapes=True).SerializeToString(),
+ run_meta_str,
+ op_log.SerializeToString(),
+ cmd.encode('utf-8'),
+ opts.SerializeToString())
+ try:
+ tfprof_node.ParseFromString(ret)
+ except message.DecodeError as _:
+ pass
+ # sys.stderr.write('Cannot parse returned proto: %s.\n' % e)
+
elif cmd == 'graph' or cmd == 'scope':
tfprof_node = tfprof_output_pb2.GraphNodeProto()
- tfprof_node.ParseFromString(
- print_mdl.PrintModelAnalysis(
- graph.as_graph_def(add_shapes=True).SerializeToString(),
- run_meta_str,
- op_log.SerializeToString(),
- cmd.encode('utf-8'),
- opts.SerializeToString()))
+ ret = print_mdl.PrintModelAnalysis(
+ graph.as_graph_def(add_shapes=True).SerializeToString(),
+ run_meta_str,
+ op_log.SerializeToString(),
+ cmd.encode('utf-8'),
+ opts.SerializeToString())
+ try:
+ tfprof_node.ParseFromString(ret)
+ except message.DecodeError as _:
+ pass
+ # sys.stderr.write('Cannot parse returned proto: %s.\n' % e)
else:
raise errors.InvalidArgumentError(
None, None, 'unknown cmd: %s\n' % cmd)
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 841fe46393..7c0080c206 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -33,6 +33,7 @@ from tensorflow.python.platform import gfile
from tensorflow.python.platform import test
from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler import option_builder
+from tensorflow.python.profiler import profile_context
from tensorflow.python.profiler.internal import model_analyzer_testlib as lib
builder = option_builder.ProfileOptionBuilder
@@ -149,7 +150,7 @@ class PrintModelAnalysisTest(test.TestCase):
with gfile.Open(outfile, 'r') as f:
# pylint: disable=line-too-long
self.assertEqual(
- 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/7|--/35, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n',
+ 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/8|--/36, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|RunTimeOp, 1/1|1/1, )\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n',
f.read())
# pylint: enable=line-too-long
@@ -562,6 +563,71 @@ class PrintModelAnalysisTest(test.TestCase):
check_selection(['peak bytes', 'residual bytes', 'output bytes'],
['requested_bytes'])
+ def _trainLoop(self, train_op, train_steps, time_dir, time_step,
+ memory_dir, memory_step, profile_dir, dump_step):
+ with session.Session() as sess:
+ sess.run(variables.global_variables_initializer())
+ # start from 1 because variable_initializer took one step.
+ for i in range(1, train_steps + 1):
+ _ = sess.run(train_op)
+ if i in time_step:
+ ret = gfile.ListDirectory(time_dir)
+ self.assertEqual(len(ret), 1)
+ self.assertTrue(
+ gfile.Open(os.path.join(time_dir, ret[0]), 'r').read()
+ .find('execution time') > 0)
+ _ = [gfile.Remove(os.path.join(time_dir, x)) for x in ret]
+ else:
+ self.assertEqual(len(gfile.ListDirectory(time_dir)), 0)
+ if i in memory_step:
+ ret = gfile.ListDirectory(memory_dir)
+ self.assertEqual(len(ret), 1)
+ self.assertTrue(
+ gfile.Open(os.path.join(memory_dir, ret[0]), 'r').read()
+ .find('requested bytes') > 0)
+ _ = [gfile.Remove(os.path.join(memory_dir, x)) for x in ret]
+ else:
+ self.assertEqual(len(gfile.ListDirectory(memory_dir)), 0)
+ if i in dump_step:
+ ret = gfile.ListDirectory(profile_dir)
+ self.assertAllEqual(sorted(ret),
+ ['graph.pbtxt', 'run_metadata', 'tfprof_log'])
+ _ = [gfile.Remove(os.path.join(profile_dir, x)) for x in ret]
+ else:
+ if i < dump_step[0]:
+ self.assertFalse(gfile.Exists(profile_dir))
+ else:
+ self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0)
+
+ def testAutoProfiling(self):
+ ops.reset_default_graph()
+ time_dir = os.path.join(test.get_temp_dir(), 'time')
+ memory_dir = os.path.join(test.get_temp_dir(), 'memory')
+ profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile')
+ # TODO(xpan): Should we create parent directory for them?
+ gfile.MkDir(time_dir)
+ gfile.MkDir(memory_dir)
+
+ time_opts = (builder(builder.time_and_memory())
+ .with_file_output(os.path.join(time_dir, 'profile'))
+ .select(['micros']).build())
+ memory_opts = (builder(builder.time_and_memory())
+ .with_file_output(os.path.join(memory_dir, 'profile'))
+ .select(['bytes']).build())
+
+ time_steps = [2, 3]
+ memory_steps = [1, 3]
+ dump_steps = [3, 4]
+
+ x = lib.BuildSmallModel()
+ with profile_context.ProfileContext() as pctx:
+ pctx.add_auto_profiling('scope', time_opts, time_steps)
+ pctx.add_auto_profiling('scope', memory_opts, memory_steps)
+ pctx.add_auto_profile_dump(profile_dir, dump_steps)
+
+ self._trainLoop(x, 10, time_dir, time_steps,
+ memory_dir, memory_steps, profile_dir, dump_steps)
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/python/profiler/profile_context.py b/tensorflow/python/profiler/profile_context.py
new file mode 100644
index 0000000000..6438fede2f
--- /dev/null
+++ b/tensorflow/python/profiler/profile_context.py
@@ -0,0 +1,230 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""A Context that captures profile and performs profiling/dumping.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import threading
+
+from tensorflow.core.protobuf import config_pb2
+from tensorflow.python.client import session
+from tensorflow.python.framework import errors
+from tensorflow.python.platform import gfile
+from tensorflow.python.profiler import model_analyzer
+from tensorflow.python.profiler import tfprof_logger
+
+
+def _profiled_init(self, target='', graph=None, config=None):
+ """Overwrites the session.__init__."""
+ self._profiler_init_internal(target, graph, config) # pylint: disable=protected-access
+
+
+def _profiled_run(self,
+ fetches,
+ feed_dict=None,
+ options=None,
+ run_metadata=None):
+ """Overwrites the session.run()."""
+ # pylint: disable=protected-access
+ # Count the session steps.
+ self.profile_context._new_step()
+ # Fast path if no need for profiling.
+ to_profiles = self.profile_context._profile_candidates()
+ to_dumps = self.profile_context._dump_candidates()
+ if (not to_profiles and not to_dumps and
+ not self.profile_context._is_capture_enforced()):
+ return self._profiler_run_internal(
+ fetches, feed_dict, options, run_metadata)
+
+ # Enable tracing, perform auto profiling or auto dump.
+ if not run_metadata:
+ run_metadata = config_pb2.RunMetadata()
+
+ if not options:
+ options = config_pb2.RunOptions(
+ trace_level=config_pb2.RunOptions.FULL_TRACE)
+ old_trace_level = options.trace_level
+ else:
+ old_trace_level = options.trace_level
+ options.trace_level = config_pb2.RunOptions.FULL_TRACE
+
+ ret = self._profiler_run_internal(fetches, feed_dict, options, run_metadata)
+
+ if self.profile_context._capture_next_step:
+ self.profile_context._add_run_meta(run_metadata)
+
+ for to_dump in to_dumps:
+ outdir, _ = to_dump
+ if not gfile.Exists(outdir):
+ gfile.MakeDirs(outdir)
+ with gfile.Open(os.path.join(outdir, 'graph.pbtxt'), 'w') as f:
+ f.write('%s' % self.graph.as_graph_def(add_shapes=True))
+ with gfile.Open(os.path.join(outdir, 'run_metadata'), 'w') as f:
+ f.write(run_metadata.SerializeToString())
+ tfprof_logger.write_op_log(
+ self.graph, outdir, run_meta=run_metadata, add_trace=True)
+
+ for to_prof in to_profiles:
+ cmd, opts, _ = to_prof
+ model_analyzer.profile(
+ self.graph, run_meta=run_metadata, cmd=cmd, options=opts)
+
+ # Restore to default.
+ options.trace_level = old_trace_level
+ return ret
+ # pylint: enable=protected-access
+
+
+class ProfileContext(object):
+ """A Context that captures RunMetadata and performs profiling.
+
+ ```python
+ # Auto profiling at step 1, 100 and 1000.:
+ with tf.contrib.tfprof.ProfileContext() as pctx:
+ # Create the profiling options.
+ opts = tf.profiler.ProfileOptionBuilder.time_and_memory()
+ # Run profiling at certain steps. Multiple ones can be added.
+ pctx.add_auto_profiling('op', opts, [1, 100, 1000])
+ # Or dump the profile files at certain steps.
+ pctx.add_auto_profile_dump('/tmp/profiles', [1000])
+ # Run train/eval loop.
+ train_loop().
+
+ # Alternatively, enable and capture RunMetadata of next step.
+ with tf.contrib.tfprof.ProfileContext() as pctx:
+ pctx.capture_next_run_meta()
+ opts = tf.profiler.ProfileOptionBuilder.time_and_memory()
+ _ = session.run(train_op)
+ tf.profiler.profile(session.graph,
+ run_meta=pctx.run_meta(),
+ cmd='op',
+ options=opts)
+ ```
+ """
+
+ def __init__(self):
+ self._lock = threading.Lock()
+ self._capture_next_step = False
+ self._step = 0
+ self._auto_profiles = []
+ self._auto_dumps = []
+ self._run_meta = None
+
+ def add_auto_profiling(self, cmd, profile_options, profile_steps):
+ """Runs profiling at some steps with provided command and options.
+
+ Args:
+ cmd: The profiling commands.
+ profile_options: The profiling options.
+ profile_steps: A list/set of integers. The profiling command and options
+ will be run automatically at these integer steps. Each step is
+ a session.run.
+ """
+ with self._lock:
+ self._auto_profiles.append((cmd, profile_options, profile_steps))
+
+ def add_auto_profile_dump(self, outdir, dump_steps):
+ """Dumps profiles at some steps to the directory.
+
+ Args:
+ outdir: The directory to dump the profile files.
+ dump_steps: A list/set of integers. The profile files will be dump at
+ these integer steps. Each step is a session.run.
+ """
+ with self._lock:
+ self._auto_dumps.append((outdir, dump_steps))
+
+ def capture_next_run_meta(self):
+ """Enables tracing and captures RunMetadata at next session.run.
+
+ The captured RunMetadata can be retrieved via run_meta(). It
+ will be cleared one step later.
+ """
+ with self._lock:
+ self._capture_next_step = True
+
+ def run_meta(self):
+ """Returns the RunMetadata captured at previous session.run.
+
+ Needs to call capture_next_run_meta() before session.run to enable
+ capturing.
+ """
+ with self._lock:
+ assert self._run_meta, 'Need to call capture_next_run_meta()'
+ return self._run_meta
+
+ def _is_capture_enforced(self):
+ with self._lock:
+ return self._capture_next_step
+
+ def _add_run_meta(self, run_meta):
+ with self._lock:
+ self._run_meta = run_meta
+ self._capture_next_step = False
+
+ def _new_step(self):
+ with self._lock:
+ self._run_meta = None
+ self._step += 1
+
+ def _profile_candidates(self):
+ to_profile = []
+ with self._lock:
+ for auto_prof in self._auto_profiles:
+ _, _, prof_steps = auto_prof
+ if self._step - 1 in prof_steps:
+ to_profile.append(auto_prof)
+ return to_profile
+
+ def _dump_candidates(self):
+ to_dump = []
+ with self._lock:
+ for auto_dump in self._auto_dumps:
+ _, dump_steps = auto_dump
+ if self._step - 1 in dump_steps:
+ to_dump.append(auto_dump)
+ return to_dump
+
+ def __enter__(self):
+ self.old_run = getattr(session.BaseSession, 'run', None)
+ self.old_init = getattr(session.BaseSession, '__init__', None)
+ if not self.old_run:
+ raise errors.InternalError(None, None, 'BaseSession misses run method.')
+ elif not self.old_init:
+ raise errors.InternalError(None, None,
+ 'BaseSession misses __init__ method.')
+ elif getattr(session.BaseSession, '_profiler_run_internal', None):
+ raise errors.InternalError(None, None,
+ 'Already in context or context not cleaned.')
+ elif getattr(session.BaseSession, '_profiler_init_internal', None):
+ raise errors.InternalError(None, None,
+ 'Already in context or context not cleaned.')
+ else:
+ setattr(session.BaseSession, 'run', _profiled_run)
+ setattr(session.BaseSession, '__init__', _profiled_init)
+ setattr(session.BaseSession, '_profiler_run_internal', self.old_run)
+ setattr(session.BaseSession, '_profiler_init_internal', self.old_init)
+ setattr(session.BaseSession, 'profile_context', self)
+ return self
+
+ def __exit__(self, exec_type, exec_value, exec_tb):
+ setattr(session.BaseSession, 'run', self.old_run)
+ setattr(session.BaseSession, '__init__', self.old_init)
+ setattr(session.BaseSession, '_profiler_run_internal', None)
+ setattr(session.BaseSession, '_profiler_init_internal', None)
+ setattr(session.BaseSession, 'profile_context', None)
diff --git a/tensorflow/python/profiler/profiler_test.py b/tensorflow/python/profiler/profiler_test.py
index 46afe1fe55..eacb7d21e6 100644
--- a/tensorflow/python/profiler/profiler_test.py
+++ b/tensorflow/python/profiler/profiler_test.py
@@ -49,17 +49,6 @@ class ProfilerTest(test.TestCase):
r = lib.BuildFullModel()
sess.run(variables.global_variables_initializer())
- profiler = model_analyzer.Profiler(sess.graph)
- profiler.profile_name_scope(opts)
- with gfile.Open(outfile, 'r') as f:
- profiler_str = f.read()
-
- model_analyzer.profile(
- sess.graph, cmd='scope', options=opts)
- with gfile.Open(outfile, 'r') as f:
- pma_str = f.read()
- self.assertEqual(pma_str, profiler_str)
-
# Test the output with run_meta.
run_meta = config_pb2.RunMetadata()
_ = sess.run(r,
@@ -67,6 +56,7 @@ class ProfilerTest(test.TestCase):
trace_level=config_pb2.RunOptions.FULL_TRACE),
run_metadata=run_meta)
+ profiler = model_analyzer.Profiler(sess.graph)
profiler.add_step(1, run_meta)
profiler.profile_graph(opts)
with gfile.Open(outfile, 'r') as f:
@@ -78,6 +68,16 @@ class ProfilerTest(test.TestCase):
pma_str = f.read()
self.assertEqual(pma_str, profiler_str)
+ profiler.profile_name_scope(opts)
+ with gfile.Open(outfile, 'r') as f:
+ profiler_str = f.read()
+
+ model_analyzer.profile(
+ sess.graph, cmd='scope', run_meta=run_meta, options=opts)
+ with gfile.Open(outfile, 'r') as f:
+ pma_str = f.read()
+ self.assertEqual(pma_str, profiler_str)
+
profiler.profile_python(opts)
with gfile.Open(outfile, 'r') as f:
profiler_str = f.read()
@@ -104,18 +104,6 @@ class ProfilerTest(test.TestCase):
pma_str = f.read()
self.assertNotEqual(pma_str, profiler_str)
- opts2 = opts.copy()
- opts2['select'] = ['params', 'float_ops']
- profiler.profile_name_scope(opts2)
- with gfile.Open(outfile, 'r') as f:
- profiler_str = f.read()
-
- model_analyzer.profile(
- sess.graph, cmd='scope', run_meta=run_meta, options=opts2)
- with gfile.Open(outfile, 'r') as f:
- pma_str = f.read()
- self.assertEqual(pma_str, profiler_str)
-
def testMultiStepProfile(self):
ops.reset_default_graph()
opts = builder.time_and_memory(min_bytes=0)