diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-08-15 21:39:28 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-15 21:42:58 -0700 |
commit | 2b51e0ba27af69c914a7523d9aae232de09e3206 (patch) | |
tree | 892089b4eca69784760e397f9bf1a5dd64dc9f18 /tensorflow/python/profiler | |
parent | 8041185b16c0ce9348bd79900f6682fc3976bfd7 (diff) |
1. Support profiling nodes in RunMetadata but not in GraphDef.
2. Add an API to allow easier profile retrieval. Currently in contrib.
PiperOrigin-RevId: 165399640
Diffstat (limited to 'tensorflow/python/profiler')
-rw-r--r-- | tensorflow/python/profiler/BUILD | 11 | ||||
-rw-r--r-- | tensorflow/python/profiler/model_analyzer.py | 38 | ||||
-rw-r--r-- | tensorflow/python/profiler/model_analyzer_test.py | 68 | ||||
-rw-r--r-- | tensorflow/python/profiler/profile_context.py | 230 | ||||
-rw-r--r-- | tensorflow/python/profiler/profiler_test.py | 34 |
5 files changed, 343 insertions, 38 deletions
diff --git a/tensorflow/python/profiler/BUILD b/tensorflow/python/profiler/BUILD index c32cddbd6d..f9e8578866 100644 --- a/tensorflow/python/profiler/BUILD +++ b/tensorflow/python/profiler/BUILD @@ -43,6 +43,7 @@ cuda_py_test( name = "model_analyzer_test", srcs = ["model_analyzer_test.py"], additional_deps = [ + ":profile_context", ":model_analyzer", "//tensorflow/python/profiler/internal:model_analyzer_testlib", "//tensorflow/python:client", @@ -98,6 +99,16 @@ tf_py_test( ) py_library( + name = "profile_context", + srcs = ["profile_context.py"], + srcs_version = "PY2AND3", + deps = [ + ":model_analyzer", + ":tfprof_logger", + ], +) + +py_library( name = "pprof_profiler", srcs = ["pprof_profiler.py"], srcs_version = "PY2AND3", diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index eb95af6a28..5345949664 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -22,6 +22,7 @@ from __future__ import print_function import six +from google.protobuf import message from tensorflow.core.profiler import tfprof_options_pb2 from tensorflow.core.profiler import tfprof_output_pb2 from tensorflow.python import pywrap_tensorflow as print_mdl @@ -303,22 +304,31 @@ def profile(graph, if cmd == 'code' or cmd == 'op': tfprof_node = tfprof_output_pb2.MultiGraphNodeProto() - tfprof_node.ParseFromString( - print_mdl.PrintModelAnalysis( - graph.as_graph_def(add_shapes=True).SerializeToString(), - run_meta_str, - op_log.SerializeToString(), - cmd.encode('utf-8'), - opts.SerializeToString())) + ret = print_mdl.PrintModelAnalysis( + graph.as_graph_def(add_shapes=True).SerializeToString(), + run_meta_str, + op_log.SerializeToString(), + cmd.encode('utf-8'), + opts.SerializeToString()) + try: + tfprof_node.ParseFromString(ret) + except message.DecodeError as _: + pass + # sys.stderr.write('Cannot parse returned proto: %s.\n' % e) + elif cmd == 'graph' or cmd == 'scope': tfprof_node = tfprof_output_pb2.GraphNodeProto() - tfprof_node.ParseFromString( - print_mdl.PrintModelAnalysis( - graph.as_graph_def(add_shapes=True).SerializeToString(), - run_meta_str, - op_log.SerializeToString(), - cmd.encode('utf-8'), - opts.SerializeToString())) + ret = print_mdl.PrintModelAnalysis( + graph.as_graph_def(add_shapes=True).SerializeToString(), + run_meta_str, + op_log.SerializeToString(), + cmd.encode('utf-8'), + opts.SerializeToString()) + try: + tfprof_node.ParseFromString(ret) + except message.DecodeError as _: + pass + # sys.stderr.write('Cannot parse returned proto: %s.\n' % e) else: raise errors.InvalidArgumentError( None, None, 'unknown cmd: %s\n' % cmd) diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 841fe46393..7c0080c206 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -33,6 +33,7 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.profiler import model_analyzer from tensorflow.python.profiler import option_builder +from tensorflow.python.profiler import profile_context from tensorflow.python.profiler.internal import model_analyzer_testlib as lib builder = option_builder.ProfileOptionBuilder @@ -149,7 +150,7 @@ class PrintModelAnalysisTest(test.TestCase): with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( - 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/7|--/35, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n', + 'node name | # parameters | # float_ops | assigned devices | op types | op count (run|defined) | input shapes\n_TFProfRoot (--/451 params, --/10.44k flops, _kTFScopeParent, --/8|--/36, )\n Conv2D (0/0 params, 5.83k/5.83k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x6x6x3|1:3x3x3x6)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D, 1/1|1/1, 0:2x3x3x6|1:2x2x6x12)\n DW (3x3x3x6, 162/162 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:3x3x3x6|1:3x3x3x6)\n DW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:3x3x3x6|1:1)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:3x3x3x6)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables, 1/2|1/10, )\n DW2/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:2x2x6x12|1:2x2x6x12)\n DW2/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:4)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:2x2x6x12|1:1)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n DW2/read (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity, 1/1|1/1, 0:2x2x6x12)\n ScalarW (1, 1/1 params, 0/0 flops, VariableV2|_trainable_variables, 0/0|1/10, )\n ScalarW/Assign (0/0 params, 0/0 flops, Assign, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer (0/0 params, 0/0 flops, _kTFScopeParent, 0/0|1/7, )\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, Add, 0/0|1/6, 0:1|1:1)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, RandomStandardNormal, 0/0|1/1, 0:0)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, Mul, 0/0|1/1, 0:1|1:1)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, Const, 0/0|1/1, )\n ScalarW/read (0/0 params, 0/0 flops, Identity, 0/0|1/1, 0:1)\n _retval_Conv2D_1_0_0 (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|RunTimeOp, 1/1|1/1, )\n init (0/0 params, 0/0 flops, NoOp, 0/0|1/1, 0:1|1:3x3x3x6|2:2x2x6x12)\n zeros (0/0 params, 0/0 flops, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const, 1/1|1/1, )\n', f.read()) # pylint: enable=line-too-long @@ -562,6 +563,71 @@ class PrintModelAnalysisTest(test.TestCase): check_selection(['peak bytes', 'residual bytes', 'output bytes'], ['requested_bytes']) + def _trainLoop(self, train_op, train_steps, time_dir, time_step, + memory_dir, memory_step, profile_dir, dump_step): + with session.Session() as sess: + sess.run(variables.global_variables_initializer()) + # start from 1 because variable_initializer took one step. + for i in range(1, train_steps + 1): + _ = sess.run(train_op) + if i in time_step: + ret = gfile.ListDirectory(time_dir) + self.assertEqual(len(ret), 1) + self.assertTrue( + gfile.Open(os.path.join(time_dir, ret[0]), 'r').read() + .find('execution time') > 0) + _ = [gfile.Remove(os.path.join(time_dir, x)) for x in ret] + else: + self.assertEqual(len(gfile.ListDirectory(time_dir)), 0) + if i in memory_step: + ret = gfile.ListDirectory(memory_dir) + self.assertEqual(len(ret), 1) + self.assertTrue( + gfile.Open(os.path.join(memory_dir, ret[0]), 'r').read() + .find('requested bytes') > 0) + _ = [gfile.Remove(os.path.join(memory_dir, x)) for x in ret] + else: + self.assertEqual(len(gfile.ListDirectory(memory_dir)), 0) + if i in dump_step: + ret = gfile.ListDirectory(profile_dir) + self.assertAllEqual(sorted(ret), + ['graph.pbtxt', 'run_metadata', 'tfprof_log']) + _ = [gfile.Remove(os.path.join(profile_dir, x)) for x in ret] + else: + if i < dump_step[0]: + self.assertFalse(gfile.Exists(profile_dir)) + else: + self.assertEqual(len(gfile.ListDirectory(profile_dir)), 0) + + def testAutoProfiling(self): + ops.reset_default_graph() + time_dir = os.path.join(test.get_temp_dir(), 'time') + memory_dir = os.path.join(test.get_temp_dir(), 'memory') + profile_dir = os.path.join(test.get_temp_dir(), 'dir/dir2/profile') + # TODO(xpan): Should we create parent directory for them? + gfile.MkDir(time_dir) + gfile.MkDir(memory_dir) + + time_opts = (builder(builder.time_and_memory()) + .with_file_output(os.path.join(time_dir, 'profile')) + .select(['micros']).build()) + memory_opts = (builder(builder.time_and_memory()) + .with_file_output(os.path.join(memory_dir, 'profile')) + .select(['bytes']).build()) + + time_steps = [2, 3] + memory_steps = [1, 3] + dump_steps = [3, 4] + + x = lib.BuildSmallModel() + with profile_context.ProfileContext() as pctx: + pctx.add_auto_profiling('scope', time_opts, time_steps) + pctx.add_auto_profiling('scope', memory_opts, memory_steps) + pctx.add_auto_profile_dump(profile_dir, dump_steps) + + self._trainLoop(x, 10, time_dir, time_steps, + memory_dir, memory_steps, profile_dir, dump_steps) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/profiler/profile_context.py b/tensorflow/python/profiler/profile_context.py new file mode 100644 index 0000000000..6438fede2f --- /dev/null +++ b/tensorflow/python/profiler/profile_context.py @@ -0,0 +1,230 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""A Context that captures profile and performs profiling/dumping. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import threading + +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.framework import errors +from tensorflow.python.platform import gfile +from tensorflow.python.profiler import model_analyzer +from tensorflow.python.profiler import tfprof_logger + + +def _profiled_init(self, target='', graph=None, config=None): + """Overwrites the session.__init__.""" + self._profiler_init_internal(target, graph, config) # pylint: disable=protected-access + + +def _profiled_run(self, + fetches, + feed_dict=None, + options=None, + run_metadata=None): + """Overwrites the session.run().""" + # pylint: disable=protected-access + # Count the session steps. + self.profile_context._new_step() + # Fast path if no need for profiling. + to_profiles = self.profile_context._profile_candidates() + to_dumps = self.profile_context._dump_candidates() + if (not to_profiles and not to_dumps and + not self.profile_context._is_capture_enforced()): + return self._profiler_run_internal( + fetches, feed_dict, options, run_metadata) + + # Enable tracing, perform auto profiling or auto dump. + if not run_metadata: + run_metadata = config_pb2.RunMetadata() + + if not options: + options = config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE) + old_trace_level = options.trace_level + else: + old_trace_level = options.trace_level + options.trace_level = config_pb2.RunOptions.FULL_TRACE + + ret = self._profiler_run_internal(fetches, feed_dict, options, run_metadata) + + if self.profile_context._capture_next_step: + self.profile_context._add_run_meta(run_metadata) + + for to_dump in to_dumps: + outdir, _ = to_dump + if not gfile.Exists(outdir): + gfile.MakeDirs(outdir) + with gfile.Open(os.path.join(outdir, 'graph.pbtxt'), 'w') as f: + f.write('%s' % self.graph.as_graph_def(add_shapes=True)) + with gfile.Open(os.path.join(outdir, 'run_metadata'), 'w') as f: + f.write(run_metadata.SerializeToString()) + tfprof_logger.write_op_log( + self.graph, outdir, run_meta=run_metadata, add_trace=True) + + for to_prof in to_profiles: + cmd, opts, _ = to_prof + model_analyzer.profile( + self.graph, run_meta=run_metadata, cmd=cmd, options=opts) + + # Restore to default. + options.trace_level = old_trace_level + return ret + # pylint: enable=protected-access + + +class ProfileContext(object): + """A Context that captures RunMetadata and performs profiling. + + ```python + # Auto profiling at step 1, 100 and 1000.: + with tf.contrib.tfprof.ProfileContext() as pctx: + # Create the profiling options. + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + # Run profiling at certain steps. Multiple ones can be added. + pctx.add_auto_profiling('op', opts, [1, 100, 1000]) + # Or dump the profile files at certain steps. + pctx.add_auto_profile_dump('/tmp/profiles', [1000]) + # Run train/eval loop. + train_loop(). + + # Alternatively, enable and capture RunMetadata of next step. + with tf.contrib.tfprof.ProfileContext() as pctx: + pctx.capture_next_run_meta() + opts = tf.profiler.ProfileOptionBuilder.time_and_memory() + _ = session.run(train_op) + tf.profiler.profile(session.graph, + run_meta=pctx.run_meta(), + cmd='op', + options=opts) + ``` + """ + + def __init__(self): + self._lock = threading.Lock() + self._capture_next_step = False + self._step = 0 + self._auto_profiles = [] + self._auto_dumps = [] + self._run_meta = None + + def add_auto_profiling(self, cmd, profile_options, profile_steps): + """Runs profiling at some steps with provided command and options. + + Args: + cmd: The profiling commands. + profile_options: The profiling options. + profile_steps: A list/set of integers. The profiling command and options + will be run automatically at these integer steps. Each step is + a session.run. + """ + with self._lock: + self._auto_profiles.append((cmd, profile_options, profile_steps)) + + def add_auto_profile_dump(self, outdir, dump_steps): + """Dumps profiles at some steps to the directory. + + Args: + outdir: The directory to dump the profile files. + dump_steps: A list/set of integers. The profile files will be dump at + these integer steps. Each step is a session.run. + """ + with self._lock: + self._auto_dumps.append((outdir, dump_steps)) + + def capture_next_run_meta(self): + """Enables tracing and captures RunMetadata at next session.run. + + The captured RunMetadata can be retrieved via run_meta(). It + will be cleared one step later. + """ + with self._lock: + self._capture_next_step = True + + def run_meta(self): + """Returns the RunMetadata captured at previous session.run. + + Needs to call capture_next_run_meta() before session.run to enable + capturing. + """ + with self._lock: + assert self._run_meta, 'Need to call capture_next_run_meta()' + return self._run_meta + + def _is_capture_enforced(self): + with self._lock: + return self._capture_next_step + + def _add_run_meta(self, run_meta): + with self._lock: + self._run_meta = run_meta + self._capture_next_step = False + + def _new_step(self): + with self._lock: + self._run_meta = None + self._step += 1 + + def _profile_candidates(self): + to_profile = [] + with self._lock: + for auto_prof in self._auto_profiles: + _, _, prof_steps = auto_prof + if self._step - 1 in prof_steps: + to_profile.append(auto_prof) + return to_profile + + def _dump_candidates(self): + to_dump = [] + with self._lock: + for auto_dump in self._auto_dumps: + _, dump_steps = auto_dump + if self._step - 1 in dump_steps: + to_dump.append(auto_dump) + return to_dump + + def __enter__(self): + self.old_run = getattr(session.BaseSession, 'run', None) + self.old_init = getattr(session.BaseSession, '__init__', None) + if not self.old_run: + raise errors.InternalError(None, None, 'BaseSession misses run method.') + elif not self.old_init: + raise errors.InternalError(None, None, + 'BaseSession misses __init__ method.') + elif getattr(session.BaseSession, '_profiler_run_internal', None): + raise errors.InternalError(None, None, + 'Already in context or context not cleaned.') + elif getattr(session.BaseSession, '_profiler_init_internal', None): + raise errors.InternalError(None, None, + 'Already in context or context not cleaned.') + else: + setattr(session.BaseSession, 'run', _profiled_run) + setattr(session.BaseSession, '__init__', _profiled_init) + setattr(session.BaseSession, '_profiler_run_internal', self.old_run) + setattr(session.BaseSession, '_profiler_init_internal', self.old_init) + setattr(session.BaseSession, 'profile_context', self) + return self + + def __exit__(self, exec_type, exec_value, exec_tb): + setattr(session.BaseSession, 'run', self.old_run) + setattr(session.BaseSession, '__init__', self.old_init) + setattr(session.BaseSession, '_profiler_run_internal', None) + setattr(session.BaseSession, '_profiler_init_internal', None) + setattr(session.BaseSession, 'profile_context', None) diff --git a/tensorflow/python/profiler/profiler_test.py b/tensorflow/python/profiler/profiler_test.py index 46afe1fe55..eacb7d21e6 100644 --- a/tensorflow/python/profiler/profiler_test.py +++ b/tensorflow/python/profiler/profiler_test.py @@ -49,17 +49,6 @@ class ProfilerTest(test.TestCase): r = lib.BuildFullModel() sess.run(variables.global_variables_initializer()) - profiler = model_analyzer.Profiler(sess.graph) - profiler.profile_name_scope(opts) - with gfile.Open(outfile, 'r') as f: - profiler_str = f.read() - - model_analyzer.profile( - sess.graph, cmd='scope', options=opts) - with gfile.Open(outfile, 'r') as f: - pma_str = f.read() - self.assertEqual(pma_str, profiler_str) - # Test the output with run_meta. run_meta = config_pb2.RunMetadata() _ = sess.run(r, @@ -67,6 +56,7 @@ class ProfilerTest(test.TestCase): trace_level=config_pb2.RunOptions.FULL_TRACE), run_metadata=run_meta) + profiler = model_analyzer.Profiler(sess.graph) profiler.add_step(1, run_meta) profiler.profile_graph(opts) with gfile.Open(outfile, 'r') as f: @@ -78,6 +68,16 @@ class ProfilerTest(test.TestCase): pma_str = f.read() self.assertEqual(pma_str, profiler_str) + profiler.profile_name_scope(opts) + with gfile.Open(outfile, 'r') as f: + profiler_str = f.read() + + model_analyzer.profile( + sess.graph, cmd='scope', run_meta=run_meta, options=opts) + with gfile.Open(outfile, 'r') as f: + pma_str = f.read() + self.assertEqual(pma_str, profiler_str) + profiler.profile_python(opts) with gfile.Open(outfile, 'r') as f: profiler_str = f.read() @@ -104,18 +104,6 @@ class ProfilerTest(test.TestCase): pma_str = f.read() self.assertNotEqual(pma_str, profiler_str) - opts2 = opts.copy() - opts2['select'] = ['params', 'float_ops'] - profiler.profile_name_scope(opts2) - with gfile.Open(outfile, 'r') as f: - profiler_str = f.read() - - model_analyzer.profile( - sess.graph, cmd='scope', run_meta=run_meta, options=opts2) - with gfile.Open(outfile, 'r') as f: - pma_str = f.read() - self.assertEqual(pma_str, profiler_str) - def testMultiStepProfile(self): ops.reset_default_graph() opts = builder.time_and_memory(min_bytes=0) |