diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-05-24 15:06:46 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-05-24 15:10:29 -0700 |
commit | 61a36c4383ffa6f0479875fe32cad5a2b9c74ddd (patch) | |
tree | e54b375f2d24c63def658b7dcc267d3a3040aff2 /tensorflow/contrib/tfprof | |
parent | df7c4d73c4e079899fb692589c5513bdf9096bfe (diff) |
Add a view that organize results by operation type:
tfprof> op -select micros,bytes,occurrence -order_by micros
SoftmaxCrossEntropyWithLogits 36.58MB (100.00%, 0.05%), 1.37sec
(100.00%, 23.56%), 30
MatMul 2720.57MB (99.95%, 3.66%), 988.90ms (76.44%,
17.05%), 3450
ConcatV2 741.37MB (96.29%, 1.00%), 421.44ms
(59.38%, 7.27%), 6098
Mul 3957.24MB (95.29%, 5.33%), 418.90ms
(52.12%, 7.22%), 9427
Add 740.05MB (89.96%, 1.00%), 335.26ms
(44.89%, 5.78%), 2180
Sub 32.46MB (88.97%, 0.04%), 216.44ms
The CL also:
1. Unify code view and op view implementation and rename.
2. Clean up a few unused features.
3. Unify the option semantics of all views.
PiperOrigin-RevId: 157043043
Diffstat (limited to 'tensorflow/contrib/tfprof')
3 files changed, 80 insertions, 36 deletions
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py index 17dff69edd..fcce3cd45b 100644 --- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger +from tensorflow.python.framework import errors from tensorflow.tools.tfprof import tfprof_options_pb2 from tensorflow.tools.tfprof import tfprof_output_pb2 @@ -36,7 +37,6 @@ TRAINABLE_VARS_PARAMS_STAT_OPTIONS = { 'min_micros': 0, 'min_params': 0, 'min_float_ops': 0, - 'device_regexes': ['.*'], 'order_by': 'name', 'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES], 'start_name_regexes': ['.*'], @@ -56,7 +56,6 @@ FLOAT_OPS_OPTIONS = { 'min_micros': 0, 'min_params': 0, 'min_float_ops': 1, - 'device_regexes': ['.*'], 'order_by': 'float_ops', 'account_type_regexes': ['.*'], 'start_name_regexes': ['.*'], @@ -78,7 +77,6 @@ PRINT_PARAMS_ON_DEVICE = { 'min_micros': 0, 'min_params': 0, 'min_float_ops': 0, - 'device_regexes': ['.*'], 'order_by': 'name', 'account_type_regexes': ['.*ps.*task:0.*'], 'start_name_regexes': ['.*'], @@ -98,7 +96,6 @@ PRINT_ALL_TIMING_MEMORY = { 'min_micros': 1, # Only >=1 'min_params': 0, 'min_float_ops': 0, - 'device_regexes': ['.*'], 'order_by': 'name', 'account_type_regexes': ['.*'], 'start_name_regexes': ['.*'], @@ -122,19 +119,10 @@ def print_model_analysis(graph, tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS): """Print model statistics. - Prints the model statistics to stdout. Also returns the results - in a TFGraphNodeProto proto. See go/tfprof or run tfprof tool: + See go/tfprof or README for examples and tutorials. + Run tfprof tool for help: 'bazel run third_party/tensorflow/tools/tfprof help' - Examples: - Show the parameter/shape statistics of tf.trainable_variables(). - print_model_analysis(sess.graph). - - Show number of float ops. Only ops with RegisterStatistics defined - are counted. - show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS - print_model_analysis(sess.graph, tfprof_options=show_float_op_opts) - Args: graph: tf.Graph. run_meta: tensorflow::RunMetadata proto. When provided, also shows valid @@ -142,15 +130,16 @@ def print_model_analysis(graph, 'micros' and 'bytes'. op_log: tensorflow::tfprof::OpLog proto. users can use this proto to group together ops and use a op_type to select the group. - tfprof_cmd: string. Either 'scope', 'graph', 'code'. - 'scope' view organize outputs using ops' name scope. - 'graph' view organize outputs using op's inputs/outputs. + tfprof_cmd: string. Either 'op', 'scope', 'graph', 'code'. + 'op' view organize outputs using operation type. (e.g. MatMul) + 'scope' view organize outputs using graph node name scope. + 'graph' view organize outputs using graph node inputs/outputs. 'code' view organize outputs using Python call stack. tfprof_options: See 'tfprof help' for details. Returns: If tfprof_cmd is 'scope' or 'graph', returns TFGraphNodeProto proto. - If tfprof_cmd is 'code', returns TFCodeNodeProto proto. - Side effect: a formatted output to stdout. + If tfprof_cmd is 'op' or 'code', returns TFMultiGraphNodeProto proto. + Side effect: stdout/file/timeline.json depending on tfprof_options['output'] """ # pylint: disable=protected-access op_log = tfprof_logger._merge_default_with_oplog( @@ -162,8 +151,11 @@ def print_model_analysis(graph, opts.min_micros = tfprof_options['min_micros'] opts.min_params = tfprof_options['min_params'] opts.min_float_ops = tfprof_options['min_float_ops'] - for p in tfprof_options['device_regexes']: - opts.device_regexes.append(p) + if 'min_occurrence' in tfprof_options: + opts.min_occurrence = tfprof_options['min_occurrence'] + else: + opts.min_occurrence = 0 + opts.order_by = tfprof_options['order_by'] for p in tfprof_options['account_type_regexes']: opts.account_type_regexes.append(p) @@ -183,8 +175,8 @@ def print_model_analysis(graph, run_meta_str = run_meta.SerializeToString() if run_meta else b'' - if tfprof_cmd == 'code': - tfprof_node = tfprof_output_pb2.TFCodeNodeProto() + if tfprof_cmd == 'code' or tfprof_cmd == 'op': + tfprof_node = tfprof_output_pb2.TFMultiGraphNodeProto() tfprof_node.ParseFromString( print_mdl.PrintModelAnalysis( graph.as_graph_def().SerializeToString(), @@ -192,7 +184,7 @@ def print_model_analysis(graph, op_log.SerializeToString(), tfprof_cmd.encode('utf-8'), opts.SerializeToString())) - else: + elif tfprof_cmd == 'graph' or tfprof_cmd == 'scope': tfprof_node = tfprof_output_pb2.TFGraphNodeProto() tfprof_node.ParseFromString( print_mdl.PrintModelAnalysis( @@ -201,5 +193,8 @@ def print_model_analysis(graph, op_log.SerializeToString(), tfprof_cmd.encode('utf-8'), opts.SerializeToString())) + else: + raise errors.InvalidArgumentError( + None, None, 'unknown tfprof_cmd: %s\n' % tfprof_cmd) return tfprof_node diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py index 561e0a8577..14f8fcff9d 100644 --- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py @@ -43,7 +43,8 @@ class PrintModelAnalysisTest(test.TestCase): model_analyzer.print_model_analysis(sess.graph, tfprof_options=opts) with gfile.Open(outfile, 'r') as f: - self.assertEqual(u'_TFProfRoot (--/451 params)\n' + self.assertEqual(u'node name | # parameters\n' + '_TFProfRoot (--/451 params)\n' ' DW (3x3x3x6, 162/162 params)\n' ' DW2 (2x2x6x12, 288/288 params)\n' ' ScalarW (1, 1/1 params)\n', @@ -56,7 +57,8 @@ class PrintModelAnalysisTest(test.TestCase): opts['output'] = 'file:outfile=' + outfile opts['account_type_regexes'] = ['.*'] opts['select'] = [ - 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types' + 'bytes', 'params', 'float_ops', 'occurrence', + 'device', 'op_types' ] with session.Session() as sess, ops.device('/cpu:0'): @@ -75,7 +77,7 @@ class PrintModelAnalysisTest(test.TestCase): with gfile.Open(outfile, 'r') as f: # pylint: disable=line-too-long self.assertEqual( - '_TFProfRoot (0/451 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables)\n DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables)\n DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, VariableV2|_trainable_variables)\n ScalarW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n ScalarW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n ScalarW/read (0/0 params, 0/0 flops, 0B/0B, Identity)\n init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n', + 'node name | # parameters | # float_ops | output bytes | assigned devices | op types\n_TFProfRoot (--/451 params, --/10.44k flops, --/5.28KB, _kTFScopeParent)\n Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables)\n DW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|VariableV2|_trainable_variables)\n DW2/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, VariableV2|_trainable_variables)\n ScalarW/Assign (0/0 params, 0/0 flops, 0B/0B, Assign)\n ScalarW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n ScalarW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, Add)\n ScalarW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, RandomStandardNormal)\n ScalarW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, Const)\n ScalarW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, Mul)\n ScalarW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, Const)\n ScalarW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, Const)\n ScalarW/read (0/0 params, 0/0 flops, 0B/0B, Identity)\n init (0/0 params, 0/0 flops, 0B/0B, NoOp)\n zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n', f.read()) # pylint: enable=line-too-long @@ -107,9 +109,8 @@ class PrintModelAnalysisTest(test.TestCase): sess.graph, run_meta, tfprof_cmd='code', tfprof_options=opts) with gfile.Open(outfile, 'r') as f: - # pylint: disable=line-too-long - self.assertEqual('_TFProfRoot (', f.read()[0:13]) - # pylint: enable=line-too-long + self.assertEqual('node name | output bytes | # parameters | # float_', + f.read()[0:50]) def testComplexCodeView(self): ops.reset_default_graph() @@ -136,7 +137,10 @@ class PrintModelAnalysisTest(test.TestCase): # pylint: disable=line-too-long with gfile.Open(outfile, 'r') as f: - self.assertEqual('_TFProfRoot (0', f.read()[:14]) + lines = f.read().split('\n') + result = '\n'.join([l[:min(len(l), 80)] for l in lines]) + self.assertEqual('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/54.08k flops)\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n', + result) self.assertLess(0, tfprof_node.total_exec_micros) self.assertEqual(2844, tfprof_node.total_parameters) @@ -166,7 +170,7 @@ class PrintModelAnalysisTest(test.TestCase): opts['account_type_regexes'] = ['.*'] opts['account_displayed_op_only'] = False opts['select'] = [ - 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device' + 'bytes', 'params', 'float_ops', 'device' ] with session.Session() as sess, ops.device('/cpu:0'): @@ -214,6 +218,54 @@ class PrintModelAnalysisTest(test.TestCase): # Test that a json file is created. self.assertLess(1000, len(f.read())) + def testOpView(self): + ops.reset_default_graph() + opts = model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS + outfile = os.path.join(test.get_temp_dir(), 'dump') + opts['output'] = 'file:outfile=' + outfile + opts['account_type_regexes'] = ['.*'] + opts['min_occurrence'] = 10 + opts['select'] = [ + 'params', 'micros', 'occurrence', + ] + opts['order_by'] = 'occurrence' + + with session.Session() as sess, ops.device('/cpu:0'): + x = lib.BuildFullModel() + + sess.run(variables.global_variables_initializer()) + run_meta = config_pb2.RunMetadata() + _ = sess.run(x, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE), + run_metadata=run_meta) + + tfprof_node = model_analyzer.print_model_analysis( + sess.graph, run_meta, tfprof_cmd='op', tfprof_options=opts) + + with gfile.Open(outfile, 'r') as f: + self.assertEqual( + 'nodename|executiontime|#parameters|opocc', + f.read().replace('\t', '').replace(' ', '')[0:40]) + + total_children = 0 + last_occurrence = 1e32 + last_total_micros = tfprof_node.total_exec_micros + last_micros = tfprof_node.exec_micros + while tfprof_node.children: + self.assertEqual(len(tfprof_node.children), 1) + tfprof_node = tfprof_node.children[0] + + self.assertEqual( + last_total_micros, tfprof_node.total_exec_micros + last_micros) + last_total_micros = tfprof_node.total_exec_micros + last_micros = tfprof_node.exec_micros + + total_children += 1 + self.assertLessEqual(len(tfprof_node.graph_nodes), last_occurrence) + last_occurrence = len(tfprof_node.graph_nodes) + self.assertEqual(total_children, 15) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py index c3e9fc9cc0..41154d552b 100644 --- a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py @@ -42,7 +42,6 @@ TEST_OPTIONS = { 'min_micros': 0, 'min_params': 0, 'min_float_ops': 0, - 'device_regexes': ['.*'], 'order_by': 'name', 'account_type_regexes': ['.*'], 'start_name_regexes': ['.*'], @@ -76,8 +75,6 @@ class PrintModelAnalysisTest(test.TestCase): opts.min_micros = TEST_OPTIONS['min_micros'] opts.min_params = TEST_OPTIONS['min_params'] opts.min_float_ops = TEST_OPTIONS['min_float_ops'] - for p in TEST_OPTIONS['device_regexes']: - opts.device_regexes.append(p) opts.order_by = TEST_OPTIONS['order_by'] for p in TEST_OPTIONS['account_type_regexes']: opts.account_type_regexes.append(p) |