aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/profiler
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-08-15 17:48:55 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-15 17:52:23 -0700
commit93b21f7b1fa725299f86058436f034b15350de52 (patch)
tree6b0dcf69ed06601680b0b14eb0a9c25edfc292e9 /tensorflow/python/profiler
parent5db8be66563227f5bba37aeddce3951239dcd947 (diff)
1. Adjust code view pprof image to better visualize backprop.
2. Allow to add multiple RunMetadata for 1 step, e.g. 1 for var initialization, 1 for training. So it has a complete profile. 3. Improve tests a bit. PiperOrigin-RevId: 165385567
Diffstat (limited to 'tensorflow/python/profiler')
-rw-r--r--tensorflow/python/profiler/internal/run_metadata_test.py92
-rw-r--r--tensorflow/python/profiler/model_analyzer_test.py84
-rw-r--r--tensorflow/python/profiler/option_builder.py2
-rw-r--r--tensorflow/python/profiler/profiler_test.py45
4 files changed, 175 insertions, 48 deletions
diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py
index b758edf87e..1e26a9897e 100644
--- a/tensorflow/python/profiler/internal/run_metadata_test.py
+++ b/tensorflow/python/profiler/internal/run_metadata_test.py
@@ -40,14 +40,22 @@ SIZE = 1300
builder = option_builder.ProfileOptionBuilder
-def _extract_node(run_meta, node_names):
- if not isinstance(node_names, list):
- node_names = [node_names]
+def _extract_node(run_meta, node_name):
ret = defaultdict(list)
for dev_stat in run_meta.step_stats.dev_stats:
- dev = dev_stat.device
+ dev = dev_stat.device.lower()
+ if dev.find('cpu:') > 0:
+ dev = dev[dev.find('cpu:'):]
+ elif dev.find('gpu:') > 0:
+ dev = dev[dev.find('gpu:'):]
+ else:
+ assert False, 'Unrecognized device name: %s' % dev
+
for node_stat in dev_stat.node_stats:
- if node_stat.node_name in node_names:
+ nname = node_stat.node_name
+ if nname.find(':') > 0:
+ nname = nname[:nname.find(':')]
+ if nname == node_name:
ret[dev].append(node_stat)
return ret
@@ -62,6 +70,7 @@ def _run_model():
opts = builder.time_and_memory()
opts['min_micros'] = 0
opts['min_bytes'] = 0
+ opts['output'] = 'none'
_ = sess.run(y,
options=config_pb2.RunOptions(
trace_level=config_pb2.RunOptions.FULL_TRACE),
@@ -85,9 +94,11 @@ def _run_loop_model():
trace_level=config_pb2.RunOptions.FULL_TRACE),
run_metadata=run_meta)
+ opts = builder.time_and_memory()
+ opts['output'] = 'none'
+
tfprof_node = model_analyzer.profile(
- sess.graph, run_meta,
- options=builder.time_and_memory())
+ sess.graph, run_meta, options=opts)
return tfprof_node, run_meta
@@ -104,17 +115,9 @@ class RunMetadataTest(test.TestCase):
self.assertEqual(tfprof_node.children[0].name, 'MatMul')
self.assertGreater(tfprof_node.children[0].exec_micros, 10)
- ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul'])
- self.assertEqual(len(ret), 3)
- self.assertTrue('/job:localhost/replica:0/task:0' + gpu_dev in ret)
- del ret['/job:localhost/replica:0/task:0' + gpu_dev]
-
- has_all_stream = False
- for k, _ in six.iteritems(ret):
- self.assertTrue(gpu_dev + '/stream' in k)
- if gpu_dev + '/stream:all' in k:
- has_all_stream = True
- self.assertTrue(has_all_stream)
+ ret = _extract_node(run_meta, 'MatMul')
+ self.assertEqual(len(ret['gpu:0']), 1)
+ self.assertEqual(len(ret['gpu:0/stream:all']), 1, '%s' % run_meta)
def testCPU(self):
ops.reset_default_graph()
@@ -124,8 +127,7 @@ class RunMetadataTest(test.TestCase):
self.assertGreater(tfprof_node.children[0].exec_micros, 0)
ret = _extract_node(run_meta, 'MatMul')
- self.assertEqual(len(ret), 1)
- self.assertTrue('/job:localhost/replica:0/task:0/cpu:0' in ret)
+ self.assertEqual(len(ret['cpu:0']), 1)
ret = _extract_node(run_meta, 'MatMul:MatMul')
self.assertEqual(len(ret), 0)
@@ -137,10 +139,10 @@ class RunMetadataTest(test.TestCase):
# The while-loop caused a node to appear 4 times in scheduling.
ret = _extract_node(run_meta,
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
- self.assertEqual(len(ret['/job:localhost/replica:0/task:0/cpu:0']), 4)
+ self.assertEqual(len(ret['cpu:0']), 4)
total_cpu_execs = 0
- for node in ret['/job:localhost/replica:0/task:0/cpu:0']:
+ for node in ret['cpu:0']:
total_cpu_execs += node.op_end_rel_micros
mm_node = lib.SearchTFProfNode(
@@ -151,10 +153,31 @@ class RunMetadataTest(test.TestCase):
self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
self.assertEqual(mm_node.exec_micros, total_cpu_execs)
+ def testGradientGraph(self):
+ # Note: Please don't just adjust the test to make it pass.
+ # The code view logic depends on it.
+ ops.reset_default_graph()
+ _, _ = _run_loop_model()
+ graph = ops.get_default_graph()
+ forward_op = set()
+ backward_op = set()
+ back_to_forward = dict()
+ for op in graph.get_operations():
+ if op.name.find('gradients/') > 0 and op.name.find('_grad/') > 0:
+ backward_op.add(op.name)
+ idx1 = op.name.find('gradients/') + 10
+ idx2 = op.name.find('_grad/')
+ back_to_forward[op.name] = op.name[idx1:idx2]
+ else:
+ forward_op.add(op.name)
+
+ for _, f in six.iteritems(back_to_forward):
+ self.assertTrue(f in forward_op)
+
# pylint: disable=pointless-string-statement
"""
- TODO(xpan): This test is flaky because RunMetadata returned from TensorFlow
- is random. Still being investigated.
+ # TODO(xpan): This test is flaky because RunMetadata returned from TensorFlow
+ # is random. Still being investigated.
def testLoopGPU(self):
if not test.is_gpu_available():
return
@@ -165,30 +188,17 @@ class RunMetadataTest(test.TestCase):
# The while-loop caused a node to appear 4 times in scheduling.
ret = _extract_node(run_meta,
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
- self.assertEqual(len(ret['/job:localhost/replica:0/task:0/device:GPU:0']), 4)
+ self.assertEqual(len(ret['gpu:0']), 4, '%s' % run_meta)
total_cpu_execs = 0
- for node in ret['/job:localhost/replica:0/task:0/device:GPU:0']:
+ for node in ret['gpu:0']:
total_cpu_execs += node.op_end_rel_micros
- ret = _extract_node(
- run_meta,
- 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
- self.assertGreaterEqual(len(ret['/device:GPU:0/stream:all']), 4)
+ self.assertGreaterEqual(len(ret['gpu:0/stream:all']), 4, '%s' % run_meta)
total_accelerator_execs = 0
- for node in ret['/device:GPU:0/stream:all']:
+ for node in ret['gpu:0/stream:all']:
total_accelerator_execs += node.op_end_rel_micros
-
- mm_node = lib.SearchTFProfNode(
- tfprof_node,
- 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
-
- self.assertEqual(mm_node.run_count, 4)
- self.assertEqual(mm_node.accelerator_exec_micros, total_accelerator_execs)
- self.assertEqual(mm_node.cpu_exec_micros, total_cpu_execs)
- self.assertEqual(mm_node.exec_micros,
- total_cpu_execs + total_accelerator_execs)
"""
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 21d26b8782..841fe46393 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -21,6 +21,7 @@ import gzip
import io
import os
import random
+import re
from tensorflow.core.profiler import profile_pb2
from tensorflow.core.protobuf import config_pb2
@@ -57,6 +58,68 @@ class PrintModelAnalysisTest(test.TestCase):
' ScalarW (1, 1/1 params)\n',
f.read())
+ def testSelectEverthingDetail(self):
+ ops.reset_default_graph()
+ dev = '/gpu:0' if test.is_gpu_available() else '/cpu:0'
+ outfile = os.path.join(test.get_temp_dir(), 'dump')
+ opts = (builder(builder.trainable_variables_parameter())
+ .with_file_output(outfile)
+ .with_accounted_types(['.*'])
+ .select(['micros', 'bytes', 'params', 'float_ops', 'occurrence',
+ 'device', 'op_types', 'input_shapes']).build())
+
+ config = config_pb2.ConfigProto()
+ with session.Session(config=config) as sess, ops.device(dev):
+ x = lib.BuildSmallModel()
+
+ sess.run(variables.global_variables_initializer())
+ run_meta = config_pb2.RunMetadata()
+ _ = sess.run(x,
+ options=config_pb2.RunOptions(
+ trace_level=config_pb2.RunOptions.FULL_TRACE),
+ run_metadata=run_meta)
+
+ model_analyzer.profile(
+ sess.graph, run_meta, options=opts)
+
+ with gfile.Open(outfile, 'r') as f:
+ # pylint: disable=line-too-long
+ outputs = f.read().split('\n')
+
+ self.assertEqual(outputs[0],
+ 'node name | # parameters | # float_ops | requested bytes | total execution time | accelerator execution time | cpu execution time | assigned devices | op types | op count (run|defined) | input shapes')
+ for o in outputs[1:]:
+ if o.find('Conv2D ') > 0:
+ metrics = o[o.find('(') +1: o.find(')')].split(',')
+ # Make sure time is profiled.
+ gap = 1 if test.is_gpu_available() else 2
+ for i in range(3, 6, gap):
+ mat = re.search('(.*)us/(.*)us', metrics[i])
+ self.assertGreater(float(mat.group(1)), 0.0)
+ self.assertGreater(float(mat.group(2)), 0.0)
+ # Make sure device is profiled.
+ if test.is_gpu_available():
+ self.assertTrue(metrics[6].find('gpu') > 0)
+ self.assertFalse(metrics[6].find('cpu') > 0)
+ else:
+ self.assertFalse(metrics[6].find('gpu') > 0)
+ self.assertTrue(metrics[6].find('cpu') > 0)
+ # Make sure float_ops is profiled.
+ mat = re.search('(.*)k/(.*)k flops', metrics[1].strip())
+ self.assertGreater(float(mat.group(1)), 0.0)
+ self.assertGreater(float(mat.group(2)), 0.0)
+ # Make sure op_count is profiled.
+ self.assertEqual(metrics[8].strip(), '1/1|1/1')
+ # Make sure input_shapes is profiled.
+ self.assertEqual(metrics[9].strip(), '0:2x6x6x3|1:3x3x3x6')
+
+ if o.find('DW (3x3x3x6') > 0:
+ metrics = o[o.find('(') +1: o.find(')')].split(',')
+ mat = re.search('(.*)/(.*) params', metrics[1].strip())
+ self.assertGreater(float(mat.group(1)), 0.0)
+ self.assertGreater(float(mat.group(2)), 0.0)
+ # pylint: enable=line-too-long
+
def testSelectEverything(self):
ops.reset_default_graph()
outfile = os.path.join(test.get_temp_dir(), 'dump')
@@ -151,29 +214,38 @@ class PrintModelAnalysisTest(test.TestCase):
with gfile.Open(outfile, 'r') as f:
lines = f.read().split('\n')
result = '\n'.join([l[:min(len(l), 80)] for l in lines])
- self.assertEqual('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n',
+ self.assertEqual('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/91.04k flops)\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (0/1.80k para\n model_analyzer_testlib.py:35:BuildSmallModel:image = array_ops... (0/0 param\n model_analyzer_testlib.py:39:BuildSmallModel:initializer=init_... (0/4 param\n model_analyzer_testlib.py:43:BuildSmallModel:initializer=init_... (0/648 par\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:48:BuildSmallModel:initializer=init_... (0/1.15k p\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (0/0 param\n model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient) (0\n model_analyzer_testlib.py:44:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n model_analyzer_testlib.py:49:BuildSmallModel:x = nn_ops.conv2d... (gradient)\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (0/1.04k para\n model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient) (0\n model_analyzer_testlib.py:64:BuildFullModel:target = array_op... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (0/0 params, \n model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient) (0\n model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min... (0/0 params, \n',
result)
self.assertLess(0, tfprof_node.total_exec_micros)
self.assertEqual(2844, tfprof_node.total_parameters)
self.assertEqual(91040, tfprof_node.total_float_ops)
- self.assertEqual(5, len(tfprof_node.children))
+ self.assertEqual(8, len(tfprof_node.children))
self.assertEqual('_TFProfRoot', tfprof_node.name)
self.assertEqual(
'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_...',
tfprof_node.children[0].name)
self.assertEqual(
- 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c...',
+ 'model_analyzer_testlib.py:58:BuildFullModel:seq.append(array_... (gradient)',
tfprof_node.children[1].name)
self.assertEqual(
- 'model_analyzer_testlib.py:64:BuildFullModel:target = array_op...',
+ 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c...',
tfprof_node.children[2].name)
self.assertEqual(
- 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_...',
+ 'model_analyzer_testlib.py:62:BuildFullModel:cell, array_ops.c... (gradient)',
tfprof_node.children[3].name)
self.assertEqual(
- 'model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min...',
+ 'model_analyzer_testlib.py:64:BuildFullModel:target = array_op...',
tfprof_node.children[4].name)
+ self.assertEqual(
+ 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_...',
+ tfprof_node.children[5].name)
+ self.assertEqual(
+ 'model_analyzer_testlib.py:65:BuildFullModel:loss = nn_ops.l2_... (gradient)',
+ tfprof_node.children[6].name)
+ self.assertEqual(
+ 'model_analyzer_testlib.py:67:BuildFullModel:return sgd_op.min...',
+ tfprof_node.children[7].name)
# pylint: enable=line-too-long
def testCodeViewLeafGraphNode(self):
diff --git a/tensorflow/python/profiler/option_builder.py b/tensorflow/python/profiler/option_builder.py
index 502fc49bb6..641895ffe5 100644
--- a/tensorflow/python/profiler/option_builder.py
+++ b/tensorflow/python/profiler/option_builder.py
@@ -406,7 +406,7 @@ class ProfileOptionBuilder(object):
"""Generate a pprof profile gzip file.
To use the pprof file:
- pprof -png --nodecount=20 --sample_index=1 <pprof_file>
+ pprof -png --nodecount=100 --sample_index=1 <pprof_file>
Args:
pprof_file: filename for output, usually suffixed with .pb.gz.
diff --git a/tensorflow/python/profiler/profiler_test.py b/tensorflow/python/profiler/profiler_test.py
index 2170e1bdea..46afe1fe55 100644
--- a/tensorflow/python/profiler/profiler_test.py
+++ b/tensorflow/python/profiler/profiler_test.py
@@ -183,6 +183,51 @@ class ProfilerTest(test.TestCase):
checker = advice_pb.checkers['ExpensiveOperationChecker']
self.assertGreater(len(checker.reports), 0)
+ def testMultipleProfilePerStep(self):
+ ops.reset_default_graph()
+ opts = (builder(builder.trainable_variables_parameter())
+ .with_empty_output()
+ .with_accounted_types(['.*'])
+ .select(['micros', 'bytes', 'peak_bytes',
+ 'residual_bytes', 'output_bytes']).build())
+
+ r = lib.BuildSmallModel()
+ sess = session.Session()
+ profiler = model_analyzer.Profiler(sess.graph)
+
+ init_var_run_meta = config_pb2.RunMetadata()
+ sess.run(variables.global_variables_initializer(),
+ options=config_pb2.RunOptions(
+ trace_level=config_pb2.RunOptions.FULL_TRACE),
+ run_metadata=init_var_run_meta)
+
+ train_run_meta = config_pb2.RunMetadata()
+ sess.run(r,
+ options=config_pb2.RunOptions(
+ trace_level=config_pb2.RunOptions.FULL_TRACE),
+ run_metadata=train_run_meta)
+
+ profiler.add_step(0, train_run_meta)
+ ret1 = profiler.profile_name_scope(opts)
+ n1 = lib.SearchTFProfNode(
+ ret1, 'DW/Initializer/random_normal/RandomStandardNormal')
+ # Without the var initialization run_meta, it doesn't have the
+ # information of var_initialization.
+ self.assertEqual(n1.exec_micros, 0)
+ self.assertEqual(n1.requested_bytes, 0)
+ self.assertEqual(n1.peak_bytes, 0)
+ self.assertEqual(n1.residual_bytes, 0)
+
+ profiler.add_step(0, init_var_run_meta)
+ ret2 = profiler.profile_name_scope(opts)
+ n2 = lib.SearchTFProfNode(
+ ret2, 'DW/Initializer/random_normal/RandomStandardNormal')
+ # After adding the var initialization run_meta.
+ self.assertGreater(n2.exec_micros, 0)
+ self.assertGreater(n2.requested_bytes, 0)
+ self.assertGreater(n2.peak_bytes, 0)
+ self.assertGreater(n2.residual_bytes, 0)
+
if __name__ == '__main__':
test.main()