tfdbg: add "print_source" ("ps") command to profiler mode

Related changes: * Refactored part of cli/profile_analyzer_cli.py to new file lib/profiling.py. * Improve the way in which filtering is done for file path and line number. * Add unit test for the "lp" and "ps" commands under tf.while_loops. * Add op type to the "lp" table. Also add the ability to sort the rows by op type. * Let the length of the all-"-" divider scale automatically with screen with in the curses UI. PiperOrigin-RevId: 156502357
author: Shanqing Cai <cais@google.com> 2017-05-18 17:44:51 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-05-18 17:48:39 -0700
commit: 8eb7ba18b11145f164cb0fb9b0a91a95e165687c (patch)
tree: 758ccf11d38f0ef27b2bbadd05ff046b31a29525 /tensorflow/python/debug/lib
parent: 89bf99f1b94d515c9b13a6f2eac3a1bf65d9c50f (diff)
3 files changed, 287 insertions, 4 deletions
diff --git a/tensorflow/python/debug/lib/profiling.py b/tensorflow/python/debug/lib/profiling.py
new file mode 100644
index 0000000000..dd580251a6
--- /dev/null
+++ b/tensorflow/python/debug/lib/profiling.py
@@ -0,0 +1,108 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Data structures and algorithms for profiling information."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+
+class ProfileDatum(object):
+  """Profile data point."""
+
+  def __init__(self,
+               device_name,
+               node_exec_stats,
+               file_path,
+               line_number,
+               func_name,
+               op_type):
+    """Constructor.
+
+    Args:
+      device_name: (string) name of the device.
+      node_exec_stats: `NodeExecStats` proto.
+      file_path: path to the source file involved in creating the op.
+      line_number: line number in the file involved in creating the op.
+      func_name: name of the function that the line belongs to.
+      op_type: (string) Operation type.
+    """
+    self.device_name = device_name
+    self.node_exec_stats = node_exec_stats
+    self.file_path = file_path
+    self.line_number = line_number
+    self.func_name = func_name
+    if self.file_path:
+      self.file_line_func = "%s:%d(%s)" % (
+          os.path.basename(self.file_path), self.line_number, self.func_name)
+    else:
+      self.file_line_func = ""
+    self.op_type = op_type
+    self.start_time = self.node_exec_stats.all_start_micros
+    self.op_time = (self.node_exec_stats.op_end_rel_micros -
+                    self.node_exec_stats.op_start_rel_micros)
+
+  @property
+  def exec_time(self):
+    """Op execution time plus pre- and post-processing."""
+    return self.node_exec_stats.all_end_rel_micros
+
+
+class AggregateProfile(object):
+  """Profile summary data for aggregating a number of ProfileDatum."""
+
+  def __init__(self, profile_datum):
+    """Constructor.
+
+    Args:
+      profile_datum: (`ProfileDatum`) an instance of `ProfileDatum` to
+        initialize this object with.
+    """
+
+    self.total_op_time = profile_datum.op_time
+    self.total_exec_time = profile_datum.exec_time
+    device_and_node = "%s:%s" % (profile_datum.device_name,
+                                 profile_datum.node_exec_stats.node_name)
+    self._node_to_exec_count = {device_and_node: 1}
+
+  def add(self, profile_datum):
+    """Accumulate a new instance of ProfileDatum.
+
+    Args:
+      profile_datum: (`ProfileDatum`) an instance of `ProfileDatum` to
+        accumulate to this object.
+    """
+
+    self.total_op_time += profile_datum.op_time
+    self.total_exec_time += profile_datum.exec_time
+    device_and_node = "%s:%s" % (profile_datum.device_name,
+                                 profile_datum.node_exec_stats.node_name)
+
+    device_and_node = "%s:%s" % (profile_datum.device_name,
+                                 profile_datum.node_exec_stats.node_name)
+    if device_and_node in self._node_to_exec_count:
+      self._node_to_exec_count[device_and_node] += 1
+    else:
+      self._node_to_exec_count[device_and_node] = 1
+
+  @property
+  def node_count(self):
+    return len(self._node_to_exec_count)
+
+  @property
+  def node_exec_count(self):
+    return sum(self._node_to_exec_count.values())
diff --git a/tensorflow/python/debug/lib/profiling_test.py b/tensorflow/python/debug/lib/profiling_test.py
new file mode 100644
index 0000000000..f7b120ba5d
--- /dev/null
+++ b/tensorflow/python/debug/lib/profiling_test.py
@@ -0,0 +1,100 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Unit tests for the basic data structures and algorithms for profiling."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.core.framework import step_stats_pb2
+from tensorflow.python.debug.lib import profiling
+from tensorflow.python.framework import test_util
+from tensorflow.python.platform import googletest
+
+
+class AggregateProfile(test_util.TensorFlowTestCase):
+
+  def setUp(self):
+    node_1 = step_stats_pb2.NodeExecStats(
+        node_name="Add/123",
+        op_start_rel_micros=3,
+        op_end_rel_micros=5,
+        all_end_rel_micros=4)
+    self.profile_datum_1 = profiling.ProfileDatum(
+        "cpu:0", node_1, "/foo/bar.py", 10, "func1", "Add")
+
+    node_2 = step_stats_pb2.NodeExecStats(
+        node_name="Mul/456",
+        op_start_rel_micros=13,
+        op_end_rel_micros=16,
+        all_end_rel_micros=17)
+    self.profile_datum_2 = profiling.ProfileDatum(
+        "cpu:0", node_2, "/foo/bar.py", 11, "func1", "Mul")
+
+    node_3 = step_stats_pb2.NodeExecStats(
+        node_name="Add/123",
+        op_start_rel_micros=103,
+        op_end_rel_micros=105,
+        all_end_rel_micros=4)
+    self.profile_datum_3 = profiling.ProfileDatum(
+        "cpu:0", node_3, "/foo/bar.py", 12, "func1", "Add")
+
+    node_4 = step_stats_pb2.NodeExecStats(
+        node_name="Add/123",
+        op_start_rel_micros=203,
+        op_end_rel_micros=205,
+        all_end_rel_micros=4)
+    self.profile_datum_4 = profiling.ProfileDatum(
+        "gpu:0", node_4, "/foo/bar.py", 13, "func1", "Add")
+
+  def testAggregateProfileConstructorWorks(self):
+    aggregate_data = profiling.AggregateProfile(self.profile_datum_1)
+
+    self.assertEqual(2, aggregate_data.total_op_time)
+    self.assertEqual(4, aggregate_data.total_exec_time)
+    self.assertEqual(1, aggregate_data.node_count)
+    self.assertEqual(1, aggregate_data.node_exec_count)
+
+  def testAddToAggregateProfileWithDifferentNodeWorks(self):
+    aggregate_data = profiling.AggregateProfile(self.profile_datum_1)
+    aggregate_data.add(self.profile_datum_2)
+
+    self.assertEqual(5, aggregate_data.total_op_time)
+    self.assertEqual(21, aggregate_data.total_exec_time)
+    self.assertEqual(2, aggregate_data.node_count)
+    self.assertEqual(2, aggregate_data.node_exec_count)
+
+  def testAddToAggregateProfileWithSameNodeWorks(self):
+    aggregate_data = profiling.AggregateProfile(self.profile_datum_1)
+    aggregate_data.add(self.profile_datum_2)
+    aggregate_data.add(self.profile_datum_3)
+
+    self.assertEqual(7, aggregate_data.total_op_time)
+    self.assertEqual(25, aggregate_data.total_exec_time)
+    self.assertEqual(2, aggregate_data.node_count)
+    self.assertEqual(3, aggregate_data.node_exec_count)
+
+  def testAddToAggregateProfileWithDifferentDeviceSameNodeWorks(self):
+    aggregate_data = profiling.AggregateProfile(self.profile_datum_1)
+    aggregate_data.add(self.profile_datum_4)
+
+    self.assertEqual(4, aggregate_data.total_op_time)
+    self.assertEqual(8, aggregate_data.total_exec_time)
+    self.assertEqual(2, aggregate_data.node_count)
+    self.assertEqual(2, aggregate_data.node_exec_count)
+
+
+if __name__ == "__main__":
+  googletest.main()
diff --git a/tensorflow/python/debug/lib/source_utils.py b/tensorflow/python/debug/lib/source_utils.py
index f610d05b83..ad4e37d22e 100644
--- a/tensorflow/python/debug/lib/source_utils.py
+++ b/tensorflow/python/debug/lib/source_utils.py
@@ -22,6 +22,11 @@ import collections
 import os
 import re
 
+import numpy as np
+
+from tensorflow.python.debug.lib import profiling
+
+
 _TENSORFLOW_BASEDIR = os.path.dirname(
     os.path.dirname(os.path.dirname(os.path.dirname(
         os.path.normpath(os.path.abspath(__file__))))))
@@ -30,6 +35,10 @@ UNCOMPILED_SOURCE_SUFFIXES = (".py")
 COMPILED_SOURCE_SUFFIXES = (".pyc", ".pyo")
 
 
+def _norm_abs_path(file_path):
+  return os.path.normpath(os.path.abspath(file_path))
+
+
 def is_extension_uncompiled_python_source(file_path):
   _, extension = os.path.splitext(file_path)
   return extension.lower() in UNCOMPILED_SOURCE_SUFFIXES
@@ -65,7 +74,7 @@ def guess_is_tensorflow_py_library(py_file_path):
       not is_extension_compiled_python_source(py_file_path)):
     raise ValueError(
         "Input file path (%s) is not a Python source file." % py_file_path)
-  py_file_path = os.path.normpath(os.path.abspath(py_file_path))
+  py_file_path = _norm_abs_path(py_file_path)
 
   return (py_file_path.startswith(_TENSORFLOW_BASEDIR) and
           not py_file_path.endswith("_test.py") and
@@ -73,6 +82,14 @@ def guess_is_tensorflow_py_library(py_file_path):
               os.path.normpath("python/debug/examples")))
 
 
+def load_source(source_file_path):
+  with open(source_file_path, "rU") as f:
+    source_text = f.read()
+  source_lines = source_text.split("\n")
+  line_num_width = int(np.ceil(np.log10(len(source_lines)))) + 3
+  return source_lines, line_num_width
+
+
 def annotate_source(dump,
                     source_file_path,
                     do_dumped_tensors=False,
@@ -109,7 +126,7 @@ def annotate_source(dump,
     raise ValueError("Cannot perform source annotation due to a lack of set "
                      "Python graph in the dump object")
 
-  source_file_path = os.path.normpath(os.path.abspath(source_file_path))
+  source_file_path = _norm_abs_path(source_file_path)
 
   line_to_op_names = {}
   for op in py_graph.get_operations():
@@ -118,7 +135,7 @@ def annotate_source(dump,
           max_line is not None and line_number >= max_line):
         continue
 
-      if os.path.normpath(os.path.abspath(file_path)) != source_file_path:
+      if _norm_abs_path(file_path) != source_file_path:
         continue
 
       if do_dumped_tensors:
@@ -195,7 +212,7 @@ def list_source_files_against_dump(dump,
       continue
 
     for file_path, line_number, _, _ in dump.node_traceback(op.name):
-      file_path = os.path.normpath(os.path.abspath(file_path))
+      file_path = _norm_abs_path(file_path)
       if (file_path in to_skip_file_paths or
           path_regex and not path_regex.match(file_path) or
           not os.path.isfile(file_path)):
@@ -238,3 +255,61 @@ def list_source_files_against_dump(dump,
         path_to_first_line[file_path]))
 
   return sorted(output, key=lambda x: x[0])
+
+
+def annotate_source_against_profile(profile_data,
+                                    source_file_path,
+                                    node_name_filter=None,
+                                    op_type_filter=None,
+                                    min_line=None,
+                                    max_line=None):
+  """Annotate a Python source file with profiling information at each line.
+
+  (The annotation doesn't change the source file itself.)
+
+  Args:
+    profile_data: (`list` of `ProfileDatum`) A list of `ProfileDatum`.
+    source_file_path: (`str`) Path to the source file being annotated.
+    node_name_filter: Regular expression to filter by node name.
+    op_type_filter: Regular expression to filter by op type.
+    min_line: (`None` or `int`) The 1-based line to start annotate the source
+      file from (inclusive).
+    max_line: (`None` or `int`) The 1-based line number to end the annotation
+      at (exclusive).
+
+  Returns:
+    A `dict` mapping 1-based line number to a the namedtuple
+      `profiling.LineOrFuncProfileSummary`.
+  """
+
+  source_file_path = _norm_abs_path(source_file_path)
+
+  node_name_regex = re.compile(node_name_filter) if node_name_filter else None
+  op_type_regex = re.compile(op_type_filter) if op_type_filter else None
+
+  line_to_profile_summary = {}
+  for profile_datum in profile_data:
+    if not profile_datum.file_path:
+      continue
+
+    if _norm_abs_path(profile_datum.file_path) != source_file_path:
+      continue
+
+    if (min_line is not None and profile_datum.line_number < min_line or
+        max_line is not None and profile_datum.line_number >= max_line):
+      continue
+
+    if (node_name_regex and
+        not node_name_regex.match(profile_datum.node_exec_stats.node_name)):
+      continue
+
+    if op_type_regex and not op_type_regex.match(profile_datum.op_type):
+      continue
+
+    if profile_datum.line_number not in line_to_profile_summary:
+      line_to_profile_summary[profile_datum.line_number] = (
+          profiling.AggregateProfile(profile_datum))
+    else:
+      line_to_profile_summary[profile_datum.line_number].add(profile_datum)
+
+  return line_to_profile_summary
author	Shanqing Cai <cais@google.com>	2017-05-18 17:44:51 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-05-18 17:48:39 -0700
commit	8eb7ba18b11145f164cb0fb9b0a91a95e165687c (patch)
tree	758ccf11d38f0ef27b2bbadd05ff046b31a29525 /tensorflow/python/debug/lib
parent	89bf99f1b94d515c9b13a6f2eac3a1bf65d9c50f (diff)