tfdbg CLI: let list_tensors (lt) output display dump file size

Also let it allow sorting of the dumped tensors by dump file size. Change: 143511153
author: Shanqing Cai <cais@google.com> 2017-01-03 19:15:24 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-01-03 19:23:31 -0800
commit: 271b7f34f4fc65bc92cef1bf17548aa46ab63daf (patch)
tree: 15e60130e5ef50750121be8e946dea97e47a97bd /tensorflow
parent: 6703501903e1920b55fb76a2ee85398a6e296bf9 (diff)
10 files changed, 332 insertions, 36 deletions
diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD
index c672404328..5d4f3be459 100644
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
@@ -441,6 +441,7 @@ cuda_py_test(
     ],
     additional_deps = [
         ":analyzer_cli",
+        ":command_parser",
         ":debug_data",
         ":debug_utils",
         ":debugger_cli_common",
diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py
index ce771e4297..eab55d627e 100644
--- a/tensorflow/python/debug/cli/analyzer_cli.py
+++ b/tensorflow/python/debug/cli/analyzer_cli.py
@@ -51,6 +51,7 @@ CTRL_LABEL = "(Ctrl) "
 ELLIPSIS = "..."
 
 SORT_TENSORS_BY_TIMESTAMP = "timestamp"
+SORT_TENSORS_BY_DUMP_SIZE = "dump_size"
 SORT_TENSORS_BY_OP_TYPE = "op_type"
 SORT_TENSORS_BY_TENSOR_NAME = "tensor_name"
 
@@ -173,11 +174,9 @@ class DebugAnalyzer(object):
         dest="sort_by",
         type=str,
         default=SORT_TENSORS_BY_TIMESTAMP,
-        help=("the field to sort the data by: " +
-              "(%s | %s | %s)" %
-              (SORT_TENSORS_BY_TIMESTAMP,
-               SORT_TENSORS_BY_OP_TYPE,
-               SORT_TENSORS_BY_TENSOR_NAME)))
+        help=("the field to sort the data by: (%s | %s | %s | %s)" %
+              (SORT_TENSORS_BY_TIMESTAMP, SORT_TENSORS_BY_DUMP_SIZE,
+               SORT_TENSORS_BY_OP_TYPE, SORT_TENSORS_BY_TENSOR_NAME)))
     ap.add_argument(
         "-r",
         "--reverse",
@@ -431,7 +430,7 @@ class DebugAnalyzer(object):
 
     # TODO(cais): Implement filter by lambda on tensor value.
 
-    max_timestamp_width, max_op_type_width = (
+    max_timestamp_width, max_dump_size_width, max_op_type_width = (
         self._measure_tensor_list_column_widths(data_to_show))
 
     # Sort the data.
@@ -440,7 +439,7 @@ class DebugAnalyzer(object):
 
     output.extend(
         self._tensor_list_column_heads(parsed, max_timestamp_width,
-                                       max_op_type_width))
+                                       max_dump_size_width, max_op_type_width))
 
     dump_count = 0
     for dump in data_to_show:
@@ -453,13 +452,17 @@ class DebugAnalyzer(object):
           continue
 
       rel_time = (dump.timestamp - self._debug_dump.t0) / 1000.0
+      dump_size_str = cli_shared.bytes_to_readable_str(dump.dump_size_bytes)
       dumped_tensor_name = "%s:%d" % (dump.node_name, dump.output_slot)
       op_type = self._debug_dump.node_op_type(dump.node_name)
 
       line = "[%.3f]" % rel_time
       line += " " * (max_timestamp_width - len(line))
+      line += dump_size_str
+      line += " " * (max_timestamp_width + max_dump_size_width - len(line))
       line += op_type
-      line += " " * (max_timestamp_width + max_op_type_width - len(line))
+      line += " " * (max_timestamp_width + max_dump_size_width +
+                     max_op_type_width - len(line))
       line += " %s" % dumped_tensor_name
 
       output.append(
@@ -492,6 +495,7 @@ class DebugAnalyzer(object):
 
     Returns:
       (int) maximum width of the timestamp column. 0 if data is empty.
+      (int) maximum width of the dump size column. 0 if data is empty.
       (int) maximum width of the op type column. 0 if data is empty.
     """
 
@@ -500,13 +504,19 @@ class DebugAnalyzer(object):
       max_rel_time_ms = (data[-1].timestamp - self._debug_dump.t0) / 1000.0
       max_timestamp_width = len("[%.3f] " % max_rel_time_ms)
 
+    max_dump_size_width = 0
+    for dump in data:
+      dump_size_str = cli_shared.bytes_to_readable_str(dump.dump_size_bytes)
+      if len(dump_size_str) + 1 > max_dump_size_width:
+        max_dump_size_width = len(dump_size_str) + 1
+
     max_op_type_width = 0
     for dump in data:
       op_type = self._debug_dump.node_op_type(dump.node_name)
       if len(op_type) > max_op_type_width:
         max_op_type_width = len(op_type)
 
-    return max_timestamp_width, max_op_type_width
+    return max_timestamp_width, max_dump_size_width, max_op_type_width
 
   def _sort_dump_data_by(self, data, sort_by, reverse):
     """Sort a list of DebugTensorDatum in specified order.
@@ -528,6 +538,8 @@ class DebugAnalyzer(object):
           data,
           reverse=reverse,
           key=lambda x: x.timestamp)
+    elif sort_by == SORT_TENSORS_BY_DUMP_SIZE:
+      return sorted(data, reverse=reverse, key=lambda x: x.dump_size_bytes)
     elif sort_by == SORT_TENSORS_BY_OP_TYPE:
       return sorted(
           data,
@@ -542,12 +554,13 @@ class DebugAnalyzer(object):
       raise ValueError("Unsupported key to sort tensors by: %s" % sort_by)
 
   def _tensor_list_column_heads(self, parsed, max_timestamp_width,
-                                max_op_type_width):
+                                max_dump_size_width, max_op_type_width):
     """Generate a line containing the column heads of the tensor list.
 
     Args:
       parsed: Parsed arguments (by argparse) of the list_tensors command.
       max_timestamp_width: (int) maximum width of the timestamp column.
+      max_dump_size_width: (int) maximum width of the dump size column.
       max_op_type_width: (int) maximum width of the op type column.
 
     Returns:
@@ -564,30 +577,43 @@ class DebugAnalyzer(object):
 
     attr_segs = {0: []}
     row = "t (ms)"
-    command = "%s -s timestamp" % base_command
-    if parsed.sort_by == "timestamp" and not parsed.reverse:
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_TIMESTAMP)
+    if parsed.sort_by == SORT_TENSORS_BY_TIMESTAMP and not parsed.reverse:
       command += " -r"
     attr_segs[0].append(
         (0, len(row), [debugger_cli_common.MenuItem(None, command), "bold"]))
     row += " " * (max_timestamp_width - len(row))
 
     prev_len = len(row)
+    row += "Size"
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_DUMP_SIZE)
+    if parsed.sort_by == SORT_TENSORS_BY_DUMP_SIZE and not parsed.reverse:
+      command += " -r"
+    attr_segs[0].append((prev_len, len(row),
+                         [debugger_cli_common.MenuItem(None, command), "bold"]))
+    row += " " * (max_dump_size_width + max_timestamp_width - len(row))
+
+    prev_len = len(row)
     row += "Op type"
-    command = "%s -s op_type" % base_command
-    if parsed.sort_by == "op_type" and not parsed.reverse:
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_OP_TYPE)
+    if parsed.sort_by == SORT_TENSORS_BY_OP_TYPE and not parsed.reverse:
       command += " -r"
     attr_segs[0].append((prev_len, len(row),
                          [debugger_cli_common.MenuItem(None, command), "bold"]))
-    row += " " * (max_op_type_width + max_timestamp_width - len(row))
+    row += " " * (
+        max_op_type_width + max_dump_size_width + max_timestamp_width - len(row)
+    )
 
     prev_len = len(row)
     row += " Tensor name"
-    command = "%s -s tensor_name" % base_command
-    if parsed.sort_by == "tensor_name" and not parsed.reverse:
+    command = "%s -s %s" % (base_command, SORT_TENSORS_BY_TENSOR_NAME)
+    if parsed.sort_by == SORT_TENSORS_BY_TENSOR_NAME and not parsed.reverse:
       command += " -r"
     attr_segs[0].append((prev_len + 1, len(row),
                          [debugger_cli_common.MenuItem("", command), "bold"]))
-    row += " " * (max_op_type_width + max_timestamp_width - len(row))
+    row += " " * (
+        max_op_type_width + max_dump_size_width + max_timestamp_width - len(row)
+    )
 
     return debugger_cli_common.RichTextLines([row], font_attr_segs=attr_segs)
 
diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py
index c63e8d6afc..d6b5c3a097 100644
--- a/tensorflow/python/debug/cli/analyzer_cli_test.py
+++ b/tensorflow/python/debug/cli/analyzer_cli_test.py
@@ -28,6 +28,7 @@ from tensorflow.python.client import session
 from tensorflow.python.debug import debug_data
 from tensorflow.python.debug import debug_utils
 from tensorflow.python.debug.cli import analyzer_cli
+from tensorflow.python.debug.cli import command_parser
 from tensorflow.python.debug.cli import debugger_cli_common
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
@@ -135,37 +136,51 @@ def assert_listed_tensors(tst,
 
   # Verify the command shortcuts in the top row.
   attr_segs = out.font_attr_segs[line_counter]
-  tst.assertEqual(0, attr_segs[0][0])
-  tst.assertEqual(len("t (ms)"), attr_segs[0][1])
-  command = attr_segs[0][2][0].content
+  attr_seg = attr_segs[0]
+  tst.assertEqual(0, attr_seg[0])
+  tst.assertEqual(len("t (ms)"), attr_seg[1])
+  command = attr_seg[2][0].content
   tst.assertIn("-s timestamp", command)
   assert_column_header_command_shortcut(
       tst, command, reverse, node_name_regex, op_type_regex,
       tensor_filter_name)
-  tst.assertEqual("bold", attr_segs[0][2][1])
+  tst.assertEqual("bold", attr_seg[2][1])
+
+  idx0 = line.index("Size")
+  attr_seg = attr_segs[1]
+  tst.assertEqual(idx0, attr_seg[0])
+  tst.assertEqual(idx0 + len("Size"), attr_seg[1])
+  command = attr_seg[2][0].content
+  tst.assertIn("-s dump_size", command)
+  assert_column_header_command_shortcut(tst, command, reverse, node_name_regex,
+                                        op_type_regex, tensor_filter_name)
+  tst.assertEqual("bold", attr_seg[2][1])
 
   idx0 = line.index("Op type")
-  tst.assertEqual(idx0, attr_segs[1][0])
-  tst.assertEqual(idx0 + len("Op type"), attr_segs[1][1])
-  command = attr_segs[1][2][0].content
+  attr_seg = attr_segs[2]
+  tst.assertEqual(idx0, attr_seg[0])
+  tst.assertEqual(idx0 + len("Op type"), attr_seg[1])
+  command = attr_seg[2][0].content
   tst.assertIn("-s op_type", command)
   assert_column_header_command_shortcut(
       tst, command, reverse, node_name_regex, op_type_regex,
       tensor_filter_name)
-  tst.assertEqual("bold", attr_segs[1][2][1])
+  tst.assertEqual("bold", attr_seg[2][1])
 
   idx0 = line.index("Tensor name")
-  tst.assertEqual(idx0, attr_segs[2][0])
-  tst.assertEqual(idx0 + len("Tensor name"), attr_segs[2][1])
-  command = attr_segs[2][2][0].content
+  attr_seg = attr_segs[3]
+  tst.assertEqual(idx0, attr_seg[0])
+  tst.assertEqual(idx0 + len("Tensor name"), attr_seg[1])
+  command = attr_seg[2][0].content
   tst.assertIn("-s tensor_name", command)
   assert_column_header_command_shortcut(
       tst, command, reverse, node_name_regex, op_type_regex,
       tensor_filter_name)
-  tst.assertEqual("bold", attr_segs[2][2][1])
+  tst.assertEqual("bold", attr_seg[2][1])
 
   # Verify the listed tensors and their timestamps.
   tensor_timestamps = []
+  dump_sizes_bytes = []
   op_types = []
   tensor_names = []
   for line in line_iter:
@@ -176,8 +191,9 @@ def assert_listed_tensors(tst,
     tst.assertGreaterEqual(rel_time, 0.0)
 
     tensor_timestamps.append(rel_time)
-    op_types.append(items[1])
-    tensor_names.append(items[2])
+    dump_sizes_bytes.append(command_parser.parse_readable_size_str(items[1]))
+    op_types.append(items[2])
+    tensor_names.append(items[3])
 
   # Verify that the tensors should be listed in ascending order of their
   # timestamps.
@@ -186,6 +202,11 @@ def assert_listed_tensors(tst,
     if reverse:
       sorted_timestamps.reverse()
     tst.assertEqual(sorted_timestamps, tensor_timestamps)
+  elif sort_by == "dump_size":
+    sorted_dump_sizes_bytes = sorted(dump_sizes_bytes)
+    if reverse:
+      sorted_dump_sizes_bytes.reverse()
+    tst.assertEqual(sorted_dump_sizes_bytes, dump_sizes_bytes)
   elif sort_by == "op_type":
     sorted_op_types = sorted(op_types)
     if reverse:
@@ -353,7 +374,6 @@ def assert_node_attribute_lines(tst,
         while True:
           for i in range(5):
             line = next(line_iter)
-            print(line)
             if i == 0:
               tst.assertEqual(depth_counter, int(line.split(":")[0]))
             elif i == 1:
@@ -564,6 +584,33 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase):
         reverse=True)
     check_main_menu(self, out, list_tensors_enabled=False)
 
+  def testListTensorsInDumpSizeOrderWorks(self):
+    out = self._registry.dispatch_command("lt", ["-s", "dump_size"])
+    assert_listed_tensors(
+        self,
+        out, [
+            "simple_mul_add/u:0", "simple_mul_add/v:0",
+            "simple_mul_add/u/read:0", "simple_mul_add/v/read:0",
+            "simple_mul_add/matmul:0", "simple_mul_add/add:0"
+        ],
+        ["VariableV2", "VariableV2", "Identity", "Identity", "MatMul", "Add"],
+        sort_by="dump_size")
+    check_main_menu(self, out, list_tensors_enabled=False)
+
+  def testListTensorsInReverseDumpSizeOrderWorks(self):
+    out = self._registry.dispatch_command("lt", ["-s", "dump_size", "-r"])
+    assert_listed_tensors(
+        self,
+        out, [
+            "simple_mul_add/u:0", "simple_mul_add/v:0",
+            "simple_mul_add/u/read:0", "simple_mul_add/v/read:0",
+            "simple_mul_add/matmul:0", "simple_mul_add/add:0"
+        ],
+        ["VariableV2", "VariableV2", "Identity", "Identity", "MatMul", "Add"],
+        sort_by="dump_size",
+        reverse=True)
+    check_main_menu(self, out, list_tensors_enabled=False)
+
   def testListTensorsWithInvalidSortByFieldGivesError(self):
     out = self._registry.dispatch_command("lt", ["-s", "foobar"])
     self.assertIn("ValueError: Unsupported key to sort tensors by: foobar",
diff --git a/tensorflow/python/debug/cli/cli_shared.py b/tensorflow/python/debug/cli/cli_shared.py
index 023aa2c8c6..5841537d9e 100644
--- a/tensorflow/python/debug/cli/cli_shared.py
+++ b/tensorflow/python/debug/cli/cli_shared.py
@@ -32,6 +32,36 @@ from tensorflow.python.ops import variables
 DEFAULT_NDARRAY_DISPLAY_THRESHOLD = 2000
 
 
+def bytes_to_readable_str(num_bytes, include_b=False):
+  """Generate a human-readable string representing number of bytes.
+
+  The units B, kB, MB and GB are used.
+
+  Args:
+    num_bytes: (`int` or None) Number of bytes.
+    include_b: (`bool`) Include the letter B at the end of the unit.
+
+  Returns:
+    (`str`) A string representing the number of bytes in a human-readable way,
+      including a unit at the end.
+  """
+
+  if num_bytes is None:
+    return str(num_bytes)
+  if num_bytes < 1024:
+    result = "%d" % num_bytes
+  elif num_bytes < 1048576:
+    result = "%.2fk" % (num_bytes / 1024.0)
+  elif num_bytes < 1073741824:
+    result = "%.2fM" % (num_bytes / 1048576.0)
+  else:
+    result = "%.2fG" % (num_bytes / 1073741824.0)
+
+  if include_b:
+    result += "B"
+  return result
+
+
 def parse_ranges_highlight(ranges_string):
   """Process ranges highlight string.
 
diff --git a/tensorflow/python/debug/cli/cli_shared_test.py b/tensorflow/python/debug/cli/cli_shared_test.py
index 07126e9ba1..1ef3c34254 100644
--- a/tensorflow/python/debug/cli/cli_shared_test.py
+++ b/tensorflow/python/debug/cli/cli_shared_test.py
@@ -30,6 +30,46 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
 
 
+class BytesToReadableStrTest(test_util.TensorFlowTestCase):
+
+  def testNoneSizeWorks(self):
+    self.assertEqual(str(None), cli_shared.bytes_to_readable_str(None))
+
+  def testSizesBelowOneKiloByteWorks(self):
+    self.assertEqual("0", cli_shared.bytes_to_readable_str(0))
+    self.assertEqual("500", cli_shared.bytes_to_readable_str(500))
+    self.assertEqual("1023", cli_shared.bytes_to_readable_str(1023))
+
+  def testSizesBetweenOneKiloByteandOneMegaByteWorks(self):
+    self.assertEqual("1.00k", cli_shared.bytes_to_readable_str(1024))
+    self.assertEqual("2.40k", cli_shared.bytes_to_readable_str(int(1024 * 2.4)))
+    self.assertEqual("1023.00k", cli_shared.bytes_to_readable_str(1024 * 1023))
+
+  def testSizesBetweenOneMegaByteandOneGigaByteWorks(self):
+    self.assertEqual("1.00M", cli_shared.bytes_to_readable_str(1024**2))
+    self.assertEqual("2.40M",
+                     cli_shared.bytes_to_readable_str(int(1024**2 * 2.4)))
+    self.assertEqual("1023.00M",
+                     cli_shared.bytes_to_readable_str(1024**2 * 1023))
+
+  def testSizeAboveOneGigaByteWorks(self):
+    self.assertEqual("1.00G", cli_shared.bytes_to_readable_str(1024**3))
+    self.assertEqual("2000.00G",
+                     cli_shared.bytes_to_readable_str(1024**3 * 2000))
+
+  def testReadableStrIncludesBAtTheEndOnRequest(self):
+    self.assertEqual("0B", cli_shared.bytes_to_readable_str(0, include_b=True))
+    self.assertEqual(
+        "1.00kB", cli_shared.bytes_to_readable_str(
+            1024, include_b=True))
+    self.assertEqual(
+        "1.00MB", cli_shared.bytes_to_readable_str(
+            1024**2, include_b=True))
+    self.assertEqual(
+        "1.00GB", cli_shared.bytes_to_readable_str(
+            1024**3, include_b=True))
+
+
 class GetRunStartIntroAndDescriptionTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
diff --git a/tensorflow/python/debug/cli/command_parser.py b/tensorflow/python/debug/cli/command_parser.py
index bd4962c7ab..8311c36ea2 100644
--- a/tensorflow/python/debug/cli/command_parser.py
+++ b/tensorflow/python/debug/cli/command_parser.py
@@ -214,6 +214,40 @@ def parse_ranges(range_string):
   return ranges
 
 
+def parse_readable_size_str(size_str):
+  """Convert a human-readable str representation to number of bytes.
+
+  Only the units "kB", "MB", "GB" are supported. The "B character at the end
+  of the input `str` may be omitted.
+
+  Args:
+    size_str: (`str`) A human-readable str representing a number of bytes
+      (e.g., "0", "1023", "1.1kB", "24 MB", "23GB", "100 G".
+
+  Returns:
+    (`int`) The parsed number of bytes.
+
+  Raises:
+    ValueError: on failure to parse the input `size_str`.
+  """
+
+  size_str = size_str.strip()
+  if size_str.endswith("B"):
+    size_str = size_str[:-1]
+
+  if size_str.isdigit():
+    return int(size_str)
+  elif size_str.endswith("k"):
+    return int(float(size_str[:-1]) * 1024)
+  elif size_str.endswith("M"):
+    return int(float(size_str[:-1]) * 1048576)
+  elif size_str.endswith("G"):
+    return int(float(size_str[:-1]) * 1073741824)
+  else:
+    raise ValueError("Failed to parsed human-readable byte size str: \"%s\"" %
+                     size_str)
+
+
 def evaluate_tensor_slice(tensor, tensor_slicing):
   """Call eval on the slicing of a tensor, with validation.
 
diff --git a/tensorflow/python/debug/cli/command_parser_test.py b/tensorflow/python/debug/cli/command_parser_test.py
index 0b0684c7d2..3f8b8744c3 100644
--- a/tensorflow/python/debug/cli/command_parser_test.py
+++ b/tensorflow/python/debug/cli/command_parser_test.py
@@ -251,5 +251,51 @@ class ParseRangesTest(test_util.TensorFlowTestCase):
       command_parser.parse_ranges("[1, 1j]")
 
 
+class ParseReadableSizeStrTest(test_util.TensorFlowTestCase):
+
+  def testParseNoUnitWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0"))
+    self.assertEqual(1024, command_parser.parse_readable_size_str("1024 "))
+    self.assertEqual(2000, command_parser.parse_readable_size_str(" 2000 "))
+
+  def testParseKiloBytesWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0kB"))
+    self.assertEqual(1024**2, command_parser.parse_readable_size_str("1024 kB"))
+    self.assertEqual(1024**2 * 2,
+                     command_parser.parse_readable_size_str("2048k"))
+    self.assertEqual(1024**2 * 2,
+                     command_parser.parse_readable_size_str("2048kB"))
+    self.assertEqual(1024 / 4, command_parser.parse_readable_size_str("0.25k"))
+
+  def testParseMegaBytesWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0MB"))
+    self.assertEqual(1024**3, command_parser.parse_readable_size_str("1024 MB"))
+    self.assertEqual(1024**3 * 2,
+                     command_parser.parse_readable_size_str("2048M"))
+    self.assertEqual(1024**3 * 2,
+                     command_parser.parse_readable_size_str("2048MB"))
+    self.assertEqual(1024**2 / 4,
+                     command_parser.parse_readable_size_str("0.25M"))
+
+  def testParseGigaBytesWorks(self):
+    self.assertEqual(0, command_parser.parse_readable_size_str("0GB"))
+    self.assertEqual(1024**4, command_parser.parse_readable_size_str("1024 GB"))
+    self.assertEqual(1024**4 * 2,
+                     command_parser.parse_readable_size_str("2048G"))
+    self.assertEqual(1024**4 * 2,
+                     command_parser.parse_readable_size_str("2048GB"))
+    self.assertEqual(1024**3 / 4,
+                     command_parser.parse_readable_size_str("0.25G"))
+
+  def testParseUnsupportedUnitRaisesException(self):
+    with self.assertRaisesRegexp(
+        ValueError, "Failed to parsed human-readable byte size str: \"0foo\""):
+      command_parser.parse_readable_size_str("0foo")
+
+    with self.assertRaisesRegexp(
+        ValueError, "Failed to parsed human-readable byte size str: \"2E\""):
+      command_parser.parse_readable_size_str("2EB")
+
+
 if __name__ == "__main__":
   googletest.main()
diff --git a/tensorflow/python/debug/debug_data.py b/tensorflow/python/debug/debug_data.py
index 81f8781a3f..4c252ade86 100644
--- a/tensorflow/python/debug/debug_data.py
+++ b/tensorflow/python/debug/debug_data.py
@@ -298,6 +298,8 @@ class DebugTensorDatum(object):
       self._node_name = namespace + "/" + node_base_name
 
     self._file_path = os.path.join(dump_root, debug_dump_rel_path)
+    self._dump_size_bytes = (gfile.Stat(self._file_path).length if
+                             gfile.Exists(self._file_path) else None)
 
   def __str__(self):
     return "{DebugTensorDatum: %s:%d @ %s @ %d}" % (self.node_name,
@@ -385,6 +387,19 @@ class DebugTensorDatum(object):
 
     return self._file_path
 
+  @property
+  def dump_size_bytes(self):
+    """Size of the dump file.
+
+    Unit: byte.
+
+    Returns:
+      If the dump file exists, size of the dump file, in bytes.
+      If the dump file does not exist, None.
+    """
+
+    return self._dump_size_bytes
+
 
 class DebugDumpDir(object):
   """Data set from a debug-dump directory on filesystem.
@@ -498,16 +513,22 @@ class DebugDumpDir(object):
 
     self._watch_key_to_datum = {}
     self._watch_key_to_rel_time = {}
+    self._watch_key_to_dump_size_bytes = {}
     for datum in self._dump_tensor_data:
       if datum.watch_key not in self._watch_key_to_datum:
         self._watch_key_to_datum[datum.watch_key] = [datum]
         self._watch_key_to_rel_time[datum.watch_key] = [
             datum.timestamp - self._t0
         ]
+        self._watch_key_to_dump_size_bytes[datum.watch_key] = [
+            datum.dump_size_bytes
+        ]
       else:
         self._watch_key_to_datum[datum.watch_key].append(datum)
         self._watch_key_to_rel_time[datum.watch_key].append(datum.timestamp -
                                                             self._t0)
+        self._watch_key_to_dump_size_bytes[datum.watch_key].append(
+            datum.dump_size_bytes)
 
   def set_python_graph(self, python_graph):
     """Provide Python `Graph` object to the wrapper.
@@ -1193,10 +1214,10 @@ class DebugDumpDir(object):
       debug_op: (`str`) name of the debug op.
 
     Returns:
-      (list of int) list of relative timestamps.
+      (`list` of `int`) list of relative timestamps.
 
     Raises:
-      ValueError: If the tensor does not exist in the debub dump data.
+      ValueError: If the tensor watch key does not exist in the debug dump data.
     """
 
     watch_key = _get_tensor_watch_key(node_name, output_slot, debug_op)
@@ -1206,6 +1227,30 @@ class DebugDumpDir(object):
 
     return self._watch_key_to_rel_time[watch_key]
 
+  def get_dump_sizes_bytes(self, node_name, output_slot, debug_op):
+    """Get the sizes of the dump files for a debug-dumped tensor.
+
+    Unit of the file size: byte.
+
+    Args:
+      node_name: (`str`) name of the node that the tensor is produced by.
+      output_slot: (`int`) output slot index of tensor.
+      debug_op: (`str`) name of the debug op.
+
+    Returns:
+      (`list` of `int`): list of dump file sizes in bytes.
+
+    Raises:
+      ValueError: If the tensor watch key does not exist in the debug dump data.
+    """
+
+    watch_key = _get_tensor_watch_key(node_name, output_slot, debug_op)
+    if watch_key not in self._watch_key_to_datum:
+      raise ValueError("Watch key \"%s\" does not exist in the debug dump" %
+                       watch_key)
+
+    return self._watch_key_to_dump_size_bytes[watch_key]
+
   def node_traceback(self, element_name):
     """Try to retrieve the Python traceback of node's construction.
 
diff --git a/tensorflow/python/debug/debug_data_test.py b/tensorflow/python/debug/debug_data_test.py
index fc8db37090..9910244ad3 100644
--- a/tensorflow/python/debug/debug_data_test.py
+++ b/tensorflow/python/debug/debug_data_test.py
@@ -179,6 +179,13 @@ class DebugTensorDatumTest(test_util.TensorFlowTestCase):
                                                               datum.timestamp),
                      repr(datum))
 
+  def testDumpSizeBytesIsNoneForNonexistentFilePath(self):
+    dump_root = "/tmp/tfdbg_1"
+    debug_dump_rel_path = "ns1/ns2/node_foo_1_2_DebugIdentity_1472563253536385"
+    datum = debug_data.DebugTensorDatum(dump_root, debug_dump_rel_path)
+
+    self.assertIsNone(datum.dump_size_bytes)
+
 
 class DebugDumpDirTest(test_util.TensorFlowTestCase):
 
diff --git a/tensorflow/python/debug/session_debug_testlib.py b/tensorflow/python/debug/session_debug_testlib.py
index 9642ed1fe3..2b700facd7 100644
--- a/tensorflow/python/debug/session_debug_testlib.py
+++ b/tensorflow/python/debug/session_debug_testlib.py
@@ -150,6 +150,13 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
         results.dump.get_rel_timestamps("%s/read" % results.v_name, 0,
                                         "DebugIdentity")[0], 0)
 
+    self.assertGreater(
+        results.dump.get_dump_sizes_bytes("%s/read" % results.u_name, 0,
+                                          "DebugIdentity")[0], 0)
+    self.assertGreater(
+        results.dump.get_dump_sizes_bytes("%s/read" % results.v_name, 0,
+                                          "DebugIdentity")[0], 0)
+
   def testGetOpTypeWorks(self):
     results = self._generate_dump_from_simple_addition_graph()
 
@@ -218,6 +225,13 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
           dump.get_rel_timestamps("%s/read" % str2_name, 0, "DebugIdentity")[0],
           0)
 
+      self.assertGreater(
+          dump.get_dump_sizes_bytes("%s/read" % str1_name, 0,
+                                    "DebugIdentity")[0], 0)
+      self.assertGreater(
+          dump.get_dump_sizes_bytes("%s/read" % str2_name, 0,
+                                    "DebugIdentity")[0], 0)
+
   def testDumpUninitializedVariable(self):
     op_namespace = "testDumpUninitializedVariable"
     with session.Session() as sess:
@@ -362,11 +376,17 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       # Verify ascending timestamps from the while loops.
       while_id_rel_timestamps = dump.get_rel_timestamps("while/Identity", 0,
                                                         "DebugIdentity")
+      while_id_dump_sizes_bytes = dump.get_dump_sizes_bytes("while/Identity", 0,
+                                                            "DebugIdentity")
       self.assertEqual(10, len(while_id_rel_timestamps))
       prev_rel_time = 0
-      for rel_time in while_id_rel_timestamps:
+      prev_dump_size_bytes = while_id_dump_sizes_bytes[0]
+      for rel_time, dump_size_bytes in zip(while_id_rel_timestamps,
+                                           while_id_dump_sizes_bytes):
         self.assertGreaterEqual(rel_time, prev_rel_time)
+        self.assertEqual(dump_size_bytes, prev_dump_size_bytes)
         prev_rel_time = rel_time
+        prev_dump_size_bytes = dump_size_bytes
 
       # Test querying debug watch keys from node name.
       watch_keys = dump.debug_watch_keys("while/Identity")
author	Shanqing Cai <cais@google.com>	2017-01-03 19:15:24 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-01-03 19:23:31 -0800
commit	271b7f34f4fc65bc92cef1bf17548aa46ab63daf (patch)
tree	15e60130e5ef50750121be8e946dea97e47a97bd /tensorflow
parent	6703501903e1920b55fb76a2ee85398a6e296bf9 (diff)