A few profiler improvements.

1. Use a id_to_string map to reduce the profile size (2/3 in xception) 2. dedup code view's function name with extra file base name. 3. remove code view display heuristic that doesn't work in some cases. 4. make the profile_context thread-safe. PiperOrigin-RevId: 172031528
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-10-12 16:39:22 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-10-12 16:43:56 -0700
commit: 19708cc7d8e34e830a716d3f9896294489d3b535 (patch)
tree: 2fe7b78976ee65a5175ccf4121c168a7c8d9c2d3 /tensorflow/python/profiler
parent: 1002f974f58b23c528436e34c06384b8bffb2485 (diff)
2 files changed, 73 insertions, 61 deletions
diff --git a/tensorflow/python/profiler/profile_context.py b/tensorflow/python/profiler/profile_context.py
index 1710209ed9..0c31cf8f13 100644
--- a/tensorflow/python/profiler/profile_context.py
+++ b/tensorflow/python/profiler/profile_context.py
@@ -47,56 +47,53 @@ def _profiled_run(self,
   """Overwrites the session.run()."""
   # pylint: disable=protected-access
   # Count the session steps.
-  with self.profile_context._new_step():
+  with self.profile_context._new_step() as step:
     # Fast path if no need for profiling.
-    if self.profile_context._is_fast_path():
-      return self._profiler_run_internal(
-          fetches, feed_dict, options, run_metadata)
-
-    step = self.profile_context._step
-
-    # Maybe trace this step.
-    if self.profile_context._should_trace():
-      # Enable tracing, perform auto profiling or auto dump.
-      if not run_metadata:
-        run_metadata = config_pb2.RunMetadata()
-
-      if not options:
-        options = config_pb2.RunOptions(
-            trace_level=config_pb2.RunOptions.FULL_TRACE)
-        old_trace_level = options.trace_level
+    if not self.profile_context._is_fast_path():
+      # Maybe trace this step.
+      if self.profile_context._should_trace():
+        # Enable tracing, perform auto profiling or auto dump.
+        if not run_metadata:
+          run_metadata = config_pb2.RunMetadata()
+
+        if not options:
+          options = config_pb2.RunOptions(
+              trace_level=config_pb2.RunOptions.FULL_TRACE)
+          old_trace_level = options.trace_level
+        else:
+          old_trace_level = options.trace_level
+          options.trace_level = config_pb2.RunOptions.FULL_TRACE
+
+        ret = self._profiler_run_internal(
+            fetches, feed_dict, options, run_metadata)
+
+        self.profile_context.profiler._graph = self.graph
+        self.profile_context.profiler.add_step(step, run_metadata)
+        options.trace_level = old_trace_level
       else:
-        old_trace_level = options.trace_level
-        options.trace_level = config_pb2.RunOptions.FULL_TRACE
-
-      ret = self._profiler_run_internal(
-          fetches, feed_dict, options, run_metadata)
-
-      self.profile_context.profiler._graph = self.graph
-      self.profile_context.profiler.add_step(step, run_metadata)
-      options.trace_level = old_trace_level
-    else:
-      ret = self._profiler_run_internal(fetches, feed_dict, options)
-
-    # Maybe dump profile.
-    self.profile_context._maybe_dump()
-
-    # Maybe profile:
-    to_profiles = self.profile_context._profile_candidates()
-    for to_prof in to_profiles:
-      cmd, opts, _ = to_prof
-      if cmd == 'graph':
-        self.profile_context.profiler.profile_graph(opts)
-      elif cmd == 'scope':
-        self.profile_context.profiler.profile_name_scope(opts)
-      elif cmd == 'op':
-        self.profile_context.profiler.profile_operations(opts)
-      elif cmd == 'code':
-        self.profile_context.profiler.profile_python(opts)
-      else:
-        raise ValueError('Unknown cmd: %s\n' % cmd)
-
-    return ret
+        ret = self._profiler_run_internal(fetches, feed_dict, options)
+
+      # Maybe dump profile.
+      self.profile_context._maybe_dump()
+
+      # Maybe profile:
+      to_profiles = self.profile_context._profile_candidates()
+      for to_prof in to_profiles:
+        cmd, opts, _ = to_prof
+        if cmd == 'graph':
+          self.profile_context.profiler.profile_graph(opts)
+        elif cmd == 'scope':
+          self.profile_context.profiler.profile_name_scope(opts)
+        elif cmd == 'op':
+          self.profile_context.profiler.profile_operations(opts)
+        elif cmd == 'code':
+          self.profile_context.profiler.profile_python(opts)
+        else:
+          raise ValueError('Unknown cmd: %s\n' % cmd)
+      return ret
+  # Fast no lock path.
+  return self._profiler_run_internal(
+      fetches, feed_dict, options, run_metadata)
   # pylint: enable=protected-access
 
 
@@ -183,10 +180,9 @@ class ProfileContext(object):
   @property
   def profiler(self):
     """Returns the current profiler object."""
-    with self._lock:
-      if not self._profiler:
-        self._profiler = model_analyzer.Profiler(ops.get_default_graph())
-      return self._profiler
+    if not self._profiler:
+      self._profiler = model_analyzer.Profiler(ops.get_default_graph())
+    return self._profiler
 
   def trace_next_step(self):
     """Enables tracing and add traces to profiler at next step."""
@@ -222,10 +218,11 @@ class ProfileContext(object):
 
   @contextlib.contextmanager
   def _new_step(self):
-    yield
-    self._step += 1
-    self._trace_next_step = False
-    self._dump_next_step = False
+    with self._lock:
+      yield self._step
+      self._step += 1
+      self._trace_next_step = False
+      self._dump_next_step = False
 
   def _profile_candidates(self):
     to_profile = []
diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py
index 9020f60421..838064a1f0 100644
--- a/tensorflow/python/profiler/tfprof_logger.py
+++ b/tensorflow/python/profiler/tfprof_logger.py
@@ -63,6 +63,15 @@ def _fill_missing_graph_shape(graph, run_meta):
   return graph
 
 
+def _str_id(s, str_to_id):
+  """Maps string to id."""
+  num = str_to_id.get(s, None)
+  if num is None:
+    num = len(str_to_id)
+    str_to_id[s] = num
+  return num
+
+
 def _get_logged_ops(graph, run_meta=None, add_trace=True,
                     add_trainable_var=True):
   """Extract trainable model parameters and FLOPs for ops from a Graph.
@@ -75,12 +84,15 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
       '_trainable_variables'.
   Returns:
     logged_ops: dict mapping from op_name to OpLogEntry.
+    string_to_id: dict mapping from string to id.
   """
   if run_meta:
     graph = _fill_missing_graph_shape(graph, run_meta)
 
   op_missing_shape = 0
   logged_ops = {}
+  string_to_id = dict()
+  string_to_id['none'] = len(string_to_id)
   # TODO(xpan): Work with Profiler more efficiently.
   for op in graph.get_operations():
     try:
@@ -101,10 +113,10 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
     if add_trace:
       for tb in op.traceback_with_start_lines:
         trace = entry.code_def.traces.add()
-        trace.file = tb[0] if tb[0] else 'none'
+        trace.file_id = _str_id(tb[0], string_to_id) if tb[0] else 0
         trace.lineno = tb[1] if tb[1] else -1
-        trace.function = tb[2] if tb[2] else 'none'
-        trace.line = tb[3] if tb[3] else 'none'
+        trace.function_id = _str_id(tb[2], string_to_id) if tb[2] else 0
+        trace.line_id = _str_id(tb[3], string_to_id) if tb[3] else 0
         trace.func_start_line = tb[4] if tb[4] else -1
       add_entry = True
 
@@ -124,7 +136,7 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
   if op_missing_shape > 0 and not run_meta:
     sys.stderr.write('%d ops no flops stats due to incomplete shapes.\n' %
                      op_missing_shape)
-  return logged_ops
+  return logged_ops, string_to_id
 
 
 def _merge_default_with_oplog(graph, op_log=None, run_meta=None,
@@ -142,7 +154,7 @@ def _merge_default_with_oplog(graph, op_log=None, run_meta=None,
     tmp_op_log: Merged OpLogProto proto.
   """
   tmp_op_log = tfprof_log_pb2.OpLogProto()
-  logged_ops = _get_logged_ops(
+  logged_ops, string_to_id = _get_logged_ops(
       graph, run_meta, add_trace=add_trace, add_trainable_var=add_trainable_var)
 
   if not op_log:
@@ -161,6 +173,9 @@ def _merge_default_with_oplog(graph, op_log=None, run_meta=None,
       else:
         all_ops[op_name] = entry
     tmp_op_log.log_entries.extend(all_ops.values())
+
+  for s, i in six.iteritems(string_to_id):
+    tmp_op_log.id_to_string[i] = s
   return tmp_op_log
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-10-12 16:39:22 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-10-12 16:43:56 -0700
commit	19708cc7d8e34e830a716d3f9896294489d3b535 (patch)
tree	2fe7b78976ee65a5175ccf4121c168a7c8d9c2d3 /tensorflow/python/profiler
parent	1002f974f58b23c528436e34c06384b8bffb2485 (diff)