# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Logging tensorflow::tfprof::OpLogProto. OpLogProto is used to add extra model information for offline analysis. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import sys import six from tensorflow.core.profiler import tfprof_log_pb2 from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile from tensorflow.python.profiler.internal import flops_registry # pylint: disable=unused-import from tensorflow.python.util.tf_export import tf_export TRAINABLE_VARIABLES = '_trainable_variables' REGISTERED_FLOP_STATS = 'flops' def _fill_missing_graph_shape(graph, run_meta): """Fill Tensor shapes in 'graph' with run time shape from 'run_meta'.""" for dev_stat in run_meta.step_stats.dev_stats: for node_stat in dev_stat.node_stats: if not node_stat.output: continue try: op = graph.get_operation_by_name(node_stat.node_name) except KeyError as e: # Graph doesn't contains the node_stat, usually RecvTensor. continue if len(node_stat.output) != len(op.outputs): # For example, conditional op has only 1 output at run time. continue for (i, node_stat_out) in enumerate(node_stat.output): if op.outputs[i].get_shape().is_fully_defined(): continue node_stat_dims = node_stat_out.tensor_description.shape.dim node_stat_shape = tensor_shape.TensorShape( [d.size for d in node_stat_dims]) try: op.outputs[i].set_shape(op.outputs[i].get_shape().merge_with( node_stat_shape)) except ValueError as e: sys.stderr.write('Node %s incompatible shapes: %s.\n' % (node_stat.node_name, e)) return graph def _str_id(s, str_to_id): """Maps string to id.""" num = str_to_id.get(s, None) if num is None: num = len(str_to_id) str_to_id[s] = num return num def _get_logged_ops(graph, run_meta=None, add_trace=True, add_trainable_var=True): """Extract trainable model parameters and FLOPs for ops from a Graph. Args: graph: tf.Graph. run_meta: RunMetadata proto used to complete shape information. add_trace: Whether to add op trace information. add_trainable_var: Whether to assign tf.trainable_variables() op type '_trainable_variables'. Returns: logged_ops: dict mapping from op_name to OpLogEntry. string_to_id: dict mapping from string to id. """ if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) op_missing_shape = 0 logged_ops = {} string_to_id = dict() string_to_id['none'] = len(string_to_id) # TODO(xpan): Work with Profiler more efficiently. for op in graph.get_operations(): try: stats = ops.get_stats_for_node_def( graph, op.node_def, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. op_missing_shape += 1 stats = None entry = tfprof_log_pb2.OpLogEntry() entry.name = op.name add_entry = False if stats and stats.value: entry.float_ops = int(stats.value) add_entry = True if add_trace: for tb in op.traceback_with_start_lines: trace = entry.code_def.traces.add() trace.file_id = _str_id(tb[0], string_to_id) if tb[0] else 0 trace.lineno = tb[1] if tb[1] else -1 trace.function_id = _str_id(tb[2], string_to_id) if tb[2] else 0 trace.line_id = _str_id(tb[3], string_to_id) if tb[3] else 0 trace.func_start_line = tb[4] if tb[4] else -1 add_entry = True if add_entry: logged_ops[entry.name] = entry if add_trainable_var: for v in graph.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES): if v.op.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = v.op.name entry.types.append(TRAINABLE_VARIABLES) logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) if op_missing_shape > 0 and not run_meta: sys.stderr.write('%d ops no flops stats due to incomplete shapes.\n' % op_missing_shape) return logged_ops, string_to_id def merge_default_with_oplog(graph, op_log=None, run_meta=None, add_trace=True, add_trainable_var=True): """Merge the tfprof default extra info with caller's op_log. Args: graph: tf.Graph. If None and eager execution is not enabled, use default graph. op_log: OpLogProto proto. run_meta: RunMetadata proto used to complete shape information. add_trace: Whether to add op trace information. add_trainable_var: Whether to assign tf.trainable_variables() op type '_trainable_variables'. Returns: tmp_op_log: Merged OpLogProto proto. """ if not graph and not context.executing_eagerly(): graph = ops.get_default_graph() tmp_op_log = tfprof_log_pb2.OpLogProto() if not graph: return tmp_op_log logged_ops, string_to_id = _get_logged_ops( graph, run_meta, add_trace=add_trace, add_trainable_var=add_trainable_var) if not op_log: tmp_op_log.log_entries.extend(logged_ops.values()) else: all_ops = dict() for entry in op_log.log_entries: all_ops[entry.name] = entry for op_name, entry in six.iteritems(logged_ops): if op_name in all_ops: all_ops[op_name].types.extend(entry.types) if entry.float_ops > 0 and all_ops[op_name].float_ops == 0: all_ops[op_name].float_ops = entry.float_ops if entry.code_def.traces and not all_ops[op_name].code_def.traces: all_ops[op_name].code_def.MergeFrom(entry.code_def) else: all_ops[op_name] = entry tmp_op_log.log_entries.extend(all_ops.values()) for s, i in six.iteritems(string_to_id): tmp_op_log.id_to_string[i] = s return tmp_op_log @tf_export('profiler.write_op_log') def write_op_log(graph, log_dir, op_log=None, run_meta=None, add_trace=True): """Log provided 'op_log', and add additional model information below. The API also assigns ops in tf.trainable_variables() an op type called '_trainable_variables'. The API also logs 'flops' statistics for ops with op.RegisterStatistics() defined. flops calculation depends on Tensor shapes defined in 'graph', which might not be complete. 'run_meta', if provided, completes the shape information with best effort. Args: graph: tf.Graph. If None and eager execution is not enabled, use default graph. log_dir: directory to write the log file. op_log: (Optional) OpLogProto proto to be written. If not provided, an new one is created. run_meta: (Optional) RunMetadata proto that helps flops computation using run time shape information. add_trace: Whether to add python code trace information. Used to support "code" view. """ if not graph and not context.executing_eagerly(): graph = ops.get_default_graph() op_log = merge_default_with_oplog(graph, op_log, run_meta, add_trace) with gfile.Open(os.path.join(log_dir, 'tfprof_log'), 'w') as log: log.write(op_log.SerializeToString())