aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tfprof
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-09-22 09:19:06 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-09-22 10:31:37 -0700
commite4a63b578f97c9dca26fd4d3a364f90a94cb45b5 (patch)
tree18f2612bbff2ad2d68ebae3e9ece48367623f71b /tensorflow/contrib/tfprof
parent64a170499dc7bb8ccd7a844ea54b2805e441c8e5 (diff)
tfprof: "Swiss Army Knife Tool" To Explore Your Model.
Change: 133968335
Diffstat (limited to 'tensorflow/contrib/tfprof')
-rw-r--r--tensorflow/contrib/tfprof/README.md453
-rw-r--r--tensorflow/contrib/tfprof/python/tools/tfprof/BUILD31
-rw-r--r--tensorflow/contrib/tfprof/python/tools/tfprof/__init__.py0
-rw-r--r--tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py114
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/BUILD52
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD227
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc65
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h45
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckptbin0 -> 2059 bytes
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt636
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta22
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log9
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h37
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc222
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h116
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc47
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h106
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc57
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h119
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc191
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h88
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc266
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h127
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc92
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc130
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h82
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc194
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc78
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h120
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc306
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc350
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h50
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto19
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc236
-rw-r--r--tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto49
35 files changed, 4736 insertions, 0 deletions
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
new file mode 100644
index 0000000000..0e6420134a
--- /dev/null
+++ b/tensorflow/contrib/tfprof/README.md
@@ -0,0 +1,453 @@
+# tfprof: A Profiling Tool for TensorFlow Models
+
+go/tfprof
+
+Author: Xin Pan (xpan@google.com, github: panyx0718)
+
+Consultants: Jon Shlens (shlens@google.com), Pete Warden (petewarden@google.com)
+
+[TOC]
+
+## Introduction
+
+tfprof is a profiling tool for TensorFlow that analyzes model architectures
+and measures system performance.
+
+###Major Features
+
+1. Measure model parameters, float operations, tensor shapes.
+2. Measure op execution times, requested memory size and device placement.
+3. Inspect checkpoint tensors' shapes and their values.
+4. Explore model based on name scope or graph structure.
+5. Selectively grouping/filtering/accounting/ordering ops.
+
+### Interfaces
+
+[CLI Tutorials](#cli-tutorials):
+It supports interactive mode for exploration and single-shot mode for
+scripts. Outputs can be dumped to files or printed in terminal.
+
+Python API Tutorials: Python API is not released yet.
+
+## CLI Tutorials
+
+Tutorials are based on a 32 layers ResNet.
+TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
+
+### Examples
+
+1) Start `tfprof` command line tool
+
+```shell
+# Build the tool.
+bazel build -c opt tensorflow/contrib/tfprof/...
+
+# Help information, including detail 'option' instructions.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help
+#
+# The following commands will start tfprof interactive mode.
+#
+# Profile model shapes and parameters only.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+ --graph_path=/graph.pbtxt
+#
+# Additionally profile checkpoint statistics and values.
+# Use '-account_type_regexes _checkpoint_variables' to select
+# checkpoint tensors.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt \
+ --checkpoint_path=model.ckpt
+#
+# Additionally profile ops requested memory and timing.
+# See CLI Input Files section on generating run_meta file.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt \
+ --run_meta_path=run_meta \
+ --checkpoint_path=model.ckpt
+#
+# tfprof_log is used to define customized op types and float ops.
+# Use tfprof_logger.write_op_log() to create tfprof_log.
+# See 11) in Examples section on generating tfprof_log file.
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+ --graph_path=graph.pbtxt \
+ --run_meta_path=run_meta \
+ --op_log_path=tfprof_log \
+ --checkpoint_path=model.ckpt
+```
+Note that `graph.pbtxt` is an ASCII text format.
+
+2) Press enter to show the default options
+
+```shell
+tfprof>
+tfprof>
+-max_depth 4
+-min_bytes 0
+-min_micros 0
+-min_params 0
+-min_float_ops 0
+-device_regexes .*
+-order_by name
+-account_type_regexes Variable
+-start_name_regexes .*
+-trim_name_regexes
+-show_name_regexes .*
+-hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]*
+-account_displayed_op_only false
+# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path.
+# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]
+-select params
+-viz false
+-dump_to_file
+```
+
+3) I want to see the `BatchNorm`'s gamma value in checkpoint.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5
+_TFProfRoot ()
+ unit_1_0/shared_activation/init_bn/gamma ()
+[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ],
+ unit_1_0/sub2/bn2/gamma ()
+[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ],
+```
+
+4) I want to see my checkpoint tensors shape and number of parameters.
+
+```shell
+# Requires --graph_path, --checkpoint_path.
+# Increase -max_depth to see all tensors.
+tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4
+_TFProfRoot (--/930.58k params)
+ global_step (0/0 params)
+ init/init_conv/DW (3x3x3x16, 432/864 params)
+ pool_logit/DW (64x10, 640/1.28k params)
+ pool_logit/DW/Momentum (64x10, 640/640 params)
+ pool_logit/biases (10, 10/20 params)
+ pool_logit/biases/Momentum (10, 10/10 params)
+ unit_last/final_bn/beta (64, 64/128 params)
+ unit_last/final_bn/gamma (64, 64/128 params)
+ unit_last/final_bn/moving_mean (64, 64/64 params)
+ unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops
+it depends on take a long time to run. Hint: Use the ‘graph’ command to explore
+graph dependencies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/3.61sec)
+ init/init_conv/Conv2D (11.75ms/3.10sec)
+ random_shuffle_queue_DequeueMany (3.09sec/3.09sec)
+ unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec)
+ unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec)
+ unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec)
+ unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec)
+```
+
+6) I want to know the expensive operations during the back propagation.
+Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’
+command to explore based on name scope hierarchies.
+
+```shell
+# Requires --graph_path, --run_meta_path.
+tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .*
+_TFProfRoot (0us/2.29sec)
+ gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms)
+ gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms)
+ gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms)
+ gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms)
+ gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms)
+ gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms)
+ ...
+```
+
+7) Show the number of float operations in the model.
+Note: float operations calculation depends on
+1) op.RegisterStatistics. If an op doesn’t
+have RegisterStatistics defined, its float operations cannot be counted.
+2) fully defined shape is also necessary in order to calculate flops.
+float operations number is provided by tensorflow::tfprof::OpLog logged from
+Python API.
+
+```shell
+# Requires --graph_path, --op_log_path.
+tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .*
+_TFProfRoot (0/17.63b flops)
+ gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops)
+ gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops)
+ init/init_conv/Conv2D (113.25m/113.25m flops)
+ pool_logit/xw_plus_b (1.28k/165.12k flops)
+ pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops)
+ unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops)
+ unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops)
+ unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops)
+ unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops)
+ ...
+```
+
+8) Show the number of parameters of all `tf.trainable_variables()` in the model.
+
+```shell
+# Requires --graph_path --op_log_path.
+# store option for future commands.
+tfprof> set -account_type_regexes _trainable_variables
+tfprof> scope -max_depth 4 -select params
+_TFProfRoot (--/464.15k params)
+ init/init_conv/DW (3x3x3x16, 432/432 params)
+ pool_logit/DW (64x10, 640/640 params)
+ pool_logit/biases (10, 10/10 params)
+ unit_last/final_bn/beta (64, 64/64 params)
+ unit_last/final_bn/gamma (64, 64/64 params)
+```
+
+Where does “_trainable_variables” come from? It is from the OpLog file
+generated by write_op_log() Python API. write_op_log() help users create some
+common op types implicitly. Users can define their own op types and log it
+through the write_op_log() API.
+
+9) What if I’m lazy and don’t want to define op type? I have given my ops
+well-defined names in my model’s code. And want to use names to select a group
+of ops. Let’s try it!
+
+```shell
+tfprof> set -account_type_regexes .*
+tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only
+_TFProfRoot (0/18.43k params)
+ unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params)
+ unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params)
+```
+
+The above command allows you to filter ops that match specific names.
+`-account_displayed_op_only` asks tfprof to only account ops displayed
+in terminal. Otherwise, tfprof accounts all ops matched by
+`-account_type_regexes` recursively even if they are hidden due to some
+options such as -max_depth.
+
+10) TensorFlow has built-in op types. For example, built-in op type `Variable`
+seems to include `Variable's` created by your model. However, be careful when
+depending on it because TensorFlow creates extra `Variable` ops implicitly and
+the implicitly created ops can have the same prefix as the `Variable's` you
+defined.
+
+In the following example, extra `Variables` are created and “/Momentum” is
+appended to their names. This might cause you “model capacity” calculation
+to get wrong.
+
+```shell
+tfprof> scope -account_type_regexes Variable -max_depth 4 -select params
+_TFProfRoot (--/930.58k params)
+ global_step (1/1 params)
+ init/init_conv/DW (3x3x3x16, 432/864 params)
+ pool_logit/DW (64x10, 640/1.28k params)
+ pool_logit/DW/Momentum (64x10, 640/640 params)
+ pool_logit/biases (10, 10/20 params)
+ pool_logit/biases/Momentum (10, 10/10 params)
+ unit_last/final_bn/beta (64, 64/128 params)
+ unit_last/final_bn/gamma (64, 64/128 params)
+ unit_last/final_bn/moving_mean (64, 64/64 params)
+ unit_last/final_bn/moving_variance (64, 64/64 params)
+```
+
+
+11) A example of defining extra op type for ops using `OpLog`
+
+First, in Python code, create an `OpLog` proto and add op type
+information to it:
+
+```python
+op_log = tfprof_log_pb2.OpLog()
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/DW'
+entry.types.append('pool_logit')
+entry = op_log.log_entries.add()
+entry.name = 'pool_logit/biases'
+# Alternatively:
+# var = tf.get_variable(xxx)
+# entry.name = var.op.name
+entry.types.append('pool_logit')
+```
+
+Second, call write_op_log to write the OpLog proto.
+
+```python
+tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log)
+```
+
+Third, when starting the tfprof tool, specify
+"--op_log_path /tmp/my_op_log_dir/op_log"
+
+```shell
+tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params
+_TFProfRoot (--/650 params)
+ pool_logit/DW (64x10, 640/640 params)
+ pool_logit/biases (10, 10/10 params)
+```
+
+Note that when you call
+`tfprof_logger.write_op_log(...)`, the tool adds all `Variables` inside
+`tf.trainable_variables()` to `_trainable_variables`.
+
+12) Run tfprof in one-shot mode and dump result to file.
+
+```shell
+# Printed to stdout if --dump_to_file is not set.
+tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt \
+ --max_depth 3 \
+ --dump_to_file "/tmp/dump"
+Reading Files...
+Parsing GraphDef...
+Preparing Views...
+
+cat /tmp/dump
+_TFProfRoot (--/930.58k params)
+ global_step (0/0 params)
+ pool_logit/DW (64x10, 640/1.28k params)
+ pool_logit/biases (10, 10/20 params)
+```
+
+13) Analyze how balanced Variable are on parameter servers.
+
+In this tutorial, I'm going to use a seq2seq model, which are split
+on several gpus at workers and several parameter servers.
+
+In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on
+gpu0. They share an op_type called 'gpu0'.
+
+```shell
+bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \
+ --graph_path ~/tfprof/textsum/graph.pbtxt \
+ --run_meta_path ~/tfprof/textsum/run_meta
+
+# Looks like ps task 1 is holding twice more parameters than task 0.
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1
+_TFProfRoot (--/25.81m params)
+tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1
+_TFProfRoot (--/58.84m params)
+```
+
+### CLI Input Files
+
+tfprof command line inference (CLI) loads dumped files from a tensorflow model.
+Convert them into in-memory data structures. To use it, users need to specify
+the locations of the dumped files. The following are the dumped files loaded
+by tfprof:
+
+<b>--graph_path:</b> GraphDef text file (required). Used to build in-memory
+representation of the model. For example, graph.pbtxt written by tf.Supervisor
+is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef
+using tf.Graph.as_graph_def() or other API.
+
+<b>--run_meta_path:</b> tensorflow::RunMetadata.
+Used to get the memory and time consumption of
+each op of the model. Users need to enable it. For example, the following code
+snippet writes a RunMetadata file:
+
+```python
+run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE)
+run_metadata = config_pb2.RunMetadata()
+# Once a while, call it the get the RunMeta.
+_ = self._sess.run(..., options=run_options, run_metadata=run_metadata)
+with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f:
+ f.write(run_metadata.SerializeToString())
+```
+
+<b>--op_log_path:</b>
+tensorflow::tfprof::OpLog. A proto used to provide extra op information
+for ops. By giving a group of ops a type name, users can easily aggregate the
+statistics for those ops without accidently missing or including extra ops.
+tfprof exposes the following Python API to add op information and logging.
+
+```python
+ def write_op_log(graph, log_dir, op_log=None)
+```
+
+<b>--checkpoint_path:</b>
+TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
+provides checkpointed tensors' values.
+
+
+## Design
+
+
+### In-memory representation
+
+<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
+similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
+For example op1 with name “name1/name2” is a child of op2 with name “name1”.
+
+<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
+a graph structure. The graph is a “directed acyclic graph” (hopefully), with
+direction from “output to input”. The direction is design this way so that users
+can trace from “result” to its “sources”.
+
+### Command line options
+
+tfprof’s major goals are to measure system performance and quicly analyze
+model architectures. Hence, its commands and options should allow users to achieve
+these 2 goals easily.
+
+<b>graph:</b> It is expected that users will mostly use graph representation to
+debug system performance. Hence, tfprof supports graph command, which pulls the
+graph in-memory representation described above.
+
+<b>scope:</b> It is expected that some users might want to explore their model
+statistics using the name scope information they defined in the Python codes.
+Hence, tfprof supports “scope” command, which pulls the tree in-memory
+representation.
+
+<b>set:</b> It is used to store the options so that user doesn’t need to
+re-type the same option again and again in the follow up command line. Note that
+tfprof has traditional terminal’s history and auto-complete support.
+
+<b>help:</b> print help information.
+
+<b>Options:</b> Run “tfprof help” to get detailed explanations.
+
+```python
+"-max_depth",
+"-min_bytes",
+"-min_micros",
+"-min_params",
+"-min_float_ops",
+"-order_by",
+"-account_type_regexes",
+"-start_name_regexes",
+"-trim_name_regexes",
+"-show_name_regexes",
+"-hide_name_regexes",
+"-account_displayed_op_only",
+"-select",
+"-viz", # Only supported for graph command.
+"-dump_to_file",
+```
+
+A key design is that stats are aggregated from descendants up to ancestors.
+`-account_type_regexes` is used to decide which ops stat is accounted. It makes
+decision based on op type. Usually set it to `.*` if no extra type information
+is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
+`-min/max` and `-show/hide/trim/start` options are only used the optionally
+displayed or hide ops based on ops’ name and stats. However, they don’t prevent
+tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
+aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
+an option to break this rule. When it is set, only displayed ops are accounted.
+
+Regexes are all comma-separated, for example `-show_name_regexes`
+`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
+is not expected to show up in op names.
+
+`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
+(notice the indent printed) are sorted according to order_by.
+
+## Future Work
+
+* Load SummaryWriter event logs so that it can show the latest summary value.
+
+* Better sorting and aggregation of outputs. Easier comprehension.
+
+* Currently, shape information is based on `graph.pbtxt`. When the shape
+information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
+and `Checkpoint` to complete shape information.
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
new file mode 100644
index 0000000000..d78020bbd8
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
@@ -0,0 +1,31 @@
+package(
+ default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"]) # Apache 2.0
+
+py_library(
+ name = "tfprof_logger",
+ srcs = ["tfprof_logger.py"],
+ srcs_version = "PY2AND3",
+ deps = [
+ "//tensorflow:tensorflow_py",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py",
+ "//tensorflow/python:framework_for_generated_wrappers",
+ ],
+)
+
+# -----------------------------------------------------------------------------
+# Google-internal targets. These must be at the end for syncrepo.
+
+filegroup(
+ name = "all_files",
+ srcs = glob(
+ ["**/*"],
+ exclude = [
+ "**/METADATA",
+ "**/OWNERS",
+ ],
+ ),
+ visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/__init__.py b/tensorflow/contrib/tfprof/python/tools/tfprof/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/__init__.py
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
new file mode 100644
index 0000000000..4a487461a3
--- /dev/null
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@@ -0,0 +1,114 @@
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Logging tensorflow::tfprof::OpLog.
+
+OpLog is used to add extra model information for offline analysis by tfprof.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow as tf
+from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_log_pb2
+from tensorflow.python.framework import ops
+
+TRAINABLE_VARIABLES = '_trainable_variables'
+REGISTERED_FLOP_STATS = 'flops'
+
+
+def _get_logged_ops(graph):
+ """Extract trainable model parameters and FLOPs for ops from a Graph.
+
+ Args:
+ graph: tf.Graph.
+ Returns:
+ logged_ops: dict mapping from op_name to OpLogEntry.
+ """
+ logged_ops = {}
+
+ graph_def = graph.as_graph_def()
+ for node in graph_def.node:
+ try:
+ stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS)
+ except ValueError:
+ # Catch Exception When shape is incomplete. Skip it.
+ stats = None
+
+ if not stats or not stats.value:
+ continue
+ if node.name not in logged_ops:
+ entry = tfprof_log_pb2.OpLogEntry()
+ entry.name = node.name
+ entry.float_ops = stats.value
+ logged_ops[entry.name] = entry
+
+ for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
+ if v.op.name not in logged_ops:
+ entry = tfprof_log_pb2.OpLogEntry()
+ entry.name = v.op.name
+ entry.types.append(TRAINABLE_VARIABLES)
+ logged_ops[entry.name] = entry
+ else:
+ logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES)
+ return logged_ops
+
+
+def _merge_default_with_oplog(graph, op_log=None):
+ """Merge the tfprof default extra info with caller's op_log.
+
+ Args:
+ graph: tf.Graph.
+ op_log: OpLog proto.
+ Returns:
+ tmp_op_log: Merged OpLog proto.
+ """
+ tmp_op_log = tfprof_log_pb2.OpLog()
+ logged_ops = _get_logged_ops(graph)
+ if not op_log:
+ tmp_op_log.log_entries.extend(logged_ops.values())
+ else:
+ all_ops = dict()
+ for entry in op_log.log_entries:
+ all_ops[entry.name] = entry
+ for op_name, entry in logged_ops.iteritems():
+ if op_name in all_ops:
+ all_ops[op_name].types.extend(entry.types)
+ if entry.float_ops > 0 and all_ops[op_name].float_ops == 0:
+ all_ops[op_name].float_ops = entry.float_ops
+ else:
+ all_ops[op_name] = entry
+ tmp_op_log.log_entries.extend(all_ops.values())
+ return tmp_op_log
+
+
+def write_op_log(graph, log_dir, op_log=None):
+ """Log provided 'op_log', and add additional model information below.
+
+ The API also assigns ops in tf.trainable_variables() an op type called
+ '_trainable_variables'.
+ The API also logs 'flops' statistics for ops with op.RegisterStatistics()
+ defined.
+
+ Args:
+ graph: tf.Graph.
+ log_dir: directory to write the log file.
+ op_log: OpLog proto.
+ """
+ op_log = _merge_default_with_oplog(graph, op_log)
+
+ with tf.gfile.Open(os.path.join(log_dir, 'tfprof_log'), 'w') as log:
+ log.write(op_log.SerializeToString())
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/tools/tfprof/BUILD
new file mode 100644
index 0000000000..da161b1ffa
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/BUILD
@@ -0,0 +1,52 @@
+package(
+ default_visibility = ["//visibility:public"],
+)
+
+licenses(["notice"]) # Apache 2.0
+
+# -----------------------------------------------------------------------------
+# Google-internal targets. These must be at the end for syncrepo.
+
+filegroup(
+ name = "all_files",
+ srcs = glob(
+ ["**/*"],
+ exclude = [
+ "**/METADATA",
+ "**/OWNERS",
+ ],
+ ),
+ visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_binary(
+ name = "tfprof",
+ srcs = ["tfprof_main.cc"],
+ deps = [
+ ":protos_all_cc",
+ "//tensorflow/c:c_api",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_options",
+ "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_stats",
+ "//tensorflow/contrib/tfprof/tools/tfprof/internal:tfprof_utils",
+ "//tensorflow/core:framework_headers_lib",
+ "//tensorflow/core:framework_internal",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "@linenoise//:linenoise",
+ ],
+)
+
+load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library")
+
+tf_proto_library(
+ name = "protos_all",
+ srcs = glob(
+ ["**/*.proto"],
+ ),
+ cc_api_version = 2,
+ cc_libs = ["//tensorflow/core:protos_all_cc"],
+ go_api_version = 2,
+ java_api_version = 2,
+ visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD b/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
new file mode 100644
index 0000000000..42812b345d
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/BUILD
@@ -0,0 +1,227 @@
+package(
+ default_visibility = ["//tensorflow:__subpackages__"],
+)
+
+licenses(["notice"]) # Apache 2.0
+
+load("//tensorflow:tensorflow.bzl", "tf_cc_test")
+
+cc_library(
+ name = "tfprof_stats",
+ srcs = ["tfprof_stats.cc"],
+ hdrs = ["tfprof_stats.h"],
+ deps = [
+ ":tfprof_graph",
+ ":tfprof_node",
+ ":tfprof_options",
+ ":tfprof_scope",
+ ":tfprof_show",
+ ":tfprof_utils",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+cc_library(
+ name = "tfprof_node",
+ srcs = ["tfprof_node.cc"],
+ hdrs = ["tfprof_node.h"],
+ deps = [
+ ":tfprof_options",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+cc_library(
+ name = "tfprof_scope",
+ srcs = ["tfprof_scope.cc"],
+ hdrs = ["tfprof_scope.h"],
+ deps = [
+ ":tfprof_constants",
+ ":tfprof_node",
+ ":tfprof_options",
+ ":tfprof_show",
+ ":tfprof_tensor",
+ ":tfprof_utils",
+ "//tensorflow/c:c_api",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+cc_library(
+ name = "tfprof_graph",
+ srcs = ["tfprof_graph.cc"],
+ hdrs = ["tfprof_graph.h"],
+ deps = [
+ ":tfprof_constants",
+ ":tfprof_node",
+ ":tfprof_options",
+ ":tfprof_show",
+ ":tfprof_tensor",
+ ":tfprof_utils",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+cc_library(
+ name = "tfprof_show",
+ srcs = ["tfprof_show.cc"],
+ hdrs = ["tfprof_show.h"],
+ deps = [
+ ":tfprof_constants",
+ ":tfprof_node",
+ ":tfprof_options",
+ ":tfprof_tensor",
+ ":tfprof_utils",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+tf_cc_test(
+ name = "tfprof_show_test",
+ srcs = ["tfprof_show_test.cc"],
+ data = [
+ "testdata/ckpt",
+ "testdata/graph.pbtxt",
+ "testdata/run_meta",
+ "testdata/tfprof_log",
+ ],
+ deps = [
+ ":tfprof_constants",
+ ":tfprof_options",
+ ":tfprof_stats",
+ ":tfprof_utils",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ ],
+)
+
+cc_library(
+ name = "tfprof_utils",
+ srcs = ["tfprof_utils.cc"],
+ hdrs = ["tfprof_utils.h"],
+ deps = [
+ ":tfprof_options",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+cc_library(
+ name = "tfprof_options",
+ srcs = ["tfprof_options.cc"],
+ hdrs = ["tfprof_options.h"],
+ deps = [
+ "//tensorflow/core:framework_headers_lib",
+ "//tensorflow/core:lib",
+ ],
+)
+
+cc_library(
+ name = "print_model_analysis",
+ srcs = ["print_model_analysis.cc"],
+ hdrs = ["print_model_analysis.h"],
+ deps = [
+ ":tfprof_options",
+ ":tfprof_stats",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+)
+
+tf_cc_test(
+ name = "tfprof_stats_test",
+ srcs = ["tfprof_stats_test.cc"],
+ data = [
+ "testdata/ckpt",
+ "testdata/graph.pbtxt",
+ "testdata/run_meta",
+ "testdata/tfprof_log",
+ ],
+ deps = [
+ ":tfprof_constants",
+ ":tfprof_options",
+ ":tfprof_stats",
+ ":tfprof_utils",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ ],
+)
+
+cc_library(
+ name = "tfprof_tensor",
+ srcs = ["tfprof_tensor.cc"],
+ hdrs = ["tfprof_tensor.h"],
+ deps = [
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ ],
+)
+
+tf_cc_test(
+ name = "tfprof_tensor_test",
+ srcs = ["tfprof_tensor_test.cc"],
+ data = [
+ "testdata/ckpt",
+ "testdata/graph.pbtxt",
+ ],
+ deps = [
+ ":tfprof_options",
+ ":tfprof_stats",
+ ":tfprof_utils",
+ "//tensorflow/c:checkpoint_reader",
+ "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_cc",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:test",
+ "//tensorflow/core:test_main",
+ "//tensorflow/core:testlib",
+ ],
+)
+
+cc_library(
+ name = "tfprof_constants",
+ hdrs = ["tfprof_constants.h"],
+ deps = [
+ ],
+)
+# -----------------------------------------------------------------------------
+# Google-internal targets. These must be at the end for syncrepo.
+
+filegroup(
+ name = "all_files",
+ srcs = glob(
+ ["**/*"],
+ exclude = [
+ "**/METADATA",
+ "**/OWNERS",
+ ],
+ ),
+ visibility = ["//tensorflow:__subpackages__"],
+)
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
new file mode 100644
index 0000000000..ab1e47b32d
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.cc
@@ -0,0 +1,65 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h"
+
+#include <stdio.h>
+#include <memory>
+#include <utility>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+namespace tensorflow {
+namespace tfprof {
+string PrintModelAnalysis(const string* graph, const string* run_meta,
+ const string* op_log, const string* command,
+ const Options* options) {
+ CHECK(graph) << "graph mustn't be null";
+ CHECK(command) << "command mustn't be null";
+ CHECK(options) << "options mustn't be null";
+ std::unique_ptr<GraphDef> graph_ptr(new GraphDef());
+ graph_ptr->ParseFromString(*graph);
+
+ std::unique_ptr<RunMetadata> run_meta_ptr;
+ if (run_meta) {
+ run_meta_ptr.reset(new RunMetadata());
+ run_meta_ptr->ParseFromString(*run_meta);
+ }
+
+ std::unique_ptr<OpLog> op_log_ptr;
+ if (op_log) {
+ op_log_ptr.reset(new OpLog());
+ op_log_ptr->ParseFromString(*op_log);
+ }
+
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader;
+
+ TFStats tf_stats(std::move(graph_ptr), std::move(run_meta_ptr),
+ std::move(op_log_ptr), std::move(ckpt_reader));
+
+ if (options->dump_to_file.empty()) {
+ printf("\n=========================Options=============================\n");
+ printf("%s", options->ToString().c_str());
+ printf("\n==================Model Analysis Report======================\n");
+ TFProfNode root(tf_stats.PrintGraph(*command, *options));
+ printf("\n======================End of Report==========================\n");
+ fflush(stdout);
+ return root.SerializeAsString();
+ }
+ return tf_stats.PrintGraph(*command, *options).SerializeAsString();
+}
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
new file mode 100644
index 0000000000..579147f164
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/print_model_analysis.h
@@ -0,0 +1,45 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
+
+#include <string>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+// ***This API is only for swig.***
+//
+// Interface defined for Python API swig. Calls the tfprof core API.
+// 'graph', 'run_meta', 'op_log' are serialized GraphDef, RunMetadata,
+// OpLog strings, respectively.
+// 'graph', 'command' and 'options' are required. Others can be nullptr
+// if not available.
+string PrintModelAnalysis(const string* graph, const string* run_meta,
+ const string* op_log, const string* command,
+ const Options* options);
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_PRINT_MODEL_ANALYSIS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
new file mode 100644
index 0000000000..2f59f071c5
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/ckpt
Binary files differ
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
new file mode 100644
index 0000000000..fd54551776
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt
@@ -0,0 +1,636 @@
+node {
+ name: "zeros"
+ op: "Const"
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 6
+ }
+ dim {
+ size: 6
+ }
+ dim {
+ size: 3
+ }
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "DW"
+ op: "Variable"
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 3
+ }
+ dim {
+ size: 6
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "DW/Initializer/random_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\003\000\000\000\003\000\000\000\003\000\000\000\006\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "DW/Initializer/random_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "DW/Initializer/random_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000475
+ }
+ }
+ }
+}
+node {
+ name: "DW/Initializer/random_normal/RandomStandardNormal"
+ op: "RandomStandardNormal"
+ input: "DW/Initializer/random_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 87654321
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 5
+ }
+ }
+}
+node {
+ name: "DW/Initializer/random_normal/mul"
+ op: "Mul"
+ input: "DW/Initializer/random_normal/RandomStandardNormal"
+ input: "DW/Initializer/random_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+}
+node {
+ name: "DW/Initializer/random_normal"
+ op: "Add"
+ input: "DW/Initializer/random_normal/mul"
+ input: "DW/Initializer/random_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+}
+node {
+ name: "DW/Assign"
+ op: "Assign"
+ input: "DW"
+ input: "DW/Initializer/random_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "DW/read"
+ op: "Identity"
+ input: "DW"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW"
+ }
+ }
+ }
+}
+node {
+ name: "Conv2D"
+ op: "Conv2D"
+ input: "zeros"
+ input: "DW/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "SAME"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1
+ i: 2
+ i: 2
+ i: 1
+ }
+ }
+ }
+ attr {
+ key: "use_cudnn_on_gpu"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "DW2"
+ op: "Variable"
+ attr {
+ key: "container"
+ value {
+ s: ""
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "shape"
+ value {
+ shape {
+ dim {
+ size: 2
+ }
+ dim {
+ size: 2
+ }
+ dim {
+ size: 6
+ }
+ dim {
+ size: 12
+ }
+ }
+ }
+ }
+ attr {
+ key: "shared_name"
+ value {
+ s: ""
+ }
+ }
+}
+node {
+ name: "DW2/Initializer/random_normal/shape"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_INT32
+ tensor_shape {
+ dim {
+ size: 4
+ }
+ }
+ tensor_content: "\002\000\000\000\002\000\000\000\006\000\000\000\014\000\000\000"
+ }
+ }
+ }
+}
+node {
+ name: "DW2/Initializer/random_normal/mean"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0
+ }
+ }
+ }
+}
+node {
+ name: "DW2/Initializer/random_normal/stddev"
+ op: "Const"
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "value"
+ value {
+ tensor {
+ dtype: DT_FLOAT
+ tensor_shape {
+ }
+ float_val: 0.0010000000475
+ }
+ }
+ }
+}
+node {
+ name: "DW2/Initializer/random_normal/RandomStandardNormal"
+ op: "RandomStandardNormal"
+ input: "DW2/Initializer/random_normal/shape"
+ attr {
+ key: "T"
+ value {
+ type: DT_INT32
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+ attr {
+ key: "dtype"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "seed"
+ value {
+ i: 87654321
+ }
+ }
+ attr {
+ key: "seed2"
+ value {
+ i: 15
+ }
+ }
+}
+node {
+ name: "DW2/Initializer/random_normal/mul"
+ op: "Mul"
+ input: "DW2/Initializer/random_normal/RandomStandardNormal"
+ input: "DW2/Initializer/random_normal/stddev"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+}
+node {
+ name: "DW2/Initializer/random_normal"
+ op: "Add"
+ input: "DW2/Initializer/random_normal/mul"
+ input: "DW2/Initializer/random_normal/mean"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+}
+node {
+ name: "DW2/Assign"
+ op: "Assign"
+ input: "DW2"
+ input: "DW2/Initializer/random_normal"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+ attr {
+ key: "use_locking"
+ value {
+ b: true
+ }
+ }
+ attr {
+ key: "validate_shape"
+ value {
+ b: true
+ }
+ }
+}
+node {
+ name: "DW2/read"
+ op: "Identity"
+ input: "DW2"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "_class"
+ value {
+ list {
+ s: "loc:@DW2"
+ }
+ }
+ }
+}
+node {
+ name: "Conv2D_1"
+ op: "Conv2D"
+ input: "Conv2D"
+ input: "DW2/read"
+ attr {
+ key: "T"
+ value {
+ type: DT_FLOAT
+ }
+ }
+ attr {
+ key: "data_format"
+ value {
+ s: "NHWC"
+ }
+ }
+ attr {
+ key: "padding"
+ value {
+ s: "SAME"
+ }
+ }
+ attr {
+ key: "strides"
+ value {
+ list {
+ i: 1
+ i: 2
+ i: 2
+ i: 1
+ }
+ }
+ }
+ attr {
+ key: "use_cudnn_on_gpu"
+ value {
+ b: true
+ }
+ }
+}
+versions {
+ producer: 13
+}
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
new file mode 100644
index 0000000000..2d5bb7ddaf
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/run_meta
@@ -0,0 +1,22 @@
+
+
+
+%/job:localhost/replica:0/task:0/cpu:0:
+_SOURCE (2
+cpuB_SOURCE = NoOp()H塈a
+zeros (2
+cpu:(&"cpu0Bzeros = Const()H^
+DW (2
+cpu:(&"cpu0ੀBDW = Variable()H`
+DW2 (2
+cpu:(& " cpu0BDW2 = Variable()Hj
+DW/read (2
+cpu:(&"cpu0ੀBDW/read = Identity(DW)Hm
+DW2/read (2
+cpu:(& " cpu0BDW2/read = Identity(DW2)Hs
+Conv2D P(U2
+cpu:(&"cpu0ીBConv2D = Conv2D(zeros, DW/read)H{
+Conv2D_1 (2
+cpu:(& "cpu0฀B#Conv2D_1 = Conv2D(Conv2D, DW2/read)H6
+_SINK (2
+cpuB_SINK = NoOp()H \ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
new file mode 100644
index 0000000000..c35d4338e9
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log
@@ -0,0 +1,9 @@
+
+
+Conv2D_1$
+
+DW2_trainable_variables
+
+DW_trainable_variables
+
+Conv2D- \ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
new file mode 100644
index 0000000000..169ebae4a7
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h
@@ -0,0 +1,37 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
+
+namespace tensorflow {
+namespace tfprof {
+
+// Op name of root of everything. Aggregates all stats.
+static const char* const kTFProfRoot = "_TFProfRoot";
+// Op type for nodes that doesn't represent a physical node in the
+// TensorFlow model. Only exist as a placehold to aggregate children.
+// For example, kTFProfRoot belongs to this type.
+static const char* const kTFGraphParent = "_TFGraphParent";
+static const char* const kTFScopeParent = "_kTFScopeParent";
+// Op type for tf.trainable_variables().
+static const char* const kTrainableVarType = "_trainable_variables";
+// Op type for tensors in the checkpoint file.
+static const char* const kCkptVarType = "_checkpoint_variables";
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_CONSTANTS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
new file mode 100644
index 0000000000..287fd78d46
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.cc
@@ -0,0 +1,222 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+
+#include <stdio.h>
+#include <utility>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+GraphNode* TFGraph::CreateParentNode(const string& name) {
+ node_defs_.push_back(std::unique_ptr<NodeDef>(new NodeDef()));
+ node_defs_.back()->set_name(name);
+ node_defs_.back()->set_op(kTFGraphParent);
+ parent_nodes_[name] =
+ std::unique_ptr<TFNode>(new TFNode(node_defs_.back().get()));
+ nodes_map_[name] =
+ std::unique_ptr<GraphNode>(new GraphNode(parent_nodes_[name].get()));
+ return nodes_map_[name].get();
+}
+
+void TFGraph::AddNode(TFNode* node) {
+ string name = node->node_def()->name();
+ nodes_map_[name] = std::unique_ptr<GraphNode>(new GraphNode(node));
+}
+
+void TFGraph::Build() {
+ if (!roots_.empty()) return;
+
+ std::set<string> nonroots;
+ // Filter out the root nodes (node not input of any other node).
+ for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+ GraphNode* node = it->second.get();
+ const std::map<string, TFNode*>& inputs = node->node->inputs();
+ for (auto inputs_it = inputs.cbegin(); inputs_it != inputs.cend();
+ inputs_it++) {
+ nonroots.insert(inputs_it->first);
+ auto child_it = nodes_map_.find(inputs_it->first);
+ if (child_it != nodes_map_.end()) {
+ node->children.push_back(child_it->second.get());
+ }
+ }
+ }
+ for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+ if (nonroots.find(it->first) == nonroots.end()) {
+ roots_.push_back(it->second.get());
+ }
+ }
+}
+
+const ShowNode* TFGraph::ShowInternal(const Options& opts) {
+ // Search the nodes to start from.
+ std::vector<GraphNode*> roots = roots_;
+ if (opts.start_name_regexes.size() != 1 ||
+ opts.start_name_regexes[0] != ".*") {
+ std::set<string> visited;
+ roots = SearchRoot(roots, opts.start_name_regexes, &visited);
+ }
+
+ GraphNode* root = CreateParentNode(kTFProfRoot);
+ root->children.assign(roots.begin(), roots.end());
+
+ std::map<string, int64> account_visits;
+ Account({root}, opts, &account_visits);
+
+ if (opts.viz) {
+ printf("Visualizing feature disabled...\n");
+ }
+ std::set<string> visits;
+ return PrintGraph({root}, opts, 1, 0, 0, &visits)[0];
+}
+
+std::vector<GraphNode*> TFGraph::SearchRoot(
+ const std::vector<GraphNode*>& roots, const std::vector<string>& regexes,
+ std::set<string>* visited) {
+ std::vector<GraphNode*> res;
+ if (roots.empty()) {
+ return res;
+ }
+ for (GraphNode* root : roots) {
+ if (visited->find(root->name()) != visited->end()) continue;
+ visited->insert(root->name());
+ // If the parent is a start point, don't search its children.
+ // Note that its children can still be added as start node through
+ // another route.
+ bool match_start_node = false;
+ for (const string& regex : regexes) {
+ if (RE2::FullMatch(root->name(), regex)) {
+ res.push_back(root);
+ match_start_node = true;
+ break;
+ }
+ }
+ if (match_start_node) {
+ continue;
+ }
+ std::vector<GraphNode*> nroot =
+ SearchRoot(root->children, regexes, visited);
+ res.insert(res.end(), nroot.begin(), nroot.end());
+ }
+ return res;
+}
+
+std::vector<GraphNode*> TFGraph::PrintGraph(const std::vector<GraphNode*> roots,
+ const Options& opts, int depth,
+ int hidden, int last_ident,
+ std::set<string>* visits) {
+ std::vector<GraphNode*> show_nodes;
+
+ for (GraphNode* node : roots) {
+ if (visits->find(node->name()) != visits->end()) continue;
+ visits->insert(node->name());
+
+ int nhidden = hidden;
+ int nlast_ident = last_ident;
+ bool show = ShouldShow(node, opts, depth);
+ if (show) {
+ node->formatted_str.clear();
+ if (opts.account_displayed_op_only) {
+ node->ResetTotalStats();
+ node->AddSelfToTotalStats();
+ }
+ nhidden = 0;
+ nlast_ident = (hidden && opts.select.find(kShown[4]) != opts.select.end()
+ ? last_ident + 4
+ : last_ident + 2);
+ } else {
+ ++nhidden;
+ }
+
+ std::vector<GraphNode*> show_cnodes;
+ if (!ShouldTrim(node, opts.trim_name_regexes)) {
+ show_cnodes = PrintGraph(node->children, opts, depth + 1, nhidden,
+ nlast_ident, visits);
+ }
+ if (show) {
+ show_cnodes = SortNodes(show_cnodes, opts);
+ string children_str;
+ for (GraphNode* sc : show_cnodes) {
+ children_str += sc->formatted_str;
+ node->mutable_proto()->add_children()->MergeFrom(sc->proto());
+ if (opts.account_displayed_op_only) {
+ node->AggregateTotalStats(sc);
+ }
+ }
+ if (hidden && opts.select.find(kShown[4]) != opts.select.end()) {
+ node->formatted_str = strings::Printf(
+ "%s...hidden %d...\n", string(last_ident, ' ').c_str(), hidden);
+ node->formatted_str +=
+ strings::Printf(" %s%s\n", string(last_ident, ' ').c_str(),
+ node->Format(opts).c_str());
+ } else {
+ node->formatted_str =
+ strings::Printf("%s%s\n", string(last_ident, ' ').c_str(),
+ node->Format(opts).c_str());
+ }
+ if (opts.select.find(kShown[5]) != opts.select.end()) {
+ std::unique_ptr<TFProfTensor> tfprof_tensor;
+ if (LookUpCheckPoint(node->name(), &tfprof_tensor)) {
+ string value_str;
+ tfprof_tensor->Display(&value_str,
+ node->mutable_proto()->mutable_tensor_value());
+ node->formatted_str += value_str;
+ }
+ }
+
+ node->formatted_str += children_str;
+ show_nodes.push_back(node);
+ } else {
+ show_nodes.insert(show_nodes.end(), show_cnodes.begin(),
+ show_cnodes.end());
+ }
+ }
+ return show_nodes;
+}
+
+void TFGraph::Account(const std::vector<GraphNode*>& roots, const Options& opts,
+ std::map<string, int64>* visits) {
+ if (roots.empty()) return;
+
+ for (GraphNode* node : roots) {
+ if (visits->find(node->name()) != visits->end()) continue;
+ (*visits)[node->name()] = 1;
+ node->ResetTotalStats();
+ // Depth-firsth.
+ Account(node->children, opts, visits);
+
+ node->account = ShouldAccount(node, opts);
+ if (node->account) {
+ node->AddSelfToTotalStats();
+ }
+ // Aggregate its children stats.
+ for (GraphNode* c : node->children) {
+ // A node can be visited from multiple parents. Only account once.
+ // "visits==1" is when the node is visited through depth-first search.
+ (*visits)[c->name()] += 1;
+ if ((*visits)[c->name()] > 2) continue;
+
+ node->AggregateTotalStats(c);
+ }
+ }
+}
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
new file mode 100644
index 0000000000..ee54534f56
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h
@@ -0,0 +1,116 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Build a graph structure based on op inputs/outputs. The graph is a directed
+// acyclic graph pointing *from outputs to inputs*.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
+
+#include <deque>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tfprof {
+class GraphNode : public ShowNode {
+ public:
+ explicit GraphNode(TFNode* node) : ShowNode(node) {
+ mutable_proto()->set_inputs(node->inputs().size());
+ mutable_proto()->set_total_inputs(0);
+ }
+
+ void AggregateTotalStats(GraphNode* node) {
+ ShowNode::AggregateTotalStats(node);
+ mutable_proto()->set_total_inputs(proto().total_inputs() +
+ node->proto().total_inputs() + 1);
+ }
+
+ void AddSelfToTotalStats() {
+ ShowNode::AddSelfToTotalStats();
+ mutable_proto()->set_total_inputs(proto().total_inputs() +
+ proto().inputs());
+ }
+
+ void ResetTotalStats() {
+ ShowNode::ResetTotalStats();
+ mutable_proto()->set_total_inputs(0);
+ }
+
+ std::vector<GraphNode*> children;
+};
+
+// Organize tensorflow ops in a graph structure, pointing from output ops
+// to input ops.
+class TFGraph : public TFShow {
+ public:
+ explicit TFGraph(checkpoint::CheckpointReader* ckpt_reader)
+ : TFShow(ckpt_reader) {}
+ ~TFGraph() override {}
+
+ void AddNode(TFNode* node) override;
+
+ void Build() override;
+
+ private:
+ const ShowNode* ShowInternal(const Options& opts) override;
+
+ bool ShouldShowIfExtra(ShowNode* node, const Options& opts,
+ int depth) override {
+ return true;
+ }
+
+ GraphNode* CreateParentNode(const string& name);
+
+ std::vector<GraphNode*> SearchRoot(const std::vector<GraphNode*>& roots,
+ const std::vector<string>& regexes,
+ std::set<string>* visited);
+
+ std::vector<GraphNode*> PrintGraph(const std::vector<GraphNode*> roots,
+ const Options& opts, int depth, int hidden,
+ int last_ident, std::set<string>* visits);
+
+ void VisualizeGraph(GraphNode* root, const Options& opts);
+
+ std::vector<GraphNode*> GenerateGraphDot(
+ GraphNode* root, GraphNode* last_shown, const Options& opts, int depth,
+ int hidden, std::set<string>* declared_nodes,
+ std::set<string>* declared_edges, TFProfNode* parent);
+
+ void Account(const std::vector<GraphNode*>& roots, const Options& opts,
+ std::map<string, int64>* visits);
+
+ std::vector<GraphNode*> roots_;
+ std::vector<std::unique_ptr<NodeDef>> node_defs_;
+ std::map<string, std::unique_ptr<TFNode>> parent_nodes_;
+ std::map<string, std::unique_ptr<GraphNode>> nodes_map_;
+};
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_GRAPH_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
new file mode 100644
index 0000000000..0e8ab366cb
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.cc
@@ -0,0 +1,47 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+
+#include "tensorflow/core/framework/allocation_description.pb.h"
+#include "tensorflow/core/framework/tensor_description.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+void TFNode::AddStepStat(const string& device, const NodeExecStats* step_stat) {
+ if (!device.empty()) {
+ // This might override device from GraphDef.
+ device_ = device;
+ }
+ step_stat_ = step_stat;
+
+ op_start_micros_ = step_stat_->all_start_micros();
+ if (step_stat_->op_end_rel_micros() && step_stat_->op_start_rel_micros()) {
+ op_exec_micros_ =
+ step_stat_->op_end_rel_micros() - step_stat_->op_start_rel_micros();
+ }
+ all_spent_micros_ = step_stat_->all_end_rel_micros();
+
+ for (const auto& output : step_stat_->output()) {
+ if (output.has_tensor_description() &&
+ output.tensor_description().has_allocation_description()) {
+ requested_bytes_ += output.tensor_description()
+ .allocation_description()
+ .requested_bytes();
+ }
+ }
+}
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
new file mode 100644
index 0000000000..c8a8f5e7ec
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h
@@ -0,0 +1,106 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/core/framework/allocation_description.pb.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/framework/tensor_description.pb.h"
+#include "tensorflow/core/framework/tensor_shape.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class TFNode {
+ public:
+ TFNode(const NodeDef* node)
+ : node_(node),
+ step_stat_(nullptr),
+ op_start_micros_(0),
+ op_exec_micros_(0),
+ all_spent_micros_(0),
+ requested_bytes_(0),
+ float_ops_(0) {
+ if (!node) return;
+
+ for (const auto& attr : node->attr()) {
+ // TODO(xpan): Also consider _output_shapes.
+ if (attr.first != "shape" || !attr.second.has_shape()) continue;
+ if (!shape_.empty()) {
+ fprintf(stderr, "Found duplicated shapes!\n");
+ continue;
+ }
+ std::vector<int64> shape_vec;
+ for (const auto& d : attr.second.shape().dim()) {
+ shape_vec.push_back(d.size());
+ }
+ update_shape(shape_vec);
+ }
+ op_types_.insert(node->op());
+ device_ = node->device();
+ }
+
+ TFNode() : TFNode(nullptr) {}
+
+ void AddInput(TFNode* input) { inputs_[input->node_def()->name()] = input; }
+
+ void AddOpType(const string& op_type) { op_types_.insert(op_type); }
+
+ void AddStepStat(const string& device, const NodeExecStats* step_stat);
+
+ void AddFloatOps(int64 float_ops) { float_ops_ = float_ops; }
+
+ const NodeDef* node_def() { return node_; }
+ const std::map<string, TFNode*>& inputs() { return inputs_; }
+ int64 op_start_micros() { return op_start_micros_; }
+ int64 op_exec_micros() { return op_exec_micros_; }
+ int64 all_spent_micros() { return all_spent_micros_; }
+ int64 requested_byptes() { return requested_bytes_; }
+ int64 float_ops() { return float_ops_; }
+ string device() { return device_; }
+ const std::set<string>& op_types() { return op_types_; }
+
+ const std::vector<int64>& shape() { return shape_; }
+ void update_shape(const std::vector<int64>& shape) { shape_ = shape; }
+
+ private:
+ std::map<string, TFNode*> inputs_;
+ const NodeDef* node_;
+ const NodeExecStats* step_stat_;
+
+ std::vector<int64> shape_;
+ std::set<string> op_types_;
+ string device_;
+ int64 op_start_micros_;
+ int64 op_exec_micros_;
+ int64 all_spent_micros_;
+ int64 requested_bytes_;
+ int64 float_ops_;
+};
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_NODE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
new file mode 100644
index 0000000000..2574415fdd
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.cc
@@ -0,0 +1,57 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+string Options::ToString() const {
+ const string s = strings::Printf(
+ "%-28s%d\n"
+ "%-28s%lld\n"
+ "%-28s%lld\n"
+ "%-28s%lld\n"
+ "%-28s%lld\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n"
+ "%-28s%s\n",
+ kOptions[0], max_depth, kOptions[1], min_bytes, kOptions[2], min_micros,
+ kOptions[3], min_params, kOptions[4], min_float_ops, kOptions[5],
+ str_util::Join(device_regexes, ",").c_str(), kOptions[6],
+ order_by.c_str(), kOptions[7],
+ str_util::Join(account_type_regexes, ",").c_str(), kOptions[8],
+ str_util::Join(start_name_regexes, ",").c_str(), kOptions[9],
+ str_util::Join(trim_name_regexes, ",").c_str(), kOptions[10],
+ str_util::Join(show_name_regexes, ",").c_str(), kOptions[11],
+ str_util::Join(hide_name_regexes, ",").c_str(), kOptions[12],
+ (account_displayed_op_only ? "true" : "false"), kOptions[13],
+ str_util::Join(select, ",").c_str(), kOptions[14],
+ (viz ? "true" : "false"), kOptions[15], dump_to_file.c_str());
+ return s;
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
new file mode 100644
index 0000000000..a0c52e6d1a
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h
@@ -0,0 +1,119 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
+
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+
+namespace tensorflow {
+namespace tfprof {
+static const char* const kOptions[] = {
+ "-max_depth",
+ "-min_bytes",
+ "-min_micros",
+ "-min_params",
+ "-min_float_ops",
+ "-device_regexes",
+ "-order_by",
+ "-account_type_regexes",
+ "-start_name_regexes",
+ "-trim_name_regexes",
+ "-show_name_regexes",
+ "-hide_name_regexes",
+ "-account_displayed_op_only",
+ "-select",
+ "-viz",
+ "-dump_to_file",
+};
+
+static const char* const kOrderBy[] = {
+ "name", "bytes", "micros", "params", "float_ops",
+};
+
+// Append Only.
+static const char* const kShown[] = {
+ "bytes", "micros", "params", "float_ops",
+ "num_hidden_ops", "tensor_value", "device", "op_types",
+};
+
+static const char* const kCmds[] = {
+ "scope", "graph", "set", "help",
+};
+
+struct Options {
+ public:
+ virtual ~Options() {}
+ Options(int max_depth, tensorflow::int64 min_bytes,
+ tensorflow::int64 min_micros, tensorflow::int64 min_params,
+ tensorflow::int64 min_float_ops,
+ const std::vector<string>& device_regexes, const string& order_by,
+ const std::vector<string>& account_type_regexes,
+ const std::vector<string>& start_name_regexes,
+ const std::vector<string>& trim_name_regexes,
+ const std::vector<string>& show_name_regexes,
+ const std::vector<string>& hide_name_regexes,
+ bool account_displayed_op_only, const std::vector<string>& select,
+ bool viz, const string& dump_to_file = "")
+ : max_depth(max_depth),
+ min_bytes(min_bytes),
+ min_micros(min_micros),
+ min_params(min_params),
+ min_float_ops(min_float_ops),
+ device_regexes(device_regexes),
+ order_by(order_by),
+ account_type_regexes(account_type_regexes),
+ start_name_regexes(start_name_regexes),
+ trim_name_regexes(trim_name_regexes),
+ show_name_regexes(show_name_regexes),
+ hide_name_regexes(hide_name_regexes),
+ account_displayed_op_only(account_displayed_op_only),
+ select(select.begin(), select.end()),
+ viz(viz),
+ dump_to_file(dump_to_file) {}
+
+ string ToString() const;
+
+ int max_depth;
+ tensorflow::int64 min_bytes;
+ tensorflow::int64 min_micros;
+ tensorflow::int64 min_params;
+ tensorflow::int64 min_float_ops;
+ std::vector<string> device_regexes;
+ string order_by;
+
+ std::vector<string> account_type_regexes;
+ std::vector<string> start_name_regexes;
+ std::vector<string> trim_name_regexes;
+ std::vector<string> show_name_regexes;
+ std::vector<string> hide_name_regexes;
+ bool account_displayed_op_only;
+
+ std::set<string> select;
+ bool viz;
+ string dump_to_file;
+};
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_OPTIONS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
new file mode 100644
index 0000000000..6b2bc298cc
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.cc
@@ -0,0 +1,191 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+
+#include <stdio.h>
+#include <utility>
+
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+ScopeNode* TFScope::CreateParentNode(const string& name) {
+ if (nodes_map_.find(name) != nodes_map_.end()) {
+ return nodes_map_[name].get();
+ }
+ node_defs_.push_back(std::unique_ptr<NodeDef>(new NodeDef()));
+ node_defs_.back()->set_name(name);
+ node_defs_.back()->set_op(kTFScopeParent);
+ parent_nodes_[name] =
+ std::unique_ptr<TFNode>(new TFNode(node_defs_.back().get()));
+ nodes_map_[name] =
+ std::unique_ptr<ScopeNode>(new ScopeNode(parent_nodes_[name].get()));
+ return nodes_map_[name].get();
+}
+
+void TFScope::AddNode(TFNode* node) {
+ string name = node->node_def()->name();
+ if (nodes_map_.find(node->node_def()->name()) == nodes_map_.end()) {
+ nodes_map_[name] = std::unique_ptr<ScopeNode>(new ScopeNode(node));
+ }
+
+ auto last_slash = name.find_last_of("/");
+ while (last_slash != name.npos) {
+ name = name.substr(0, last_slash);
+ if (nodes_map_.find(name) == nodes_map_.end()) {
+ CHECK(CreateParentNode(name));
+ }
+ last_slash = name.find_last_of("/");
+ }
+}
+
+void TFScope::Build() {
+ if (!roots_.empty()) return;
+ // Found roots, which are nodes without "/".
+ for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+ ScopeNode* node = it->second.get();
+ auto last_slash = node->name().find_last_of("/");
+ if (last_slash == string::npos) {
+ roots_.push_back(node);
+ } else {
+ const string prefix = node->name().substr(0, last_slash);
+ nodes_map_[prefix]->children.push_back(node);
+ }
+ }
+}
+
+const ShowNode* TFScope::ShowInternal(const Options& opts) {
+ // Search from roots recursively to find start node, if start_name_regexes
+ // is specified.
+ std::vector<ScopeNode*> roots = roots_;
+ if (opts.start_name_regexes.size() != 1 ||
+ opts.start_name_regexes[0] != ".*") {
+ roots = SearchRoot(roots, opts.start_name_regexes);
+ }
+
+ ScopeNode* root = CreateParentNode(kTFProfRoot);
+ root->children.assign(roots.begin(), roots.end());
+ Account({root}, opts);
+
+ root = PrintScope({root}, opts, 1, 0)[0];
+ return root;
+}
+
+std::vector<ScopeNode*> TFScope::SearchRoot(
+ std::vector<ScopeNode*> roots, const std::vector<string>& regexes) {
+ std::vector<ScopeNode*> res;
+ if (roots.empty()) {
+ return res;
+ }
+ for (ScopeNode* root : roots) {
+ bool match_start_node = false;
+ for (const string& regex : regexes) {
+ if (RE2::FullMatch(root->name(), regex)) {
+ res.push_back(root);
+ match_start_node = true;
+ break;
+ }
+ }
+ if (match_start_node) {
+ // Found a start node at this branch, no need to continue.
+ continue;
+ }
+ std::vector<ScopeNode*> nroots = SearchRoot(root->children, regexes);
+ res.insert(res.end(), nroots.begin(), nroots.end());
+ }
+ return res;
+}
+
+std::vector<ScopeNode*> TFScope::PrintScope(const std::vector<ScopeNode*> roots,
+ const Options& opts, int depth,
+ int last_ident) {
+ std::vector<ScopeNode*> show_nodes;
+
+ for (ScopeNode* node : roots) {
+ int nlast_ident = last_ident;
+ bool show = ShouldShow(node, opts, depth);
+ if (show) {
+ node->formatted_str.clear();
+ if (opts.account_displayed_op_only) {
+ node->ResetTotalStats();
+ node->AddSelfToTotalStats();
+ }
+ nlast_ident += 2;
+ }
+
+ std::vector<ScopeNode*> show_cnodes;
+ if (!ShouldTrim(node, opts.trim_name_regexes)) {
+ show_cnodes = PrintScope(node->children, opts, depth + 1, nlast_ident);
+ }
+ if (show) {
+ show_cnodes = SortNodes(show_cnodes, opts);
+ string children_str;
+ for (ScopeNode* sc : show_cnodes) {
+ children_str += sc->formatted_str;
+ node->mutable_proto()->add_children()->MergeFrom(sc->proto());
+ if (opts.account_displayed_op_only) {
+ node->AggregateTotalStats(sc);
+ }
+ }
+
+ node->formatted_str =
+ strings::Printf("%s%s\n", string(last_ident, ' ').c_str(),
+ node->Format(opts).c_str());
+
+ if (opts.select.find(kShown[5]) != opts.select.end()) {
+ std::unique_ptr<TFProfTensor> tfprof_tensor;
+ if (LookUpCheckPoint(node->name(), &tfprof_tensor)) {
+ string value_str;
+ tfprof_tensor->Display(&value_str,
+ node->mutable_proto()->mutable_tensor_value());
+ node->formatted_str += value_str;
+ }
+ }
+
+ node->formatted_str += children_str;
+ show_nodes.push_back(node);
+ } else {
+ show_nodes.insert(show_nodes.end(), show_cnodes.begin(),
+ show_cnodes.end());
+ }
+ }
+ return show_nodes;
+}
+
+void TFScope::Account(const std::vector<ScopeNode*>& roots,
+ const Options& opts) {
+ if (roots.empty()) return;
+
+ for (ScopeNode* node : roots) {
+ node->ResetTotalStats();
+ Account(node->children, opts);
+
+ node->account = ShouldAccount(node, opts);
+ if (node->account) {
+ node->AddSelfToTotalStats();
+ }
+ for (ScopeNode* c : node->children) {
+ node->AggregateTotalStats(c);
+ }
+ }
+}
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
new file mode 100644
index 0000000000..3a8ca52b43
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h
@@ -0,0 +1,88 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Build a tree structure based on the TensorFlow op names.
+// For example, 'name1/name2' is a child of 'name1'.
+// Stats are aggregated from descendants from ancestors.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class ScopeNode : public ShowNode {
+ public:
+ explicit ScopeNode(TFNode* node) : ShowNode(node) {}
+ ~ScopeNode() override {}
+
+ void AggregateTotalStats(ScopeNode* node) {
+ ShowNode::AggregateTotalStats(node);
+ }
+
+ void AddSelfToTotalStats() { ShowNode::AddSelfToTotalStats(); }
+
+ void ResetTotalStats() { ShowNode::ResetTotalStats(); }
+
+ std::vector<ScopeNode*> children;
+};
+
+class TFScope : public TFShow {
+ public:
+ explicit TFScope(checkpoint::CheckpointReader* ckpt_reader)
+ : TFShow(ckpt_reader) {}
+ ~TFScope() override {}
+
+ void AddNode(TFNode* node) override;
+
+ void Build() override;
+
+ private:
+ const ShowNode* ShowInternal(const Options& opts) override;
+
+ ScopeNode* CreateParentNode(const string& name);
+
+ std::vector<ScopeNode*> SearchRoot(std::vector<ScopeNode*> roots,
+ const std::vector<string>& regexes);
+
+ std::vector<ScopeNode*> PrintScope(const std::vector<ScopeNode*> roots,
+ const Options& opts, int depth,
+ int last_ident);
+
+ void Account(const std::vector<ScopeNode*>& roots, const Options& opts);
+
+ std::vector<ScopeNode*> roots_;
+ std::vector<std::unique_ptr<NodeDef>> node_defs_;
+ std::map<string, std::unique_ptr<TFNode>> parent_nodes_;
+ std::map<string, std::unique_ptr<ScopeNode>> nodes_map_;
+};
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SCOPE_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
new file mode 100644
index 0000000000..f7275d8ae4
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.cc
@@ -0,0 +1,266 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+
+#include <memory>
+#include <set>
+
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+ShowNode::ShowNode(TFNode* node) : node(node), account(true) {
+ mutable_proto()->set_name(name());
+ if (!node->device().empty()) {
+ mutable_proto()->set_device(node->device());
+ }
+ mutable_proto()->set_exec_micros(node->op_exec_micros());
+ mutable_proto()->set_requested_bytes(node->requested_byptes());
+ mutable_proto()->set_float_ops(node->float_ops());
+
+ if (!node->shape().empty()) {
+ int64 params = 1;
+ bool complete_shape = true;
+ for (int64 d : node->shape()) {
+ // Sometimes parameters could be <0 when a dim is unknown.
+ if (d < 0) {
+ complete_shape = false;
+ break;
+ }
+ params *= d;
+ }
+ if (complete_shape) {
+ mutable_proto()->set_parameters(proto_.parameters() + params);
+ } else {
+ fprintf(stderr, "Incomplete shape.");
+ }
+ }
+}
+
+string ShowNode::Format(const Options& opts) {
+ if (opts.select.empty()) {
+ return name();
+ }
+ return strings::Printf("%s (%s)", name().c_str(), FormatMeta(opts).c_str());
+}
+
+string ShowNode::FormatMeta(const Options& opts) {
+ std::vector<string> info;
+ if (opts.select.find(kShown[2]) != opts.select.end()) {
+ const string shape = FormatShapes(node->shape());
+ if (!shape.empty()) {
+ info.push_back(shape);
+ }
+ string params = FormatNumber(proto().total_parameters()) + " params";
+ if (account) {
+ params = FormatNumber(proto().parameters()) + "/" + params;
+ } else {
+ params = "--/" + params;
+ }
+ info.push_back(params);
+ }
+ if (opts.select.find(kShown[3]) != opts.select.end()) {
+ string fops = FormatNumber(proto().total_float_ops()) + " flops";
+ if (account) {
+ fops = FormatNumber(proto().float_ops()) + "/" + fops;
+ } else {
+ fops = "--/" + fops;
+ }
+ info.push_back(fops);
+ }
+ if (opts.select.find(kShown[0]) != opts.select.end()) {
+ string memory = FormatMemory(proto().total_requested_bytes());
+ if (account) {
+ memory = FormatMemory(proto().requested_bytes()) + "/" + memory;
+
+ } else {
+ memory = "--/" + memory;
+ }
+ info.push_back(memory);
+ }
+ if (opts.select.find(kShown[1]) != opts.select.end()) {
+ string time = FormatTime(proto().total_exec_micros());
+ if (account) {
+ time = FormatTime(proto().exec_micros()) + "/" + time;
+ } else {
+ time = "--/" + time;
+ }
+ info.push_back(time);
+ }
+ if (opts.select.find(kShown[6]) != opts.select.end()) {
+ if (!proto().device().empty()) {
+ info.push_back(proto().device());
+ }
+ }
+ if (opts.select.find(kShown[7]) != opts.select.end()) {
+ std::set<string> op_types = node->op_types();
+ // Device is considered a type.
+ if (!proto().device().empty()) {
+ op_types.insert(proto().device());
+ }
+ info.push_back(str_util::Join(op_types, "|"));
+ }
+ return str_util::Join(info, ", ");
+}
+
+TFProfNode* ShowNode::mutable_proto() { return &proto_; }
+
+const TFProfNode& ShowNode::proto() const { return proto_; }
+
+void ShowNode::AggregateTotalStats(ShowNode* node) {
+ TFProfNode* node_pb = node->mutable_proto();
+ mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
+ node_pb->total_exec_micros());
+ mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
+ node_pb->total_requested_bytes());
+ mutable_proto()->set_total_parameters(proto().total_parameters() +
+ node_pb->total_parameters());
+ mutable_proto()->set_total_float_ops(proto().total_float_ops() +
+ node_pb->total_float_ops());
+}
+
+void ShowNode::AddSelfToTotalStats() {
+ mutable_proto()->set_total_exec_micros(proto().total_exec_micros() +
+ proto().exec_micros());
+ mutable_proto()->set_total_requested_bytes(proto().total_requested_bytes() +
+ proto().requested_bytes());
+ mutable_proto()->set_total_parameters(proto().total_parameters() +
+ proto().parameters());
+ mutable_proto()->set_total_float_ops(proto().total_float_ops() +
+ proto().float_ops());
+}
+
+void ShowNode::ResetTotalStats() {
+ mutable_proto()->set_total_exec_micros(0);
+ mutable_proto()->set_total_requested_bytes(0);
+ mutable_proto()->set_total_parameters(0);
+ mutable_proto()->set_total_float_ops(0);
+}
+
+const TFProfNode& TFShow::Show(const Options& opts) {
+ const ShowNode* root = ShowInternal(opts);
+ if (opts.dump_to_file.empty()) {
+ printf("%s", root->formatted_str.c_str());
+ fflush(stdout);
+ } else {
+ Status s = WriteStringToFile(Env::Default(), opts.dump_to_file,
+ root->formatted_str);
+ if (!s.ok()) {
+ fprintf(stderr, "%s\n", s.ToString().c_str());
+ }
+ }
+ return root->proto();
+}
+
+bool TFShow::LookUpCheckPoint(const string& name,
+ std::unique_ptr<TFProfTensor>* tensor) {
+ if (name == kTFProfRoot || !ckpt_reader_ || !tensor) {
+ return false;
+ }
+ std::unique_ptr<Tensor> out_tensor;
+ TF_Status* status = TF_NewStatus();
+ ckpt_reader_->GetTensor(name, &out_tensor, status);
+ if (TF_GetCode(status) != TF_OK) {
+ fprintf(stderr, "%s\n", TF_Message(status));
+ TF_DeleteStatus(status);
+ return false;
+ }
+ tensor->reset(new TFProfTensor(std::move(out_tensor)));
+ TF_DeleteStatus(status);
+ return true;
+}
+
+bool TFShow::ShouldShow(ShowNode* node, const Options& opts, int depth) {
+ // Always show kTFProfRoot.
+ if (node->name() == kTFProfRoot) return true;
+
+ if (!node->account) return false;
+
+ if (node->proto().requested_bytes() < opts.min_bytes ||
+ node->proto().exec_micros() < opts.min_micros ||
+ node->proto().parameters() < opts.min_params ||
+ node->proto().float_ops() < opts.min_float_ops ||
+ depth > opts.max_depth || !ShouldShowIfExtra(node, opts, depth)) {
+ return false;
+ }
+
+ bool show = false;
+ if (opts.device_regexes.size() == 1 && opts.device_regexes[0] == ".*") {
+ show = true;
+ } else {
+ for (const string& regex : opts.device_regexes) {
+ if (RE2::FullMatch(node->proto().device(), regex)) {
+ show = true;
+ break;
+ }
+ }
+ }
+ // Don't show if device_regexes don't cover it.
+ if (!show) return false;
+
+ show = false;
+ if (opts.show_name_regexes.size() == 1 && opts.show_name_regexes[0] == ".*") {
+ show = true;
+ } else {
+ for (const string& regex : opts.show_name_regexes) {
+ if (RE2::FullMatch(node->name(), regex)) {
+ show = true;
+ break;
+ }
+ }
+ }
+ // Don't show if show_name_regexes don't cover it.
+ if (!show) return false;
+ // Don't show if hide_name_regexes cover it.
+ for (const string& regex : opts.hide_name_regexes) {
+ if (RE2::FullMatch(node->name(), regex)) return false;
+ }
+ return true;
+}
+
+bool TFShow::ShouldTrim(ShowNode* node, const std::vector<string>& regexes) {
+ for (const string& regex : regexes) {
+ if (RE2::FullMatch(node->name(), regex)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TFShow::ShouldAccount(ShowNode* node, const Options& opts) {
+ if (opts.account_type_regexes.size() == 1 &&
+ opts.account_type_regexes[0] == ".*") {
+ return true;
+ }
+ for (const string& regex : opts.account_type_regexes) {
+ for (const string& type : node->node->op_types()) {
+ if (RE2::FullMatch(type, regex)) {
+ return true;
+ }
+ }
+ if (RE2::FullMatch(node->proto().device(), regex)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
new file mode 100644
index 0000000000..4b5d6592e5
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h
@@ -0,0 +1,127 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Parent class and utilities for tfprof_graph and tfprof_scope.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace tfprof {
+class ShowNode {
+ public:
+ explicit ShowNode(TFNode* node);
+ virtual ~ShowNode() {}
+
+ const string& name() const { return node->node_def()->name(); }
+ TFProfNode* mutable_proto();
+ const TFProfNode& proto() const;
+
+ string Format(const Options& opts);
+
+ string FormatMeta(const Options& opts);
+
+ TFNode* node;
+ bool account;
+ string formatted_str;
+
+ protected:
+ void AggregateTotalStats(ShowNode* node);
+
+ void AddSelfToTotalStats();
+
+ void ResetTotalStats();
+
+ TFProfNode proto_;
+};
+
+class TFShow {
+ public:
+ explicit TFShow(checkpoint::CheckpointReader* ckpt_reader)
+ : ckpt_reader_(ckpt_reader) {}
+ virtual ~TFShow() {}
+ virtual void AddNode(TFNode* node) = 0;
+ virtual void Build() = 0;
+ const TFProfNode& Show(const Options& opts);
+
+ protected:
+ virtual const ShowNode* ShowInternal(const Options& opts) = 0;
+
+ bool LookUpCheckPoint(const string& name,
+ std::unique_ptr<TFProfTensor>* tensor);
+
+ // Overridden by subclass if extra requirements need to be met.
+ virtual bool ShouldShowIfExtra(ShowNode* node, const Options& opts,
+ int depth) {
+ return true;
+ }
+
+ bool ShouldShow(ShowNode* node, const Options& opts, int depth);
+
+ bool ShouldTrim(ShowNode* node, const std::vector<string>& regexes);
+
+ bool ShouldAccount(ShowNode* node, const Options& opts);
+
+ template <typename T>
+ std::vector<T*> SortNodes(const std::vector<T*>& nodes, const Options& opts) {
+ if (opts.order_by.empty() || nodes.empty()) {
+ return nodes;
+ }
+ std::vector<T*> sorted_nodes = nodes;
+ std::sort(sorted_nodes.begin(), sorted_nodes.end(), [&opts](const T* n1,
+ const T* n2) {
+ if (n1->name() == kTFProfRoot) return true;
+ if (n2->name() == kTFProfRoot) return false;
+ bool name_cmp = n1->name() < n2->name();
+ if (opts.order_by == kOrderBy[0]) {
+ return name_cmp;
+ } else if (opts.order_by == kOrderBy[1]) {
+ return n1->proto().total_requested_bytes() >
+ n2->proto().total_requested_bytes();
+ } else if (opts.order_by == kOrderBy[2]) {
+ return n1->proto().total_exec_micros() >
+ n2->proto().total_exec_micros();
+ } else if (opts.order_by == kOrderBy[3]) {
+ return n1->proto().total_parameters() > n2->proto().total_parameters();
+ } else if (opts.order_by == kOrderBy[4]) {
+ return n1->proto().total_float_ops() > n2->proto().total_float_ops();
+ }
+ return name_cmp;
+ });
+ return sorted_nodes;
+ }
+
+ checkpoint::CheckpointReader* ckpt_reader_;
+};
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_SHOW_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
new file mode 100644
index 0000000000..81396e31cc
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show_test.cc
@@ -0,0 +1,92 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+#include <utility>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+class TFProfShowTest : public ::testing::Test {
+ protected:
+ TFProfShowTest() {
+ string graph_path = io::JoinPath(
+ testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+ std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
+ TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+
+ std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
+ new tensorflow::RunMetadata());
+ string run_meta_path =
+ io::JoinPath(testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+ TF_CHECK_OK(
+ ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
+
+ std::unique_ptr<OpLog> op_log_pb(new OpLog());
+ string op_log_path = io::JoinPath(
+ testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+ TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
+
+ string ckpt_path =
+ io::JoinPath(testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+ TF_Status* status = TF_NewStatus();
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
+ new checkpoint::CheckpointReader(ckpt_path, status));
+ CHECK(TF_GetCode(status) == TF_OK);
+ TF_DeleteStatus(status);
+
+ tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
+ std::move(op_log_pb), std::move(ckpt_reader)));
+ }
+
+ std::unique_ptr<TFStats> tf_stats_;
+};
+
+TEST_F(TFProfShowTest, DumpScopeMode) {
+ string dump_file = io::JoinPath(testing::TmpDir(), "dump");
+ Options opts(5, 0, 0, 0, 0, {".*"}, "name",
+ {"Variable"}, // accout_type_regexes
+ {".*"}, {""}, {".*"}, {""}, false,
+ {"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
+ false, dump_file);
+ tf_stats_->PrintGraph("scope", opts);
+
+ string dump_str;
+ TF_CHECK_OK(ReadFileToString(Env::Default(), dump_file, &dump_str));
+ EXPECT_EQ(
+ "_TFProfRoot (--/450 params, --/0 flops, --/1.80KB, --/0us)\n DW "
+ "(3x3x3x6, 162/162 params, 0/0 flops, 648B/648B, 0us/0us)\n DW2 "
+ "(2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/1.15KB, 0us/0us)\n",
+ dump_str);
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
new file mode 100644
index 0000000000..54fce4772b
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.cc
@@ -0,0 +1,130 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+#include <stdio.h>
+#include <utility>
+
+#include "tensorflow/core/framework/step_stats.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+TFStats::TFStats(std::unique_ptr<GraphDef> graph,
+ std::unique_ptr<RunMetadata> run_meta,
+ std::unique_ptr<OpLog> op_log,
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
+ : graph_(std::move(graph)),
+ run_meta_(std::move(run_meta)),
+ op_log_(std::move(op_log)),
+ ckpt_reader_(std::move(ckpt_reader)) {
+ CHECK(graph_) << "Must at least have GraphDef";
+
+ printf("Parsing GraphDef...\n");
+ ParseGraph();
+ if (run_meta_) {
+ printf("Parsing RunMetadata...\n");
+ ParseRunMeta();
+ }
+ if (op_log_) {
+ printf("Parsing OpLog...\n");
+ ParseOpLog();
+ }
+
+ if (ckpt_reader_) {
+ printf("Parsing Checkpoint...\n");
+ for (const auto& v : ckpt_reader_->GetVariableToShapeMap()) {
+ auto node = nodes_map_.find(v.first);
+ if (node != nodes_map_.end()) {
+ node->second.AddOpType("_checkpoint_variables");
+ }
+ }
+ }
+
+ printf("Preparing Views...\n");
+ scope_view_ = std::unique_ptr<TFScope>(new TFScope(ckpt_reader_.get()));
+ graph_view_ = std::unique_ptr<TFGraph>(new TFGraph(ckpt_reader_.get()));
+ for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+ scope_view_->AddNode(&it->second);
+ graph_view_->AddNode(&it->second);
+ }
+ scope_view_->Build();
+ graph_view_->Build();
+}
+
+const TFProfNode& TFStats::PrintGraph(const string& cmd, const Options& opts) {
+ if (cmd == kCmds[0]) {
+ return scope_view_->Show(opts);
+ } else if (cmd == kCmds[1]) {
+ return graph_view_->Show(opts);
+ } else {
+ fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
+ return empty_node_;
+ }
+}
+
+void TFStats::ParseGraph() {
+ for (const NodeDef& node : graph_->node()) {
+ CHECK(nodes_map_.find(node.name()) == nodes_map_.end());
+ nodes_map_[node.name()] = TFNode(&node);
+ }
+ for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
+ const NodeDef* node_def = it->second.node_def();
+ for (string node_input : node_def->input()) {
+ // input name format can be: "^node:src_output"
+ auto prefix_pos = node_input.find(":");
+ if (prefix_pos != node_input.npos) {
+ node_input.substr(0, prefix_pos);
+ }
+ if (node_input.substr(0, 1) == "^") {
+ node_input = node_input.substr(1);
+ }
+ auto input_node = nodes_map_.find(node_input);
+ if (input_node == nodes_map_.end()) {
+ continue;
+ }
+ it->second.AddInput(&input_node->second);
+ }
+ }
+}
+
+void TFStats::ParseOpLog() {
+ for (const OpLogEntry& entry : op_log_->log_entries()) {
+ auto node = nodes_map_.find(entry.name());
+ if (node == nodes_map_.end()) continue;
+ for (const string& type : entry.types()) {
+ node->second.AddOpType(type);
+ }
+ if (entry.float_ops()) {
+ node->second.AddFloatOps(entry.float_ops());
+ }
+ }
+}
+
+void TFStats::ParseRunMeta() {
+ if (!run_meta_->has_step_stats()) return;
+
+ for (const auto& dev_stat : run_meta_->step_stats().dev_stats()) {
+ for (const auto& node_stat : dev_stat.node_stats()) {
+ auto node = nodes_map_.find(node_stat.node_name());
+ if (node == nodes_map_.end()) {
+ continue;
+ }
+ node->second.AddStepStat(dev_stat.device(), &node_stat);
+ }
+ }
+}
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
new file mode 100644
index 0000000000..1246a2fae2
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h
@@ -0,0 +1,82 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Core API of tfprof.
+// 1. Load protos generated from a tensorflow model.
+// 2. Build in-memory representations of the tensorflow model, annotate the
+// representation with various stats, such as params,times,memory,etc.
+// 3. Accept command and options to selectively aggregate stats for analysis
+// and print out the results.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
+
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_graph.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_node.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_scope.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_show.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/step_stats.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class TFStats {
+ public:
+ TFStats(std::unique_ptr<GraphDef> graph,
+ std::unique_ptr<RunMetadata> run_meta, std::unique_ptr<OpLog> op_log,
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader);
+ ~TFStats() {}
+
+ // Prints the results to stdout. Also returns the printed output in
+ // a proto.
+ const TFProfNode& PrintGraph(const string& cmd, const Options& opts);
+
+ private:
+ void ParseGraph();
+
+ void ParseOpLog();
+
+ void ParseRunMeta();
+
+ std::unique_ptr<TFScope> scope_view_;
+ std::unique_ptr<TFGraph> graph_view_;
+ std::unique_ptr<GraphDef> graph_;
+ std::unique_ptr<RunMetadata> run_meta_;
+ std::unique_ptr<OpLog> op_log_;
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader_;
+ // Store TFNode instead of TFNode* to avoid large number of dynamic alloc.
+ std::map<string, TFNode> nodes_map_;
+ TFProfNode empty_node_;
+};
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_STATS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
new file mode 100644
index 0000000000..06b288fdce
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats_test.cc
@@ -0,0 +1,194 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+
+#include <utility>
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_constants.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+class TFProfStatsTest : public ::testing::Test {
+ protected:
+ TFProfStatsTest() {
+ string graph_path = io::JoinPath(
+ testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+ std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
+ TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+
+ std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
+ new tensorflow::RunMetadata());
+ string run_meta_path =
+ io::JoinPath(testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/run_meta");
+ TF_CHECK_OK(
+ ReadBinaryProto(Env::Default(), run_meta_path, run_meta_pb.get()));
+
+ std::unique_ptr<OpLog> op_log_pb(new OpLog());
+ string op_log_path = io::JoinPath(
+ testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/tfprof_log");
+ TF_CHECK_OK(ReadBinaryProto(Env::Default(), op_log_path, op_log_pb.get()));
+
+ string ckpt_path =
+ io::JoinPath(testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+ TF_Status* status = TF_NewStatus();
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
+ new checkpoint::CheckpointReader(ckpt_path, status));
+ CHECK(TF_GetCode(status) == TF_OK);
+ TF_DeleteStatus(status);
+
+ tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
+ std::move(op_log_pb), std::move(ckpt_reader)));
+ }
+
+ std::unique_ptr<TFStats> tf_stats_;
+};
+
+TEST_F(TFProfStatsTest, CustomOpType) {
+ Options opts(3, 0, 0, 0, 0, {".*"}, "name",
+ {kTrainableVarType}, // accout_type_regexes
+ {".*"}, {""}, {".*"}, {""}, false,
+ {"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
+ false);
+ const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+ "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 1800\ntotal_parameters: "
+ "450\nchildren {\n name: \"DW\"\n exec_micros: 0\n requested_bytes: "
+ "648\n parameters: 162\n total_exec_micros: 0\n "
+ "total_requested_bytes: 648\n total_parameters: 162\n device: "
+ "\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
+ "total_float_ops: 0\n}\nchildren {\n name: \"DW2\"\n exec_micros: 0\n "
+ "requested_bytes: 1152\n parameters: 288\n total_exec_micros: 0\n "
+ "total_requested_bytes: 1152\n total_parameters: 288\n device: "
+ "\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
+ "total_float_ops: 0\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, CheckPointOpType) {
+ Options opts(
+ 3, 0, 0, 0, 0, {".*"}, "name", {kCkptVarType}, // accout_type_regexes
+ {".*"}, {""}, {".*"}, {""}, false,
+ {"params", "bytes", "micros", "float_ops", "num_hidden_ops"}, false);
+ const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+ "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 1800\ntotal_parameters: "
+ "450\nchildren {\n name: \"DW\"\n exec_micros: 0\n requested_bytes: "
+ "648\n parameters: 162\n total_exec_micros: 0\n "
+ "total_requested_bytes: 648\n total_parameters: 162\n device: "
+ "\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
+ "total_float_ops: 0\n}\nchildren {\n name: \"DW2\"\n exec_micros: 0\n "
+ "requested_bytes: 1152\n parameters: 288\n total_exec_micros: 0\n "
+ "total_requested_bytes: 1152\n total_parameters: 288\n device: "
+ "\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 0\n "
+ "total_float_ops: 0\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestGraph) {
+ Options opts(100, 0, 10000, 0, 0, {".*"}, "name", {".*"},
+ {"cost.*"}, // start_name_regexes
+ {""}, {".*"}, {""}, false,
+ {"params", "bytes", "micros", "float_ops", "num_hidden_ops"},
+ false);
+ const TFProfNode& root = tf_stats_->PrintGraph("graph", opts);
+
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: 0\ninputs: "
+ "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
+ "0\ntotal_inputs: 0\nfloat_ops: 0\ntotal_float_ops: 0\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestFloatOps) {
+ Options opts(10, 0, 0, 0, 1, {".*"}, "name", {".*"}, {".*"}, {""}, {".*"},
+ {""}, false, {"float_ops"}, false);
+ const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+ "0\ntotal_exec_micros: 11\ntotal_requested_bytes: "
+ "5280\ntotal_parameters: 450\nchildren {\n name: \"Conv2D\"\n "
+ "exec_micros: 0\n requested_bytes: 432\n total_exec_micros: 0\n "
+ "total_requested_bytes: 432\n total_parameters: 0\n device: "
+ "\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 5832\n "
+ "total_float_ops: 5832\n}\nchildren {\n name: \"Conv2D_1\"\n "
+ "exec_micros: 10\n requested_bytes: 384\n total_exec_micros: 10\n "
+ "total_requested_bytes: 384\n total_parameters: 0\n device: "
+ "\"/job:localhost/replica:0/task:0/cpu:0\"\n float_ops: 4608\n "
+ "total_float_ops: 4608\n}\nfloat_ops: 0\ntotal_float_ops: 10440\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestAccountShownNameOnly) {
+ Options opts(100, 0, 0, 0, 0, {".*"}, "name", {".*"}, {".*"}, {""},
+ {"unit_2_1.*DW"}, // show_name_regexes.
+ {""}, true, // account_displayed_op_only.
+ {"params"}, false);
+ const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+ "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
+ "0\nfloat_ops: 0\ntotal_float_ops: 0\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+TEST_F(TFProfStatsTest, TestShowTensorValue) {
+ Options opts(10, 0, 0, 0, 0, {".*"}, "name", {".*"}, {".*"}, {""},
+ {"unit_1_0.*gamma"}, {""}, false,
+ {"tensor_value"}, // Show tensor value from checkpoint.
+ false);
+ const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+ "0\ntotal_exec_micros: 11\ntotal_requested_bytes: "
+ "5280\ntotal_parameters: 450\nfloat_ops: 0\ntotal_float_ops: 10440\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
new file mode 100644
index 0000000000..c21626919f
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.cc
@@ -0,0 +1,78 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h"
+
+namespace tensorflow {
+namespace tfprof {
+void TFProfTensor::Display(string* formatted_str,
+ TFProfTensorProto* tfprof_tensor_pb) {
+ if (formatted_str) {
+ if (formatted_str_.length() >= kTFProfTenosrMaxDisplayLen) {
+ *formatted_str =
+ strings::StrCat(formatted_str_, "...omitted from display\n\n");
+ } else {
+ *formatted_str = formatted_str_;
+ }
+ }
+ if (tfprof_tensor_pb) {
+ tfprof_tensor_pb->MergeFrom(tfprof_tensor_pb_);
+ }
+}
+
+void TFProfTensor::Build() {
+ tfprof_tensor_pb_.set_dtype(tensor_->dtype());
+
+ switch (tensor_->dtype()) {
+ // Double for all floats.
+ case DataType::DT_FLOAT:
+ case DataType::DT_DOUBLE: {
+ std::vector<double> values_vec;
+ if (tensor_->dtype() == DataType::DT_FLOAT) {
+ GetValueVec<float, double>(&values_vec);
+ } else if (tensor_->dtype() == DataType::DT_DOUBLE) {
+ GetValueVec<double, double>(&values_vec);
+ }
+ BuildOutput<double>(0, 0, values_vec, &tfprof_tensor_pb_);
+ break;
+ }
+ // Int64 for all integers.
+ case DataType::DT_INT32:
+ case DataType::DT_INT64: {
+ std::vector<int64> values_vec;
+ if (tensor_->dtype() == DataType::DT_INT32) {
+ GetValueVec<int32, int64>(&values_vec);
+ } else if (tensor_->dtype() == DataType::DT_INT64) {
+ GetValueVec<int64, int64>(&values_vec);
+ }
+ BuildOutput<int64>(0, 0, values_vec, &tfprof_tensor_pb_);
+ break;
+ }
+ case DataType::DT_STRING: {
+ // Not supported by TensorFlow.
+ std::vector<string> values_vec;
+ GetValueVec<string, string>(&values_vec);
+ BuildOutput<string>(0, 0, values_vec, &tfprof_tensor_pb_);
+ break;
+ }
+ default: {
+ fprintf(stderr, "Not Supported type %d\n", tensor_->dtype());
+ break;
+ }
+ }
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
new file mode 100644
index 0000000000..471a1db417
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor.h
@@ -0,0 +1,120 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// TFProf representation of a Tensor's value.
+// 1. Multi-dimension tensor is flattened in row major, and stored in proto.
+// 2. integer are up-casted to int64. floats are up-casted to double. string
+// is not supported by TensorFlow CheckPointReader library, though it is
+// supported in current code.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
+
+#include <typeinfo>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace tfprof {
+
+class TFProfTensor {
+ public:
+ explicit TFProfTensor(std::unique_ptr<Tensor> tensor)
+ : tensor_(std::move(tensor)) {
+ Build();
+ }
+
+ // If pointers are provided, they are filled by the method.
+ void Display(string* formatted_str, TFProfTensorProto* tfprof_tensor_pb);
+
+ private:
+ // Max length of tensor value displayed to CLI.
+ const int64 kTFProfTenosrMaxDisplayLen = 10000;
+ // Max length after which a latency warning will be printed.
+ const int64 kTFProfTensorMaxWarnLen = 100000;
+
+ void Build();
+
+ // It assumes the flatten values are stored in row-major, which is mentioned
+ // indirectly at various places:
+ // TODO(xpan): Further verifying it.
+ template <typename T>
+ int64 BuildOutput(int64 start, int depth, const std::vector<T>& values,
+ TFProfTensorProto* dim) {
+ formatted_str_ += "[";
+ int64 nstart = start;
+ for (int i = 0; i < tensor_->dim_size(depth); i++) {
+ // Last dimension, pull the values.
+ if (depth == tensor_->dims() - 1) {
+ std::ostringstream sstream;
+ sstream << values[nstart];
+
+ if (typeid(values[nstart]) == typeid(double)) {
+ double double_val;
+ CHECK(strings::safe_strtod(sstream.str().c_str(), &double_val));
+ dim->add_value_double(double_val);
+ formatted_str_ += strings::Printf(
+ "%.2f ", dim->value_double(dim->value_double_size() - 1));
+ } else if (typeid(values[nstart]) == typeid(int64)) {
+ int64 int64_val;
+ CHECK(strings::safe_strto64(sstream.str().c_str(), &int64_val));
+ dim->add_value_int64(int64_val);
+ formatted_str_ += strings::Printf(
+ "%lld ", dim->value_int64(dim->value_int64_size() - 1));
+ } else if (typeid(values[nstart]) == typeid(string)) {
+ dim->add_value_str(sstream.str());
+ formatted_str_ =
+ strings::StrCat(formatted_str_, "'",
+ dim->value_str(dim->value_str_size() - 1) + "' ");
+ } else {
+ CHECK(false) << "Unsupported type: " << typeid(values[nstart]).name();
+ }
+ ++nstart;
+ } else {
+ // Not-last dimension. Drill deeper.
+ nstart = BuildOutput<T>(nstart, depth + 1, values, dim);
+ }
+ }
+ if (formatted_str_.length() > kTFProfTenosrMaxDisplayLen) {
+ formatted_str_ = formatted_str_.substr(0, kTFProfTenosrMaxDisplayLen);
+ }
+ formatted_str_ += "],\n";
+ return nstart;
+ }
+
+ template <typename T, typename U>
+ void GetValueVec(std::vector<U>* value_vec) {
+ // TODO(xpan): Address the huge tensor problem.
+ if (tensor_->NumElements() > kTFProfTensorMaxWarnLen) {
+ fprintf(stderr, "Showing huge tensor, the tool might halt...\n");
+ }
+ auto values = tensor_->flat<T>();
+ for (int64 i = 0; i < tensor_->NumElements(); i++) {
+ value_vec->push_back(static_cast<U>(values(i)));
+ }
+ }
+
+ TFProfTensorProto tfprof_tensor_pb_;
+ std::unique_ptr<Tensor> tensor_;
+ string formatted_str_;
+};
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_TENSOR_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
new file mode 100644
index 0000000000..d3f1e3c7b7
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_tensor_test.cc
@@ -0,0 +1,306 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+namespace tfprof {
+class TFProfTensorTest : public ::testing::Test {
+ protected:
+ TFProfTensorTest() {
+ string graph_path = io::JoinPath(
+ testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/graph.pbtxt");
+ std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
+ TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+
+ std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
+ std::unique_ptr<OpLog> op_log_pb;
+
+ string ckpt_path =
+ io::JoinPath(testing::TensorFlowSrcRoot(),
+ "contrib/tfprof/tools/tfprof/internal/testdata/ckpt");
+ TF_Status* status = TF_NewStatus();
+ std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader(
+ new checkpoint::CheckpointReader(ckpt_path, status));
+ CHECK(TF_GetCode(status) == TF_OK);
+ TF_DeleteStatus(status);
+
+ tf_stats_.reset(new TFStats(std::move(graph_pb), std::move(run_meta_pb),
+ std::move(op_log_pb), std::move(ckpt_reader)));
+ }
+
+ std::unique_ptr<TFStats> tf_stats_;
+};
+
+TEST_F(TFProfTensorTest, Basics) {
+ Options opts(3, 0, 0, 0, 0, {".*"}, "name", {"Variable"}, {".*"}, {""},
+ {".*"}, {""}, false, {"tensor_value"}, // show the tensor value.
+ false);
+ const TFProfNode& root = tf_stats_->PrintGraph("scope", opts);
+
+ TFProfNode expected;
+ CHECK(protobuf::TextFormat::ParseFromString(
+ "name: \"_TFProfRoot\"\nexec_micros: 0\nrequested_bytes: "
+ "0\ntotal_exec_micros: 0\ntotal_requested_bytes: 0\ntotal_parameters: "
+ "450\nchildren {\n name: \"DW\"\n exec_micros: 0\n requested_bytes: "
+ "0\n parameters: 162\n total_exec_micros: 0\n total_requested_bytes: "
+ "0\n total_parameters: 162\n float_ops: 0\n total_float_ops: 0\n "
+ "tensor_value {\n dtype: DT_FLOAT\n value_double: -0.00117808\n "
+ "value_double: -0.000709941\n value_double: -0.00174816\n "
+ "value_double: -0.000495372\n value_double: 0.000243039\n "
+ "value_double: -0.000126313\n value_double: -0.000663929\n "
+ "value_double: -0.000495198\n value_double: -0.000893934\n "
+ "value_double: -0.00179659\n value_double: 0.000408874\n "
+ "value_double: -0.00120166\n value_double: -0.00109484\n "
+ "value_double: -0.000200362\n value_double: 0.000726721\n "
+ "value_double: -0.000277568\n value_double: 0.00180584\n "
+ "value_double: 0.000997271\n value_double: -0.00185987\n "
+ "value_double: -0.00113401\n value_double: -0.000528852\n "
+ "value_double: -0.000197412\n value_double: 1.32871e-05\n "
+ "value_double: -0.000285896\n value_double: -0.000428898\n "
+ "value_double: -0.000424633\n value_double: 2.15488e-05\n "
+ "value_double: 0.00149753\n value_double: -0.000884576\n "
+ "value_double: -0.0013795\n value_double: -0.000650125\n "
+ "value_double: 0.00191612\n value_double: 4.71838e-05\n "
+ "value_double: 0.000400201\n value_double: 0.00239555\n "
+ "value_double: -0.00177706\n value_double: -0.000781899\n "
+ "value_double: -0.00145247\n value_double: 0.0020025\n "
+ "value_double: 0.000597419\n value_double: 0.00135456\n "
+ "value_double: 0.0015876\n value_double: -0.000993568\n "
+ "value_double: 0.0006509\n value_double: -0.000894533\n "
+ "value_double: -0.00129322\n value_double: 0.0003859\n "
+ "value_double: 0.000415186\n value_double: -0.000439212\n "
+ "value_double: 0.000442138\n value_double: 0.00212353\n "
+ "value_double: 0.000702953\n value_double: 0.000713424\n "
+ "value_double: -0.000304877\n value_double: -9.17046e-05\n "
+ "value_double: -0.000801103\n value_double: 0.000304854\n "
+ "value_double: -0.00070527\n value_double: -0.00106408\n "
+ "value_double: -0.000909906\n value_double: -4.49183e-05\n "
+ "value_double: 0.000104172\n value_double: -0.000438067\n "
+ "value_double: -0.000317689\n value_double: -0.000769914\n "
+ "value_double: -0.00157729\n value_double: 0.000220733\n "
+ "value_double: 0.00107268\n value_double: -0.000186449\n "
+ "value_double: -0.000807328\n value_double: 0.000456308\n "
+ "value_double: -0.000593729\n value_double: -0.000954873\n "
+ "value_double: -0.000268676\n value_double: 9.06328e-05\n "
+ "value_double: -0.000323473\n value_double: -0.000628768\n "
+ "value_double: 0.000664985\n value_double: 0.0020999\n "
+ "value_double: -0.000932228\n value_double: -0.00203203\n "
+ "value_double: 0.000565405\n value_double: 0.000167899\n "
+ "value_double: 0.00054897\n value_double: 0.000612407\n "
+ "value_double: -0.000619301\n value_double: 0.00169361\n "
+ "value_double: -0.000188057\n value_double: 0.000267652\n "
+ "value_double: -0.00127341\n value_double: -0.000218836\n "
+ "value_double: -0.000431722\n value_double: 5.41867e-05\n "
+ "value_double: 0.000296628\n value_double: 0.000819415\n "
+ "value_double: -0.000758993\n value_double: -0.000114477\n "
+ "value_double: 6.29219e-05\n value_double: 0.000726988\n "
+ "value_double: -0.00135974\n value_double: 2.28447e-05\n "
+ "value_double: 0.00120547\n value_double: -0.00136907\n "
+ "value_double: -0.00140188\n value_double: 0.000201145\n "
+ "value_double: -0.000774109\n value_double: 0.000798465\n "
+ "value_double: -0.00131861\n value_double: 3.08996e-05\n "
+ "value_double: -0.000637026\n value_double: 0.00228975\n "
+ "value_double: -0.000633757\n value_double: -0.00116047\n "
+ "value_double: 7.66039e-05\n value_double: 2.09167e-06\n "
+ "value_double: -0.000296448\n value_double: 0.000206795\n "
+ "value_double: 0.000674405\n value_double: -0.000722742\n "
+ "value_double: -9.32443e-05\n value_double: -0.00170917\n "
+ "value_double: -0.000505279\n value_double: 0.000628132\n "
+ "value_double: -0.00145929\n value_double: 0.00106077\n "
+ "value_double: -0.000796743\n value_double: 0.000498275\n "
+ "value_double: -0.0002914\n value_double: -0.00230622\n "
+ "value_double: -9.42872e-05\n value_double: 0.000200359\n "
+ "value_double: -0.00305027\n value_double: -0.0016218\n "
+ "value_double: 0.00137126\n value_double: -0.00215436\n "
+ "value_double: -0.000743827\n value_double: -0.00090007\n "
+ "value_double: -0.000762207\n value_double: -0.000149951\n "
+ "value_double: -0.0013102\n value_double: 0.00165781\n "
+ "value_double: 0.000343809\n value_double: -0.000826069\n "
+ "value_double: -4.67404e-05\n value_double: 0.0023931\n "
+ "value_double: 0.00165338\n value_double: -0.00050529\n "
+ "value_double: 0.000178771\n value_double: -0.000858287\n "
+ "value_double: -0.00157031\n value_double: -0.00165846\n "
+ "value_double: -0.000713672\n value_double: 0.00014357\n "
+ "value_double: 0.00203632\n value_double: -0.0010973\n "
+ "value_double: -9.89852e-05\n value_double: 0.000558808\n "
+ "value_double: 0.00087211\n value_double: 0.000661239\n "
+ "value_double: 0.000389605\n value_double: 0.00060653\n "
+ "value_double: -0.000330104\n }\n}\nchildren {\n name: \"DW2\"\n "
+ "exec_micros: 0\n requested_bytes: 0\n parameters: 288\n "
+ "total_exec_micros: 0\n total_requested_bytes: 0\n total_parameters: "
+ "288\n float_ops: 0\n total_float_ops: 0\n tensor_value {\n dtype: "
+ "DT_FLOAT\n value_double: 0.000704577\n value_double: "
+ "0.000127421\n value_double: 0.00105952\n value_double: "
+ "0.000423765\n value_double: -0.00025461\n value_double: "
+ "-0.000857203\n value_double: 0.000693494\n value_double: "
+ "0.000282214\n value_double: 0.00106185\n value_double: "
+ "-0.000836552\n value_double: -0.00116766\n value_double: "
+ "0.000733674\n value_double: -0.000669601\n value_double: "
+ "-0.000275175\n value_double: -0.000428215\n value_double: "
+ "-0.000495715\n value_double: -0.000125887\n value_double: "
+ "-0.000715204\n value_double: -0.00108936\n value_double: "
+ "0.000738267\n value_double: 0.000376081\n value_double: "
+ "0.00191442\n value_double: 0.001423\n value_double: -0.00093811\n "
+ " value_double: -5.91421e-05\n value_double: -0.000221507\n "
+ "value_double: -0.000104555\n value_double: -0.00069682\n "
+ "value_double: -0.000278325\n value_double: -0.00122748\n "
+ "value_double: -0.00112411\n value_double: -0.000440511\n "
+ "value_double: -0.000392247\n value_double: -0.000419606\n "
+ "value_double: -0.00167063\n value_double: -0.000988578\n "
+ "value_double: -0.00040159\n value_double: 0.00238918\n "
+ "value_double: -0.000892898\n value_double: -0.000875976\n "
+ "value_double: 0.00154401\n value_double: -0.000719911\n "
+ "value_double: 0.000753941\n value_double: -0.000119961\n "
+ "value_double: -0.000305115\n value_double: 9.97947e-05\n "
+ "value_double: -0.00128908\n value_double: -0.000584184\n "
+ "value_double: -0.000734685\n value_double: -0.00146612\n "
+ "value_double: 0.000670802\n value_double: 0.000924219\n "
+ "value_double: -0.000154409\n value_double: 0.000198231\n "
+ "value_double: -0.000340742\n value_double: -0.00159646\n "
+ "value_double: -1.19382e-05\n value_double: 0.00165203\n "
+ "value_double: 0.0017085\n value_double: -0.000199614\n "
+ "value_double: 0.000529526\n value_double: 0.000769364\n "
+ "value_double: 0.00135369\n value_double: 0.00132873\n "
+ "value_double: 0.000451174\n value_double: 0.000255218\n "
+ "value_double: 0.00102891\n value_double: -0.00160068\n "
+ "value_double: 0.000324269\n value_double: -0.000492347\n "
+ "value_double: 0.000925301\n value_double: 0.00281998\n "
+ "value_double: -0.000826404\n value_double: -0.000602903\n "
+ "value_double: 0.00126559\n value_double: 0.000924364\n "
+ "value_double: -9.19827e-05\n value_double: -5.59275e-05\n "
+ "value_double: 0.00107971\n value_double: -9.91756e-05\n "
+ "value_double: 0.000864708\n value_double: 0.00121747\n "
+ "value_double: 0.00146338\n value_double: 0.000186883\n "
+ "value_double: -0.00168195\n value_double: -0.00062029\n "
+ "value_double: 0.000658127\n value_double: 0.00115682\n "
+ "value_double: -0.00178359\n value_double: 0.000685606\n "
+ "value_double: -0.000503373\n value_double: -0.000312999\n "
+ "value_double: 0.000335383\n value_double: -1.08597e-05\n "
+ "value_double: -8.2499e-05\n value_double: -0.000469726\n "
+ "value_double: -0.00170868\n value_double: 0.000118957\n "
+ "value_double: -0.000460736\n value_double: -5.56372e-05\n "
+ "value_double: -0.00110148\n value_double: 0.00059123\n "
+ "value_double: 0.000386339\n value_double: -0.00139967\n "
+ "value_double: -0.000835664\n value_double: 0.00103421\n "
+ "value_double: -0.00104296\n value_double: -0.000687497\n "
+ "value_double: 1.1338e-05\n value_double: 0.00176484\n "
+ "value_double: 0.000531523\n value_double: -0.000986387\n "
+ "value_double: -0.00114152\n value_double: 0.000256744\n "
+ "value_double: 0.000228425\n value_double: 0.00116583\n "
+ "value_double: 0.0002726\n value_double: -0.00100828\n "
+ "value_double: -0.000950376\n value_double: -0.00229074\n "
+ "value_double: -0.000348272\n value_double: -0.000526032\n "
+ "value_double: -0.000133703\n value_double: 0.000310979\n "
+ "value_double: -0.00199278\n value_double: -0.000874469\n "
+ "value_double: -0.000631466\n value_double: 0.0010534\n "
+ "value_double: 0.00134646\n value_double: -0.00172743\n "
+ "value_double: 0.00131031\n value_double: -0.000697506\n "
+ "value_double: 0.000286747\n value_double: 0.000140759\n "
+ "value_double: 0.000568707\n value_double: 0.000108177\n "
+ "value_double: -0.00207337\n value_double: -0.00138146\n "
+ "value_double: 0.000483162\n value_double: -0.00167096\n "
+ "value_double: -0.000465813\n value_double: 0.00067724\n "
+ "value_double: 2.08388e-05\n value_double: -0.00203279\n "
+ "value_double: 7.8429e-05\n value_double: 0.00161337\n "
+ "value_double: -0.000269005\n value_double: 0.000217822\n "
+ "value_double: 0.000599886\n value_double: 0.000317549\n "
+ "value_double: 0.00146597\n value_double: -0.00210947\n "
+ "value_double: -0.000823917\n value_double: -6.83766e-05\n "
+ "value_double: 0.000656085\n value_double: 0.000117134\n "
+ "value_double: -0.000390405\n value_double: 2.39565e-05\n "
+ "value_double: 0.00104837\n value_double: -0.000563671\n "
+ "value_double: 0.000634073\n value_double: -0.000554531\n "
+ "value_double: 0.000677971\n value_double: -0.000596207\n "
+ "value_double: -0.00103335\n value_double: 0.000645199\n "
+ "value_double: 0.00162195\n value_double: 0.000239246\n "
+ "value_double: 0.00113519\n value_double: 0.000787431\n "
+ "value_double: -0.000471688\n value_double: -0.000216625\n "
+ "value_double: -0.000537156\n value_double: 0.000551816\n "
+ "value_double: 0.00094337\n value_double: -0.000708127\n "
+ "value_double: 0.000956955\n value_double: -0.000904936\n "
+ "value_double: -0.000424413\n value_double: 0.000106455\n "
+ "value_double: -0.000443952\n value_double: 0.000185436\n "
+ "value_double: 0.000944397\n value_double: -0.000760572\n "
+ "value_double: 0.000560002\n value_double: 4.09886e-05\n "
+ "value_double: -0.00075076\n value_double: -0.000701856\n "
+ "value_double: -0.000234851\n value_double: -0.000131515\n "
+ "value_double: -0.000761718\n value_double: -0.000267808\n "
+ "value_double: -0.00039682\n value_double: 0.000542953\n "
+ "value_double: -0.000817685\n value_double: 0.00103851\n "
+ "value_double: -0.000427176\n value_double: 0.000517784\n "
+ "value_double: -0.000823552\n value_double: -0.000742637\n "
+ "value_double: 0.000529213\n value_double: -0.000372805\n "
+ "value_double: 1.85745e-05\n value_double: 0.00139891\n "
+ "value_double: -0.000128417\n value_double: -0.000404316\n "
+ "value_double: -0.000671571\n value_double: 0.000490311\n "
+ "value_double: -0.00118493\n value_double: -0.000897118\n "
+ "value_double: 0.000939601\n value_double: 0.000376399\n "
+ "value_double: 0.0014709\n value_double: 0.000134806\n "
+ "value_double: -0.000294469\n value_double: -0.000569142\n "
+ "value_double: 0.00127266\n value_double: -0.00140936\n "
+ "value_double: 0.000870083\n value_double: 0.000287246\n "
+ "value_double: 0.000537685\n value_double: 0.000125569\n "
+ "value_double: 0.000360276\n value_double: -0.000186268\n "
+ "value_double: 0.0011141\n value_double: -0.000605185\n "
+ "value_double: -0.0016281\n value_double: -0.000552758\n "
+ "value_double: -0.000196755\n value_double: -0.00265188\n "
+ "value_double: 0.000480997\n value_double: 0.00018776\n "
+ "value_double: -0.00199234\n value_double: 0.000959982\n "
+ "value_double: 0.00040334\n value_double: -0.000693596\n "
+ "value_double: 0.00157678\n value_double: -0.00134499\n "
+ "value_double: 0.00121909\n value_double: -0.000328734\n "
+ "value_double: 0.000148554\n value_double: -0.000209509\n "
+ "value_double: -0.000266303\n value_double: -0.00134084\n "
+ "value_double: 5.21371e-05\n value_double: 0.0005329\n "
+ "value_double: -0.000168858\n value_double: -0.00074875\n "
+ "value_double: 0.000959397\n value_double: -0.00159476\n "
+ "value_double: -0.000368838\n value_double: 0.0006077\n "
+ "value_double: -0.00117243\n value_double: -0.00146013\n "
+ "value_double: 0.00031519\n value_double: -0.000167911\n "
+ "value_double: 0.000482571\n value_double: -0.000752268\n "
+ "value_double: -0.00042363\n value_double: 0.00121219\n "
+ "value_double: -0.000208159\n value_double: 0.000128531\n "
+ "value_double: -0.000406308\n value_double: -0.000242663\n "
+ "value_double: -3.96673e-05\n value_double: 0.00144854\n "
+ "value_double: -0.000787328\n value_double: -0.000401958\n "
+ "value_double: 0.00114091\n value_double: -0.000739546\n "
+ "value_double: 0.000483236\n value_double: -0.000916945\n "
+ "value_double: -0.00129577\n value_double: -0.00186504\n "
+ "value_double: 0.000806804\n value_double: -0.000152251\n "
+ "value_double: 0.000662576\n value_double: -0.000533236\n "
+ "value_double: 0.00151019\n value_double: 0.00127805\n "
+ "value_double: 0.00115399\n value_double: -0.00130876\n "
+ "value_double: 2.99457e-06\n value_double: 0.000820777\n "
+ "value_double: 0.000878393\n value_double: -0.000562642\n "
+ "value_double: -0.00070442\n value_double: -0.00066277\n "
+ "}\n}\nfloat_ops: 0\ntotal_float_ops: 0\n",
+ &expected));
+ EXPECT_EQ(expected.DebugString(), root.DebugString());
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
new file mode 100644
index 0000000000..7610729a11
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.cc
@@ -0,0 +1,350 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+
+#include <stdio.h>
+#include <algorithm>
+#include <memory>
+#include <set>
+
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+#include "tensorflow/core/platform/protobuf.h"
+#include "tensorflow/core/platform/regexp.h"
+
+namespace tensorflow {
+namespace tfprof {
+string FormatNumber(int64 n) {
+ if (n < 1000) {
+ return strings::Printf("%lld", n);
+ } else if (n < 1000000) {
+ return strings::Printf("%.2fk", n / 1000.0);
+ } else if (n < 1000000000) {
+ return strings::Printf("%.2fm", n / 1000000.0);
+ } else {
+ return strings::Printf("%.2fb", n / 1000000000.0);
+ }
+}
+
+string FormatTime(int64 micros) {
+ if (micros < 1000) {
+ return strings::Printf("%lldus", micros);
+ } else if (micros < 1000000) {
+ return strings::Printf("%.2fms", micros / 1000.0);
+ } else {
+ return strings::Printf("%.2fsec", micros / 1000000.0);
+ }
+}
+
+string FormatMemory(int64 bytes) {
+ if (bytes < 1000) {
+ return strings::Printf("%lldB", bytes);
+ } else if (bytes < 1000000) {
+ return strings::Printf("%.2fKB", bytes / 1000.0);
+ } else {
+ return strings::Printf("%.2fMB", bytes / 1000000.0);
+ }
+}
+
+string FormatShapes(const std::vector<int64>& shape) {
+ return str_util::Join(shape, "x");
+}
+
+string StringReplace(const string& str, const string& oldsub,
+ const string& newsub) {
+ string out = str;
+ RE2::GlobalReplace(&out, oldsub, newsub);
+ return out;
+}
+
+Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def) {
+ string out;
+ Status s = ReadFileToString(env, fname, &out);
+ if (!s.ok()) return s;
+ if (protobuf::TextFormat::ParseFromString(out, graph_def)) {
+ return Status();
+ }
+ return errors::InvalidArgument("Cannot parse proto string.");
+}
+
+namespace {
+string StripQuote(const string& s) {
+ int start = s.find_first_not_of("\"\'");
+ int end = s.find_last_not_of("\"\'");
+ if (start == s.npos || end == s.npos) return "";
+
+ return s.substr(start, end - start + 1);
+}
+
+tensorflow::Status ReturnError(const std::vector<string> pieces, int idx) {
+ string val;
+ if (pieces.size() > idx + 1) {
+ val = pieces[idx + 1];
+ }
+ return tensorflow::Status(
+ tensorflow::error::INVALID_ARGUMENT,
+ strings::StrCat("Invalid option '", pieces[idx], "' value: '", val, "'"));
+}
+
+bool CaseEqual(StringPiece s1, StringPiece s2) {
+ if (s1.size() != s2.size()) return false;
+ return str_util::Lowercase(s1) == str_util::Lowercase(s2);
+}
+
+bool StringToBool(StringPiece str, bool* value) {
+ CHECK(value != NULL) << "NULL output boolean given.";
+ if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") ||
+ CaseEqual(str, "y") || CaseEqual(str, "1")) {
+ *value = true;
+ return true;
+ }
+ if (CaseEqual(str, "false") || CaseEqual(str, "f") || CaseEqual(str, "no") ||
+ CaseEqual(str, "n") || CaseEqual(str, "0")) {
+ *value = false;
+ return true;
+ }
+ return false;
+}
+} // namespace
+
+tensorflow::Status ParseCmdLine(const string& line, string* cmd,
+ tensorflow::tfprof::Options* opts) {
+ std::vector<string> pieces =
+ str_util::Split(line, ' ', str_util::SkipEmpty());
+
+ std::vector<string> cmds_str(kCmds, kCmds + sizeof(kCmds) / sizeof(*kCmds));
+ if (std::find(cmds_str.begin(), cmds_str.end(), pieces[0]) ==
+ cmds_str.end()) {
+ return tensorflow::Status(tensorflow::error::INVALID_ARGUMENT,
+ "First string must be a valid command.");
+ }
+ *cmd = pieces[0];
+
+ for (int i = 1; i < pieces.size(); ++i) {
+ if (pieces[i] == string(tensorflow::tfprof::kOptions[0])) {
+ if (pieces.size() <= i + 1 ||
+ !strings::safe_strto32(pieces[i + 1], &opts->max_depth)) {
+ return ReturnError(pieces, i);
+ }
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[1]) {
+ if (pieces.size() <= i + 1 ||
+ !strings::safe_strto64(pieces[i + 1], &opts->min_bytes)) {
+ return ReturnError(pieces, i);
+ }
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[2]) {
+ if (pieces.size() <= i + 1 ||
+ !strings::safe_strto64(pieces[i + 1], &opts->min_micros)) {
+ return ReturnError(pieces, i);
+ }
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[3]) {
+ if (pieces.size() <= i + 1 ||
+ !strings::safe_strto64(pieces[i + 1], &opts->min_params)) {
+ return ReturnError(pieces, i);
+ }
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[4]) {
+ if (pieces.size() <= i + 1 ||
+ !strings::safe_strto64(pieces[i + 1], &opts->min_float_ops)) {
+ return ReturnError(pieces, i);
+ }
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[5]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->device_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+ str_util::SkipEmpty());
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[6]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ std::set<string> order_by_set(
+ kOrderBy, kOrderBy + sizeof(kOrderBy) / sizeof(*kOrderBy));
+ auto order_by = order_by_set.find(pieces[i + 1]);
+ if (order_by == order_by_set.end()) {
+ return ReturnError(pieces, i);
+ }
+ opts->order_by = *order_by;
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[7]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->account_type_regexes = str_util::Split(StripQuote(pieces[i + 1]),
+ ',', str_util::SkipEmpty());
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[8]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->start_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+ str_util::SkipEmpty());
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[9]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->trim_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+ str_util::SkipEmpty());
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[10]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->show_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+ str_util::SkipEmpty());
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[11]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->hide_name_regexes = str_util::Split(StripQuote(pieces[i + 1]), ',',
+ str_util::SkipEmpty());
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[12]) {
+ if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
+ pieces.size() == i + 1) {
+ opts->account_displayed_op_only = true;
+ } else if (!StringToBool(pieces[i + 1],
+ &opts->account_displayed_op_only)) {
+ return ReturnError(pieces, i);
+ } else {
+ ++i;
+ }
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[13]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ std::set<string> shown_set(kShown,
+ kShown + sizeof(kShown) / sizeof(*kShown));
+ std::vector<string> requested_vector = str_util::Split(
+ StripQuote(pieces[i + 1]), ',', str_util::SkipEmpty());
+ std::set<string> requested_set(requested_vector.begin(),
+ requested_vector.end());
+ for (const string& requested : requested_set) {
+ if (shown_set.find(requested) == shown_set.end()) {
+ return ReturnError(pieces, i);
+ }
+ }
+ opts->select = requested_set;
+ ++i;
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[14]) {
+ if ((pieces.size() > i + 1 && pieces[i + 1].find("-") == 0) ||
+ pieces.size() == i + 1) {
+ opts->viz = true;
+ } else if (!StringToBool(pieces[i + 1], &opts->viz)) {
+ return ReturnError(pieces, i);
+ } else {
+ ++i;
+ }
+ } else if (pieces[i] == tensorflow::tfprof::kOptions[15]) {
+ if (pieces.size() <= i + 1) {
+ return ReturnError(pieces, i);
+ }
+ opts->dump_to_file = StripQuote(pieces[i + 1]);
+ ++i;
+ } else {
+ return ReturnError(pieces, i);
+ }
+ }
+ return tensorflow::Status::OK();
+}
+
+void PrintHelp() {
+ printf(
+ "\nSee go/tfprof for detail tutorial.\n"
+ "\nCommands\n\n"
+ " scope: Each op has its op name in TensorFlow, such as 'n1', 'n1/n2', "
+ "'n1/n2/n3'. 'n1/n2' is a child of 'n1'. 'scope' command builds "
+ "a name scope tree and aggregates statistics based on it.\n\n"
+ " graph: ops in TensorFlow are organized as a graph based on their "
+ "the source (inputs) and sink (outputs). 'graph' command builds "
+ "a graph pointing *from output to input*, and aggregates "
+ "statistics based on it.\n\n"
+ " set: Set options that will be default for follow up commands.\n\n"
+ " help: Show helps.\n"
+ "\nOptions\n\n"
+ "Press Enter in CLI to see default option values.\n\n"
+ " -max_depth: Show ops that are at most this number of hops from "
+ "starting op in the tree/graph structure.\n\n"
+ " -min_bytes: Show ops that request at least this number of bytes.\n\n"
+ " -min_micros: Show ops that spend at least this number of micros to "
+ "run.\n\n"
+ " -min_params: Show ops that contains at least this number of "
+ "parameters.\n\n"
+ " -min_float_ops: Show ops that contain at least this number of "
+ "float operations. Only available if an op has "
+ "op.RegisterStatistics() defined and OpLog is "
+ "provided\n\n"
+ " -device_regexes: Show ops that a placed on the specified devices. "
+ "regexes are comma-separated.\n\n"
+ " -order_by: Order the results by [name|depth|bytes|micros|params|"
+ "float_ops]\n\n"
+ " -account_type_regexes: Account and display the ops whose types match "
+ "one of the type regexes specified. tfprof "
+ "allow user to define extra op types for ops "
+ "through tensorflow.tfprof.OpLog proto. regexes "
+ "are comma-sperated.\n\n"
+ " -start_name_regexes: Show ops starting from the ops that matches the "
+ "regexes, recursively. regexes are "
+ "comma-separated.\n\n"
+ " -trim_name_regexes: Hide ops starting from the ops that matches the "
+ "regexes, recursively, regexes are comma-seprated. "
+ "\n\n"
+ " -show_name_regexes: Show ops that match the regexes. regexes are "
+ "comma-seprated.\n\n"
+ " -hide_name_regexes: Hide ops that match the regexes. regexes are "
+ "comma-seprated.\n\n"
+ ""
+ " Notes: For each op, -acount_type_regexes is first evaluated, "
+ "only ops with types matching the specified regexes are accounted and "
+ "selected for displayed. -start/trim/show/hide_name_regexes are used "
+ "to further filter ops for display. -start_name_regexes is evaluated "
+ "first to search the starting ops to display. Descendants of starting "
+ "ops are then evaluated against show/hide_name_regexes to make display "
+ "decision. If an op matches trim_name_regexes, all its descendants are "
+ "hidden.\n"
+ "Ops statistics are *accounted even if they are hidden* as long as "
+ "they match the -account_xxx options.\n\n"
+ " -account_displayed_op_only: If True, only account the statistics of "
+ "ops eventually displayed. If False, account all "
+ "op statistics matching -account_type_regexes recursively.\n\n"
+ " -select: Comma-separated list of metrics to show: [bytes|micros|"
+ "params|float_ops|num_hidden_ops|tensor_value|device|op_types]."
+ "\n\n"
+ " -dump_to_file: Dump the output to a file, instead of terminal.\n\n"
+ ""
+ "Examples\n"
+ " Assuming a toy model:\n"
+ " intput(typeB)->conv2d_1(typeA)->conv2d_2(typeA)->"
+ "fc(typeA)->cost(typeA)->summarize(typeC)\n"
+ " Command:\n"
+ " tfprof> graph -account_type_regexes typeA -start_name_regexes "
+ "cost.* -show_name_regexes conv2d.* -max_depth 10\n\n"
+ " The above command only aggregate statistics of all ops of typeA ("
+ "hence ignoring input(typeB)). It will start looking for candidate to "
+ "display from cost.* and finally displays conv2d_1 and conv2d_2.\n\n");
+ fflush(stdout);
+}
+
+} // namespace tfprof
+} // namespace tensorflow
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
new file mode 100644
index 0000000000..6c1bba04fc
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h
@@ -0,0 +1,50 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
+
+#include <string>
+#include <vector>
+
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/platform/env.h"
+
+namespace tensorflow {
+namespace tfprof {
+string FormatNumber(int64 n);
+
+string FormatTime(int64 micros);
+
+string FormatMemory(int64 bytes);
+
+string FormatShapes(const std::vector<int64>& shapes);
+
+tensorflow::Status ParseCmdLine(const string& line, string* cmd,
+ tensorflow::tfprof::Options* opts);
+
+string StringReplace(const string& str, const string& oldsub,
+ const string& newsub);
+
+Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def);
+
+void PrintHelp();
+
+} // namespace tfprof
+} // namespace tensorflow
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_TFPROF_TOOLS_TFPROF_INTERNAL_TFPROF_UTILS_H_
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
new file mode 100644
index 0000000000..cae6e1e3a8
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.proto
@@ -0,0 +1,19 @@
+syntax = "proto2";
+
+package tensorflow.tfprof;
+
+message OpLogEntry {
+ // op name.
+ optional string name = 1;
+ // float_ops is filled by tfprof Python API when called. It requires the
+ // op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are
+ // implemented.
+ optional int64 float_ops = 2;
+ // User can define extra op type information for an op. This allows the user
+ // to select a group of ops precisely using op_type as a key.
+ repeated string types = 3;
+}
+
+message OpLog {
+ repeated OpLogEntry log_entries = 1;
+} \ No newline at end of file
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
new file mode 100644
index 0000000000..d9080242d6
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_main.cc
@@ -0,0 +1,236 @@
+/* Copyright 2016 The TensorFlow Authors All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "linenoise.h"
+#include "tensorflow/c/c_api.h"
+#include "tensorflow/c/checkpoint_reader.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_options.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_stats.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/internal/tfprof_utils.h"
+#include "tensorflow/contrib/tfprof/tools/tfprof/tfprof_log.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+using tensorflow::str_util::Split;
+
+void completion(const char* buf, linenoiseCompletions* lc) {
+ tensorflow::string buf_str = tensorflow::string(buf);
+ if (buf_str.find(" ") == buf_str.npos) {
+ for (const char* opt : tensorflow::tfprof::kCmds) {
+ if (tensorflow::string(opt).find(buf_str) == 0) {
+ linenoiseAddCompletion(lc, opt);
+ }
+ }
+ return;
+ }
+
+ tensorflow::string prefix;
+ int last_dash = buf_str.find_last_of(' ');
+ if (last_dash != tensorflow::string::npos) {
+ prefix = buf_str.substr(0, last_dash + 1);
+ buf_str = buf_str.substr(last_dash + 1, tensorflow::kint32max);
+ }
+ for (const char* opt : tensorflow::tfprof::kOptions) {
+ if (tensorflow::string(opt).find(buf_str) == 0) {
+ linenoiseAddCompletion(lc, (prefix + opt).c_str());
+ }
+ }
+}
+
+int main(int argc, char** argv) {
+ tensorflow::string FLAGS_graph_path = "";
+ tensorflow::string FLAGS_run_meta_path = "";
+ tensorflow::string FLAGS_op_log_path = "";
+ tensorflow::string FLAGS_checkpoint_path = "";
+ tensorflow::int32 FLAGS_max_depth = 4;
+ tensorflow::int64 FLAGS_min_bytes = 0;
+ tensorflow::int64 FLAGS_min_micros = 0;
+ tensorflow::int64 FLAGS_min_params = 0;
+ tensorflow::int64 FLAGS_min_float_ops = 0;
+ tensorflow::string FLAGS_device_regexes = ".*";
+ tensorflow::string FLAGS_order_by = "name";
+ tensorflow::string FLAGS_account_type_regexes = "Variable";
+ tensorflow::string FLAGS_start_name_regexes = ".*";
+ tensorflow::string FLAGS_trim_name_regexes = "";
+ tensorflow::string FLAGS_show_name_regexes = ".*";
+ tensorflow::string FLAGS_hide_name_regexes;
+ bool FLAGS_account_displayed_op_only = false;
+ tensorflow::string FLAGS_select = "params";
+ bool FLAGS_viz = false;
+ tensorflow::string FLAGS_dump_to_file = "";
+ for (int i = 0; i < argc; i++) {
+ fprintf(stderr, "%s\n", argv[i]);
+ }
+
+ CHECK(tensorflow::ParseFlags(
+ &argc, argv,
+ {tensorflow::Flag("graph_path", &FLAGS_graph_path),
+ tensorflow::Flag("run_meta_path", &FLAGS_run_meta_path),
+ tensorflow::Flag("op_log_path", &FLAGS_op_log_path),
+ tensorflow::Flag("checkpoint_path", &FLAGS_checkpoint_path),
+ tensorflow::Flag("max_depth", &FLAGS_max_depth),
+ tensorflow::Flag("min_bytes", &FLAGS_min_bytes),
+ tensorflow::Flag("min_micros", &FLAGS_min_micros),
+ tensorflow::Flag("min_params", &FLAGS_min_params),
+ tensorflow::Flag("min_float_ops", &FLAGS_min_float_ops),
+ tensorflow::Flag("device_regexes", &FLAGS_device_regexes),
+ tensorflow::Flag("order_by", &FLAGS_order_by),
+ tensorflow::Flag("account_type_regexes", &FLAGS_start_name_regexes),
+ tensorflow::Flag("trim_name_regexes", &FLAGS_trim_name_regexes),
+ tensorflow::Flag("show_name_regexes", &FLAGS_show_name_regexes),
+ tensorflow::Flag("hide_name_regexes", &FLAGS_hide_name_regexes),
+ tensorflow::Flag("account_displayed_op_only",
+ &FLAGS_account_displayed_op_only),
+ tensorflow::Flag("select", &FLAGS_select),
+ tensorflow::Flag("dump_to_file", &FLAGS_dump_to_file)}));
+ tensorflow::port::InitMain(argv[0], &argc, &argv);
+
+ fprintf(stderr, "%s\n", FLAGS_graph_path.c_str());
+
+ std::vector<tensorflow::string> device_regexes =
+ Split(FLAGS_device_regexes, ',', tensorflow::str_util::SkipEmpty());
+ std::vector<tensorflow::string> account_type_regexes =
+ Split(FLAGS_account_type_regexes, ',', tensorflow::str_util::SkipEmpty());
+ std::vector<tensorflow::string> start_name_regexes =
+ Split(FLAGS_start_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+ std::vector<tensorflow::string> trim_name_regexes =
+ Split(FLAGS_trim_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+ std::vector<tensorflow::string> show_name_regexes =
+ Split(FLAGS_show_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+ std::vector<tensorflow::string> hide_name_regexes =
+ Split(FLAGS_hide_name_regexes, ',', tensorflow::str_util::SkipEmpty());
+ std::vector<tensorflow::string> select =
+ Split(FLAGS_select, ',', tensorflow::str_util::SkipEmpty());
+
+ tensorflow::string cmd = "";
+ if (argc == 1 && FLAGS_graph_path.empty()) {
+ printf("1) go/tfprof: Tutorial.\n");
+ printf("2) tfprof help: Detail help information.\n");
+ printf(
+ "3) tfprof --graph_path <GraphDef proto text file>: "
+ "Profiling model structure, tensor shape and # parameters.\n");
+ printf(
+ "4) tfprof --graph_path <GraphDef proto text file> \\\n"
+ " --run_meta_path <RunMetadata proto binary file> \\\n"
+ " --op_log_path <tensorflow::tfprof::OpLog proto binary file> "
+ "\\\n"
+ " --checkpoint_path <TensorFlow Checkpoint file>: "
+ "Profiling everything!\n");
+ return 0;
+ } else if (argc > 1) {
+ if (tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[3]) {
+ tensorflow::tfprof::PrintHelp();
+ return 0;
+ }
+ if (tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[0] ||
+ tensorflow::string(argv[1]) == tensorflow::tfprof::kCmds[1]) {
+ cmd = argv[1];
+ }
+ }
+
+ printf("Reading Files...\n");
+ std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef());
+ TF_CHECK_OK(tensorflow::tfprof::ReadGraphDefText(
+ tensorflow::Env::Default(), FLAGS_graph_path, graph.get()));
+
+ std::unique_ptr<tensorflow::RunMetadata> run_meta(
+ new tensorflow::RunMetadata());
+ if (!ReadBinaryProto(tensorflow::Env::Default(), FLAGS_run_meta_path,
+ run_meta.get())
+ .ok()) {
+ run_meta.release();
+ }
+
+ std::unique_ptr<tensorflow::tfprof::OpLog> op_log(
+ new tensorflow::tfprof::OpLog());
+ if (!ReadBinaryProto(tensorflow::Env::Default(), FLAGS_op_log_path,
+ op_log.get())
+ .ok()) {
+ op_log.release();
+ }
+
+ std::unique_ptr<tensorflow::checkpoint::CheckpointReader> ckpt_reader;
+ TF_Status* status = TF_NewStatus();
+ if (!FLAGS_checkpoint_path.empty()) {
+ ckpt_reader.reset(new tensorflow::checkpoint::CheckpointReader(
+ FLAGS_checkpoint_path, status));
+ if (TF_GetCode(status) != TF_OK) {
+ fprintf(stderr, "%s\n", TF_Message(status));
+ TF_DeleteStatus(status);
+ return 1;
+ }
+ TF_DeleteStatus(status);
+ }
+
+ tensorflow::tfprof::TFStats tf_stat(std::move(graph), std::move(run_meta),
+ std::move(op_log),
+ std::move(ckpt_reader));
+ tensorflow::tfprof::Options opts(
+ FLAGS_max_depth, FLAGS_min_bytes, FLAGS_min_micros, FLAGS_min_params,
+ FLAGS_min_float_ops, device_regexes, FLAGS_order_by, account_type_regexes,
+ start_name_regexes, trim_name_regexes, show_name_regexes,
+ hide_name_regexes, FLAGS_account_displayed_op_only, select, FLAGS_viz,
+ FLAGS_dump_to_file);
+
+ if (!cmd.empty()) {
+ tf_stat.PrintGraph(cmd, opts);
+ return 0;
+ }
+
+ linenoiseSetCompletionCallback(completion);
+ linenoiseHistoryLoad(".tfprof_history.txt");
+
+ for (char* line = nullptr; (line = linenoise("tfprof> ")) != nullptr;) {
+ tensorflow::string line_s = tensorflow::string(line);
+ free(line);
+
+ if (line_s.empty()) {
+ printf("%s", opts.ToString().c_str());
+ continue;
+ }
+ linenoiseHistoryAdd(line_s.c_str());
+ linenoiseHistorySave(".tfprof_history.txt");
+
+ tensorflow::tfprof::Options new_opts = opts;
+ tensorflow::Status s =
+ tensorflow::tfprof::ParseCmdLine(line_s, &cmd, &new_opts);
+ if (!s.ok()) {
+ fprintf(stderr, "E: %s\n", s.ToString().c_str());
+ continue;
+ }
+ if (cmd == tensorflow::tfprof::kCmds[2]) {
+ opts = new_opts;
+ } else if (cmd == tensorflow::tfprof::kCmds[3]) {
+ tensorflow::tfprof::PrintHelp();
+ } else {
+ tf_stat.PrintGraph(cmd, new_opts);
+ }
+ }
+ return 0;
+}
diff --git a/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
new file mode 100644
index 0000000000..9afd41046e
--- /dev/null
+++ b/tensorflow/contrib/tfprof/tools/tfprof/tfprof_output.proto
@@ -0,0 +1,49 @@
+syntax = "proto2";
+
+import "tensorflow/core/framework/tensor_shape.proto";
+import "tensorflow/core/framework/types.proto";
+
+package tensorflow.tfprof;
+
+message TFProfTensorProto {
+ optional DataType dtype = 1;
+ // Flatten tensor in row-major.
+ // Only one of the following array is set.
+ repeated double value_double = 2;
+ repeated int64 value_int64 = 3;
+ repeated string value_str = 4;
+}
+
+message TFProfNode {
+ // op name.
+ optional string name = 1;
+ // tensor value restored from checkpoint.
+ optional TFProfTensorProto tensor_value = 15;
+ // op execution time.
+ optional int64 exec_micros = 2;
+ // Total requested bytes by the op.
+ optional int64 requested_bytes = 3;
+ // Number of parameters if available.
+ optional int64 parameters = 4;
+ // Number of float operations.
+ optional int64 float_ops = 13;
+ // Number of inputs to the op.
+ optional int64 inputs = 5;
+ // Device the op is assigned to.
+ optional string device = 10;
+
+ // The following are the aggregated stats from all accounted descendants and
+ // the op itself. The actual descendants depend on the data structure used
+ // (scope, graph).
+ optional int64 total_exec_micros = 6;
+ optional int64 total_requested_bytes = 7;
+ optional int64 total_parameters = 8;
+ optional int64 total_float_ops = 14;
+ optional int64 total_inputs = 9;
+
+ // shape information, if available.
+ repeated TensorShapeProto shapes = 11;
+ // Descendants of the graph. The actual descendants depend on the data
+ // structure used (scope, graph).
+ repeated TFProfNode children = 12;
+} \ No newline at end of file