aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-11-09 18:16:12 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-09 18:23:41 -0800
commitf5bd8e1c34a0ee6f6067119744b4b1c8fd9077b3 (patch)
treec8e41475a9a0eb9a6444bea8ac5b90626d300c1f
parente01b641fc26a39ae62dc6a0a1379571e5857d8c8 (diff)
Improve tfprof doc. Support binary GraphDef.
Change: 138711410
-rw-r--r--tensorflow/contrib/tfprof/README.md72
-rw-r--r--tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py7
-rw-r--r--tensorflow/tools/tfprof/README.md181
-rw-r--r--tensorflow/tools/tfprof/internal/tfprof_show_test.cc2
-rw-r--r--tensorflow/tools/tfprof/internal/tfprof_stats_test.cc2
-rw-r--r--tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc2
-rw-r--r--tensorflow/tools/tfprof/internal/tfprof_utils.cc5
-rw-r--r--tensorflow/tools/tfprof/internal/tfprof_utils.h2
-rw-r--r--tensorflow/tools/tfprof/tfprof_main.cc4
9 files changed, 182 insertions, 95 deletions
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md
index e103cb2121..c7ff4a2921 100644
--- a/tensorflow/contrib/tfprof/README.md
+++ b/tensorflow/contrib/tfprof/README.md
@@ -1,17 +1,11 @@
# tfprof: A Profiling Tool for TensorFlow Models
-Internal User Please Use: go/tfprof
+# Full Docment in tensorflow/tools/tfprof/README.md
Author: Xin Pan (xpan@google.com, github: panyx0718)
Consultants: Jon Shlens, Pete Warden
-
-## Introduction
-
-tfprof is a profiling tool for TensorFlow that analyzes model architectures
-and measures system performance.
-
###Major Features
1. Measure model parameters, float operations, tensor shapes.
@@ -20,9 +14,63 @@ and measures system performance.
4. Explore model based on name scope or graph structure.
5. Selectively grouping/filtering/accounting/ordering ops.
-tfprof can be used as CommandLine Interface (CLI) and Python API.
-CLI locates in tensorflow/tools/tfprof.
-Python API locates in tensorflow/contrib/tfprof.
-Tutorial locates in tensorflow/tools/tfprof/README.md
+tfprof can be used as Python API, Interactive CLI and One-shot Script.
+
+## Python API Tutorials
+
+tfprof is part of TensorFlow core. Simply ```import tensorflow as tf```.
+
+### Examine the shapes and sizes of all trainiable Variables.
+```python
+# Print trainable variable parameter statistics to stdout.
+param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ tf.get_default_graph(),
+ tfprof_options=tf.contrib.tfprof.model_analyzer.
+ TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
+
+# param_stats is tensorflow.tfprof.TFProfNode proto. It organize the statistics
+# of each graph node in tree scructure. Let's print the root below.
+sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
+```
+
+### Examine the number of floating point operations
+``` python
+# Print to stdout an analysis of the number of floating point operations in the
+# model broken down by individual operations.
+#
+# Note: Only Ops with RegisterStatistics('flops') defined have flop stats. It
+# also requires complete shape information. It is common that shape is unknown
+# statically. To complete the shape, provide run-time shape information with
+# tf.RunMetadata to the API (See next example on how to provide RunMetadata).
+tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ tf.get_default_graph(),
+ tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
+```
+
+### Examine the timing and memory usage
+You will first need to run the following set up in your model in order to
+compute the memory and timing statistics.
+
+```python
+# Generate the meta information for the model that contains the memory usage
+# and timing information.
+run_metadata = tf.RunMetadata()
+with tf.Session() as sess:
+ _ = sess.run(train_op,
+ options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
+ run_metadata=run_metadata)
+```
+
+Finally, you may run `print_model_analysis` to explore the timing and memory
+demands of the model.
+
+``` python
+# Print to stdout an analysis of the memory usage and the timing information
+# from running the graph broken down by operations.
+tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ tf.get_default_graph(),
+ run_meta=run_metadata,
+ tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)
+```
-Enjoy! \ No newline at end of file
+Users can change ```tfprof_options``` to fully leverage tfprof's power.
diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
index 1f710bc970..a89d966939 100644
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
@@ -71,6 +71,7 @@ def _get_logged_ops(graph, run_meta=None):
if run_meta:
graph = _fill_missing_graph_shape(graph, run_meta)
+ op_missing_shape = 0
logged_ops = {}
graph_def = graph.as_graph_def()
for node in graph_def.node:
@@ -78,6 +79,7 @@ def _get_logged_ops(graph, run_meta=None):
stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS)
except ValueError:
# Catch Exception When shape is incomplete. Skip it.
+ op_missing_shape += 1
stats = None
if not stats or not stats.value:
@@ -96,6 +98,11 @@ def _get_logged_ops(graph, run_meta=None):
logged_ops[entry.name] = entry
else:
logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES)
+ if op_missing_shape > 0 and not run_meta:
+ sys.stderr.write(
+ '%d ops no flops stats due to incomplete shapes. '
+ 'Consider passing run_meta to use run_time shapes.\n' %
+ op_missing_shape)
return logged_ops
diff --git a/tensorflow/tools/tfprof/README.md b/tensorflow/tools/tfprof/README.md
index 8618abe0d5..865a21d6a0 100644
--- a/tensorflow/tools/tfprof/README.md
+++ b/tensorflow/tools/tfprof/README.md
@@ -1,17 +1,10 @@
# tfprof: A Profiling Tool for TensorFlow Models
-Internal User Please Use: go/tfprof
-
Author: Xin Pan (xpan@google.com, github: panyx0718)
Consultants: Jon Shlens, Pete Warden
-## Introduction
-
-tfprof is a profiling tool for TensorFlow that analyzes model architectures
-and measures system performance.
-
###Major Features
1. Measure model parameters, float operations, tensor shapes.
@@ -20,17 +13,83 @@ and measures system performance.
4. Explore model based on name scope or graph structure.
5. Selectively grouping/filtering/accounting/ordering ops.
-### Interfaces
+[Python API Tutorials](#python-api-tutorials): It can be called directly from
+Python codes. Results are either printed
+to stdout or dumped to file. tensorflow.tfprof.TFProfNode proto is returned from
+the API to allow users to perform further analysis.
[CLI Tutorials](#cli-tutorials):
It supports interactive mode for exploration and single-shot mode for
scripts. Outputs can be dumped to files or printed in terminal.
-Python API Tutorials: Python API is not released yet.
+[Options](#options):
+tfprof supports many options to selectively account/display/order ops and
+statistics.
+
+## Python API Tutorials
+
+tfprof is part of TensorFlow core. Simply ```import tensorflow as tf```.
+
+### Examine the shapes and sizes of all trainiable Variables.
+```python
+# Print trainable variable parameter statistics to stdout.
+param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ tf.get_default_graph(),
+ tfprof_options=tf.contrib.tfprof.model_analyzer.
+ TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
+
+# param_stats is tensorflow.tfprof.TFProfNode proto. It organize the statistics
+# of each graph node in tree scructure. Let's print the root below.
+sys.stdout.write('total_params: %d\n' % param_stats.total_parameters)
+```
+
+### Examine the number of floating point operations
+``` python
+# Print to stdout an analysis of the number of floating point operations in the
+# model broken down by individual operations.
+#
+# Note: Only Ops with RegisterStatistics('flops') defined have flop stats. It
+# also requires complete shape information. It is common that shape is unknown
+# statically. To complete the shape, provide run-time shape information with
+# tf.RunMetadata to the API (See next example on how to provide RunMetadata).
+tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ tf.get_default_graph(),
+ tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
+```
+
+### Examine the timing and memory usage
+You will first need to run the following set up in your model in order to
+compute the memory and timing statistics.
+
+```python
+# Generate the meta information for the model that contains the memory usage
+# and timing information.
+run_metadata = tf.RunMetadata()
+with tf.Session() as sess:
+ _ = sess.run(train_op,
+ options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE),
+ run_metadata=run_metadata)
+```
+
+Finally, you may run `print_model_analysis` to explore the timing and memory
+demands of the model.
+
+``` python
+# Print to stdout an analysis of the memory usage and the timing information
+# from running the graph broken down by operations.
+tf.contrib.tfprof.model_analyzer.print_model_analysis(
+ tf.get_default_graph(),
+ run_meta=run_metadata,
+ tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY)
+```
+
+Users can change ```tfprof_options``` to fully leverage tfprof's power.
+
## CLI Tutorials
-Tutorials are based on a 32 layers ResNet.
+Tutorials below are based on a 32 layers ResNet.
+
TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download.
### Examples
@@ -44,6 +103,12 @@ bazel build -c opt tensorflow/tools/tfprof/...
# Help information, including detail 'option' instructions.
bazel-bin/tensorflow/tools/tfprof/tfprof help
#
+# The following command start tfprof in one-shot mode.
+#
+bazel-bin/tensorflow/tools/tfprof/tfprof scope \
+ --graph_path=graph.pbtxt \
+ --max_depth=3
+#
# The following commands will start tfprof interactive mode.
#
# Profile model shapes and parameters only.
@@ -168,7 +233,8 @@ _TFProfRoot (0us/2.29sec)
Note: float operations calculation depends on
1) op.RegisterStatistics. If an op doesn’t
have RegisterStatistics defined, its float operations cannot be counted.
-2) fully defined shape is also necessary in order to calculate flops.
+2) fully defined shape is also necessary in order to calculate flops. Sometimes
+full shape is not available statically. Use RunMetadata to get run-time shape.
float operations number is provided by tensorflow::tfprof::OpLog logged from
Python API.
@@ -276,6 +342,10 @@ Second, call write_op_log to write the OpLog proto.
```python
tf.contrib.tfprof.tfprof_logger.write_op_log(
sess.graph, /tmp/my_op_log_dir, op_log)
+
+# Get run-time shape information in order to fill shapes and get flops.
+tf.contrib.tfprof.tfprof_logger.write_op_log(
+ sess.graph, /tmp/my_op_log_dir, op_log, run_meta)
```
Third, when starting the tfprof tool, specify
@@ -372,84 +442,43 @@ TensorFlow checkpoint. It defines _checkpoint_variable op type. It also
provides checkpointed tensors' values.
-## Design
-
-
-### In-memory representation
+##Options
-<b>Scope:</b> This representation organizes ops based on name scope hierarchy,
-similar to filesystem hierarchy. Hence, it is essentially a tree data structure.
-For example op1 with name “name1/name2” is a child of op2 with name “name1”.
+`-max_depth`: Show ops that are at most this number of hops from starting op in the tree/graph structure.
-<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is
-a graph structure. The graph is a “directed acyclic graph” (hopefully), with
-direction from “output to input”. The direction is design this way so that users
-can trace from “result” to its “sources”.
+`-min_bytes`: Show ops that request at least this number of bytes.
-### Command line options
+`-min_micros`: Show ops that spend at least this number of microseconds to run.
-tfprof’s major goals are to measure system performance and quicly analyze
-model architectures. Hence, its commands and options should allow users to achieve
-these 2 goals easily.
+`-min_params`: Show ops that contains at least this number of parameters.
-<b>graph:</b> It is expected that users will mostly use graph representation to
-debug system performance. Hence, tfprof supports graph command, which pulls the
-graph in-memory representation described above.
+`-min_float_ops`: Show ops that contain at least this number of float operations. Only available if an op has op.RegisterStatistics() defined and OpLog is provided
-<b>scope:</b> It is expected that some users might want to explore their model
-statistics using the name scope information they defined in the Python codes.
-Hence, tfprof supports “scope” command, which pulls the tree in-memory
-representation.
+`-device_regexes`: Show ops that a placed on the specified devices. regexes are comma-separated.
-<b>set:</b> It is used to store the options so that user doesn’t need to
-re-type the same option again and again in the follow up command line. Note that
-tfprof has traditional terminal’s history and auto-complete support.
+`-order_by`: Order the results by [name|depth|bytes|micros|params|float_ops]
-<b>help:</b> print help information.
+`-account_type_regexes`: Account and display the ops whose types match one of the type regexes specified. tfprof allow user to define extra op types for ops through tensorflow.tfprof.OpLog proto. regexes are comma-sperated.
-<b>Options:</b> Run “tfprof help” to get detailed explanations.
-
-```python
-"-max_depth",
-"-min_bytes",
-"-min_micros",
-"-min_params",
-"-min_float_ops",
-"-order_by",
-"-account_type_regexes",
-"-start_name_regexes",
-"-trim_name_regexes",
-"-show_name_regexes",
-"-hide_name_regexes",
-"-account_displayed_op_only",
-"-select",
-"-viz", # Only supported for graph command.
-"-dump_to_file",
-```
+`-start_name_regexes`: Show ops starting from the ops that matches the regexes, recursively. regexes are comma-separated.
-A key design is that stats are aggregated from descendants up to ancestors.
-`-account_type_regexes` is used to decide which ops stat is accounted. It makes
-decision based on op type. Usually set it to `.*` if no extra type information
-is added to the ops using OpLog. Intuitively, only accounted ops are displayed.
-`-min/max` and `-show/hide/trim/start` options are only used the optionally
-displayed or hide ops based on ops’ name and stats. However, they don’t prevent
-tfprof from accounting stats of hidden ops. Hence, the stat of a op can be
-aggregated by its parent even if it is hidden. `-account_displayed_op_only` is
-an option to break this rule. When it is set, only displayed ops are accounted.
+`-trim_name_regexes`: Hide ops starting from the ops that matches the regexes, recursively, regexes are comma-seprated.
-Regexes are all comma-separated, for example `-show_name_regexes`
-`regex1.*,regex2.*`. It is designed this way because it is convenient and comma
-is not expected to show up in op names.
+`-show_name_regexes`: Show ops that match the regexes. regexes are comma-seprated.
-`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy
-(notice the indent printed) are sorted according to order_by.
+`-hide_name_regexes`: Hide ops that match the regexes. regexes are comma-seprated.
-## Future Work
+Notes: For each op, `-account_type_regexes` is first evaluated, only ops with
+types matching the specified regexes are accounted and selected for displayed.
+`-start/trim/show/hide_name_regexes` are used to further filter ops for display.
+`-start_name_regexes` is evaluated first to search the starting ops to display.
+Descendants of starting ops are then evaluated against `-show/hide_name_regexes`
+to make display decision. If an op matches trim_name_regexes, all its
+descendants are hidden. Ops statistics are *accounted even if they are hidden*
+as long as they match the `-account_xxx` options.
-* Load SummaryWriter event logs so that it can show the latest summary value.
+`-account_displayed_op_only`: If True, only account the statistics of ops eventually displayed. If False, account all op statistics matching -account_type_regexes recursively.
-* Better sorting and aggregation of outputs. Easier comprehension.
+`-select`: Comma-separated list of metrics to show: [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types].
-* Currently, shape information is based on `graph.pbtxt`. When the shape
-information is incomplete, tfprof ignores it. See if it can use `RunMetadata`
-and `Checkpoint` to complete shape information.
+`-dump_to_file`: Dump the output to a file, instead of terminal.
diff --git a/tensorflow/tools/tfprof/internal/tfprof_show_test.cc b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
index 1579472764..820647f627 100644
--- a/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc
@@ -38,7 +38,7 @@ class TFProfShowTest : public ::testing::Test {
io::JoinPath(testing::TensorFlowSrcRoot(),
"tools/tfprof/internal/testdata/graph.pbtxt");
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
- TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+ TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
new tensorflow::RunMetadata());
diff --git a/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
index a6fcadbe95..2aa282ac12 100644
--- a/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc
@@ -39,7 +39,7 @@ class TFProfStatsTest : public ::testing::Test {
io::JoinPath(testing::TensorFlowSrcRoot(),
"tools/tfprof/internal/testdata/graph.pbtxt");
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
- TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+ TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb(
new tensorflow::RunMetadata());
diff --git a/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
index 1066e6208a..baa9fce110 100644
--- a/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc
@@ -34,7 +34,7 @@ class TFProfTensorTest : public ::testing::Test {
io::JoinPath(testing::TensorFlowSrcRoot(),
"tools/tfprof/internal/testdata/graph.pbtxt");
std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef());
- TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get()));
+ TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta_pb;
std::unique_ptr<OpLog> op_log_pb;
diff --git a/tensorflow/tools/tfprof/internal/tfprof_utils.cc b/tensorflow/tools/tfprof/internal/tfprof_utils.cc
index 5783b9f475..6d557e9193 100644
--- a/tensorflow/tools/tfprof/internal/tfprof_utils.cc
+++ b/tensorflow/tools/tfprof/internal/tfprof_utils.cc
@@ -72,12 +72,15 @@ string StringReplace(const string& str, const string& oldsub,
return out;
}
-Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def) {
+Status ReadGraphDef(Env* env, const string& fname, GraphDef* graph_def) {
string out;
Status s = ReadFileToString(env, fname, &out);
if (!s.ok()) return s;
if (protobuf::TextFormat::ParseFromString(out, graph_def)) {
return Status();
+ } else if (ReadBinaryProto(tensorflow::Env::Default(), fname, graph_def)
+ .ok()) {
+ return Status();
}
return errors::InvalidArgument("Cannot parse proto string.");
}
diff --git a/tensorflow/tools/tfprof/internal/tfprof_utils.h b/tensorflow/tools/tfprof/internal/tfprof_utils.h
index 13077a8fc5..afa7a58acd 100644
--- a/tensorflow/tools/tfprof/internal/tfprof_utils.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_utils.h
@@ -40,7 +40,7 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd,
string StringReplace(const string& str, const string& oldsub,
const string& newsub);
-Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def);
+Status ReadGraphDef(Env* env, const string& fname, GraphDef* graph_def);
void PrintHelp();
diff --git a/tensorflow/tools/tfprof/tfprof_main.cc b/tensorflow/tools/tfprof/tfprof_main.cc
index f72797f0a2..92e9510ea8 100644
--- a/tensorflow/tools/tfprof/tfprof_main.cc
+++ b/tensorflow/tools/tfprof/tfprof_main.cc
@@ -172,8 +172,8 @@ int main(int argc, char** argv) {
printf("Reading Files...\n");
std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef());
- TF_CHECK_OK(tensorflow::tfprof::ReadGraphDefText(
- tensorflow::Env::Default(), FLAGS_graph_path, graph.get()));
+ TF_CHECK_OK(tensorflow::tfprof::ReadGraphDef(tensorflow::Env::Default(),
+ FLAGS_graph_path, graph.get()));
std::unique_ptr<tensorflow::RunMetadata> run_meta(
new tensorflow::RunMetadata());