diff options
author | 2016-11-09 18:16:12 -0800 | |
---|---|---|
committer | 2016-11-09 18:23:41 -0800 | |
commit | f5bd8e1c34a0ee6f6067119744b4b1c8fd9077b3 (patch) | |
tree | c8e41475a9a0eb9a6444bea8ac5b90626d300c1f | |
parent | e01b641fc26a39ae62dc6a0a1379571e5857d8c8 (diff) |
Improve tfprof doc. Support binary GraphDef.
Change: 138711410
-rw-r--r-- | tensorflow/contrib/tfprof/README.md | 72 | ||||
-rw-r--r-- | tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py | 7 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/README.md | 181 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/internal/tfprof_show_test.cc | 2 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/internal/tfprof_stats_test.cc | 2 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc | 2 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/internal/tfprof_utils.cc | 5 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/internal/tfprof_utils.h | 2 | ||||
-rw-r--r-- | tensorflow/tools/tfprof/tfprof_main.cc | 4 |
9 files changed, 182 insertions, 95 deletions
diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md index e103cb2121..c7ff4a2921 100644 --- a/tensorflow/contrib/tfprof/README.md +++ b/tensorflow/contrib/tfprof/README.md @@ -1,17 +1,11 @@ # tfprof: A Profiling Tool for TensorFlow Models -Internal User Please Use: go/tfprof +# Full Docment in tensorflow/tools/tfprof/README.md Author: Xin Pan (xpan@google.com, github: panyx0718) Consultants: Jon Shlens, Pete Warden - -## Introduction - -tfprof is a profiling tool for TensorFlow that analyzes model architectures -and measures system performance. - ###Major Features 1. Measure model parameters, float operations, tensor shapes. @@ -20,9 +14,63 @@ and measures system performance. 4. Explore model based on name scope or graph structure. 5. Selectively grouping/filtering/accounting/ordering ops. -tfprof can be used as CommandLine Interface (CLI) and Python API. -CLI locates in tensorflow/tools/tfprof. -Python API locates in tensorflow/contrib/tfprof. -Tutorial locates in tensorflow/tools/tfprof/README.md +tfprof can be used as Python API, Interactive CLI and One-shot Script. + +## Python API Tutorials + +tfprof is part of TensorFlow core. Simply ```import tensorflow as tf```. + +### Examine the shapes and sizes of all trainiable Variables. +```python +# Print trainable variable parameter statistics to stdout. +param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis( + tf.get_default_graph(), + tfprof_options=tf.contrib.tfprof.model_analyzer. + TRAINABLE_VARS_PARAMS_STAT_OPTIONS) + +# param_stats is tensorflow.tfprof.TFProfNode proto. It organize the statistics +# of each graph node in tree scructure. Let's print the root below. +sys.stdout.write('total_params: %d\n' % param_stats.total_parameters) +``` + +### Examine the number of floating point operations +``` python +# Print to stdout an analysis of the number of floating point operations in the +# model broken down by individual operations. +# +# Note: Only Ops with RegisterStatistics('flops') defined have flop stats. It +# also requires complete shape information. It is common that shape is unknown +# statically. To complete the shape, provide run-time shape information with +# tf.RunMetadata to the API (See next example on how to provide RunMetadata). +tf.contrib.tfprof.model_analyzer.print_model_analysis( + tf.get_default_graph(), + tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) +``` + +### Examine the timing and memory usage +You will first need to run the following set up in your model in order to +compute the memory and timing statistics. + +```python +# Generate the meta information for the model that contains the memory usage +# and timing information. +run_metadata = tf.RunMetadata() +with tf.Session() as sess: + _ = sess.run(train_op, + options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), + run_metadata=run_metadata) +``` + +Finally, you may run `print_model_analysis` to explore the timing and memory +demands of the model. + +``` python +# Print to stdout an analysis of the memory usage and the timing information +# from running the graph broken down by operations. +tf.contrib.tfprof.model_analyzer.print_model_analysis( + tf.get_default_graph(), + run_meta=run_metadata, + tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY) +``` -Enjoy!
\ No newline at end of file +Users can change ```tfprof_options``` to fully leverage tfprof's power. diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py index 1f710bc970..a89d966939 100644 --- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py @@ -71,6 +71,7 @@ def _get_logged_ops(graph, run_meta=None): if run_meta: graph = _fill_missing_graph_shape(graph, run_meta) + op_missing_shape = 0 logged_ops = {} graph_def = graph.as_graph_def() for node in graph_def.node: @@ -78,6 +79,7 @@ def _get_logged_ops(graph, run_meta=None): stats = ops.get_stats_for_node_def(graph, node, REGISTERED_FLOP_STATS) except ValueError: # Catch Exception When shape is incomplete. Skip it. + op_missing_shape += 1 stats = None if not stats or not stats.value: @@ -96,6 +98,11 @@ def _get_logged_ops(graph, run_meta=None): logged_ops[entry.name] = entry else: logged_ops[v.op.name].types.append(TRAINABLE_VARIABLES) + if op_missing_shape > 0 and not run_meta: + sys.stderr.write( + '%d ops no flops stats due to incomplete shapes. ' + 'Consider passing run_meta to use run_time shapes.\n' % + op_missing_shape) return logged_ops diff --git a/tensorflow/tools/tfprof/README.md b/tensorflow/tools/tfprof/README.md index 8618abe0d5..865a21d6a0 100644 --- a/tensorflow/tools/tfprof/README.md +++ b/tensorflow/tools/tfprof/README.md @@ -1,17 +1,10 @@ # tfprof: A Profiling Tool for TensorFlow Models -Internal User Please Use: go/tfprof - Author: Xin Pan (xpan@google.com, github: panyx0718) Consultants: Jon Shlens, Pete Warden -## Introduction - -tfprof is a profiling tool for TensorFlow that analyzes model architectures -and measures system performance. - ###Major Features 1. Measure model parameters, float operations, tensor shapes. @@ -20,17 +13,83 @@ and measures system performance. 4. Explore model based on name scope or graph structure. 5. Selectively grouping/filtering/accounting/ordering ops. -### Interfaces +[Python API Tutorials](#python-api-tutorials): It can be called directly from +Python codes. Results are either printed +to stdout or dumped to file. tensorflow.tfprof.TFProfNode proto is returned from +the API to allow users to perform further analysis. [CLI Tutorials](#cli-tutorials): It supports interactive mode for exploration and single-shot mode for scripts. Outputs can be dumped to files or printed in terminal. -Python API Tutorials: Python API is not released yet. +[Options](#options): +tfprof supports many options to selectively account/display/order ops and +statistics. + +## Python API Tutorials + +tfprof is part of TensorFlow core. Simply ```import tensorflow as tf```. + +### Examine the shapes and sizes of all trainiable Variables. +```python +# Print trainable variable parameter statistics to stdout. +param_stats = tf.contrib.tfprof.model_analyzer.print_model_analysis( + tf.get_default_graph(), + tfprof_options=tf.contrib.tfprof.model_analyzer. + TRAINABLE_VARS_PARAMS_STAT_OPTIONS) + +# param_stats is tensorflow.tfprof.TFProfNode proto. It organize the statistics +# of each graph node in tree scructure. Let's print the root below. +sys.stdout.write('total_params: %d\n' % param_stats.total_parameters) +``` + +### Examine the number of floating point operations +``` python +# Print to stdout an analysis of the number of floating point operations in the +# model broken down by individual operations. +# +# Note: Only Ops with RegisterStatistics('flops') defined have flop stats. It +# also requires complete shape information. It is common that shape is unknown +# statically. To complete the shape, provide run-time shape information with +# tf.RunMetadata to the API (See next example on how to provide RunMetadata). +tf.contrib.tfprof.model_analyzer.print_model_analysis( + tf.get_default_graph(), + tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS) +``` + +### Examine the timing and memory usage +You will first need to run the following set up in your model in order to +compute the memory and timing statistics. + +```python +# Generate the meta information for the model that contains the memory usage +# and timing information. +run_metadata = tf.RunMetadata() +with tf.Session() as sess: + _ = sess.run(train_op, + options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), + run_metadata=run_metadata) +``` + +Finally, you may run `print_model_analysis` to explore the timing and memory +demands of the model. + +``` python +# Print to stdout an analysis of the memory usage and the timing information +# from running the graph broken down by operations. +tf.contrib.tfprof.model_analyzer.print_model_analysis( + tf.get_default_graph(), + run_meta=run_metadata, + tfprof_options=tf.contrib.tfprof.model_analyzer.PRINT_ALL_TIMING_MEMORY) +``` + +Users can change ```tfprof_options``` to fully leverage tfprof's power. + ## CLI Tutorials -Tutorials are based on a 32 layers ResNet. +Tutorials below are based on a 32 layers ResNet. + TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download. ### Examples @@ -44,6 +103,12 @@ bazel build -c opt tensorflow/tools/tfprof/... # Help information, including detail 'option' instructions. bazel-bin/tensorflow/tools/tfprof/tfprof help # +# The following command start tfprof in one-shot mode. +# +bazel-bin/tensorflow/tools/tfprof/tfprof scope \ + --graph_path=graph.pbtxt \ + --max_depth=3 +# # The following commands will start tfprof interactive mode. # # Profile model shapes and parameters only. @@ -168,7 +233,8 @@ _TFProfRoot (0us/2.29sec) Note: float operations calculation depends on 1) op.RegisterStatistics. If an op doesn’t have RegisterStatistics defined, its float operations cannot be counted. -2) fully defined shape is also necessary in order to calculate flops. +2) fully defined shape is also necessary in order to calculate flops. Sometimes +full shape is not available statically. Use RunMetadata to get run-time shape. float operations number is provided by tensorflow::tfprof::OpLog logged from Python API. @@ -276,6 +342,10 @@ Second, call write_op_log to write the OpLog proto. ```python tf.contrib.tfprof.tfprof_logger.write_op_log( sess.graph, /tmp/my_op_log_dir, op_log) + +# Get run-time shape information in order to fill shapes and get flops. +tf.contrib.tfprof.tfprof_logger.write_op_log( + sess.graph, /tmp/my_op_log_dir, op_log, run_meta) ``` Third, when starting the tfprof tool, specify @@ -372,84 +442,43 @@ TensorFlow checkpoint. It defines _checkpoint_variable op type. It also provides checkpointed tensors' values. -## Design - - -### In-memory representation +##Options -<b>Scope:</b> This representation organizes ops based on name scope hierarchy, -similar to filesystem hierarchy. Hence, it is essentially a tree data structure. -For example op1 with name “name1/name2” is a child of op2 with name “name1”. +`-max_depth`: Show ops that are at most this number of hops from starting op in the tree/graph structure. -<b>Graph:</b> The representation organizes ops based on op inputs. Hence it is -a graph structure. The graph is a “directed acyclic graph” (hopefully), with -direction from “output to input”. The direction is design this way so that users -can trace from “result” to its “sources”. +`-min_bytes`: Show ops that request at least this number of bytes. -### Command line options +`-min_micros`: Show ops that spend at least this number of microseconds to run. -tfprof’s major goals are to measure system performance and quicly analyze -model architectures. Hence, its commands and options should allow users to achieve -these 2 goals easily. +`-min_params`: Show ops that contains at least this number of parameters. -<b>graph:</b> It is expected that users will mostly use graph representation to -debug system performance. Hence, tfprof supports graph command, which pulls the -graph in-memory representation described above. +`-min_float_ops`: Show ops that contain at least this number of float operations. Only available if an op has op.RegisterStatistics() defined and OpLog is provided -<b>scope:</b> It is expected that some users might want to explore their model -statistics using the name scope information they defined in the Python codes. -Hence, tfprof supports “scope” command, which pulls the tree in-memory -representation. +`-device_regexes`: Show ops that a placed on the specified devices. regexes are comma-separated. -<b>set:</b> It is used to store the options so that user doesn’t need to -re-type the same option again and again in the follow up command line. Note that -tfprof has traditional terminal’s history and auto-complete support. +`-order_by`: Order the results by [name|depth|bytes|micros|params|float_ops] -<b>help:</b> print help information. +`-account_type_regexes`: Account and display the ops whose types match one of the type regexes specified. tfprof allow user to define extra op types for ops through tensorflow.tfprof.OpLog proto. regexes are comma-sperated. -<b>Options:</b> Run “tfprof help” to get detailed explanations. - -```python -"-max_depth", -"-min_bytes", -"-min_micros", -"-min_params", -"-min_float_ops", -"-order_by", -"-account_type_regexes", -"-start_name_regexes", -"-trim_name_regexes", -"-show_name_regexes", -"-hide_name_regexes", -"-account_displayed_op_only", -"-select", -"-viz", # Only supported for graph command. -"-dump_to_file", -``` +`-start_name_regexes`: Show ops starting from the ops that matches the regexes, recursively. regexes are comma-separated. -A key design is that stats are aggregated from descendants up to ancestors. -`-account_type_regexes` is used to decide which ops stat is accounted. It makes -decision based on op type. Usually set it to `.*` if no extra type information -is added to the ops using OpLog. Intuitively, only accounted ops are displayed. -`-min/max` and `-show/hide/trim/start` options are only used the optionally -displayed or hide ops based on ops’ name and stats. However, they don’t prevent -tfprof from accounting stats of hidden ops. Hence, the stat of a op can be -aggregated by its parent even if it is hidden. `-account_displayed_op_only` is -an option to break this rule. When it is set, only displayed ops are accounted. +`-trim_name_regexes`: Hide ops starting from the ops that matches the regexes, recursively, regexes are comma-seprated. -Regexes are all comma-separated, for example `-show_name_regexes` -`regex1.*,regex2.*`. It is designed this way because it is convenient and comma -is not expected to show up in op names. +`-show_name_regexes`: Show ops that match the regexes. regexes are comma-seprated. -`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy -(notice the indent printed) are sorted according to order_by. +`-hide_name_regexes`: Hide ops that match the regexes. regexes are comma-seprated. -## Future Work +Notes: For each op, `-account_type_regexes` is first evaluated, only ops with +types matching the specified regexes are accounted and selected for displayed. +`-start/trim/show/hide_name_regexes` are used to further filter ops for display. +`-start_name_regexes` is evaluated first to search the starting ops to display. +Descendants of starting ops are then evaluated against `-show/hide_name_regexes` +to make display decision. If an op matches trim_name_regexes, all its +descendants are hidden. Ops statistics are *accounted even if they are hidden* +as long as they match the `-account_xxx` options. -* Load SummaryWriter event logs so that it can show the latest summary value. +`-account_displayed_op_only`: If True, only account the statistics of ops eventually displayed. If False, account all op statistics matching -account_type_regexes recursively. -* Better sorting and aggregation of outputs. Easier comprehension. +`-select`: Comma-separated list of metrics to show: [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types]. -* Currently, shape information is based on `graph.pbtxt`. When the shape -information is incomplete, tfprof ignores it. See if it can use `RunMetadata` -and `Checkpoint` to complete shape information. +`-dump_to_file`: Dump the output to a file, instead of terminal. diff --git a/tensorflow/tools/tfprof/internal/tfprof_show_test.cc b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc index 1579472764..820647f627 100644 --- a/tensorflow/tools/tfprof/internal/tfprof_show_test.cc +++ b/tensorflow/tools/tfprof/internal/tfprof_show_test.cc @@ -38,7 +38,7 @@ class TFProfShowTest : public ::testing::Test { io::JoinPath(testing::TensorFlowSrcRoot(), "tools/tfprof/internal/testdata/graph.pbtxt"); std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef()); - TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get())); + TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get())); std::unique_ptr<tensorflow::RunMetadata> run_meta_pb( new tensorflow::RunMetadata()); diff --git a/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc index a6fcadbe95..2aa282ac12 100644 --- a/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc +++ b/tensorflow/tools/tfprof/internal/tfprof_stats_test.cc @@ -39,7 +39,7 @@ class TFProfStatsTest : public ::testing::Test { io::JoinPath(testing::TensorFlowSrcRoot(), "tools/tfprof/internal/testdata/graph.pbtxt"); std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef()); - TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get())); + TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get())); std::unique_ptr<tensorflow::RunMetadata> run_meta_pb( new tensorflow::RunMetadata()); diff --git a/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc index 1066e6208a..baa9fce110 100644 --- a/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc +++ b/tensorflow/tools/tfprof/internal/tfprof_tensor_test.cc @@ -34,7 +34,7 @@ class TFProfTensorTest : public ::testing::Test { io::JoinPath(testing::TensorFlowSrcRoot(), "tools/tfprof/internal/testdata/graph.pbtxt"); std::unique_ptr<tensorflow::GraphDef> graph_pb(new tensorflow::GraphDef()); - TF_CHECK_OK(ReadGraphDefText(Env::Default(), graph_path, graph_pb.get())); + TF_CHECK_OK(ReadGraphDef(Env::Default(), graph_path, graph_pb.get())); std::unique_ptr<tensorflow::RunMetadata> run_meta_pb; std::unique_ptr<OpLog> op_log_pb; diff --git a/tensorflow/tools/tfprof/internal/tfprof_utils.cc b/tensorflow/tools/tfprof/internal/tfprof_utils.cc index 5783b9f475..6d557e9193 100644 --- a/tensorflow/tools/tfprof/internal/tfprof_utils.cc +++ b/tensorflow/tools/tfprof/internal/tfprof_utils.cc @@ -72,12 +72,15 @@ string StringReplace(const string& str, const string& oldsub, return out; } -Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def) { +Status ReadGraphDef(Env* env, const string& fname, GraphDef* graph_def) { string out; Status s = ReadFileToString(env, fname, &out); if (!s.ok()) return s; if (protobuf::TextFormat::ParseFromString(out, graph_def)) { return Status(); + } else if (ReadBinaryProto(tensorflow::Env::Default(), fname, graph_def) + .ok()) { + return Status(); } return errors::InvalidArgument("Cannot parse proto string."); } diff --git a/tensorflow/tools/tfprof/internal/tfprof_utils.h b/tensorflow/tools/tfprof/internal/tfprof_utils.h index 13077a8fc5..afa7a58acd 100644 --- a/tensorflow/tools/tfprof/internal/tfprof_utils.h +++ b/tensorflow/tools/tfprof/internal/tfprof_utils.h @@ -40,7 +40,7 @@ tensorflow::Status ParseCmdLine(const string& line, string* cmd, string StringReplace(const string& str, const string& oldsub, const string& newsub); -Status ReadGraphDefText(Env* env, const string& fname, GraphDef* graph_def); +Status ReadGraphDef(Env* env, const string& fname, GraphDef* graph_def); void PrintHelp(); diff --git a/tensorflow/tools/tfprof/tfprof_main.cc b/tensorflow/tools/tfprof/tfprof_main.cc index f72797f0a2..92e9510ea8 100644 --- a/tensorflow/tools/tfprof/tfprof_main.cc +++ b/tensorflow/tools/tfprof/tfprof_main.cc @@ -172,8 +172,8 @@ int main(int argc, char** argv) { printf("Reading Files...\n"); std::unique_ptr<tensorflow::GraphDef> graph(new tensorflow::GraphDef()); - TF_CHECK_OK(tensorflow::tfprof::ReadGraphDefText( - tensorflow::Env::Default(), FLAGS_graph_path, graph.get())); + TF_CHECK_OK(tensorflow::tfprof::ReadGraphDef(tensorflow::Env::Default(), + FLAGS_graph_path, graph.get())); std::unique_ptr<tensorflow::RunMetadata> run_meta( new tensorflow::RunMetadata()); |