diff options
author | 2018-01-17 17:49:23 -0800 | |
---|---|---|
committer | 2018-01-17 17:53:15 -0800 | |
commit | a04353fda800b66c9f6fcb3d3699415db72fb792 (patch) | |
tree | 910b43b04f7b83553e765006ea1e413c822286fd /tensorflow/core/profiler | |
parent | fc415ed44f2b0fe732562317d2c3ac5304f431a2 (diff) |
Dynamically print helper message based on user queries.
PiperOrigin-RevId: 182298241
Diffstat (limited to 'tensorflow/core/profiler')
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_show.cc | 6 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_show.h | 3 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_show_multi.cc | 7 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_show_multi.h | 3 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_show_test.cc | 13 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_stats.cc | 47 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_stats.h | 5 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_utils.cc | 112 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_utils.h | 3 | ||||
-rw-r--r-- | tensorflow/core/profiler/tfprof_log.proto | 3 |
10 files changed, 170 insertions, 32 deletions
diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc index cf28876089..f09cd1dad9 100644 --- a/tensorflow/core/profiler/internal/tfprof_show.cc +++ b/tensorflow/core/profiler/internal/tfprof_show.cc @@ -25,19 +25,19 @@ limitations under the License. namespace tensorflow { namespace tfprof { -const GraphNodeProto& TFShow::Show(const Options& opts) { +const GraphNodeProto& TFShow::Show(const string& prefix, const Options& opts) { if (opts.output_type == kOutput[0]) { Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0])); return ShowInternal(opts, &timeline)->proto(); } else { const ShowNode* ret = ShowInternal(opts, nullptr); if (opts.output_type == kOutput[1]) { - printf("%s", ret->formatted_str.c_str()); + printf("%s", (prefix + ret->formatted_str).c_str()); fflush(stdout); } else if (opts.output_type == kOutput[2]) { Status s = WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]), - ret->formatted_str); + prefix + ret->formatted_str); if (!s.ok()) { fprintf(stderr, "%s\n", s.ToString().c_str()); } diff --git a/tensorflow/core/profiler/internal/tfprof_show.h b/tensorflow/core/profiler/internal/tfprof_show.h index 21b21b34de..2067ea3b73 100644 --- a/tensorflow/core/profiler/internal/tfprof_show.h +++ b/tensorflow/core/profiler/internal/tfprof_show.h @@ -44,7 +44,8 @@ class TFShow { virtual ~TFShow() {} virtual void AddNode(TFGraphNode* node) = 0; virtual void Build() = 0; - const GraphNodeProto& Show(const Options& opts); + virtual const GraphNodeProto& Show(const string& prefix, + const Options& opts) final; protected: virtual const ShowNode* ShowInternal(const Options& opts, diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.cc b/tensorflow/core/profiler/internal/tfprof_show_multi.cc index eb826a7137..7c65d48d4a 100644 --- a/tensorflow/core/profiler/internal/tfprof_show_multi.cc +++ b/tensorflow/core/profiler/internal/tfprof_show_multi.cc @@ -27,19 +27,20 @@ limitations under the License. namespace tensorflow { namespace tfprof { -const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) { +const MultiGraphNodeProto& TFMultiShow::Show(const string& prefix, + const Options& opts) { if (opts.output_type == kOutput[0]) { Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0])); return ShowInternal(opts, &timeline)->proto(); } else { const ShowMultiNode* ret = ShowInternal(opts, nullptr); if (opts.output_type == kOutput[1]) { - printf("%s", ret->formatted_str.c_str()); + printf("%s", (prefix + ret->formatted_str).c_str()); fflush(stdout); } else if (opts.output_type == kOutput[2]) { Status s = WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]), - ret->formatted_str); + prefix + ret->formatted_str); if (!s.ok()) { fprintf(stderr, "%s\n", s.ToString().c_str()); } diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.h b/tensorflow/core/profiler/internal/tfprof_show_multi.h index f6c18c8029..ac0ada0449 100644 --- a/tensorflow/core/profiler/internal/tfprof_show_multi.h +++ b/tensorflow/core/profiler/internal/tfprof_show_multi.h @@ -45,7 +45,8 @@ class TFMultiShow { virtual ~TFMultiShow() {} virtual void AddNode(TFGraphNode* node) = 0; virtual void Build() = 0; - const MultiGraphNodeProto& Show(const Options& opts); + virtual const MultiGraphNodeProto& Show(const string& prefix, + const Options& opts) final; protected: virtual const ShowMultiNode* ShowInternal(const Options& opts, diff --git a/tensorflow/core/profiler/internal/tfprof_show_test.cc b/tensorflow/core/profiler/internal/tfprof_show_test.cc index 5100c8a768..625f64cae5 100644 --- a/tensorflow/core/profiler/internal/tfprof_show_test.cc +++ b/tensorflow/core/profiler/internal/tfprof_show_test.cc @@ -31,6 +31,13 @@ limitations under the License. namespace tensorflow { namespace tfprof { + +string CheckAndRemoveDoc(const string& doc) { + auto pos = doc.find("Profile:"); + CHECK(pos != doc.npos); + return doc.substr(pos + 9); +} + class TFProfShowTest : public ::testing::Test { protected: TFProfShowTest() { @@ -112,7 +119,7 @@ TEST_F(TFProfShowTest, DumpScopeMode) { "1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 11us/11us, 0us/0us, " "11us/11us)\n ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, " "0B/0B, 0us/0us, 0us/0us, 0us/0us)\n", - dump_str); + CheckAndRemoveDoc(dump_str)); EXPECT_EQ(dump_str, TestToFromProto("scope", opts)); } @@ -159,7 +166,7 @@ TEST_F(TFProfShowTest, DumpAcceleratorAndCPUMicros) { "0us/0us)\n ScalarW/Initializer/random_normal/stddev (0us/0us, " "0us/0us)\n ScalarW/read (0us/0us, 0us/0us)\n init (0us/0us, " "0us/0us)\n", - dump_str); + CheckAndRemoveDoc(dump_str)); EXPECT_EQ(dump_str, TestToFromProto("scope", opts)); } @@ -203,7 +210,7 @@ TEST_F(TFProfShowTest, DumpOpMode) { "type:0:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12\t(run*" "0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6\t(run*0|defined*1)" "\texec_time:0us\n\n", - StringReplace(dump_str, " ", "")); + StringReplace(CheckAndRemoveDoc(dump_str), " ", "")); EXPECT_EQ(dump_str, TestToFromProto("op", opts, true)); } diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc index 6e11c3f121..5b91309c80 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats.cc @@ -26,6 +26,9 @@ limitations under the License. namespace tensorflow { namespace tfprof { namespace { + +const char* const kProfilePrefix = "Profile:\n"; + bool CreateRunMetadataNode(const string& name, NodeDef* def) { // TODO(xpan): Better solution than blacklisting this 2 nodes. They // actually cost some resources, maybe include them. Some nodes, such @@ -48,7 +51,7 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph, std::unique_ptr<OpLogProto> op_log, std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader) : has_code_traces_(false), - miss_gpu_stream_(false), + miss_accelerator_stream_(false), ckpt_reader_(std::move(ckpt_reader)) { CHECK(graph) << "Must at least have GraphDef"; @@ -72,7 +75,7 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph, TFStats::TFStats(const string& filename, std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader) : has_code_traces_(false), - miss_gpu_stream_(false), + miss_accelerator_stream_(false), ckpt_reader_(std::move(ckpt_reader)) { string str; Status s = ReadFileToString(Env::Default(), filename, &str); @@ -144,18 +147,21 @@ const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd, if (!Validate(opts)) { return empty_graph_node_; } + string prefix = MaybeReportMissingTrace(); + prefix += QueryDoc(cmd, opts) + kProfilePrefix; + if (cmd == kCmds[0]) { - return scope_view_->Show(opts); + return scope_view_->Show(prefix, opts); } else if (cmd == kCmds[1]) { if (opts.step < 0 && opts.output_type == kOutput[0]) { for (int64 step : steps_) { Options nopts = opts; nopts.step = step; - graph_view_->Show(nopts); + graph_view_->Show(prefix, nopts); } return empty_graph_node_; } - return graph_view_->Show(opts); + return graph_view_->Show(prefix, opts); } else { fprintf(stderr, "Unknown command: %s\n", cmd.c_str()); return empty_graph_node_; @@ -167,14 +173,17 @@ const MultiGraphNodeProto& TFStats::ShowMultiGraphNode( if (!Validate(opts)) { return empty_multi_graph_node_; } + string prefix = MaybeReportMissingTrace(); + prefix += QueryDoc(cmd, opts) + kProfilePrefix; + if (cmd == kCmds[2]) { if (!has_code_traces()) { fprintf(stderr, "No code trace information\n"); return empty_multi_graph_node_; } - return code_view_->Show(opts); + return code_view_->Show(prefix, opts); } else if (cmd == kCmds[3]) { - return op_view_->Show(opts); + return op_view_->Show(prefix, opts); } else { fprintf(stderr, "Unknown command: %s\n", cmd.c_str()); return empty_multi_graph_node_; @@ -295,19 +304,21 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) { } if (has_gpu_scheduling && !has_gpu_stream) { - miss_gpu_stream_ = true; + miss_accelerator_stream_ = true; } } -void TFStats::MaybeReportMissingTrace() const { - if (miss_gpu_stream_) { - fprintf(stderr, - "\n\nFound accelerator operation but misses accelerator " - "stream stats!\n\n" - "It's likely a gpu tracing issue rather than tf-profiler issue.\n" - "If you found your operation missing accelerator time, " - "consider filing a bug to xprof-dev@!\n\n"); - } +string TFStats::MaybeReportMissingTrace() const { + string report = ""; + if (miss_accelerator_stream_) { + report += + "\n\nFound accelerator operation but misses accelerator " + "stream stats!\n\n" + "It's likely a gpu tracing issue rather than tf-profiler issue.\n" + "If you found your operation missing accelerator time, " + "consider filing a bug to xprof-dev@!\n\n"; + } + return report; } void TFStats::SerializeToString(string* content) { @@ -324,6 +335,7 @@ void TFStats::SerializeToString(string* content) { } profile.set_has_trace(has_code_traces_); + profile.set_miss_accelerator_stream(miss_accelerator_stream_); for (int64 s : steps_) { profile.add_steps(s); } @@ -340,7 +352,6 @@ void TFStats::WriteProfile(const string& filename) { } bool TFStats::Validate(const Options& opts) const { - MaybeReportMissingTrace(); if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) { fprintf(stderr, "Options -step=%lld not found.\nAvailable steps: ", opts.step); diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h index 621285a7e9..d78abda588 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.h +++ b/tensorflow/core/profiler/internal/tfprof_stats.h @@ -98,14 +98,13 @@ class TFStats { // For test purpose only. void AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node); - void MaybeReportMissingTrace() const; - private: bool Validate(const Options& opts) const; + string MaybeReportMissingTrace() const; std::set<int64> steps_; bool has_code_traces_; - bool miss_gpu_stream_; + bool miss_accelerator_stream_; std::unique_ptr<TFScope> scope_view_; std::unique_ptr<TFGraph> graph_view_; std::unique_ptr<TFCode> code_view_; diff --git a/tensorflow/core/profiler/internal/tfprof_utils.cc b/tensorflow/core/profiler/internal/tfprof_utils.cc index 9eba2ccf04..2813bb46fa 100644 --- a/tensorflow/core/profiler/internal/tfprof_utils.cc +++ b/tensorflow/core/profiler/internal/tfprof_utils.cc @@ -317,5 +317,117 @@ void PrintHelp() { fflush(stdout); } +static const char* const kTotalMicrosHelp = + "total execution time: Sum of accelerator execution time and cpu execution " + "time."; +static const char* const kAccMicrosHelp = + "accelerator execution time: Time spent executing on the accelerator. " + "This is normally measured by the actual hardware library."; +static const char* const kCPUHelp = + "cpu execution time: The time from the start to the end of the operation. " + "It's the sum of actual cpu run time plus the time that it spends waiting " + "if part of computation is launched asynchronously."; +static const char* const kBytes = + "requested bytes: The memory requested by the operation, accumulatively."; +static const char* const kPeakBytes = + "peak bytes: The peak amount of memory that the operation is holding at " + "some point."; +static const char* const kResidualBytes = + "residual bytes: The memory not de-allocated after the operation finishes."; +static const char* const kOutputBytes = + "output bytes: The memory that is output from the operation (not " + "necessarilty allocated by the operation)"; +static const char* const kOccurrence = + "occurrence: The number of times it occurs"; +static const char* const kInputShapes = + "input shape: The shape of input tensors"; +static const char* const kDevice = "device: which device is placed on."; +static const char* const kFloatOps = + "flops: Number of float operations. Note: Please read the implementation " + "for the math behind it."; +static const char* const kParams = + "param: Number of parameters (in the Variable)."; +static const char* const kTensorValue = "tensor_value: Not supported now."; +static const char* const kOpTypes = + "op_types: The attributes of the operation, includes the Kernel name " + "device placed on and user-defined strings."; + +static const char* const kScope = + "scope: The nodes in the model graph are organized by their names, which " + "is hierarchical like filesystem."; +static const char* const kGraph = + "graph: The nodes in the model graph are organized by their operation " + "input and output."; +static const char* const kCode = + "code: When python trace is available, the nodes are python lines and " + "their are organized by the python call stack."; +static const char* const kOp = + "op: The nodes are operation kernel type, such as MatMul, Conv2D. Graph " + "nodes belonging to the same type are aggregated together."; +static const char* const kAdvise = + "advise: Automatically profile and discover issues. (Experimental)"; +static const char* const kSet = + "set: Set a value for an option for future use."; +static const char* const kHelp = "help: Print helping messages."; + +string QueryDoc(const string& cmd, const Options& opts) { + string cmd_help = ""; + if (cmd == kCmds[0]) { + cmd_help = kScope; + } else if (cmd == kCmds[1]) { + cmd_help = kScope; + } else if (cmd == kCmds[2]) { + cmd_help = kCode; + } else if (cmd == kCmds[3]) { + cmd_help = kOp; + } else if (cmd == kCmds[4]) { + cmd_help = kAdvise; + } else if (cmd == kCmds[5]) { + cmd_help = kSet; + } else if (cmd == kCmds[6]) { + cmd_help = kHelp; + } else { + cmd_help = "Unknown command: " + cmd; + } + + std::vector<string> helps; + for (const string& s : opts.select) { + if (s == kShown[0]) { + helps.push_back(kBytes); + } else if (s == kShown[1]) { + helps.push_back(strings::StrCat(kTotalMicrosHelp, "\n", kCPUHelp, "\n", + kAccMicrosHelp)); + } else if (s == kShown[2]) { + helps.push_back(kParams); + } else if (s == kShown[3]) { + helps.push_back(kFloatOps); + } else if (s == kShown[4]) { + helps.push_back(kTensorValue); + } else if (s == kShown[5]) { + helps.push_back(kDevice); + } else if (s == kShown[6]) { + helps.push_back(kOpTypes); + } else if (s == kShown[7]) { + helps.push_back(kOccurrence); + } else if (s == kShown[8]) { + helps.push_back(kInputShapes); + } else if (s == kShown[9]) { + helps.push_back(kAccMicrosHelp); + } else if (s == kShown[10]) { + helps.push_back(kCPUHelp); + } else if (s == kShown[11]) { + helps.push_back(kPeakBytes); + } else if (s == kShown[12]) { + helps.push_back(kResidualBytes); + } else if (s == kShown[13]) { + helps.push_back(kOutputBytes); + } else { + helps.push_back("Unknown select: " + s); + } + } + return strings::StrCat("\nDoc:\n", cmd_help, "\n", + str_util::Join(helps, "\n"), "\n\n"); +} + } // namespace tfprof } // namespace tensorflow diff --git a/tensorflow/core/profiler/internal/tfprof_utils.h b/tensorflow/core/profiler/internal/tfprof_utils.h index 985ea97af6..afca3df7f8 100644 --- a/tensorflow/core/profiler/internal/tfprof_utils.h +++ b/tensorflow/core/profiler/internal/tfprof_utils.h @@ -66,6 +66,9 @@ Status ReadProtoFile(Env* env, const string& fname, T* proto, void PrintHelp(); +// Generate helper message based on the command and options. +string QueryDoc(const string& cmd, const Options& opts); + } // namespace tfprof } // namespace tensorflow diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto index 0bf1b477ed..90b9e293ec 100644 --- a/tensorflow/core/profiler/tfprof_log.proto +++ b/tensorflow/core/profiler/tfprof_log.proto @@ -54,6 +54,9 @@ message ProfileProto { map<int64, ProfileNode> nodes = 1; // Whether or not has code traces. bool has_trace = 2; + // Whether or not the TF device tracer fails to return accelerator + // information (which could lead to 0 accelerator execution time). + bool miss_accelerator_stream = 5; // Traced steps. repeated int64 steps = 3; |