aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/profiler
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-01-17 17:49:23 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-17 17:53:15 -0800
commita04353fda800b66c9f6fcb3d3699415db72fb792 (patch)
tree910b43b04f7b83553e765006ea1e413c822286fd /tensorflow/core/profiler
parentfc415ed44f2b0fe732562317d2c3ac5304f431a2 (diff)
Dynamically print helper message based on user queries.
PiperOrigin-RevId: 182298241
Diffstat (limited to 'tensorflow/core/profiler')
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show.cc6
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show.h3
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show_multi.cc7
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show_multi.h3
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show_test.cc13
-rw-r--r--tensorflow/core/profiler/internal/tfprof_stats.cc47
-rw-r--r--tensorflow/core/profiler/internal/tfprof_stats.h5
-rw-r--r--tensorflow/core/profiler/internal/tfprof_utils.cc112
-rw-r--r--tensorflow/core/profiler/internal/tfprof_utils.h3
-rw-r--r--tensorflow/core/profiler/tfprof_log.proto3
10 files changed, 170 insertions, 32 deletions
diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc
index cf28876089..f09cd1dad9 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show.cc
@@ -25,19 +25,19 @@ limitations under the License.
namespace tensorflow {
namespace tfprof {
-const GraphNodeProto& TFShow::Show(const Options& opts) {
+const GraphNodeProto& TFShow::Show(const string& prefix, const Options& opts) {
if (opts.output_type == kOutput[0]) {
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
return ShowInternal(opts, &timeline)->proto();
} else {
const ShowNode* ret = ShowInternal(opts, nullptr);
if (opts.output_type == kOutput[1]) {
- printf("%s", ret->formatted_str.c_str());
+ printf("%s", (prefix + ret->formatted_str).c_str());
fflush(stdout);
} else if (opts.output_type == kOutput[2]) {
Status s = WriteStringToFile(Env::Default(),
opts.output_options.at(kFileOpts[0]),
- ret->formatted_str);
+ prefix + ret->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
diff --git a/tensorflow/core/profiler/internal/tfprof_show.h b/tensorflow/core/profiler/internal/tfprof_show.h
index 21b21b34de..2067ea3b73 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.h
+++ b/tensorflow/core/profiler/internal/tfprof_show.h
@@ -44,7 +44,8 @@ class TFShow {
virtual ~TFShow() {}
virtual void AddNode(TFGraphNode* node) = 0;
virtual void Build() = 0;
- const GraphNodeProto& Show(const Options& opts);
+ virtual const GraphNodeProto& Show(const string& prefix,
+ const Options& opts) final;
protected:
virtual const ShowNode* ShowInternal(const Options& opts,
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.cc b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
index eb826a7137..7c65d48d4a 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
@@ -27,19 +27,20 @@ limitations under the License.
namespace tensorflow {
namespace tfprof {
-const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
+const MultiGraphNodeProto& TFMultiShow::Show(const string& prefix,
+ const Options& opts) {
if (opts.output_type == kOutput[0]) {
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
return ShowInternal(opts, &timeline)->proto();
} else {
const ShowMultiNode* ret = ShowInternal(opts, nullptr);
if (opts.output_type == kOutput[1]) {
- printf("%s", ret->formatted_str.c_str());
+ printf("%s", (prefix + ret->formatted_str).c_str());
fflush(stdout);
} else if (opts.output_type == kOutput[2]) {
Status s = WriteStringToFile(Env::Default(),
opts.output_options.at(kFileOpts[0]),
- ret->formatted_str);
+ prefix + ret->formatted_str);
if (!s.ok()) {
fprintf(stderr, "%s\n", s.ToString().c_str());
}
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.h b/tensorflow/core/profiler/internal/tfprof_show_multi.h
index f6c18c8029..ac0ada0449 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.h
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.h
@@ -45,7 +45,8 @@ class TFMultiShow {
virtual ~TFMultiShow() {}
virtual void AddNode(TFGraphNode* node) = 0;
virtual void Build() = 0;
- const MultiGraphNodeProto& Show(const Options& opts);
+ virtual const MultiGraphNodeProto& Show(const string& prefix,
+ const Options& opts) final;
protected:
virtual const ShowMultiNode* ShowInternal(const Options& opts,
diff --git a/tensorflow/core/profiler/internal/tfprof_show_test.cc b/tensorflow/core/profiler/internal/tfprof_show_test.cc
index 5100c8a768..625f64cae5 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_test.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_test.cc
@@ -31,6 +31,13 @@ limitations under the License.
namespace tensorflow {
namespace tfprof {
+
+string CheckAndRemoveDoc(const string& doc) {
+ auto pos = doc.find("Profile:");
+ CHECK(pos != doc.npos);
+ return doc.substr(pos + 9);
+}
+
class TFProfShowTest : public ::testing::Test {
protected:
TFProfShowTest() {
@@ -112,7 +119,7 @@ TEST_F(TFProfShowTest, DumpScopeMode) {
"1.28KB/1.28KB, 1.28KB/1.28KB, 1.28KB/1.28KB, 11us/11us, 0us/0us, "
"11us/11us)\n ScalarW (1, 1/1 params, 0/0 flops, 0B/0B, 0B/0B, 0B/0B, "
"0B/0B, 0us/0us, 0us/0us, 0us/0us)\n",
- dump_str);
+ CheckAndRemoveDoc(dump_str));
EXPECT_EQ(dump_str, TestToFromProto("scope", opts));
}
@@ -159,7 +166,7 @@ TEST_F(TFProfShowTest, DumpAcceleratorAndCPUMicros) {
"0us/0us)\n ScalarW/Initializer/random_normal/stddev (0us/0us, "
"0us/0us)\n ScalarW/read (0us/0us, 0us/0us)\n init (0us/0us, "
"0us/0us)\n",
- dump_str);
+ CheckAndRemoveDoc(dump_str));
EXPECT_EQ(dump_str, TestToFromProto("scope", opts));
}
@@ -203,7 +210,7 @@ TEST_F(TFProfShowTest, DumpOpMode) {
"type:0:1\t(run*0|defined*1)\texec_time:0us\ninput_type:0:2x2x6x12\t(run*"
"0|defined*1)\texec_time:0us\ninput_type:0:3x3x3x6\t(run*0|defined*1)"
"\texec_time:0us\n\n",
- StringReplace(dump_str, " ", ""));
+ StringReplace(CheckAndRemoveDoc(dump_str), " ", ""));
EXPECT_EQ(dump_str, TestToFromProto("op", opts, true));
}
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc
index 6e11c3f121..5b91309c80 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats.cc
@@ -26,6 +26,9 @@ limitations under the License.
namespace tensorflow {
namespace tfprof {
namespace {
+
+const char* const kProfilePrefix = "Profile:\n";
+
bool CreateRunMetadataNode(const string& name, NodeDef* def) {
// TODO(xpan): Better solution than blacklisting this 2 nodes. They
// actually cost some resources, maybe include them. Some nodes, such
@@ -48,7 +51,7 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph,
std::unique_ptr<OpLogProto> op_log,
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
: has_code_traces_(false),
- miss_gpu_stream_(false),
+ miss_accelerator_stream_(false),
ckpt_reader_(std::move(ckpt_reader)) {
CHECK(graph) << "Must at least have GraphDef";
@@ -72,7 +75,7 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph,
TFStats::TFStats(const string& filename,
std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader)
: has_code_traces_(false),
- miss_gpu_stream_(false),
+ miss_accelerator_stream_(false),
ckpt_reader_(std::move(ckpt_reader)) {
string str;
Status s = ReadFileToString(Env::Default(), filename, &str);
@@ -144,18 +147,21 @@ const GraphNodeProto& TFStats::ShowGraphNode(const string& cmd,
if (!Validate(opts)) {
return empty_graph_node_;
}
+ string prefix = MaybeReportMissingTrace();
+ prefix += QueryDoc(cmd, opts) + kProfilePrefix;
+
if (cmd == kCmds[0]) {
- return scope_view_->Show(opts);
+ return scope_view_->Show(prefix, opts);
} else if (cmd == kCmds[1]) {
if (opts.step < 0 && opts.output_type == kOutput[0]) {
for (int64 step : steps_) {
Options nopts = opts;
nopts.step = step;
- graph_view_->Show(nopts);
+ graph_view_->Show(prefix, nopts);
}
return empty_graph_node_;
}
- return graph_view_->Show(opts);
+ return graph_view_->Show(prefix, opts);
} else {
fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
return empty_graph_node_;
@@ -167,14 +173,17 @@ const MultiGraphNodeProto& TFStats::ShowMultiGraphNode(
if (!Validate(opts)) {
return empty_multi_graph_node_;
}
+ string prefix = MaybeReportMissingTrace();
+ prefix += QueryDoc(cmd, opts) + kProfilePrefix;
+
if (cmd == kCmds[2]) {
if (!has_code_traces()) {
fprintf(stderr, "No code trace information\n");
return empty_multi_graph_node_;
}
- return code_view_->Show(opts);
+ return code_view_->Show(prefix, opts);
} else if (cmd == kCmds[3]) {
- return op_view_->Show(opts);
+ return op_view_->Show(prefix, opts);
} else {
fprintf(stderr, "Unknown command: %s\n", cmd.c_str());
return empty_multi_graph_node_;
@@ -295,19 +304,21 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
}
if (has_gpu_scheduling && !has_gpu_stream) {
- miss_gpu_stream_ = true;
+ miss_accelerator_stream_ = true;
}
}
-void TFStats::MaybeReportMissingTrace() const {
- if (miss_gpu_stream_) {
- fprintf(stderr,
- "\n\nFound accelerator operation but misses accelerator "
- "stream stats!\n\n"
- "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
- "If you found your operation missing accelerator time, "
- "consider filing a bug to xprof-dev@!\n\n");
- }
+string TFStats::MaybeReportMissingTrace() const {
+ string report = "";
+ if (miss_accelerator_stream_) {
+ report +=
+ "\n\nFound accelerator operation but misses accelerator "
+ "stream stats!\n\n"
+ "It's likely a gpu tracing issue rather than tf-profiler issue.\n"
+ "If you found your operation missing accelerator time, "
+ "consider filing a bug to xprof-dev@!\n\n";
+ }
+ return report;
}
void TFStats::SerializeToString(string* content) {
@@ -324,6 +335,7 @@ void TFStats::SerializeToString(string* content) {
}
profile.set_has_trace(has_code_traces_);
+ profile.set_miss_accelerator_stream(miss_accelerator_stream_);
for (int64 s : steps_) {
profile.add_steps(s);
}
@@ -340,7 +352,6 @@ void TFStats::WriteProfile(const string& filename) {
}
bool TFStats::Validate(const Options& opts) const {
- MaybeReportMissingTrace();
if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) {
fprintf(stderr,
"Options -step=%lld not found.\nAvailable steps: ", opts.step);
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h
index 621285a7e9..d78abda588 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.h
+++ b/tensorflow/core/profiler/internal/tfprof_stats.h
@@ -98,14 +98,13 @@ class TFStats {
// For test purpose only.
void AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node);
- void MaybeReportMissingTrace() const;
-
private:
bool Validate(const Options& opts) const;
+ string MaybeReportMissingTrace() const;
std::set<int64> steps_;
bool has_code_traces_;
- bool miss_gpu_stream_;
+ bool miss_accelerator_stream_;
std::unique_ptr<TFScope> scope_view_;
std::unique_ptr<TFGraph> graph_view_;
std::unique_ptr<TFCode> code_view_;
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.cc b/tensorflow/core/profiler/internal/tfprof_utils.cc
index 9eba2ccf04..2813bb46fa 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.cc
+++ b/tensorflow/core/profiler/internal/tfprof_utils.cc
@@ -317,5 +317,117 @@ void PrintHelp() {
fflush(stdout);
}
+static const char* const kTotalMicrosHelp =
+ "total execution time: Sum of accelerator execution time and cpu execution "
+ "time.";
+static const char* const kAccMicrosHelp =
+ "accelerator execution time: Time spent executing on the accelerator. "
+ "This is normally measured by the actual hardware library.";
+static const char* const kCPUHelp =
+ "cpu execution time: The time from the start to the end of the operation. "
+ "It's the sum of actual cpu run time plus the time that it spends waiting "
+ "if part of computation is launched asynchronously.";
+static const char* const kBytes =
+ "requested bytes: The memory requested by the operation, accumulatively.";
+static const char* const kPeakBytes =
+ "peak bytes: The peak amount of memory that the operation is holding at "
+ "some point.";
+static const char* const kResidualBytes =
+ "residual bytes: The memory not de-allocated after the operation finishes.";
+static const char* const kOutputBytes =
+ "output bytes: The memory that is output from the operation (not "
+ "necessarilty allocated by the operation)";
+static const char* const kOccurrence =
+ "occurrence: The number of times it occurs";
+static const char* const kInputShapes =
+ "input shape: The shape of input tensors";
+static const char* const kDevice = "device: which device is placed on.";
+static const char* const kFloatOps =
+ "flops: Number of float operations. Note: Please read the implementation "
+ "for the math behind it.";
+static const char* const kParams =
+ "param: Number of parameters (in the Variable).";
+static const char* const kTensorValue = "tensor_value: Not supported now.";
+static const char* const kOpTypes =
+ "op_types: The attributes of the operation, includes the Kernel name "
+ "device placed on and user-defined strings.";
+
+static const char* const kScope =
+ "scope: The nodes in the model graph are organized by their names, which "
+ "is hierarchical like filesystem.";
+static const char* const kGraph =
+ "graph: The nodes in the model graph are organized by their operation "
+ "input and output.";
+static const char* const kCode =
+ "code: When python trace is available, the nodes are python lines and "
+ "their are organized by the python call stack.";
+static const char* const kOp =
+ "op: The nodes are operation kernel type, such as MatMul, Conv2D. Graph "
+ "nodes belonging to the same type are aggregated together.";
+static const char* const kAdvise =
+ "advise: Automatically profile and discover issues. (Experimental)";
+static const char* const kSet =
+ "set: Set a value for an option for future use.";
+static const char* const kHelp = "help: Print helping messages.";
+
+string QueryDoc(const string& cmd, const Options& opts) {
+ string cmd_help = "";
+ if (cmd == kCmds[0]) {
+ cmd_help = kScope;
+ } else if (cmd == kCmds[1]) {
+ cmd_help = kScope;
+ } else if (cmd == kCmds[2]) {
+ cmd_help = kCode;
+ } else if (cmd == kCmds[3]) {
+ cmd_help = kOp;
+ } else if (cmd == kCmds[4]) {
+ cmd_help = kAdvise;
+ } else if (cmd == kCmds[5]) {
+ cmd_help = kSet;
+ } else if (cmd == kCmds[6]) {
+ cmd_help = kHelp;
+ } else {
+ cmd_help = "Unknown command: " + cmd;
+ }
+
+ std::vector<string> helps;
+ for (const string& s : opts.select) {
+ if (s == kShown[0]) {
+ helps.push_back(kBytes);
+ } else if (s == kShown[1]) {
+ helps.push_back(strings::StrCat(kTotalMicrosHelp, "\n", kCPUHelp, "\n",
+ kAccMicrosHelp));
+ } else if (s == kShown[2]) {
+ helps.push_back(kParams);
+ } else if (s == kShown[3]) {
+ helps.push_back(kFloatOps);
+ } else if (s == kShown[4]) {
+ helps.push_back(kTensorValue);
+ } else if (s == kShown[5]) {
+ helps.push_back(kDevice);
+ } else if (s == kShown[6]) {
+ helps.push_back(kOpTypes);
+ } else if (s == kShown[7]) {
+ helps.push_back(kOccurrence);
+ } else if (s == kShown[8]) {
+ helps.push_back(kInputShapes);
+ } else if (s == kShown[9]) {
+ helps.push_back(kAccMicrosHelp);
+ } else if (s == kShown[10]) {
+ helps.push_back(kCPUHelp);
+ } else if (s == kShown[11]) {
+ helps.push_back(kPeakBytes);
+ } else if (s == kShown[12]) {
+ helps.push_back(kResidualBytes);
+ } else if (s == kShown[13]) {
+ helps.push_back(kOutputBytes);
+ } else {
+ helps.push_back("Unknown select: " + s);
+ }
+ }
+ return strings::StrCat("\nDoc:\n", cmd_help, "\n",
+ str_util::Join(helps, "\n"), "\n\n");
+}
+
} // namespace tfprof
} // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/tfprof_utils.h b/tensorflow/core/profiler/internal/tfprof_utils.h
index 985ea97af6..afca3df7f8 100644
--- a/tensorflow/core/profiler/internal/tfprof_utils.h
+++ b/tensorflow/core/profiler/internal/tfprof_utils.h
@@ -66,6 +66,9 @@ Status ReadProtoFile(Env* env, const string& fname, T* proto,
void PrintHelp();
+// Generate helper message based on the command and options.
+string QueryDoc(const string& cmd, const Options& opts);
+
} // namespace tfprof
} // namespace tensorflow
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index 0bf1b477ed..90b9e293ec 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -54,6 +54,9 @@ message ProfileProto {
map<int64, ProfileNode> nodes = 1;
// Whether or not has code traces.
bool has_trace = 2;
+ // Whether or not the TF device tracer fails to return accelerator
+ // information (which could lead to 0 accelerator execution time).
+ bool miss_accelerator_stream = 5;
// Traced steps.
repeated int64 steps = 3;