aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-07-31 16:41:46 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-07-31 16:45:38 -0700
commitedac90c7c75b93ba149823257caf69ac9391c10b (patch)
tree17b0b2da59ff4fecd10c8d7fc3c1fd60845bd9c3
parent78a90370ef214015508103bb21cef2962f041c5c (diff)
Add support to generate pprof results to tf.profiler
A fun thing is, it can not only profile time,memory but also parameters, etc. PiperOrigin-RevId: 163767517
-rw-r--r--tensorflow/core/profiler/README.md23
-rw-r--r--tensorflow/core/profiler/g3doc/pprof.jpgbin0 -> 267669 bytes
-rw-r--r--tensorflow/core/profiler/internal/tfprof_code.cc393
-rw-r--r--tensorflow/core/profiler/internal/tfprof_code.h29
-rw-r--r--tensorflow/core/profiler/internal/tfprof_graph.cc5
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node.h45
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node_show.cc44
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node_show.h22
-rw-r--r--tensorflow/core/profiler/internal/tfprof_op.cc7
-rw-r--r--tensorflow/core/profiler/internal/tfprof_options.cc7
-rw-r--r--tensorflow/core/profiler/internal/tfprof_options.h11
-rw-r--r--tensorflow/core/profiler/internal/tfprof_scope.cc7
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show.cc34
-rw-r--r--tensorflow/core/profiler/internal/tfprof_show_multi.cc34
-rw-r--r--tensorflow/core/profiler/profile.proto71
-rw-r--r--tensorflow/core/profiler/tfprof_log.proto1
-rw-r--r--tensorflow/python/profiler/model_analyzer_test.py60
-rw-r--r--tensorflow/python/profiler/option_builder.py14
-rw-r--r--tensorflow/python/profiler/tfprof_logger.py3
-rw-r--r--tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt4
20 files changed, 667 insertions, 147 deletions
diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md
index d55824e6f2..e748daba7a 100644
--- a/tensorflow/core/profiler/README.md
+++ b/tensorflow/core/profiler/README.md
@@ -181,6 +181,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a
### Visualize time and memory.
```
+# The following example generates a timeline.
tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename>
generating trace file.
@@ -191,9 +192,29 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file.
******************************************************
```
<left>
-[CodeTimeline](g3doc/graph_timeline.png)
+[Timeline](g3doc/graph_timeline.png)
</left>
+```
+# The following example generates a pprof graph (only supported by code view).
+# Since TensorFlow runs the graph instead of Python code, the pprof graph
+# doesn't profile the statistics of Python, but the TensorFlow graph
+# nodes created by the Python call stack.
+# Nevertheless, it pops critical Python code path for us.
+#
+# `-trim_name_regexes` trims the python call stack, which are always the same
+# for the leaves.
+# `-select accelerator_micros` pick accelerator time for pprof graph. User
+# can also generate memory profile using `-select bytes`
+tfprof> code -max_depth 100 -trim_name_regexes '^ops.py.*' -select accelerator_micros -output pprof:outfile=<filename>
+
+# Use pprof to visualize the generated file.
+pprof -png --nodecount=20 --sample_index=1 <filename>
+```
+
+<left>
+[PprofGraph](g3doc/pprof.jpg)
+</left>
### Feature Request and Bug Report
diff --git a/tensorflow/core/profiler/g3doc/pprof.jpg b/tensorflow/core/profiler/g3doc/pprof.jpg
new file mode 100644
index 0000000000..bf28967d68
--- /dev/null
+++ b/tensorflow/core/profiler/g3doc/pprof.jpg
Binary files differ
diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc
index 1746076f79..8650541550 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.cc
+++ b/tensorflow/core/profiler/internal/tfprof_code.cc
@@ -21,6 +21,8 @@ limitations under the License.
#include "tensorflow/c/c_api.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/io/zlib_compression_options.h"
+#include "tensorflow/core/lib/io/zlib_outputbuffer.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
@@ -46,66 +48,335 @@ string GetTraceString(const CodeDef::Trace& trace) {
}
return ntrace;
}
+
+// StringTable maps each string to an id.
+class StringTable {
+ public:
+ StringTable() {
+ // Pprof requires first entry in string_table to be ''.
+ string_id_[""] = 0;
+ all_strings_.push_back("");
+ }
+
+ // Returns the index of a string. If not found, inserts the string and
+ // return the inserted index.
+ uint64 GetIndex(const string& str) {
+ auto idx = string_id_.find(str);
+ if (idx != string_id_.end()) {
+ return idx->second;
+ }
+ all_strings_.push_back(str);
+ return string_id_.insert(std::pair<string, int64>(str, string_id_.size()))
+ .first->second;
+ }
+
+ const std::vector<string>& strings() const { return all_strings_; }
+
+ private:
+ std::map<string, uint64> string_id_;
+ std::vector<string> all_strings_;
+};
+
+// FunctionTable maps each function to an id.
+class FunctionTable {
+ public:
+ explicit FunctionTable(StringTable* string_table)
+ : string_table_(string_table) {}
+
+ // Returns the index of a function. If not found, adds a function proto
+ // and returns the function index.
+ uint64 GetIndex(const string& file_path, const string& func_name,
+ uint64 func_start_line) {
+ auto key = std::tuple<string, string, uint64>(file_path, func_name,
+ func_start_line);
+ auto idx = function_table_.find(key);
+ if (idx != function_table_.end()) {
+ return idx->second.id();
+ }
+ pprof::Function* func_pb = &function_table_[key];
+ // function index should start from 1.
+ func_pb->set_id(function_table_.size());
+ func_pb->set_name(string_table_->GetIndex(func_name));
+ func_pb->set_filename(string_table_->GetIndex(file_path));
+ func_pb->set_start_line(func_start_line);
+ return func_pb->id();
+ }
+
+ const std::map<std::tuple<string, string, uint64>, pprof::Function>&
+ functions() const {
+ return function_table_;
+ }
+
+ private:
+ StringTable* string_table_;
+ std::map<std::tuple<string, string, uint64>, pprof::Function> function_table_;
+};
+
+// LocationTable maps each function call to an id.
+class LocationTable {
+ public:
+ explicit LocationTable(FunctionTable* function_table)
+ : function_table_(function_table) {}
+
+ // Returns the index of a function call localtion. If not found, adds a
+ // location proto and returns the location index.
+ uint64 GetIndex(const string& file_path, uint64 line_number,
+ const string& called_function_name,
+ const string& called_file_path,
+ uint64 called_func_start_line) {
+ auto key = std::tuple<string, string, uint64>(
+ file_path, called_function_name, line_number);
+ auto idx = location_table_.find(key);
+ if (idx != location_table_.end()) {
+ return idx->second.id();
+ }
+ pprof::Location* location_pb = &location_table_[key];
+ location_pb->set_id(location_table_.size());
+ pprof::Line* line_pb = location_pb->add_line();
+ line_pb->set_function_id(function_table_->GetIndex(
+ called_file_path, called_function_name, called_func_start_line));
+ line_pb->set_line(line_number);
+ return location_pb->id();
+ }
+
+ const std::map<std::tuple<string, string, uint64>, pprof::Location>&
+ locations() const {
+ return location_table_;
+ }
+
+ private:
+ FunctionTable* function_table_;
+ std::map<std::tuple<string, string, uint64>, pprof::Location> location_table_;
+};
+
+// Samples stores samples of all calls. A sample is a single call trace,
+// that is, the call path from top caller to the leaf callee.
+class Samples {
+ public:
+ explicit Samples(StringTable* string_table, const Options* opts)
+ : string_table_(string_table), opts_(opts) {}
+
+ // 'node' is the leaf of the displayed trace. It includes all graph nodes
+ // created by it. 'location_ids' contains
+ // the call stack, from callee to caller.
+ // This method adds the statistics of graph nodes created by the python
+ // call.
+ void Add(const CodeNode* node, const std::vector<uint64>& location_ids) {
+ // displayed leaf might not be true leaf. Retrive the true leaves for
+ // stats.
+ std::vector<const CodeNode*> all_leaf = FetchAllLeaf(node);
+ CHECK(!all_leaf.empty()) << node->name();
+
+ for (const CodeNode* cn : all_leaf) {
+ for (auto gn_it : cn->node->graph_nodes()) {
+ const TFGraphNode* gn = gn_it.second;
+ pprof::Sample* sample_pb = &sample_table_[gn->name()];
+ for (uint64 id : location_ids) {
+ sample_pb->mutable_location_id()->Add(id);
+ }
+ pprof::Label* label_pb = sample_pb->mutable_label()->Add();
+ label_pb->set_key(string_table_->GetIndex("node_name"));
+ label_pb->set_str(string_table_->GetIndex(gn->name()));
+
+ sample_pb->mutable_value()->Add(1);
+ string type = *opts_->select.begin();
+ if (type == kShown[1]) {
+ sample_pb->mutable_value()->Add(gn->exec_micros(node->node->step()));
+ } else if (type == kShown[9]) {
+ sample_pb->mutable_value()->Add(
+ gn->accelerator_exec_micros(node->node->step()));
+ } else if (type == kShown[10]) {
+ sample_pb->mutable_value()->Add(
+ gn->cpu_exec_micros(node->node->step()));
+ } else if (type == kShown[0]) {
+ sample_pb->mutable_value()->Add(
+ gn->requested_bytes(node->node->step()));
+ } else if (type == kShown[2]) {
+ sample_pb->mutable_value()->Add(gn->parameters());
+ } else if (type == kShown[3]) {
+ sample_pb->mutable_value()->Add(gn->float_ops(node->node->step()));
+ } else {
+ fprintf(stderr, "pprof doesn't support -select=%s\n", type.c_str());
+ }
+ }
+ }
+ }
+
+ const std::map<string, pprof::Sample>& samples() const {
+ return sample_table_;
+ }
+
+ private:
+ std::vector<const CodeNode*> FetchAllLeaf(const CodeNode* root) {
+ if (root->children.empty()) {
+ return {root};
+ }
+ std::vector<const CodeNode*> ret;
+ for (auto& n : root->children) {
+ std::vector<const CodeNode*> nodes = FetchAllLeaf(n);
+ ret.insert(ret.end(), nodes.begin(), nodes.end());
+ }
+ return ret;
+ }
+
+ StringTable* string_table_;
+ const Options* opts_;
+ std::map<string, pprof::Sample> sample_table_;
+};
+
+class PprofProfileImpl : public PprofProfile {
+ public:
+ explicit PprofProfileImpl(const Options* opts)
+ : opts_(opts),
+ func_table_(new FunctionTable(&string_table_)),
+ loc_table_(new LocationTable(func_table_.get())),
+ samples_(new Samples(&string_table_, opts)) {}
+
+ uint64 AddLocation(const CodeNode* callee, const CodeNode* caller) override {
+ const string& file_path = caller->trace->file();
+ uint64 lineno = caller->trace->lineno();
+ const string& callee_file_path = callee->trace->file();
+ const string& callee_function = callee->trace->function();
+ uint64 callee_func_start_line = callee->trace->func_start_line();
+
+ return loc_table_->GetIndex(file_path, lineno, callee_function,
+ callee_file_path, callee_func_start_line);
+ }
+
+ void AddSample(const CodeNode* leaf, std::vector<uint64>* call_ids) override {
+ std::vector<uint64> reversed_call_ids;
+ std::reverse_copy(call_ids->begin(), call_ids->end(),
+ std::back_inserter(reversed_call_ids));
+ samples_->Add(leaf, reversed_call_ids);
+ }
+
+ Status WritePprofProfile(const string& filename) override {
+ pprof::Profile profile_pb;
+ Build(&profile_pb);
+
+ std::unique_ptr<WritableFile> file;
+ Status s = Env::Default()->NewWritableFile(filename, &file);
+ if (!s.ok()) return s;
+
+ int32 buf_size = 1024 * 1024;
+ io::ZlibOutputBuffer* zlib_output_buffer = new io::ZlibOutputBuffer(
+ file.get(), buf_size, buf_size, io::ZlibCompressionOptions::GZIP());
+ s = zlib_output_buffer->Init();
+ if (!s.ok()) return s;
+ s = zlib_output_buffer->Append(profile_pb.SerializeAsString());
+ if (!s.ok()) return s;
+ s = zlib_output_buffer->Close();
+ if (!s.ok()) return s;
+ fprintf(stdout, "\nRun pprof -png --nodecount=20 --sample_index=1 <%s>\n",
+ filename.c_str());
+ return s;
+ }
+
+ private:
+ void Build(pprof::Profile* profile_pb) {
+ string sample_type_description = "count";
+ auto sample_type = profile_pb->mutable_sample_type()->Add();
+ sample_type->set_type(string_table_.GetIndex(sample_type_description));
+ sample_type->set_unit(string_table_.GetIndex("count"));
+
+ string type = *opts_->select.begin();
+ sample_type_description = type;
+ sample_type = profile_pb->mutable_sample_type()->Add();
+ sample_type->set_type(string_table_.GetIndex(sample_type_description));
+ if (type == kShown[1] || type == kShown[9] || type == kShown[10]) {
+ sample_type->set_unit(string_table_.GetIndex("microseconds"));
+ if (type == kShown[1]) {
+ profile_pb->mutable_comment()->Add(string_table_.GetIndex(
+ "Sum of accelerator execution time and cpu execution time."));
+ } else if (type == kShown[9]) {
+ profile_pb->mutable_comment()->Add(
+ string_table_.GetIndex("Accelerator execution time."));
+ } else if (type == kShown[10]) {
+ profile_pb->mutable_comment()->Add(
+ string_table_.GetIndex("CPU execution time."));
+ }
+ } else if (type == kShown[0]) {
+ sample_type->set_unit(string_table_.GetIndex("bytes"));
+ profile_pb->mutable_comment()->Add(
+ string_table_.GetIndex("Sum of operation output memory."));
+ } else if (type == kShown[2]) {
+ sample_type->set_unit(string_table_.GetIndex("count"));
+ profile_pb->mutable_comment()->Add(
+ string_table_.GetIndex("Model parameters."));
+ } else if (type == kShown[3]) {
+ sample_type->set_unit(string_table_.GetIndex("count"));
+ profile_pb->mutable_comment()->Add(string_table_.GetIndex(
+ "Model float operations (Only available if defined)."));
+ } else {
+ fprintf(stderr, "pprof doesn't support selecting: %s\n", type.c_str());
+ }
+
+ for (const string& str : string_table_.strings()) {
+ *profile_pb->mutable_string_table()->Add() = str;
+ }
+ for (const auto& sample_it : samples_->samples()) {
+ // TODO(xpan): Consider swap.
+ profile_pb->mutable_sample()->Add()->MergeFrom(sample_it.second);
+ }
+ for (const auto& function_it : func_table_->functions()) {
+ profile_pb->mutable_function()->Add()->MergeFrom(function_it.second);
+ }
+ for (const auto& location_it : loc_table_->locations()) {
+ profile_pb->mutable_location()->Add()->MergeFrom(location_it.second);
+ }
+ }
+
+ const Options* opts_;
+ StringTable string_table_;
+ std::unique_ptr<FunctionTable> func_table_;
+ std::unique_ptr<LocationTable> loc_table_;
+ std::unique_ptr<Samples> samples_;
+};
} // namespace
void TFCode::AddNode(TFGraphNode* node) {
if (node->code().traces_size() == 0) {
return;
}
- TFMultiGraphNode* pre_trace_node = nullptr;
+ if (!root_) {
+ graph_root_.reset(new TFMultiGraphNode(kTFProfRoot));
+ root_.reset(new CodeNode(graph_root_.get(), nullptr));
+ }
+
+ CodeNode* pre_code_node = root_.get();
// TODO(xpan): Consider to release CodeDef after TFCode is built. It
// takes a lot of memory.
for (int i = 0; i < node->code().traces_size(); ++i) {
// Unlike op name, which is globally unique, trace name is only unique
// w.r.t. it's parent.
const string& trace = GetTraceString(node->code().traces(i));
- if (i == 0) {
- if (!trace_root_) {
- trace_root_.reset(new TFMultiGraphNode(trace));
- }
- CHECK(trace_root_->name() == trace) << "Different trace root";
- pre_trace_node = trace_root_.get();
- continue;
- }
- pre_trace_node->AddChildren(trace);
- TFMultiGraphNode* trace_node = pre_trace_node->children().at(trace).get();
-
+ pre_code_node = pre_code_node->AddChildren(trace, &node->code().traces(i));
if (i == node->code().traces_size() - 1) {
- trace_node->AddGraphNode(node);
+ pre_code_node->node->AddGraphNode(node);
}
- pre_trace_node = trace_node;
}
}
void TFCode::Build() {
- if (root_) {
- return;
- }
- tfprof_trace_root_.reset(new TFMultiGraphNode(kTFProfRoot));
- root_.reset(new CodeNode(tfprof_trace_root_.get()));
-
- if (trace_root_) {
- code_root_ = BuildCodeNodes(trace_root_.get());
- root_->children.push_back(code_root_);
- }
-}
-
-CodeNode* TFCode::BuildCodeNodes(TFMultiGraphNode* root) {
- auto code_root = std::unique_ptr<CodeNode>(new CodeNode(root));
- CodeNode* code_root_ptr = code_root.get();
- code_nodes_.insert(std::move(code_root));
-
- for (auto it = root->children().cbegin(); it != root->children().cend();
- ++it) {
- code_root_ptr->children.push_back(BuildCodeNodes(it->second.get()));
- }
- return code_root_ptr;
}
const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
Timeline* timeline) {
- std::vector<CodeNode*> roots = Account(root_->children, opts);
root_->ResetTotalStats();
+ if (opts.output_type == kOutput[3]) {
+ if (opts.select.size() != 1) {
+ fprintf(stderr, "Can only select 1 attribute for pprof output.\n");
+ return root_.get();
+ }
+ string select = *opts.select.begin();
+ if (select != kShown[0] && select != kShown[1] && select != kShown[2] &&
+ select != kShown[3] && select != kShown[9] && select != kShown[10]) {
+ fprintf(stderr, "pprof doesn't support -select=%s\n", select.c_str());
+ return root_.get();
+ }
+ }
+
+ std::vector<CodeNode*> roots = Account(root_->children, opts);
root_->show_children.clear();
for (CodeNode* n : roots) {
root_->AggregateTotalStats(n);
@@ -121,21 +392,46 @@ const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
CodeNode* root = PrintScope({root_.get()}, opts, 1, 0)[0];
root->formatted_str = FormatLegend(opts) + root->formatted_str;
- Format(root->show_children, &root->formatted_str, root->mutable_proto());
- if (timeline) {
- timeline->GenerateCodeTimeline(root);
+ if (opts.output_type == kOutput[3]) {
+ std::vector<uint64> call_ids;
+ pprof_profile_.reset(new PprofProfileImpl(&opts));
+ Format(root, root->show_children, opts, &root->formatted_str,
+ root->mutable_proto(), &call_ids);
+ Status s = pprof_profile_->WritePprofProfile(
+ opts.output_options.at(kPprofOpts[0]));
+ if (!s.ok()) {
+ fprintf(stderr, "%s\n", s.ToString().c_str());
+ }
+ } else {
+ Format(root, root->show_children, opts, &root->formatted_str,
+ root->mutable_proto(), nullptr);
+ if (timeline) {
+ timeline->GenerateCodeTimeline(root);
+ }
}
return root;
}
-void TFCode::Format(const std::vector<CodeNode*> roots, string* display_str,
- MultiGraphNodeProto* proto) {
- for (CodeNode* node : roots) {
+void TFCode::Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
+ const Options& opts, string* display_str,
+ MultiGraphNodeProto* proto, std::vector<uint64>* call_ids) {
+ if (nodes.empty() && root->trace && opts.output_type == kOutput[3]) {
+ pprof_profile_->AddSample(root, call_ids);
+ }
+
+ for (CodeNode* node : nodes) {
+ if (root->trace && opts.output_type == kOutput[3]) {
+ uint64 loc_id = pprof_profile_->AddLocation(node, root);
+ call_ids->push_back(loc_id);
+ }
display_str->append(node->formatted_str);
MultiGraphNodeProto* child = proto->add_children();
child->MergeFrom(node->proto());
- Format(node->show_children, display_str, child);
+ Format(node, node->show_children, opts, display_str, child, call_ids);
+ if (root->trace && opts.output_type == kOutput[3]) {
+ call_ids->pop_back();
+ }
}
}
@@ -170,14 +466,15 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots,
std::vector<CodeNode*> show_nodes;
for (CodeNode* node : roots) {
+ if (ShouldTrim(node, opts.trim_name_regexes) || depth > opts.max_depth) {
+ continue;
+ }
int ident = last_ident;
bool show = ShouldShow(node, opts, depth);
if (show) ident += 2;
- std::vector<CodeNode*> show_cnodes;
- if (!ShouldTrim(node, opts.trim_name_regexes) && depth <= opts.max_depth) {
- show_cnodes = PrintScope(node->show_children, opts, depth + 1, ident);
- }
+ std::vector<CodeNode*> show_cnodes =
+ PrintScope(node->show_children, opts, depth + 1, ident);
if (show) {
node->show_children.clear();
if (opts.account_displayed_op_only) {
diff --git a/tensorflow/core/profiler/internal/tfprof_code.h b/tensorflow/core/profiler/internal/tfprof_code.h
index 7338297221..7583a43a26 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.h
+++ b/tensorflow/core/profiler/internal/tfprof_code.h
@@ -32,15 +32,29 @@ limitations under the License.
#include "tensorflow/core/profiler/internal/tfprof_show_multi.h"
#include "tensorflow/core/profiler/internal/tfprof_timeline.h"
#include "tensorflow/core/profiler/internal/tfprof_utils.h"
+#include "tensorflow/core/profiler/profile.pb.h"
#include "tensorflow/core/profiler/tfprof_log.pb.h"
#include "tensorflow/core/profiler/tfprof_output.pb.h"
namespace tensorflow {
namespace tfprof {
+class PprofProfile {
+ public:
+ virtual ~PprofProfile() {}
+
+ virtual uint64 AddLocation(const CodeNode* callee,
+ const CodeNode* caller) = 0;
+
+ virtual void AddSample(const CodeNode* leaf,
+ std::vector<uint64>* call_ids) = 0;
+
+ virtual Status WritePprofProfile(const string& filename) = 0;
+};
+
class TFCode : public TFMultiShow {
public:
- explicit TFCode() : code_root_(nullptr), trace_root_(nullptr) {}
+ TFCode() {}
~TFCode() override {}
void AddNode(TFGraphNode* node) override;
@@ -48,8 +62,6 @@ class TFCode : public TFMultiShow {
void Build() override;
private:
- CodeNode* BuildCodeNodes(TFMultiGraphNode* root);
-
const ShowMultiNode* ShowInternal(const Options& opts,
Timeline* timeline) override;
@@ -63,16 +75,15 @@ class TFCode : public TFMultiShow {
std::vector<CodeNode*> Account(const std::vector<CodeNode*>& roots,
const Options& opts);
- void Format(const std::vector<CodeNode*> roots, string* display_str,
- MultiGraphNodeProto* proto);
+ void Format(const CodeNode* root, const std::vector<CodeNode*>& nodes,
+ const Options& opts, string* display_str,
+ MultiGraphNodeProto* proto, std::vector<uint64>* call_ids);
string FormatNode(CodeNode* node, const Options& opts, int64 indent);
std::unique_ptr<CodeNode> root_;
- CodeNode* code_root_;
- std::unique_ptr<TFMultiGraphNode> trace_root_;
- std::unique_ptr<TFMultiGraphNode> tfprof_trace_root_;
- std::set<std::unique_ptr<CodeNode>> code_nodes_;
+ std::unique_ptr<TFMultiGraphNode> graph_root_;
+ std::unique_ptr<PprofProfile> pprof_profile_;
};
} // namespace tfprof
} // namespace tensorflow
diff --git a/tensorflow/core/profiler/internal/tfprof_graph.cc b/tensorflow/core/profiler/internal/tfprof_graph.cc
index 8d7c44b219..e58924a614 100644
--- a/tensorflow/core/profiler/internal/tfprof_graph.cc
+++ b/tensorflow/core/profiler/internal/tfprof_graph.cc
@@ -72,6 +72,11 @@ void TFGraph::Build() {
const ShowNode* TFGraph::ShowInternal(const Options& opts, Timeline* timeline) {
root_->ResetTotalStats();
root_->show_children.clear();
+
+ if (opts.output_type == kOutput[3]) {
+ fprintf(stderr, "Only 'code' view supports pprof output now.\n");
+ return root_;
+ }
if (timeline && timeline->step() < 0) {
// TODO(xpan): Maybe pick a default step for users.
fprintf(stderr,
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index 9142ad51c5..929ee3f50c 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -340,6 +340,27 @@ class TFGraphNode {
return exec->second.allocator_bytes_in_use();
}
+ int64 parameters() const {
+ if (!shape().empty()) {
+ int64 params = 1;
+ bool complete_shape = true;
+ for (int64 d : shape()) {
+ // Sometimes parameters could be <0 when a dim is unknown.
+ if (d < 0) {
+ complete_shape = false;
+ break;
+ }
+ params *= d;
+ }
+ if (complete_shape) {
+ return params;
+ } else {
+ fprintf(stderr, "Incomplete shape.\n");
+ }
+ }
+ return 0;
+ }
+
int64 float_ops(int64 step) const {
// If not run, return static analysis.
if (execs_.empty()) {
@@ -400,12 +421,14 @@ class TFMultiGraphNode {
public:
TFMultiGraphNode(const string& name)
: name_(name),
+ step_(-1),
run_count_(0),
exec_micros_(0),
accelerator_exec_micros_(0),
cpu_exec_micros_(0),
requested_bytes_(0),
- float_ops_(0) {}
+ float_ops_(0),
+ parameters_(0) {}
bool SnapshotNodes(int64 step, const std::vector<string>& type_regexes) {
run_count_ = 0;
@@ -415,11 +438,13 @@ class TFMultiGraphNode {
requested_bytes_ = 0;
float_ops_ = 0;
+ parameters_ = 0;
op_types_.clear();
shapes_.clear();
devices_.clear();
snapshot_nodes_.clear();
+ step_ = step;
std::vector<const TFGraphNode*> nodes = pick_nodes(type_regexes);
if (nodes.empty()) {
@@ -436,6 +461,7 @@ class TFMultiGraphNode {
requested_bytes_ += node->requested_bytes(step);
float_ops_ += node->float_ops(step);
+ parameters_ += node->parameters();
if (node->shape().size() > 0) {
shapes_.push_back(node->shape());
}
@@ -445,6 +471,8 @@ class TFMultiGraphNode {
return true;
}
+ int64 step() const { return step_; }
+
void AddGraphNode(const TFGraphNode* node) {
if (nodes_.find(node->name()) != nodes_.end()) {
return;
@@ -456,16 +484,6 @@ class TFMultiGraphNode {
return snapshot_nodes_;
}
- void AddChildren(const string& name) {
- if (children_.find(name) != children_.end()) {
- return;
- }
- children_[name].reset(new TFMultiGraphNode(name));
- }
- const std::map<string, std::unique_ptr<TFMultiGraphNode>>& children() const {
- return children_;
- }
-
const string& name() const { return name_; }
int64 run_count() const { return run_count_; }
@@ -477,6 +495,8 @@ class TFMultiGraphNode {
int64 float_ops() const { return float_ops_; }
+ int64 parameters() const { return parameters_; }
+
const std::set<string>& devices() const { return devices_; }
const std::set<string>& op_types() const { return op_types_; }
@@ -511,6 +531,7 @@ class TFMultiGraphNode {
}
const string name_;
+ int64 step_;
// Snapshot based on type_regexes
std::set<string> op_types_;
int64 run_count_;
@@ -520,13 +541,13 @@ class TFMultiGraphNode {
int64 requested_bytes_;
int64 float_ops_;
+ int64 parameters_;
std::set<string> devices_;
std::vector<std::vector<int64>> shapes_;
std::map<string, const TFGraphNode*> snapshot_nodes_;
// Overall data held by the TFMultiGraphNode.
std::map<string, const TFGraphNode*> nodes_;
- std::map<string, std::unique_ptr<TFMultiGraphNode>> children_;
};
bool IsPlacedOnAccelerator(const string& device);
diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.cc b/tensorflow/core/profiler/internal/tfprof_node_show.cc
index 5ca57412e5..16b94fdfa1 100644
--- a/tensorflow/core/profiler/internal/tfprof_node_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node_show.cc
@@ -45,25 +45,7 @@ void ShowNode::ReInit(int64 step) {
(*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom(
VecToShapeProto(inp.second));
}
-
- proto_.clear_parameters();
- if (!node->shape().empty()) {
- int64 params = 1;
- bool complete_shape = true;
- for (int64 d : node->shape()) {
- // Sometimes parameters could be <0 when a dim is unknown.
- if (d < 0) {
- complete_shape = false;
- break;
- }
- params *= d;
- }
- if (complete_shape) {
- mutable_proto()->set_parameters(proto_.parameters() + params);
- } else {
- fprintf(stderr, "Incomplete shape.");
- }
- }
+ proto_.set_parameters(node->parameters());
}
GraphNodeProto* ShowNode::mutable_proto() { return &proto_; }
@@ -114,6 +96,8 @@ void ShowNode::AddSelfToTotalStats() {
}
void ShowNode::ResetTotalStats() {
+ formatted_str.clear();
+
mutable_proto()->set_total_definition_count(0);
mutable_proto()->set_total_run_count(0);
mutable_proto()->set_total_exec_micros(0);
@@ -153,26 +137,7 @@ bool ShowMultiNode::ReInit(int64 step,
mutable_proto()->set_requested_bytes(node->requested_bytes());
mutable_proto()->set_float_ops(node->float_ops());
- mutable_proto()->clear_parameters();
- if (!node->shapes().empty()) {
- for (const std::vector<int64>& shape : node->shapes()) {
- int64 params = 1;
- bool complete_shape = true;
- for (int64 d : shape) {
- // Sometimes parameters could be <0 when a dim is unknown.
- if (d < 0) {
- complete_shape = false;
- break;
- }
- params *= d;
- }
- if (complete_shape) {
- mutable_proto()->set_parameters(proto().parameters() + params);
- } else {
- fprintf(stderr, "Incomplete shape.");
- }
- }
- }
+ mutable_proto()->set_parameters(node->parameters());
return has_matched_type;
}
@@ -216,6 +181,7 @@ void ShowMultiNode::AddSelfToTotalStats() {
}
void ShowMultiNode::ResetTotalStats() {
+ formatted_str.clear();
mutable_proto()->set_total_exec_micros(0);
mutable_proto()->set_total_accelerator_exec_micros(0);
mutable_proto()->set_total_cpu_exec_micros(0);
diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.h b/tensorflow/core/profiler/internal/tfprof_node_show.h
index fea19bdca3..cdcb3f499b 100644
--- a/tensorflow/core/profiler/internal/tfprof_node_show.h
+++ b/tensorflow/core/profiler/internal/tfprof_node_show.h
@@ -111,11 +111,31 @@ class ShowMultiNode {
class CodeNode : public ShowMultiNode {
public:
- explicit CodeNode(TFMultiGraphNode* node) : ShowMultiNode(node) {}
+ explicit CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace)
+ : ShowMultiNode(node), trace(trace) {}
~CodeNode() override {}
+ CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace) {
+ auto it = children_.find(name);
+ if (it != children_.end()) {
+ return it->second.get();
+ }
+
+ graph_children_.push_back(
+ std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(name)));
+ auto child = &children_[name];
+ child->reset(new CodeNode(graph_children_.back().get(), trace));
+ children.push_back(child->get());
+ return child->get();
+ }
+
+ const CodeDef::Trace* trace;
std::vector<CodeNode*> children;
std::vector<CodeNode*> show_children;
+
+ private:
+ std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_;
+ std::map<string, std::unique_ptr<CodeNode>> children_;
};
class OpNode : public ShowMultiNode {
diff --git a/tensorflow/core/profiler/internal/tfprof_op.cc b/tensorflow/core/profiler/internal/tfprof_op.cc
index 46a81da80f..ab013506ec 100644
--- a/tensorflow/core/profiler/internal/tfprof_op.cc
+++ b/tensorflow/core/profiler/internal/tfprof_op.cc
@@ -103,8 +103,13 @@ void TFOp::Build() {
}
const ShowMultiNode* TFOp::ShowInternal(const Options& opts,
- Timeline* timeline) {
+ Timeline* timeline) {
root_->ResetTotalStats();
+ if (opts.output_type == kOutput[3]) {
+ fprintf(stderr, "Only 'code' view supports pprof output now.\n");
+ return root_.get();
+ }
+
if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) {
root_->formatted_str = FormatNode(root_.get(), root_.get(), opts);
}
diff --git a/tensorflow/core/profiler/internal/tfprof_options.cc b/tensorflow/core/profiler/internal/tfprof_options.cc
index 53e401dd60..2b5e340cec 100644
--- a/tensorflow/core/profiler/internal/tfprof_options.cc
+++ b/tensorflow/core/profiler/internal/tfprof_options.cc
@@ -84,6 +84,13 @@ tensorflow::Status ParseOutput(const string& output_opt, string* output_type,
required_options.insert(kFileRequiredOpts,
kFileRequiredOpts + sizeof(kFileRequiredOpts) /
sizeof(*kFileRequiredOpts));
+ } else if (*output_type == kOutput[3]) {
+ valid_options.insert(kPprofOpts,
+ kPprofOpts + sizeof(kPprofOpts) / sizeof(*kPprofOpts));
+ required_options.insert(
+ kPprofRequiredOpts,
+ kPprofRequiredOpts +
+ sizeof(kPprofRequiredOpts) / sizeof(*kPprofRequiredOpts));
}
for (const string& kv_str : kv_split) {
diff --git a/tensorflow/core/profiler/internal/tfprof_options.h b/tensorflow/core/profiler/internal/tfprof_options.h
index 6d0c213b3d..8e78ee7463 100644
--- a/tensorflow/core/profiler/internal/tfprof_options.h
+++ b/tensorflow/core/profiler/internal/tfprof_options.h
@@ -62,7 +62,8 @@ static const char* const kCmds[] = {
"scope", "graph", "code", "op", "advise", "set", "help",
};
-static const char* const kOutput[] = {"timeline", "stdout", "file", "none"};
+static const char* const kOutput[] = {"timeline", "stdout", "file", "pprof",
+ "none"};
static const char* const kTimelineOpts[] = {
"outfile",
@@ -78,6 +79,14 @@ static const char* const kFileRequiredOpts[] = {
"outfile",
};
+static const char* const kPprofOpts[] = {
+ "outfile",
+};
+
+static const char* const kPprofRequiredOpts[] = {
+ "outfile",
+};
+
struct Options {
public:
static tensorflow::Status FromProtoStr(const string& opts_proto_str,
diff --git a/tensorflow/core/profiler/internal/tfprof_scope.cc b/tensorflow/core/profiler/internal/tfprof_scope.cc
index 0f7e079098..d7293c5e52 100644
--- a/tensorflow/core/profiler/internal/tfprof_scope.cc
+++ b/tensorflow/core/profiler/internal/tfprof_scope.cc
@@ -78,8 +78,13 @@ void TFScope::Build() {
}
const ShowNode* TFScope::ShowInternal(const Options& opts, Timeline* timeline) {
- std::vector<ScopeNode*> roots = Account(root_->children, opts);
root_->ResetTotalStats();
+ if (opts.output_type == kOutput[3]) {
+ fprintf(stderr, "Only 'code' view supports pprof output now.\n");
+ return root_;
+ }
+
+ std::vector<ScopeNode*> roots = Account(root_->children, opts);
root_->show_children.clear();
for (ScopeNode* n : roots) {
root_->AggregateTotalStats(n);
diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc
index 2828217d3d..630eba4ff2 100644
--- a/tensorflow/core/profiler/internal/tfprof_show.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show.cc
@@ -26,25 +26,27 @@ namespace tensorflow {
namespace tfprof {
const GraphNodeProto& TFShow::Show(const Options& opts) {
- if (opts.output_type == kOutput[3]) {
- return ShowInternal(opts, nullptr)->proto();
- } else if (opts.output_type == kOutput[0]) {
+ if (opts.output_type == kOutput[0]) {
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
return ShowInternal(opts, &timeline)->proto();
- } else if (opts.output_type == kOutput[2]) {
- const ShowNode* root = ShowInternal(opts, nullptr);
- Status s =
- WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
- root->formatted_str);
- if (!s.ok()) {
- fprintf(stderr, "%s\n", s.ToString().c_str());
- }
- return root->proto();
} else {
- const ShowNode* root = ShowInternal(opts, nullptr);
- printf("%s", root->formatted_str.c_str());
- fflush(stdout);
- return root->proto();
+ const ShowNode* ret = ShowInternal(opts, nullptr);
+ if (opts.output_type == kOutput[1]) {
+ printf("%s", ret->formatted_str.c_str());
+ fflush(stdout);
+ } else if (opts.output_type == kOutput[2]) {
+ Status s = WriteStringToFile(Env::Default(),
+ opts.output_options.at(kFileOpts[0]),
+ ret->formatted_str);
+ if (!s.ok()) {
+ fprintf(stderr, "%s\n", s.ToString().c_str());
+ }
+ } else if (opts.output_type == kOutput[3] ||
+ opts.output_type == kOutput[4]) {
+ } else {
+ fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
+ }
+ return ret->proto();
}
}
diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.cc b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
index 199aef7685..34b3e9e3f0 100644
--- a/tensorflow/core/profiler/internal/tfprof_show_multi.cc
+++ b/tensorflow/core/profiler/internal/tfprof_show_multi.cc
@@ -28,25 +28,27 @@ namespace tensorflow {
namespace tfprof {
const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) {
- if (opts.output_type == kOutput[3]) {
- return ShowInternal(opts, nullptr)->proto();
- } else if (opts.output_type == kOutput[0]) {
+ if (opts.output_type == kOutput[0]) {
Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0]));
return ShowInternal(opts, &timeline)->proto();
- } else if (opts.output_type == kOutput[2]) {
- const ShowMultiNode* root = ShowInternal(opts, nullptr);
- Status s =
- WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]),
- root->formatted_str);
- if (!s.ok()) {
- fprintf(stderr, "%s\n", s.ToString().c_str());
- }
- return root->proto();
} else {
- const ShowMultiNode* root = ShowInternal(opts, nullptr);
- printf("%s", root->formatted_str.c_str());
- fflush(stdout);
- return root->proto();
+ const ShowMultiNode* ret = ShowInternal(opts, nullptr);
+ if (opts.output_type == kOutput[1]) {
+ printf("%s", ret->formatted_str.c_str());
+ fflush(stdout);
+ } else if (opts.output_type == kOutput[2]) {
+ Status s = WriteStringToFile(Env::Default(),
+ opts.output_options.at(kFileOpts[0]),
+ ret->formatted_str);
+ if (!s.ok()) {
+ fprintf(stderr, "%s\n", s.ToString().c_str());
+ }
+ } else if (opts.output_type == kOutput[3] ||
+ opts.output_type == kOutput[4]) {
+ } else {
+ fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str());
+ }
+ return ret->proto();
}
}
diff --git a/tensorflow/core/profiler/profile.proto b/tensorflow/core/profiler/profile.proto
new file mode 100644
index 0000000000..27aa904c4a
--- /dev/null
+++ b/tensorflow/core/profiler/profile.proto
@@ -0,0 +1,71 @@
+// This proto intends to match format expected by pprof tool.
+syntax = "proto3";
+
+package tensorflow.tfprof.pprof;
+
+message Profile {
+ repeated ValueType sample_type = 1;
+ repeated Sample sample = 2;
+ repeated Mapping mapping = 3;
+ repeated Location location = 4;
+ repeated Function function = 5;
+ repeated string string_table = 6;
+ int64 drop_frames = 7;
+ int64 keep_frames = 8;
+ int64 time_nanos = 9;
+ int64 duration_nanos = 10;
+ ValueType period_type = 11;
+ int64 period = 12;
+ repeated int64 comment = 13;
+ int64 default_sample_type = 14;
+}
+
+message ValueType {
+ int64 type = 1;
+ int64 unit = 2;
+}
+
+message Sample {
+ repeated uint64 location_id = 1;
+ repeated int64 value = 2;
+ repeated Label label = 3;
+}
+
+message Label {
+ int64 key = 1;
+ int64 str = 2;
+ int64 num = 3;
+}
+
+message Mapping {
+ uint64 id = 1;
+ uint64 memory_start = 2;
+ uint64 memory_limit = 3;
+ uint64 file_offset = 4;
+ int64 filename = 5;
+ int64 build_id = 6;
+ bool has_functions = 7;
+ bool has_filenames = 8;
+ bool has_line_numbers = 9;
+ bool has_inline_frames = 10;
+}
+
+message Location {
+ uint64 id = 1;
+ uint64 mapping_id = 2;
+ uint64 address = 3;
+ repeated Line line = 4;
+}
+
+message Line {
+ uint64 function_id = 1;
+ int64 line = 2;
+}
+
+message Function {
+ uint64 id = 1;
+ int64 name = 2;
+ int64 system_name = 3;
+ int64 filename = 4;
+ int64 start_line = 5;
+}
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index 048449b437..006ee2b7a5 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -10,6 +10,7 @@ message CodeDef {
int32 lineno = 2;
string function = 3;
string line = 4;
+ int32 func_start_line = 5;
}
}
diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py
index 43da587b2c..7c55f9eaf3 100644
--- a/tensorflow/python/profiler/model_analyzer_test.py
+++ b/tensorflow/python/profiler/model_analyzer_test.py
@@ -17,7 +17,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
+import gzip
+import io
import os
+
+from tensorflow.core.profiler import profile_pb2
from tensorflow.core.protobuf import config_pb2
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.client import session
@@ -200,7 +204,6 @@ class PrintModelAnalysisTest(test.TestCase):
def testTimeline(self):
ops.reset_default_graph()
- opts = builder.trainable_variables_parameter()
outfile = os.path.join(test.get_temp_dir(), 'timeline')
opts = (builder(builder.trainable_variables_parameter())
.with_max_depth(100000)
@@ -312,6 +315,61 @@ class PrintModelAnalysisTest(test.TestCase):
checker = advice_pb.checkers['ExpensiveOperationChecker']
self.assertGreater(len(checker.reports), 0)
+ def pprof_test_helper(self, attribute, should_fail=False):
+ ops.reset_default_graph()
+ outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz')
+ opts = (builder(builder.time_and_memory())
+ .select([attribute])
+ .with_max_depth(100000)
+ .with_node_names(trim_name_regexes=['ops.py.*'])
+ .with_pprof_output(outfile).build())
+
+ with session.Session() as sess:
+ x = lib.BuildFullModel()
+
+ sess.run(variables.global_variables_initializer())
+ run_meta = config_pb2.RunMetadata()
+ _ = sess.run(
+ x,
+ options=config_pb2.RunOptions(
+ trace_level=config_pb2.RunOptions.FULL_TRACE),
+ run_metadata=run_meta)
+
+ _ = model_analyzer.profile(
+ sess.graph, run_meta, cmd='code', options=opts)
+
+ if should_fail:
+ self.assertFalse(gfile.Exists(outfile))
+ return
+
+ profile_pb = profile_pb2.Profile()
+ with gfile.Open(outfile, 'rb') as f:
+ with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf:
+ profile_pb.ParseFromString(gzipf.read())
+
+ self.assertGreater(len(profile_pb.sample), 10)
+ self.assertGreater(len(profile_pb.location), 10)
+ self.assertGreater(len(profile_pb.function), 10)
+ self.assertGreater(len(profile_pb.string_table), 30)
+
+ has_rnn = False
+ has_loop = False
+ for s in profile_pb.string_table:
+ if s.find('rnn') > 0:
+ has_rnn = True
+ if s.find('while') > 0:
+ has_loop = True
+ self.assertFalse(s.startswith('ops.py'))
+ self.assertTrue(has_rnn)
+ self.assertTrue(has_loop)
+
+ def testPprof(self):
+ for attr in ['micros', 'bytes', 'accelerator_micros', 'cpu_micros',
+ 'params', 'float_ops']:
+ self.pprof_test_helper(attr)
+ for attr in ['op_types', 'device', 'input_shapes']:
+ self.pprof_test_helper(attr, True)
+
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/python/profiler/option_builder.py b/tensorflow/python/profiler/option_builder.py
index 87f82709be..1a91060da8 100644
--- a/tensorflow/python/profiler/option_builder.py
+++ b/tensorflow/python/profiler/option_builder.py
@@ -353,6 +353,20 @@ class ProfileOptionBuilder(object):
self._options['output'] = 'timeline:outfile=%s' % timeline_file
return self
+ def with_pprof_output(self, pprof_file):
+ """Generate a pprof profile gzip file.
+
+ To use the pprof file:
+ pprof -png --nodecount=20 --sample_index=1 <pprof_file>
+
+ Args:
+ pprof_file: filename for output, usually suffixed with .pb.gz.
+ Returns:
+ self.
+ """
+ self._options['output'] = 'pprof:outfile=%s' % pprof_file
+ return self
+
def order_by(self, attribute):
# pylint: disable=line-too-long
"""Order the displayed profiler nodes based on a attribute.
diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py
index e50c52884a..675eb98f8e 100644
--- a/tensorflow/python/profiler/tfprof_logger.py
+++ b/tensorflow/python/profiler/tfprof_logger.py
@@ -98,12 +98,13 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True,
add_entry = True
if add_trace:
- for tb in op.traceback:
+ for tb in op.traceback_with_start_lines:
trace = entry.code_def.traces.add()
trace.file = tb[0] if tb[0] else 'none'
trace.lineno = tb[1] if tb[1] else -1
trace.function = tb[2] if tb[2] else 'none'
trace.line = tb[3] if tb[3] else 'none'
+ trace.func_start_line = tb[4] if tb[4] else -1
add_entry = True
if add_entry:
diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt
index 347187a890..862b2aa7cc 100644
--- a/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt
+++ b/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt
@@ -75,6 +75,10 @@ tf_class {
argspec: "args=[\'self\', \'start_name_regexes\', \'show_name_regexes\', \'hide_name_regexes\', \'trim_name_regexes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], "
}
member_method {
+ name: "with_pprof_output"
+ argspec: "args=[\'self\', \'pprof_file\'], varargs=None, keywords=None, defaults=None"
+ }
+ member_method {
name: "with_stdout_output"
argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None"
}