diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-07-31 16:41:46 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-07-31 16:45:38 -0700 |
commit | edac90c7c75b93ba149823257caf69ac9391c10b (patch) | |
tree | 17b0b2da59ff4fecd10c8d7fc3c1fd60845bd9c3 | |
parent | 78a90370ef214015508103bb21cef2962f041c5c (diff) |
Add support to generate pprof results to tf.profiler
A fun thing is, it can not only profile time,memory
but also parameters, etc.
PiperOrigin-RevId: 163767517
20 files changed, 667 insertions, 147 deletions
diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index d55824e6f2..e748daba7a 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -181,6 +181,7 @@ seq2seq_attention_model.py:363:build_graph:self._add_train_o..., cpu: 1.28sec, a ### Visualize time and memory. ``` +# The following example generates a timeline. tfprof> graph -step 0 -max_depth 100000 -output timeline:outfile=<filename> generating trace file. @@ -191,9 +192,29 @@ Open a Chrome browser, enter URL chrome://tracing and load the timeline file. ****************************************************** ``` <left> -[CodeTimeline](g3doc/graph_timeline.png) +[Timeline](g3doc/graph_timeline.png) </left> +``` +# The following example generates a pprof graph (only supported by code view). +# Since TensorFlow runs the graph instead of Python code, the pprof graph +# doesn't profile the statistics of Python, but the TensorFlow graph +# nodes created by the Python call stack. +# Nevertheless, it pops critical Python code path for us. +# +# `-trim_name_regexes` trims the python call stack, which are always the same +# for the leaves. +# `-select accelerator_micros` pick accelerator time for pprof graph. User +# can also generate memory profile using `-select bytes` +tfprof> code -max_depth 100 -trim_name_regexes '^ops.py.*' -select accelerator_micros -output pprof:outfile=<filename> + +# Use pprof to visualize the generated file. +pprof -png --nodecount=20 --sample_index=1 <filename> +``` + +<left> +[PprofGraph](g3doc/pprof.jpg) +</left> ### Feature Request and Bug Report diff --git a/tensorflow/core/profiler/g3doc/pprof.jpg b/tensorflow/core/profiler/g3doc/pprof.jpg Binary files differnew file mode 100644 index 0000000000..bf28967d68 --- /dev/null +++ b/tensorflow/core/profiler/g3doc/pprof.jpg diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc index 1746076f79..8650541550 100644 --- a/tensorflow/core/profiler/internal/tfprof_code.cc +++ b/tensorflow/core/profiler/internal/tfprof_code.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/c/c_api.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/io/zlib_compression_options.h" +#include "tensorflow/core/lib/io/zlib_outputbuffer.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" @@ -46,66 +48,335 @@ string GetTraceString(const CodeDef::Trace& trace) { } return ntrace; } + +// StringTable maps each string to an id. +class StringTable { + public: + StringTable() { + // Pprof requires first entry in string_table to be ''. + string_id_[""] = 0; + all_strings_.push_back(""); + } + + // Returns the index of a string. If not found, inserts the string and + // return the inserted index. + uint64 GetIndex(const string& str) { + auto idx = string_id_.find(str); + if (idx != string_id_.end()) { + return idx->second; + } + all_strings_.push_back(str); + return string_id_.insert(std::pair<string, int64>(str, string_id_.size())) + .first->second; + } + + const std::vector<string>& strings() const { return all_strings_; } + + private: + std::map<string, uint64> string_id_; + std::vector<string> all_strings_; +}; + +// FunctionTable maps each function to an id. +class FunctionTable { + public: + explicit FunctionTable(StringTable* string_table) + : string_table_(string_table) {} + + // Returns the index of a function. If not found, adds a function proto + // and returns the function index. + uint64 GetIndex(const string& file_path, const string& func_name, + uint64 func_start_line) { + auto key = std::tuple<string, string, uint64>(file_path, func_name, + func_start_line); + auto idx = function_table_.find(key); + if (idx != function_table_.end()) { + return idx->second.id(); + } + pprof::Function* func_pb = &function_table_[key]; + // function index should start from 1. + func_pb->set_id(function_table_.size()); + func_pb->set_name(string_table_->GetIndex(func_name)); + func_pb->set_filename(string_table_->GetIndex(file_path)); + func_pb->set_start_line(func_start_line); + return func_pb->id(); + } + + const std::map<std::tuple<string, string, uint64>, pprof::Function>& + functions() const { + return function_table_; + } + + private: + StringTable* string_table_; + std::map<std::tuple<string, string, uint64>, pprof::Function> function_table_; +}; + +// LocationTable maps each function call to an id. +class LocationTable { + public: + explicit LocationTable(FunctionTable* function_table) + : function_table_(function_table) {} + + // Returns the index of a function call localtion. If not found, adds a + // location proto and returns the location index. + uint64 GetIndex(const string& file_path, uint64 line_number, + const string& called_function_name, + const string& called_file_path, + uint64 called_func_start_line) { + auto key = std::tuple<string, string, uint64>( + file_path, called_function_name, line_number); + auto idx = location_table_.find(key); + if (idx != location_table_.end()) { + return idx->second.id(); + } + pprof::Location* location_pb = &location_table_[key]; + location_pb->set_id(location_table_.size()); + pprof::Line* line_pb = location_pb->add_line(); + line_pb->set_function_id(function_table_->GetIndex( + called_file_path, called_function_name, called_func_start_line)); + line_pb->set_line(line_number); + return location_pb->id(); + } + + const std::map<std::tuple<string, string, uint64>, pprof::Location>& + locations() const { + return location_table_; + } + + private: + FunctionTable* function_table_; + std::map<std::tuple<string, string, uint64>, pprof::Location> location_table_; +}; + +// Samples stores samples of all calls. A sample is a single call trace, +// that is, the call path from top caller to the leaf callee. +class Samples { + public: + explicit Samples(StringTable* string_table, const Options* opts) + : string_table_(string_table), opts_(opts) {} + + // 'node' is the leaf of the displayed trace. It includes all graph nodes + // created by it. 'location_ids' contains + // the call stack, from callee to caller. + // This method adds the statistics of graph nodes created by the python + // call. + void Add(const CodeNode* node, const std::vector<uint64>& location_ids) { + // displayed leaf might not be true leaf. Retrive the true leaves for + // stats. + std::vector<const CodeNode*> all_leaf = FetchAllLeaf(node); + CHECK(!all_leaf.empty()) << node->name(); + + for (const CodeNode* cn : all_leaf) { + for (auto gn_it : cn->node->graph_nodes()) { + const TFGraphNode* gn = gn_it.second; + pprof::Sample* sample_pb = &sample_table_[gn->name()]; + for (uint64 id : location_ids) { + sample_pb->mutable_location_id()->Add(id); + } + pprof::Label* label_pb = sample_pb->mutable_label()->Add(); + label_pb->set_key(string_table_->GetIndex("node_name")); + label_pb->set_str(string_table_->GetIndex(gn->name())); + + sample_pb->mutable_value()->Add(1); + string type = *opts_->select.begin(); + if (type == kShown[1]) { + sample_pb->mutable_value()->Add(gn->exec_micros(node->node->step())); + } else if (type == kShown[9]) { + sample_pb->mutable_value()->Add( + gn->accelerator_exec_micros(node->node->step())); + } else if (type == kShown[10]) { + sample_pb->mutable_value()->Add( + gn->cpu_exec_micros(node->node->step())); + } else if (type == kShown[0]) { + sample_pb->mutable_value()->Add( + gn->requested_bytes(node->node->step())); + } else if (type == kShown[2]) { + sample_pb->mutable_value()->Add(gn->parameters()); + } else if (type == kShown[3]) { + sample_pb->mutable_value()->Add(gn->float_ops(node->node->step())); + } else { + fprintf(stderr, "pprof doesn't support -select=%s\n", type.c_str()); + } + } + } + } + + const std::map<string, pprof::Sample>& samples() const { + return sample_table_; + } + + private: + std::vector<const CodeNode*> FetchAllLeaf(const CodeNode* root) { + if (root->children.empty()) { + return {root}; + } + std::vector<const CodeNode*> ret; + for (auto& n : root->children) { + std::vector<const CodeNode*> nodes = FetchAllLeaf(n); + ret.insert(ret.end(), nodes.begin(), nodes.end()); + } + return ret; + } + + StringTable* string_table_; + const Options* opts_; + std::map<string, pprof::Sample> sample_table_; +}; + +class PprofProfileImpl : public PprofProfile { + public: + explicit PprofProfileImpl(const Options* opts) + : opts_(opts), + func_table_(new FunctionTable(&string_table_)), + loc_table_(new LocationTable(func_table_.get())), + samples_(new Samples(&string_table_, opts)) {} + + uint64 AddLocation(const CodeNode* callee, const CodeNode* caller) override { + const string& file_path = caller->trace->file(); + uint64 lineno = caller->trace->lineno(); + const string& callee_file_path = callee->trace->file(); + const string& callee_function = callee->trace->function(); + uint64 callee_func_start_line = callee->trace->func_start_line(); + + return loc_table_->GetIndex(file_path, lineno, callee_function, + callee_file_path, callee_func_start_line); + } + + void AddSample(const CodeNode* leaf, std::vector<uint64>* call_ids) override { + std::vector<uint64> reversed_call_ids; + std::reverse_copy(call_ids->begin(), call_ids->end(), + std::back_inserter(reversed_call_ids)); + samples_->Add(leaf, reversed_call_ids); + } + + Status WritePprofProfile(const string& filename) override { + pprof::Profile profile_pb; + Build(&profile_pb); + + std::unique_ptr<WritableFile> file; + Status s = Env::Default()->NewWritableFile(filename, &file); + if (!s.ok()) return s; + + int32 buf_size = 1024 * 1024; + io::ZlibOutputBuffer* zlib_output_buffer = new io::ZlibOutputBuffer( + file.get(), buf_size, buf_size, io::ZlibCompressionOptions::GZIP()); + s = zlib_output_buffer->Init(); + if (!s.ok()) return s; + s = zlib_output_buffer->Append(profile_pb.SerializeAsString()); + if (!s.ok()) return s; + s = zlib_output_buffer->Close(); + if (!s.ok()) return s; + fprintf(stdout, "\nRun pprof -png --nodecount=20 --sample_index=1 <%s>\n", + filename.c_str()); + return s; + } + + private: + void Build(pprof::Profile* profile_pb) { + string sample_type_description = "count"; + auto sample_type = profile_pb->mutable_sample_type()->Add(); + sample_type->set_type(string_table_.GetIndex(sample_type_description)); + sample_type->set_unit(string_table_.GetIndex("count")); + + string type = *opts_->select.begin(); + sample_type_description = type; + sample_type = profile_pb->mutable_sample_type()->Add(); + sample_type->set_type(string_table_.GetIndex(sample_type_description)); + if (type == kShown[1] || type == kShown[9] || type == kShown[10]) { + sample_type->set_unit(string_table_.GetIndex("microseconds")); + if (type == kShown[1]) { + profile_pb->mutable_comment()->Add(string_table_.GetIndex( + "Sum of accelerator execution time and cpu execution time.")); + } else if (type == kShown[9]) { + profile_pb->mutable_comment()->Add( + string_table_.GetIndex("Accelerator execution time.")); + } else if (type == kShown[10]) { + profile_pb->mutable_comment()->Add( + string_table_.GetIndex("CPU execution time.")); + } + } else if (type == kShown[0]) { + sample_type->set_unit(string_table_.GetIndex("bytes")); + profile_pb->mutable_comment()->Add( + string_table_.GetIndex("Sum of operation output memory.")); + } else if (type == kShown[2]) { + sample_type->set_unit(string_table_.GetIndex("count")); + profile_pb->mutable_comment()->Add( + string_table_.GetIndex("Model parameters.")); + } else if (type == kShown[3]) { + sample_type->set_unit(string_table_.GetIndex("count")); + profile_pb->mutable_comment()->Add(string_table_.GetIndex( + "Model float operations (Only available if defined).")); + } else { + fprintf(stderr, "pprof doesn't support selecting: %s\n", type.c_str()); + } + + for (const string& str : string_table_.strings()) { + *profile_pb->mutable_string_table()->Add() = str; + } + for (const auto& sample_it : samples_->samples()) { + // TODO(xpan): Consider swap. + profile_pb->mutable_sample()->Add()->MergeFrom(sample_it.second); + } + for (const auto& function_it : func_table_->functions()) { + profile_pb->mutable_function()->Add()->MergeFrom(function_it.second); + } + for (const auto& location_it : loc_table_->locations()) { + profile_pb->mutable_location()->Add()->MergeFrom(location_it.second); + } + } + + const Options* opts_; + StringTable string_table_; + std::unique_ptr<FunctionTable> func_table_; + std::unique_ptr<LocationTable> loc_table_; + std::unique_ptr<Samples> samples_; +}; } // namespace void TFCode::AddNode(TFGraphNode* node) { if (node->code().traces_size() == 0) { return; } - TFMultiGraphNode* pre_trace_node = nullptr; + if (!root_) { + graph_root_.reset(new TFMultiGraphNode(kTFProfRoot)); + root_.reset(new CodeNode(graph_root_.get(), nullptr)); + } + + CodeNode* pre_code_node = root_.get(); // TODO(xpan): Consider to release CodeDef after TFCode is built. It // takes a lot of memory. for (int i = 0; i < node->code().traces_size(); ++i) { // Unlike op name, which is globally unique, trace name is only unique // w.r.t. it's parent. const string& trace = GetTraceString(node->code().traces(i)); - if (i == 0) { - if (!trace_root_) { - trace_root_.reset(new TFMultiGraphNode(trace)); - } - CHECK(trace_root_->name() == trace) << "Different trace root"; - pre_trace_node = trace_root_.get(); - continue; - } - pre_trace_node->AddChildren(trace); - TFMultiGraphNode* trace_node = pre_trace_node->children().at(trace).get(); - + pre_code_node = pre_code_node->AddChildren(trace, &node->code().traces(i)); if (i == node->code().traces_size() - 1) { - trace_node->AddGraphNode(node); + pre_code_node->node->AddGraphNode(node); } - pre_trace_node = trace_node; } } void TFCode::Build() { - if (root_) { - return; - } - tfprof_trace_root_.reset(new TFMultiGraphNode(kTFProfRoot)); - root_.reset(new CodeNode(tfprof_trace_root_.get())); - - if (trace_root_) { - code_root_ = BuildCodeNodes(trace_root_.get()); - root_->children.push_back(code_root_); - } -} - -CodeNode* TFCode::BuildCodeNodes(TFMultiGraphNode* root) { - auto code_root = std::unique_ptr<CodeNode>(new CodeNode(root)); - CodeNode* code_root_ptr = code_root.get(); - code_nodes_.insert(std::move(code_root)); - - for (auto it = root->children().cbegin(); it != root->children().cend(); - ++it) { - code_root_ptr->children.push_back(BuildCodeNodes(it->second.get())); - } - return code_root_ptr; } const ShowMultiNode* TFCode::ShowInternal(const Options& opts, Timeline* timeline) { - std::vector<CodeNode*> roots = Account(root_->children, opts); root_->ResetTotalStats(); + if (opts.output_type == kOutput[3]) { + if (opts.select.size() != 1) { + fprintf(stderr, "Can only select 1 attribute for pprof output.\n"); + return root_.get(); + } + string select = *opts.select.begin(); + if (select != kShown[0] && select != kShown[1] && select != kShown[2] && + select != kShown[3] && select != kShown[9] && select != kShown[10]) { + fprintf(stderr, "pprof doesn't support -select=%s\n", select.c_str()); + return root_.get(); + } + } + + std::vector<CodeNode*> roots = Account(root_->children, opts); root_->show_children.clear(); for (CodeNode* n : roots) { root_->AggregateTotalStats(n); @@ -121,21 +392,46 @@ const ShowMultiNode* TFCode::ShowInternal(const Options& opts, CodeNode* root = PrintScope({root_.get()}, opts, 1, 0)[0]; root->formatted_str = FormatLegend(opts) + root->formatted_str; - Format(root->show_children, &root->formatted_str, root->mutable_proto()); - if (timeline) { - timeline->GenerateCodeTimeline(root); + if (opts.output_type == kOutput[3]) { + std::vector<uint64> call_ids; + pprof_profile_.reset(new PprofProfileImpl(&opts)); + Format(root, root->show_children, opts, &root->formatted_str, + root->mutable_proto(), &call_ids); + Status s = pprof_profile_->WritePprofProfile( + opts.output_options.at(kPprofOpts[0])); + if (!s.ok()) { + fprintf(stderr, "%s\n", s.ToString().c_str()); + } + } else { + Format(root, root->show_children, opts, &root->formatted_str, + root->mutable_proto(), nullptr); + if (timeline) { + timeline->GenerateCodeTimeline(root); + } } return root; } -void TFCode::Format(const std::vector<CodeNode*> roots, string* display_str, - MultiGraphNodeProto* proto) { - for (CodeNode* node : roots) { +void TFCode::Format(const CodeNode* root, const std::vector<CodeNode*>& nodes, + const Options& opts, string* display_str, + MultiGraphNodeProto* proto, std::vector<uint64>* call_ids) { + if (nodes.empty() && root->trace && opts.output_type == kOutput[3]) { + pprof_profile_->AddSample(root, call_ids); + } + + for (CodeNode* node : nodes) { + if (root->trace && opts.output_type == kOutput[3]) { + uint64 loc_id = pprof_profile_->AddLocation(node, root); + call_ids->push_back(loc_id); + } display_str->append(node->formatted_str); MultiGraphNodeProto* child = proto->add_children(); child->MergeFrom(node->proto()); - Format(node->show_children, display_str, child); + Format(node, node->show_children, opts, display_str, child, call_ids); + if (root->trace && opts.output_type == kOutput[3]) { + call_ids->pop_back(); + } } } @@ -170,14 +466,15 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots, std::vector<CodeNode*> show_nodes; for (CodeNode* node : roots) { + if (ShouldTrim(node, opts.trim_name_regexes) || depth > opts.max_depth) { + continue; + } int ident = last_ident; bool show = ShouldShow(node, opts, depth); if (show) ident += 2; - std::vector<CodeNode*> show_cnodes; - if (!ShouldTrim(node, opts.trim_name_regexes) && depth <= opts.max_depth) { - show_cnodes = PrintScope(node->show_children, opts, depth + 1, ident); - } + std::vector<CodeNode*> show_cnodes = + PrintScope(node->show_children, opts, depth + 1, ident); if (show) { node->show_children.clear(); if (opts.account_displayed_op_only) { diff --git a/tensorflow/core/profiler/internal/tfprof_code.h b/tensorflow/core/profiler/internal/tfprof_code.h index 7338297221..7583a43a26 100644 --- a/tensorflow/core/profiler/internal/tfprof_code.h +++ b/tensorflow/core/profiler/internal/tfprof_code.h @@ -32,15 +32,29 @@ limitations under the License. #include "tensorflow/core/profiler/internal/tfprof_show_multi.h" #include "tensorflow/core/profiler/internal/tfprof_timeline.h" #include "tensorflow/core/profiler/internal/tfprof_utils.h" +#include "tensorflow/core/profiler/profile.pb.h" #include "tensorflow/core/profiler/tfprof_log.pb.h" #include "tensorflow/core/profiler/tfprof_output.pb.h" namespace tensorflow { namespace tfprof { +class PprofProfile { + public: + virtual ~PprofProfile() {} + + virtual uint64 AddLocation(const CodeNode* callee, + const CodeNode* caller) = 0; + + virtual void AddSample(const CodeNode* leaf, + std::vector<uint64>* call_ids) = 0; + + virtual Status WritePprofProfile(const string& filename) = 0; +}; + class TFCode : public TFMultiShow { public: - explicit TFCode() : code_root_(nullptr), trace_root_(nullptr) {} + TFCode() {} ~TFCode() override {} void AddNode(TFGraphNode* node) override; @@ -48,8 +62,6 @@ class TFCode : public TFMultiShow { void Build() override; private: - CodeNode* BuildCodeNodes(TFMultiGraphNode* root); - const ShowMultiNode* ShowInternal(const Options& opts, Timeline* timeline) override; @@ -63,16 +75,15 @@ class TFCode : public TFMultiShow { std::vector<CodeNode*> Account(const std::vector<CodeNode*>& roots, const Options& opts); - void Format(const std::vector<CodeNode*> roots, string* display_str, - MultiGraphNodeProto* proto); + void Format(const CodeNode* root, const std::vector<CodeNode*>& nodes, + const Options& opts, string* display_str, + MultiGraphNodeProto* proto, std::vector<uint64>* call_ids); string FormatNode(CodeNode* node, const Options& opts, int64 indent); std::unique_ptr<CodeNode> root_; - CodeNode* code_root_; - std::unique_ptr<TFMultiGraphNode> trace_root_; - std::unique_ptr<TFMultiGraphNode> tfprof_trace_root_; - std::set<std::unique_ptr<CodeNode>> code_nodes_; + std::unique_ptr<TFMultiGraphNode> graph_root_; + std::unique_ptr<PprofProfile> pprof_profile_; }; } // namespace tfprof } // namespace tensorflow diff --git a/tensorflow/core/profiler/internal/tfprof_graph.cc b/tensorflow/core/profiler/internal/tfprof_graph.cc index 8d7c44b219..e58924a614 100644 --- a/tensorflow/core/profiler/internal/tfprof_graph.cc +++ b/tensorflow/core/profiler/internal/tfprof_graph.cc @@ -72,6 +72,11 @@ void TFGraph::Build() { const ShowNode* TFGraph::ShowInternal(const Options& opts, Timeline* timeline) { root_->ResetTotalStats(); root_->show_children.clear(); + + if (opts.output_type == kOutput[3]) { + fprintf(stderr, "Only 'code' view supports pprof output now.\n"); + return root_; + } if (timeline && timeline->step() < 0) { // TODO(xpan): Maybe pick a default step for users. fprintf(stderr, diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h index 9142ad51c5..929ee3f50c 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.h +++ b/tensorflow/core/profiler/internal/tfprof_node.h @@ -340,6 +340,27 @@ class TFGraphNode { return exec->second.allocator_bytes_in_use(); } + int64 parameters() const { + if (!shape().empty()) { + int64 params = 1; + bool complete_shape = true; + for (int64 d : shape()) { + // Sometimes parameters could be <0 when a dim is unknown. + if (d < 0) { + complete_shape = false; + break; + } + params *= d; + } + if (complete_shape) { + return params; + } else { + fprintf(stderr, "Incomplete shape.\n"); + } + } + return 0; + } + int64 float_ops(int64 step) const { // If not run, return static analysis. if (execs_.empty()) { @@ -400,12 +421,14 @@ class TFMultiGraphNode { public: TFMultiGraphNode(const string& name) : name_(name), + step_(-1), run_count_(0), exec_micros_(0), accelerator_exec_micros_(0), cpu_exec_micros_(0), requested_bytes_(0), - float_ops_(0) {} + float_ops_(0), + parameters_(0) {} bool SnapshotNodes(int64 step, const std::vector<string>& type_regexes) { run_count_ = 0; @@ -415,11 +438,13 @@ class TFMultiGraphNode { requested_bytes_ = 0; float_ops_ = 0; + parameters_ = 0; op_types_.clear(); shapes_.clear(); devices_.clear(); snapshot_nodes_.clear(); + step_ = step; std::vector<const TFGraphNode*> nodes = pick_nodes(type_regexes); if (nodes.empty()) { @@ -436,6 +461,7 @@ class TFMultiGraphNode { requested_bytes_ += node->requested_bytes(step); float_ops_ += node->float_ops(step); + parameters_ += node->parameters(); if (node->shape().size() > 0) { shapes_.push_back(node->shape()); } @@ -445,6 +471,8 @@ class TFMultiGraphNode { return true; } + int64 step() const { return step_; } + void AddGraphNode(const TFGraphNode* node) { if (nodes_.find(node->name()) != nodes_.end()) { return; @@ -456,16 +484,6 @@ class TFMultiGraphNode { return snapshot_nodes_; } - void AddChildren(const string& name) { - if (children_.find(name) != children_.end()) { - return; - } - children_[name].reset(new TFMultiGraphNode(name)); - } - const std::map<string, std::unique_ptr<TFMultiGraphNode>>& children() const { - return children_; - } - const string& name() const { return name_; } int64 run_count() const { return run_count_; } @@ -477,6 +495,8 @@ class TFMultiGraphNode { int64 float_ops() const { return float_ops_; } + int64 parameters() const { return parameters_; } + const std::set<string>& devices() const { return devices_; } const std::set<string>& op_types() const { return op_types_; } @@ -511,6 +531,7 @@ class TFMultiGraphNode { } const string name_; + int64 step_; // Snapshot based on type_regexes std::set<string> op_types_; int64 run_count_; @@ -520,13 +541,13 @@ class TFMultiGraphNode { int64 requested_bytes_; int64 float_ops_; + int64 parameters_; std::set<string> devices_; std::vector<std::vector<int64>> shapes_; std::map<string, const TFGraphNode*> snapshot_nodes_; // Overall data held by the TFMultiGraphNode. std::map<string, const TFGraphNode*> nodes_; - std::map<string, std::unique_ptr<TFMultiGraphNode>> children_; }; bool IsPlacedOnAccelerator(const string& device); diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.cc b/tensorflow/core/profiler/internal/tfprof_node_show.cc index 5ca57412e5..16b94fdfa1 100644 --- a/tensorflow/core/profiler/internal/tfprof_node_show.cc +++ b/tensorflow/core/profiler/internal/tfprof_node_show.cc @@ -45,25 +45,7 @@ void ShowNode::ReInit(int64 step) { (*mutable_proto()->mutable_input_shapes())[inp.first].MergeFrom( VecToShapeProto(inp.second)); } - - proto_.clear_parameters(); - if (!node->shape().empty()) { - int64 params = 1; - bool complete_shape = true; - for (int64 d : node->shape()) { - // Sometimes parameters could be <0 when a dim is unknown. - if (d < 0) { - complete_shape = false; - break; - } - params *= d; - } - if (complete_shape) { - mutable_proto()->set_parameters(proto_.parameters() + params); - } else { - fprintf(stderr, "Incomplete shape."); - } - } + proto_.set_parameters(node->parameters()); } GraphNodeProto* ShowNode::mutable_proto() { return &proto_; } @@ -114,6 +96,8 @@ void ShowNode::AddSelfToTotalStats() { } void ShowNode::ResetTotalStats() { + formatted_str.clear(); + mutable_proto()->set_total_definition_count(0); mutable_proto()->set_total_run_count(0); mutable_proto()->set_total_exec_micros(0); @@ -153,26 +137,7 @@ bool ShowMultiNode::ReInit(int64 step, mutable_proto()->set_requested_bytes(node->requested_bytes()); mutable_proto()->set_float_ops(node->float_ops()); - mutable_proto()->clear_parameters(); - if (!node->shapes().empty()) { - for (const std::vector<int64>& shape : node->shapes()) { - int64 params = 1; - bool complete_shape = true; - for (int64 d : shape) { - // Sometimes parameters could be <0 when a dim is unknown. - if (d < 0) { - complete_shape = false; - break; - } - params *= d; - } - if (complete_shape) { - mutable_proto()->set_parameters(proto().parameters() + params); - } else { - fprintf(stderr, "Incomplete shape."); - } - } - } + mutable_proto()->set_parameters(node->parameters()); return has_matched_type; } @@ -216,6 +181,7 @@ void ShowMultiNode::AddSelfToTotalStats() { } void ShowMultiNode::ResetTotalStats() { + formatted_str.clear(); mutable_proto()->set_total_exec_micros(0); mutable_proto()->set_total_accelerator_exec_micros(0); mutable_proto()->set_total_cpu_exec_micros(0); diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.h b/tensorflow/core/profiler/internal/tfprof_node_show.h index fea19bdca3..cdcb3f499b 100644 --- a/tensorflow/core/profiler/internal/tfprof_node_show.h +++ b/tensorflow/core/profiler/internal/tfprof_node_show.h @@ -111,11 +111,31 @@ class ShowMultiNode { class CodeNode : public ShowMultiNode { public: - explicit CodeNode(TFMultiGraphNode* node) : ShowMultiNode(node) {} + explicit CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace) + : ShowMultiNode(node), trace(trace) {} ~CodeNode() override {} + CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace) { + auto it = children_.find(name); + if (it != children_.end()) { + return it->second.get(); + } + + graph_children_.push_back( + std::unique_ptr<TFMultiGraphNode>(new TFMultiGraphNode(name))); + auto child = &children_[name]; + child->reset(new CodeNode(graph_children_.back().get(), trace)); + children.push_back(child->get()); + return child->get(); + } + + const CodeDef::Trace* trace; std::vector<CodeNode*> children; std::vector<CodeNode*> show_children; + + private: + std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_; + std::map<string, std::unique_ptr<CodeNode>> children_; }; class OpNode : public ShowMultiNode { diff --git a/tensorflow/core/profiler/internal/tfprof_op.cc b/tensorflow/core/profiler/internal/tfprof_op.cc index 46a81da80f..ab013506ec 100644 --- a/tensorflow/core/profiler/internal/tfprof_op.cc +++ b/tensorflow/core/profiler/internal/tfprof_op.cc @@ -103,8 +103,13 @@ void TFOp::Build() { } const ShowMultiNode* TFOp::ShowInternal(const Options& opts, - Timeline* timeline) { + Timeline* timeline) { root_->ResetTotalStats(); + if (opts.output_type == kOutput[3]) { + fprintf(stderr, "Only 'code' view supports pprof output now.\n"); + return root_.get(); + } + if (opts.output_type == kOutput[1] || opts.output_type == kOutput[2]) { root_->formatted_str = FormatNode(root_.get(), root_.get(), opts); } diff --git a/tensorflow/core/profiler/internal/tfprof_options.cc b/tensorflow/core/profiler/internal/tfprof_options.cc index 53e401dd60..2b5e340cec 100644 --- a/tensorflow/core/profiler/internal/tfprof_options.cc +++ b/tensorflow/core/profiler/internal/tfprof_options.cc @@ -84,6 +84,13 @@ tensorflow::Status ParseOutput(const string& output_opt, string* output_type, required_options.insert(kFileRequiredOpts, kFileRequiredOpts + sizeof(kFileRequiredOpts) / sizeof(*kFileRequiredOpts)); + } else if (*output_type == kOutput[3]) { + valid_options.insert(kPprofOpts, + kPprofOpts + sizeof(kPprofOpts) / sizeof(*kPprofOpts)); + required_options.insert( + kPprofRequiredOpts, + kPprofRequiredOpts + + sizeof(kPprofRequiredOpts) / sizeof(*kPprofRequiredOpts)); } for (const string& kv_str : kv_split) { diff --git a/tensorflow/core/profiler/internal/tfprof_options.h b/tensorflow/core/profiler/internal/tfprof_options.h index 6d0c213b3d..8e78ee7463 100644 --- a/tensorflow/core/profiler/internal/tfprof_options.h +++ b/tensorflow/core/profiler/internal/tfprof_options.h @@ -62,7 +62,8 @@ static const char* const kCmds[] = { "scope", "graph", "code", "op", "advise", "set", "help", }; -static const char* const kOutput[] = {"timeline", "stdout", "file", "none"}; +static const char* const kOutput[] = {"timeline", "stdout", "file", "pprof", + "none"}; static const char* const kTimelineOpts[] = { "outfile", @@ -78,6 +79,14 @@ static const char* const kFileRequiredOpts[] = { "outfile", }; +static const char* const kPprofOpts[] = { + "outfile", +}; + +static const char* const kPprofRequiredOpts[] = { + "outfile", +}; + struct Options { public: static tensorflow::Status FromProtoStr(const string& opts_proto_str, diff --git a/tensorflow/core/profiler/internal/tfprof_scope.cc b/tensorflow/core/profiler/internal/tfprof_scope.cc index 0f7e079098..d7293c5e52 100644 --- a/tensorflow/core/profiler/internal/tfprof_scope.cc +++ b/tensorflow/core/profiler/internal/tfprof_scope.cc @@ -78,8 +78,13 @@ void TFScope::Build() { } const ShowNode* TFScope::ShowInternal(const Options& opts, Timeline* timeline) { - std::vector<ScopeNode*> roots = Account(root_->children, opts); root_->ResetTotalStats(); + if (opts.output_type == kOutput[3]) { + fprintf(stderr, "Only 'code' view supports pprof output now.\n"); + return root_; + } + + std::vector<ScopeNode*> roots = Account(root_->children, opts); root_->show_children.clear(); for (ScopeNode* n : roots) { root_->AggregateTotalStats(n); diff --git a/tensorflow/core/profiler/internal/tfprof_show.cc b/tensorflow/core/profiler/internal/tfprof_show.cc index 2828217d3d..630eba4ff2 100644 --- a/tensorflow/core/profiler/internal/tfprof_show.cc +++ b/tensorflow/core/profiler/internal/tfprof_show.cc @@ -26,25 +26,27 @@ namespace tensorflow { namespace tfprof { const GraphNodeProto& TFShow::Show(const Options& opts) { - if (opts.output_type == kOutput[3]) { - return ShowInternal(opts, nullptr)->proto(); - } else if (opts.output_type == kOutput[0]) { + if (opts.output_type == kOutput[0]) { Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0])); return ShowInternal(opts, &timeline)->proto(); - } else if (opts.output_type == kOutput[2]) { - const ShowNode* root = ShowInternal(opts, nullptr); - Status s = - WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]), - root->formatted_str); - if (!s.ok()) { - fprintf(stderr, "%s\n", s.ToString().c_str()); - } - return root->proto(); } else { - const ShowNode* root = ShowInternal(opts, nullptr); - printf("%s", root->formatted_str.c_str()); - fflush(stdout); - return root->proto(); + const ShowNode* ret = ShowInternal(opts, nullptr); + if (opts.output_type == kOutput[1]) { + printf("%s", ret->formatted_str.c_str()); + fflush(stdout); + } else if (opts.output_type == kOutput[2]) { + Status s = WriteStringToFile(Env::Default(), + opts.output_options.at(kFileOpts[0]), + ret->formatted_str); + if (!s.ok()) { + fprintf(stderr, "%s\n", s.ToString().c_str()); + } + } else if (opts.output_type == kOutput[3] || + opts.output_type == kOutput[4]) { + } else { + fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str()); + } + return ret->proto(); } } diff --git a/tensorflow/core/profiler/internal/tfprof_show_multi.cc b/tensorflow/core/profiler/internal/tfprof_show_multi.cc index 199aef7685..34b3e9e3f0 100644 --- a/tensorflow/core/profiler/internal/tfprof_show_multi.cc +++ b/tensorflow/core/profiler/internal/tfprof_show_multi.cc @@ -28,25 +28,27 @@ namespace tensorflow { namespace tfprof { const MultiGraphNodeProto& TFMultiShow::Show(const Options& opts) { - if (opts.output_type == kOutput[3]) { - return ShowInternal(opts, nullptr)->proto(); - } else if (opts.output_type == kOutput[0]) { + if (opts.output_type == kOutput[0]) { Timeline timeline(opts.step, opts.output_options.at(kTimelineOpts[0])); return ShowInternal(opts, &timeline)->proto(); - } else if (opts.output_type == kOutput[2]) { - const ShowMultiNode* root = ShowInternal(opts, nullptr); - Status s = - WriteStringToFile(Env::Default(), opts.output_options.at(kFileOpts[0]), - root->formatted_str); - if (!s.ok()) { - fprintf(stderr, "%s\n", s.ToString().c_str()); - } - return root->proto(); } else { - const ShowMultiNode* root = ShowInternal(opts, nullptr); - printf("%s", root->formatted_str.c_str()); - fflush(stdout); - return root->proto(); + const ShowMultiNode* ret = ShowInternal(opts, nullptr); + if (opts.output_type == kOutput[1]) { + printf("%s", ret->formatted_str.c_str()); + fflush(stdout); + } else if (opts.output_type == kOutput[2]) { + Status s = WriteStringToFile(Env::Default(), + opts.output_options.at(kFileOpts[0]), + ret->formatted_str); + if (!s.ok()) { + fprintf(stderr, "%s\n", s.ToString().c_str()); + } + } else if (opts.output_type == kOutput[3] || + opts.output_type == kOutput[4]) { + } else { + fprintf(stderr, "Unknown output type: %s\n", opts.output_type.c_str()); + } + return ret->proto(); } } diff --git a/tensorflow/core/profiler/profile.proto b/tensorflow/core/profiler/profile.proto new file mode 100644 index 0000000000..27aa904c4a --- /dev/null +++ b/tensorflow/core/profiler/profile.proto @@ -0,0 +1,71 @@ +// This proto intends to match format expected by pprof tool. +syntax = "proto3"; + +package tensorflow.tfprof.pprof; + +message Profile { + repeated ValueType sample_type = 1; + repeated Sample sample = 2; + repeated Mapping mapping = 3; + repeated Location location = 4; + repeated Function function = 5; + repeated string string_table = 6; + int64 drop_frames = 7; + int64 keep_frames = 8; + int64 time_nanos = 9; + int64 duration_nanos = 10; + ValueType period_type = 11; + int64 period = 12; + repeated int64 comment = 13; + int64 default_sample_type = 14; +} + +message ValueType { + int64 type = 1; + int64 unit = 2; +} + +message Sample { + repeated uint64 location_id = 1; + repeated int64 value = 2; + repeated Label label = 3; +} + +message Label { + int64 key = 1; + int64 str = 2; + int64 num = 3; +} + +message Mapping { + uint64 id = 1; + uint64 memory_start = 2; + uint64 memory_limit = 3; + uint64 file_offset = 4; + int64 filename = 5; + int64 build_id = 6; + bool has_functions = 7; + bool has_filenames = 8; + bool has_line_numbers = 9; + bool has_inline_frames = 10; +} + +message Location { + uint64 id = 1; + uint64 mapping_id = 2; + uint64 address = 3; + repeated Line line = 4; +} + +message Line { + uint64 function_id = 1; + int64 line = 2; +} + +message Function { + uint64 id = 1; + int64 name = 2; + int64 system_name = 3; + int64 filename = 4; + int64 start_line = 5; +} diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto index 048449b437..006ee2b7a5 100644 --- a/tensorflow/core/profiler/tfprof_log.proto +++ b/tensorflow/core/profiler/tfprof_log.proto @@ -10,6 +10,7 @@ message CodeDef { int32 lineno = 2; string function = 3; string line = 4; + int32 func_start_line = 5; } } diff --git a/tensorflow/python/profiler/model_analyzer_test.py b/tensorflow/python/profiler/model_analyzer_test.py index 43da587b2c..7c55f9eaf3 100644 --- a/tensorflow/python/profiler/model_analyzer_test.py +++ b/tensorflow/python/profiler/model_analyzer_test.py @@ -17,7 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import gzip +import io import os + +from tensorflow.core.profiler import profile_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.client import session @@ -200,7 +204,6 @@ class PrintModelAnalysisTest(test.TestCase): def testTimeline(self): ops.reset_default_graph() - opts = builder.trainable_variables_parameter() outfile = os.path.join(test.get_temp_dir(), 'timeline') opts = (builder(builder.trainable_variables_parameter()) .with_max_depth(100000) @@ -312,6 +315,61 @@ class PrintModelAnalysisTest(test.TestCase): checker = advice_pb.checkers['ExpensiveOperationChecker'] self.assertGreater(len(checker.reports), 0) + def pprof_test_helper(self, attribute, should_fail=False): + ops.reset_default_graph() + outfile = os.path.join(test.get_temp_dir(), attribute + '_pprof.pb.gz') + opts = (builder(builder.time_and_memory()) + .select([attribute]) + .with_max_depth(100000) + .with_node_names(trim_name_regexes=['ops.py.*']) + .with_pprof_output(outfile).build()) + + with session.Session() as sess: + x = lib.BuildFullModel() + + sess.run(variables.global_variables_initializer()) + run_meta = config_pb2.RunMetadata() + _ = sess.run( + x, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE), + run_metadata=run_meta) + + _ = model_analyzer.profile( + sess.graph, run_meta, cmd='code', options=opts) + + if should_fail: + self.assertFalse(gfile.Exists(outfile)) + return + + profile_pb = profile_pb2.Profile() + with gfile.Open(outfile, 'rb') as f: + with gzip.GzipFile(fileobj=io.BytesIO(f.read())) as gzipf: + profile_pb.ParseFromString(gzipf.read()) + + self.assertGreater(len(profile_pb.sample), 10) + self.assertGreater(len(profile_pb.location), 10) + self.assertGreater(len(profile_pb.function), 10) + self.assertGreater(len(profile_pb.string_table), 30) + + has_rnn = False + has_loop = False + for s in profile_pb.string_table: + if s.find('rnn') > 0: + has_rnn = True + if s.find('while') > 0: + has_loop = True + self.assertFalse(s.startswith('ops.py')) + self.assertTrue(has_rnn) + self.assertTrue(has_loop) + + def testPprof(self): + for attr in ['micros', 'bytes', 'accelerator_micros', 'cpu_micros', + 'params', 'float_ops']: + self.pprof_test_helper(attr) + for attr in ['op_types', 'device', 'input_shapes']: + self.pprof_test_helper(attr, True) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/profiler/option_builder.py b/tensorflow/python/profiler/option_builder.py index 87f82709be..1a91060da8 100644 --- a/tensorflow/python/profiler/option_builder.py +++ b/tensorflow/python/profiler/option_builder.py @@ -353,6 +353,20 @@ class ProfileOptionBuilder(object): self._options['output'] = 'timeline:outfile=%s' % timeline_file return self + def with_pprof_output(self, pprof_file): + """Generate a pprof profile gzip file. + + To use the pprof file: + pprof -png --nodecount=20 --sample_index=1 <pprof_file> + + Args: + pprof_file: filename for output, usually suffixed with .pb.gz. + Returns: + self. + """ + self._options['output'] = 'pprof:outfile=%s' % pprof_file + return self + def order_by(self, attribute): # pylint: disable=line-too-long """Order the displayed profiler nodes based on a attribute. diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py index e50c52884a..675eb98f8e 100644 --- a/tensorflow/python/profiler/tfprof_logger.py +++ b/tensorflow/python/profiler/tfprof_logger.py @@ -98,12 +98,13 @@ def _get_logged_ops(graph, run_meta=None, add_trace=True, add_entry = True if add_trace: - for tb in op.traceback: + for tb in op.traceback_with_start_lines: trace = entry.code_def.traces.add() trace.file = tb[0] if tb[0] else 'none' trace.lineno = tb[1] if tb[1] else -1 trace.function = tb[2] if tb[2] else 'none' trace.line = tb[3] if tb[3] else 'none' + trace.func_start_line = tb[4] if tb[4] else -1 add_entry = True if add_entry: diff --git a/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt b/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt index 347187a890..862b2aa7cc 100644 --- a/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.profiler.-profile-option-builder.pbtxt @@ -75,6 +75,10 @@ tf_class { argspec: "args=[\'self\', \'start_name_regexes\', \'show_name_regexes\', \'hide_name_regexes\', \'trim_name_regexes\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\'], " } member_method { + name: "with_pprof_output" + argspec: "args=[\'self\', \'pprof_file\'], varargs=None, keywords=None, defaults=None" + } + member_method { name: "with_stdout_output" argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" } |