diff options
author | 2017-10-12 16:39:22 -0700 | |
---|---|---|
committer | 2017-10-12 16:43:56 -0700 | |
commit | 19708cc7d8e34e830a716d3f9896294489d3b535 (patch) | |
tree | 2fe7b78976ee65a5175ccf4121c168a7c8d9c2d3 /tensorflow/core/profiler | |
parent | 1002f974f58b23c528436e34c06384b8bffb2485 (diff) |
A few profiler improvements.
1. Use a id_to_string map to reduce the profile size (2/3 in xception)
2. dedup code view's function name with extra file base name.
3. remove code view display heuristic that doesn't work in some cases.
4. make the profile_context thread-safe.
PiperOrigin-RevId: 172031528
Diffstat (limited to 'tensorflow/core/profiler')
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_code.cc | 62 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_code.h | 3 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_node.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_node.h | 70 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_node_show.h | 8 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_stats.cc | 19 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_stats.h | 2 | ||||
-rw-r--r-- | tensorflow/core/profiler/tfprof_log.proto | 21 |
8 files changed, 122 insertions, 65 deletions
diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc index c9c0baa908..2c4f52e3ad 100644 --- a/tensorflow/core/profiler/internal/tfprof_code.cc +++ b/tensorflow/core/profiler/internal/tfprof_code.cc @@ -36,7 +36,7 @@ namespace { const char* const kGradientSuffix = " (gradient)"; // Convert to Trace proto into a short readable string. -string GetTraceString(const CodeDef::Trace& trace) { +string GetTraceString(const CallStack::Trace& trace) { string ntrace = io::Basename(trace.file()).ToString(); ntrace += strings::StrCat(":", trace.lineno()); if (trace.function().length() < 20) { @@ -112,7 +112,11 @@ class FunctionTable { pprof::Function* func_pb = &function_table_[key]; // function index should start from 1. func_pb->set_id(function_table_.size()); - func_pb->set_name(string_table_->GetIndex(func_name)); + + string file_base = io::Basename(file_path).ToString(); + file_base = file_base.substr(0, file_base.find_last_of(".")); + func_pb->set_name( + string_table_->GetIndex(strings::StrCat(file_base, ":", func_name))); func_pb->set_filename(string_table_->GetIndex(file_path)); func_pb->set_start_line(func_start_line); return func_pb->id(); @@ -142,6 +146,7 @@ class LocationTable { uint64 called_func_start_line) { auto key = std::tuple<string, string, uint64>( file_path, called_function_name, line_number); + auto idx = location_table_.find(key); if (idx != location_table_.end()) { return idx->second.id(); @@ -376,10 +381,9 @@ class PprofProfileImpl : public PprofProfile { } // namespace void TFCode::AddNode(TFGraphNode* node) { - if (node->code().traces_size() == 0) { + if (!node->call_stack() || node->call_stack()->traces().empty()) { return; } - // We infer the forward operation name from gradient op name. So, we can // map gradient op traces to forward op traces. // E.g. gradient node of 'inp_1/Conv2D' would be 'gradients/inp_1/Conv2D_grad. @@ -397,42 +401,26 @@ void TFCode::AddNode(TFGraphNode* node) { forward_nodes_[node->name()] = node; } - // Track if this is the first trace (first node). If true, add all - // traces to common_traces_. Otherwise, remove uncommon traces from - // common traces_. - bool first_trace = false; if (!root_) { graph_root_.reset(new TFMultiGraphNode(kTFProfRoot)); root_.reset(new CodeNode(graph_root_.get(), nullptr, "")); - first_trace = true; } CodeNode* pre_code_node = root_.get(); // TODO(xpan): Consider to release CodeDef after TFCode is built. It // takes a lot of memory. std::set<string> traces; - for (int i = 0; i < node->code().traces_size(); ++i) { + for (int i = 0; i < node->call_stack()->traces().size(); ++i) { // Unlike op name, which is globally unique, trace name is only unique // w.r.t. it's parent. - const string& trace = GetTraceString(node->code().traces(i)); + const string& trace = GetTraceString(node->call_stack()->traces().at(i)); traces.insert(trace); - pre_code_node = - pre_code_node->AddChildren(trace, &node->code().traces(i), ""); - if (i == node->code().traces_size() - 1) { + pre_code_node = pre_code_node->AddChildren( + trace, &node->call_stack()->traces().at(i), ""); + if (i == node->call_stack()->traces().size() - 1) { pre_code_node->node->AddGraphNode(node); } } - if (first_trace) { - common_traces_.insert(traces.begin(), traces.end()); - } else { - for (auto it = common_traces_.begin(); it != common_traces_.end();) { - if (traces.find(*it) == traces.end()) { - common_traces_.erase(it++); - } else { - ++it; - } - } - } } void TFCode::Build() { @@ -447,12 +435,12 @@ void TFCode::Build() { TFGraphNode* fn = forward_it->second; CodeNode* leaf = nullptr; CodeNode* pre_code_node = root_.get(); - for (int i = 0; i < fn->code().traces_size(); ++i) { + for (int i = 0; i < fn->call_stack()->traces().size(); ++i) { const string& trace = - GetTraceString(fn->code().traces(i)) + kGradientSuffix; - pre_code_node = pre_code_node->AddChildren(trace, &fn->code().traces(i), - kGradientSuffix); - if (i == fn->code().traces_size() - 1) { + GetTraceString(fn->call_stack()->traces().at(i)) + kGradientSuffix; + pre_code_node = pre_code_node->AddChildren( + trace, &fn->call_stack()->traces().at(i), kGradientSuffix); + if (i == fn->call_stack()->traces().size() - 1) { leaf = pre_code_node; } } @@ -463,17 +451,6 @@ void TFCode::Build() { if (unaccounted_nodes > 0) { fprintf(stderr, "%lld gradient nodes not accounted\n", unaccounted_nodes); } - - // For trace that all traces share, such as "main", "apply_op", people - // are unlikely inerested. We track them and hide them from display. - if (forward_nodes_.size() > 100) { - std::set<string> tmp = common_traces_; - for (const string& t : tmp) { - common_traces_.insert(t + kGradientSuffix); - } - } else { - common_traces_.clear(); - } } const ShowMultiNode* TFCode::ShowInternal(const Options& opts, @@ -590,8 +567,7 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots, continue; } int ident = last_ident; - bool show = ShouldShow(node, opts, depth) && - common_traces_.find(node->name()) == common_traces_.end(); + bool show = ShouldShow(node, opts, depth); if (show) ident += 2; std::vector<CodeNode*> show_cnodes = diff --git a/tensorflow/core/profiler/internal/tfprof_code.h b/tensorflow/core/profiler/internal/tfprof_code.h index 82bac8f415..a118752fce 100644 --- a/tensorflow/core/profiler/internal/tfprof_code.h +++ b/tensorflow/core/profiler/internal/tfprof_code.h @@ -85,9 +85,6 @@ class TFCode : public TFMultiShow { string FormatNode(CodeNode* node, const Options& opts, int64 indent) const; string FormatNodeMemory(CodeNode* node, int64 bytes, int64 total_bytes) const; - // Common traces track the code path that all traces share. Such as - // "main()", "create_op", etc. - std::set<string> common_traces_; std::unique_ptr<CodeNode> root_; std::unique_ptr<TFMultiGraphNode> graph_root_; std::unique_ptr<PprofProfile> pprof_profile_; diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc index e2be2cf4cf..f283fafc0f 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.cc +++ b/tensorflow/core/profiler/internal/tfprof_node.cc @@ -227,7 +227,7 @@ std::vector<int64> ShapeProtoToVec(const TensorShapeProto& shape_pb) { return shape_vec; } -TensorShapeProto VecToShapeProto(const std::vector<int64> shape_vec) { +TensorShapeProto VecToShapeProto(const std::vector<int64>& shape_vec) { TensorShapeProto shape_pb; if (shape_vec.empty()) { shape_pb.set_unknown_rank(true); diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h index 95d199e5b9..34bc0a581d 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.h +++ b/tensorflow/core/profiler/internal/tfprof_node.h @@ -38,10 +38,52 @@ namespace tensorflow { namespace tfprof { std::vector<int64> ShapeProtoToVec(const TensorShapeProto& shape_pb); -TensorShapeProto VecToShapeProto(const std::vector<int64> shape_vec); +TensorShapeProto VecToShapeProto(const std::vector<int64>& shape_vec); class TFGraphNode; +class CallStack { + public: + class Trace { + public: + Trace(const CodeDef::Trace* trace, + const std::map<int64, string>* id_to_string) + : trace_(trace), id_to_string_(id_to_string) {} + + const int32 lineno() const { return trace_->lineno(); } + string file() const { + // Backward compatible with old proto files. + if (!trace_->file().empty()) return trace_->file(); + return id_to_string_->at(trace_->file_id()); + } + string function() const { + // Backward compatible with old proto files. + if (!trace_->function().empty()) return trace_->function(); + return id_to_string_->at(trace_->function_id()); + } + int32 func_start_line() const { return trace_->func_start_line(); } + + private: + const CodeDef::Trace* trace_; + const std::map<int64, string>* id_to_string_; + }; + + CallStack(const CodeDef& def, const std::map<int64, string>* id_to_string) + : def_(def) { + traces_.reserve(def.traces_size()); + for (const auto& t : def_.traces()) { + traces_.emplace_back(&t, id_to_string); + } + } + + const CodeDef& code_def() const { return def_; } + const std::vector<Trace>& traces() const { return traces_; } + + private: + std::vector<Trace> traces_; + CodeDef def_; +}; + class ExecStep { public: ExecStep() {} @@ -195,8 +237,9 @@ class ExecStep { class TFGraphNode { public: - TFGraphNode(const ProfileNode& node, const ProfileProto& profile) { - FromProto(node, profile); + TFGraphNode(const ProfileNode& node, const ProfileProto& profile, + const std::map<int64, string>* id_to_string) { + FromProto(node, profile, id_to_string); } TFGraphNode(const NodeDef* node, int64 id) { @@ -247,7 +290,12 @@ class TFGraphNode { void AddFloatOps(int64 float_ops) { node_.set_float_ops(float_ops); } // TODO(xpan): This could take a lot of memory. - void AddCode(const CodeDef& code) { node_.mutable_trace()->MergeFrom(code); } + void AddCode(const CodeDef& code, + const std::map<int64, string>* id_to_string) { + if (!call_stack_) { + call_stack_.reset(new CallStack(code, id_to_string)); + } + } const string& name() const { return node_.name(); } int64 id() const { return node_.id(); } @@ -311,13 +359,21 @@ class TFGraphNode { int64 id = nodes_map.at(s.first)->id(); (*node_.mutable_src_output_index())[id] = s.second; } + + if (call_stack_) { + node_.clear_trace(); + node_.mutable_trace()->MergeFrom(call_stack_->code_def()); + } return node_; } - void FromProto(const ProfileNode& node, const ProfileProto& profile) { + void FromProto(const ProfileNode& node, const ProfileProto& profile, + const std::map<int64, string>* id_to_string) { node_.Clear(); node_.MergeFrom(node); + call_stack_.reset(new CallStack(node.trace(), id_to_string)); + op_types_.clear(); op_types_.insert(node_.op_types().begin(), node_.op_types().end()); @@ -554,7 +610,7 @@ class TFGraphNode { // Otherwise, return dynamic float_ops. return node_.float_ops() * run_count(step); } - const CodeDef& code() { return node_.trace(); } + const CallStack* call_stack() { return call_stack_.get(); } string canonical_device() const { return node_.canonical_device(); } string host_device() const { return node_.host_device(); } const std::set<string>& op_types() const { return op_types_; } @@ -582,6 +638,8 @@ class TFGraphNode { ProfileNode node_; + std::unique_ptr<CallStack> call_stack_; + std::vector<int64> shape_; // Won't missing input_idx. But some shapes might be empty (unknown). std::map<int, std::vector<int64>> input_shapes_; diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.h b/tensorflow/core/profiler/internal/tfprof_node_show.h index d3c5ffd7f6..3788bf3e80 100644 --- a/tensorflow/core/profiler/internal/tfprof_node_show.h +++ b/tensorflow/core/profiler/internal/tfprof_node_show.h @@ -111,12 +111,12 @@ class ShowMultiNode { class CodeNode : public ShowMultiNode { public: - CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace, + CodeNode(TFMultiGraphNode* node, const CallStack::Trace* trace, const string& suffix) : ShowMultiNode(node), trace_(trace), suffix_(suffix) {} ~CodeNode() override {} - CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace, + CodeNode* AddChildren(const string& name, const CallStack::Trace* trace, const string suffix) { auto it = children_.find(name); if (it != children_.end()) { @@ -133,7 +133,7 @@ class CodeNode : public ShowMultiNode { bool has_trace() const { return trace_ != nullptr; } const int32 lineno() const { return trace_->lineno(); } - string file() const { return trace_->file() + suffix_; } + string file() const { return trace_->file(); } string function() const { return trace_->function() + suffix_; } int32 func_start_line() const { return trace_->func_start_line(); } @@ -141,7 +141,7 @@ class CodeNode : public ShowMultiNode { std::vector<CodeNode*> show_children; private: - const CodeDef::Trace* trace_; + const CallStack::Trace* trace_; string suffix_; std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_; std::map<string, std::unique_ptr<CodeNode>> children_; diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc index eb84bada13..b4b98141f3 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats.cc @@ -81,9 +81,12 @@ TFStats::TFStats(const string& filename, fprintf(stderr, "Failed to parse profile\n"); return; } - + for (const auto& entry : profile.id_to_string()) { + id_to_string_[entry.first] = entry.second; + } for (const auto& node_pb : profile.nodes()) { - std::unique_ptr<TFGraphNode> node(new TFGraphNode(node_pb.second, profile)); + std::unique_ptr<TFGraphNode> node( + new TFGraphNode(node_pb.second, profile, &id_to_string_)); nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>( node_pb.second.name(), std::move(node))); } @@ -216,6 +219,11 @@ void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) { if (!op_log) { return; } + for (const auto& entry : op_log->id_to_string()) { + if (id_to_string_.find(entry.first) == id_to_string_.end()) { + id_to_string_[entry.first] = entry.second; + } + } for (const OpLogEntry& entry : op_log->log_entries()) { auto node = nodes_map_.find(entry.name()); if (node == nodes_map_.end()) continue; @@ -227,9 +235,7 @@ void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) { } if (entry.has_code_def()) { has_code_traces_ = true; - if (node->second->code().traces_size() == 0) { - node->second->AddCode(entry.code_def()); - } + node->second->AddCode(entry.code_def(), &id_to_string_); } } } @@ -269,6 +275,9 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) { void TFStats::WriteProfile(const string& filename) { ProfileProto profile; + for (const auto& entry : id_to_string_) { + (*profile.mutable_id_to_string())[entry.first] = entry.second; + } for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) { if (it->second->id() < 0) { continue; diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h index 3b1251152d..bb4baea738 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.h +++ b/tensorflow/core/profiler/internal/tfprof_stats.h @@ -109,6 +109,8 @@ class TFStats { std::map<string, std::unique_ptr<TFGraphNode>> nodes_map_; GraphNodeProto empty_graph_node_; MultiGraphNodeProto empty_multi_graph_node_; + + std::map<int64, string> id_to_string_; }; } // namespace tfprof diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto index ae571e2540..a1410c7c79 100644 --- a/tensorflow/core/profiler/tfprof_log.proto +++ b/tensorflow/core/profiler/tfprof_log.proto @@ -8,10 +8,17 @@ import "tensorflow/core/framework/attr_value.proto"; message CodeDef { repeated Trace traces = 1; message Trace { - string file = 1; + string file = 1 [deprecated = true]; // deprecated by file_id. + int64 file_id = 6; + int32 lineno = 2; - string function = 3; - string line = 4; + + string function = 3 [deprecated = true]; // deprecated by function_id. + int64 function_id = 7; + + string line = 4 [deprecated = true]; // deprecated line_id. + int64 line_id = 8; + int32 func_start_line = 5; } } @@ -32,6 +39,10 @@ message OpLogEntry { message OpLogProto { repeated OpLogEntry log_entries = 1; + + // Maps from id of CodeDef file,function,line to its string + // In the future can also map other id of other fields to string. + map<int64, string> id_to_string = 2; } // A proto representation of the profiler's profile. @@ -44,6 +55,10 @@ message ProfileProto { bool has_trace = 2; // Traced steps. repeated int64 steps = 3; + + // Maps from id of CodeDef file,function,line to its string + // In the future can also map other id of other fields to string. + map<int64, string> id_to_string = 4; } message ProfileNode { |