aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/profiler
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-10-12 16:39:22 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-10-12 16:43:56 -0700
commit19708cc7d8e34e830a716d3f9896294489d3b535 (patch)
tree2fe7b78976ee65a5175ccf4121c168a7c8d9c2d3 /tensorflow/core/profiler
parent1002f974f58b23c528436e34c06384b8bffb2485 (diff)
A few profiler improvements.
1. Use a id_to_string map to reduce the profile size (2/3 in xception) 2. dedup code view's function name with extra file base name. 3. remove code view display heuristic that doesn't work in some cases. 4. make the profile_context thread-safe. PiperOrigin-RevId: 172031528
Diffstat (limited to 'tensorflow/core/profiler')
-rw-r--r--tensorflow/core/profiler/internal/tfprof_code.cc62
-rw-r--r--tensorflow/core/profiler/internal/tfprof_code.h3
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node.cc2
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node.h70
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node_show.h8
-rw-r--r--tensorflow/core/profiler/internal/tfprof_stats.cc19
-rw-r--r--tensorflow/core/profiler/internal/tfprof_stats.h2
-rw-r--r--tensorflow/core/profiler/tfprof_log.proto21
8 files changed, 122 insertions, 65 deletions
diff --git a/tensorflow/core/profiler/internal/tfprof_code.cc b/tensorflow/core/profiler/internal/tfprof_code.cc
index c9c0baa908..2c4f52e3ad 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.cc
+++ b/tensorflow/core/profiler/internal/tfprof_code.cc
@@ -36,7 +36,7 @@ namespace {
const char* const kGradientSuffix = " (gradient)";
// Convert to Trace proto into a short readable string.
-string GetTraceString(const CodeDef::Trace& trace) {
+string GetTraceString(const CallStack::Trace& trace) {
string ntrace = io::Basename(trace.file()).ToString();
ntrace += strings::StrCat(":", trace.lineno());
if (trace.function().length() < 20) {
@@ -112,7 +112,11 @@ class FunctionTable {
pprof::Function* func_pb = &function_table_[key];
// function index should start from 1.
func_pb->set_id(function_table_.size());
- func_pb->set_name(string_table_->GetIndex(func_name));
+
+ string file_base = io::Basename(file_path).ToString();
+ file_base = file_base.substr(0, file_base.find_last_of("."));
+ func_pb->set_name(
+ string_table_->GetIndex(strings::StrCat(file_base, ":", func_name)));
func_pb->set_filename(string_table_->GetIndex(file_path));
func_pb->set_start_line(func_start_line);
return func_pb->id();
@@ -142,6 +146,7 @@ class LocationTable {
uint64 called_func_start_line) {
auto key = std::tuple<string, string, uint64>(
file_path, called_function_name, line_number);
+
auto idx = location_table_.find(key);
if (idx != location_table_.end()) {
return idx->second.id();
@@ -376,10 +381,9 @@ class PprofProfileImpl : public PprofProfile {
} // namespace
void TFCode::AddNode(TFGraphNode* node) {
- if (node->code().traces_size() == 0) {
+ if (!node->call_stack() || node->call_stack()->traces().empty()) {
return;
}
-
// We infer the forward operation name from gradient op name. So, we can
// map gradient op traces to forward op traces.
// E.g. gradient node of 'inp_1/Conv2D' would be 'gradients/inp_1/Conv2D_grad.
@@ -397,42 +401,26 @@ void TFCode::AddNode(TFGraphNode* node) {
forward_nodes_[node->name()] = node;
}
- // Track if this is the first trace (first node). If true, add all
- // traces to common_traces_. Otherwise, remove uncommon traces from
- // common traces_.
- bool first_trace = false;
if (!root_) {
graph_root_.reset(new TFMultiGraphNode(kTFProfRoot));
root_.reset(new CodeNode(graph_root_.get(), nullptr, ""));
- first_trace = true;
}
CodeNode* pre_code_node = root_.get();
// TODO(xpan): Consider to release CodeDef after TFCode is built. It
// takes a lot of memory.
std::set<string> traces;
- for (int i = 0; i < node->code().traces_size(); ++i) {
+ for (int i = 0; i < node->call_stack()->traces().size(); ++i) {
// Unlike op name, which is globally unique, trace name is only unique
// w.r.t. it's parent.
- const string& trace = GetTraceString(node->code().traces(i));
+ const string& trace = GetTraceString(node->call_stack()->traces().at(i));
traces.insert(trace);
- pre_code_node =
- pre_code_node->AddChildren(trace, &node->code().traces(i), "");
- if (i == node->code().traces_size() - 1) {
+ pre_code_node = pre_code_node->AddChildren(
+ trace, &node->call_stack()->traces().at(i), "");
+ if (i == node->call_stack()->traces().size() - 1) {
pre_code_node->node->AddGraphNode(node);
}
}
- if (first_trace) {
- common_traces_.insert(traces.begin(), traces.end());
- } else {
- for (auto it = common_traces_.begin(); it != common_traces_.end();) {
- if (traces.find(*it) == traces.end()) {
- common_traces_.erase(it++);
- } else {
- ++it;
- }
- }
- }
}
void TFCode::Build() {
@@ -447,12 +435,12 @@ void TFCode::Build() {
TFGraphNode* fn = forward_it->second;
CodeNode* leaf = nullptr;
CodeNode* pre_code_node = root_.get();
- for (int i = 0; i < fn->code().traces_size(); ++i) {
+ for (int i = 0; i < fn->call_stack()->traces().size(); ++i) {
const string& trace =
- GetTraceString(fn->code().traces(i)) + kGradientSuffix;
- pre_code_node = pre_code_node->AddChildren(trace, &fn->code().traces(i),
- kGradientSuffix);
- if (i == fn->code().traces_size() - 1) {
+ GetTraceString(fn->call_stack()->traces().at(i)) + kGradientSuffix;
+ pre_code_node = pre_code_node->AddChildren(
+ trace, &fn->call_stack()->traces().at(i), kGradientSuffix);
+ if (i == fn->call_stack()->traces().size() - 1) {
leaf = pre_code_node;
}
}
@@ -463,17 +451,6 @@ void TFCode::Build() {
if (unaccounted_nodes > 0) {
fprintf(stderr, "%lld gradient nodes not accounted\n", unaccounted_nodes);
}
-
- // For trace that all traces share, such as "main", "apply_op", people
- // are unlikely inerested. We track them and hide them from display.
- if (forward_nodes_.size() > 100) {
- std::set<string> tmp = common_traces_;
- for (const string& t : tmp) {
- common_traces_.insert(t + kGradientSuffix);
- }
- } else {
- common_traces_.clear();
- }
}
const ShowMultiNode* TFCode::ShowInternal(const Options& opts,
@@ -590,8 +567,7 @@ std::vector<CodeNode*> TFCode::PrintScope(const std::vector<CodeNode*> roots,
continue;
}
int ident = last_ident;
- bool show = ShouldShow(node, opts, depth) &&
- common_traces_.find(node->name()) == common_traces_.end();
+ bool show = ShouldShow(node, opts, depth);
if (show) ident += 2;
std::vector<CodeNode*> show_cnodes =
diff --git a/tensorflow/core/profiler/internal/tfprof_code.h b/tensorflow/core/profiler/internal/tfprof_code.h
index 82bac8f415..a118752fce 100644
--- a/tensorflow/core/profiler/internal/tfprof_code.h
+++ b/tensorflow/core/profiler/internal/tfprof_code.h
@@ -85,9 +85,6 @@ class TFCode : public TFMultiShow {
string FormatNode(CodeNode* node, const Options& opts, int64 indent) const;
string FormatNodeMemory(CodeNode* node, int64 bytes, int64 total_bytes) const;
- // Common traces track the code path that all traces share. Such as
- // "main()", "create_op", etc.
- std::set<string> common_traces_;
std::unique_ptr<CodeNode> root_;
std::unique_ptr<TFMultiGraphNode> graph_root_;
std::unique_ptr<PprofProfile> pprof_profile_;
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index e2be2cf4cf..f283fafc0f 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -227,7 +227,7 @@ std::vector<int64> ShapeProtoToVec(const TensorShapeProto& shape_pb) {
return shape_vec;
}
-TensorShapeProto VecToShapeProto(const std::vector<int64> shape_vec) {
+TensorShapeProto VecToShapeProto(const std::vector<int64>& shape_vec) {
TensorShapeProto shape_pb;
if (shape_vec.empty()) {
shape_pb.set_unknown_rank(true);
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index 95d199e5b9..34bc0a581d 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -38,10 +38,52 @@ namespace tensorflow {
namespace tfprof {
std::vector<int64> ShapeProtoToVec(const TensorShapeProto& shape_pb);
-TensorShapeProto VecToShapeProto(const std::vector<int64> shape_vec);
+TensorShapeProto VecToShapeProto(const std::vector<int64>& shape_vec);
class TFGraphNode;
+class CallStack {
+ public:
+ class Trace {
+ public:
+ Trace(const CodeDef::Trace* trace,
+ const std::map<int64, string>* id_to_string)
+ : trace_(trace), id_to_string_(id_to_string) {}
+
+ const int32 lineno() const { return trace_->lineno(); }
+ string file() const {
+ // Backward compatible with old proto files.
+ if (!trace_->file().empty()) return trace_->file();
+ return id_to_string_->at(trace_->file_id());
+ }
+ string function() const {
+ // Backward compatible with old proto files.
+ if (!trace_->function().empty()) return trace_->function();
+ return id_to_string_->at(trace_->function_id());
+ }
+ int32 func_start_line() const { return trace_->func_start_line(); }
+
+ private:
+ const CodeDef::Trace* trace_;
+ const std::map<int64, string>* id_to_string_;
+ };
+
+ CallStack(const CodeDef& def, const std::map<int64, string>* id_to_string)
+ : def_(def) {
+ traces_.reserve(def.traces_size());
+ for (const auto& t : def_.traces()) {
+ traces_.emplace_back(&t, id_to_string);
+ }
+ }
+
+ const CodeDef& code_def() const { return def_; }
+ const std::vector<Trace>& traces() const { return traces_; }
+
+ private:
+ std::vector<Trace> traces_;
+ CodeDef def_;
+};
+
class ExecStep {
public:
ExecStep() {}
@@ -195,8 +237,9 @@ class ExecStep {
class TFGraphNode {
public:
- TFGraphNode(const ProfileNode& node, const ProfileProto& profile) {
- FromProto(node, profile);
+ TFGraphNode(const ProfileNode& node, const ProfileProto& profile,
+ const std::map<int64, string>* id_to_string) {
+ FromProto(node, profile, id_to_string);
}
TFGraphNode(const NodeDef* node, int64 id) {
@@ -247,7 +290,12 @@ class TFGraphNode {
void AddFloatOps(int64 float_ops) { node_.set_float_ops(float_ops); }
// TODO(xpan): This could take a lot of memory.
- void AddCode(const CodeDef& code) { node_.mutable_trace()->MergeFrom(code); }
+ void AddCode(const CodeDef& code,
+ const std::map<int64, string>* id_to_string) {
+ if (!call_stack_) {
+ call_stack_.reset(new CallStack(code, id_to_string));
+ }
+ }
const string& name() const { return node_.name(); }
int64 id() const { return node_.id(); }
@@ -311,13 +359,21 @@ class TFGraphNode {
int64 id = nodes_map.at(s.first)->id();
(*node_.mutable_src_output_index())[id] = s.second;
}
+
+ if (call_stack_) {
+ node_.clear_trace();
+ node_.mutable_trace()->MergeFrom(call_stack_->code_def());
+ }
return node_;
}
- void FromProto(const ProfileNode& node, const ProfileProto& profile) {
+ void FromProto(const ProfileNode& node, const ProfileProto& profile,
+ const std::map<int64, string>* id_to_string) {
node_.Clear();
node_.MergeFrom(node);
+ call_stack_.reset(new CallStack(node.trace(), id_to_string));
+
op_types_.clear();
op_types_.insert(node_.op_types().begin(), node_.op_types().end());
@@ -554,7 +610,7 @@ class TFGraphNode {
// Otherwise, return dynamic float_ops.
return node_.float_ops() * run_count(step);
}
- const CodeDef& code() { return node_.trace(); }
+ const CallStack* call_stack() { return call_stack_.get(); }
string canonical_device() const { return node_.canonical_device(); }
string host_device() const { return node_.host_device(); }
const std::set<string>& op_types() const { return op_types_; }
@@ -582,6 +638,8 @@ class TFGraphNode {
ProfileNode node_;
+ std::unique_ptr<CallStack> call_stack_;
+
std::vector<int64> shape_;
// Won't missing input_idx. But some shapes might be empty (unknown).
std::map<int, std::vector<int64>> input_shapes_;
diff --git a/tensorflow/core/profiler/internal/tfprof_node_show.h b/tensorflow/core/profiler/internal/tfprof_node_show.h
index d3c5ffd7f6..3788bf3e80 100644
--- a/tensorflow/core/profiler/internal/tfprof_node_show.h
+++ b/tensorflow/core/profiler/internal/tfprof_node_show.h
@@ -111,12 +111,12 @@ class ShowMultiNode {
class CodeNode : public ShowMultiNode {
public:
- CodeNode(TFMultiGraphNode* node, const CodeDef::Trace* trace,
+ CodeNode(TFMultiGraphNode* node, const CallStack::Trace* trace,
const string& suffix)
: ShowMultiNode(node), trace_(trace), suffix_(suffix) {}
~CodeNode() override {}
- CodeNode* AddChildren(const string& name, const CodeDef::Trace* trace,
+ CodeNode* AddChildren(const string& name, const CallStack::Trace* trace,
const string suffix) {
auto it = children_.find(name);
if (it != children_.end()) {
@@ -133,7 +133,7 @@ class CodeNode : public ShowMultiNode {
bool has_trace() const { return trace_ != nullptr; }
const int32 lineno() const { return trace_->lineno(); }
- string file() const { return trace_->file() + suffix_; }
+ string file() const { return trace_->file(); }
string function() const { return trace_->function() + suffix_; }
int32 func_start_line() const { return trace_->func_start_line(); }
@@ -141,7 +141,7 @@ class CodeNode : public ShowMultiNode {
std::vector<CodeNode*> show_children;
private:
- const CodeDef::Trace* trace_;
+ const CallStack::Trace* trace_;
string suffix_;
std::vector<std::unique_ptr<TFMultiGraphNode>> graph_children_;
std::map<string, std::unique_ptr<CodeNode>> children_;
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc
index eb84bada13..b4b98141f3 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.cc
+++ b/tensorflow/core/profiler/internal/tfprof_stats.cc
@@ -81,9 +81,12 @@ TFStats::TFStats(const string& filename,
fprintf(stderr, "Failed to parse profile\n");
return;
}
-
+ for (const auto& entry : profile.id_to_string()) {
+ id_to_string_[entry.first] = entry.second;
+ }
for (const auto& node_pb : profile.nodes()) {
- std::unique_ptr<TFGraphNode> node(new TFGraphNode(node_pb.second, profile));
+ std::unique_ptr<TFGraphNode> node(
+ new TFGraphNode(node_pb.second, profile, &id_to_string_));
nodes_map_.insert(std::pair<string, std::unique_ptr<TFGraphNode>>(
node_pb.second.name(), std::move(node)));
}
@@ -216,6 +219,11 @@ void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
if (!op_log) {
return;
}
+ for (const auto& entry : op_log->id_to_string()) {
+ if (id_to_string_.find(entry.first) == id_to_string_.end()) {
+ id_to_string_[entry.first] = entry.second;
+ }
+ }
for (const OpLogEntry& entry : op_log->log_entries()) {
auto node = nodes_map_.find(entry.name());
if (node == nodes_map_.end()) continue;
@@ -227,9 +235,7 @@ void TFStats::AddOpLogProto(std::unique_ptr<OpLogProto> op_log) {
}
if (entry.has_code_def()) {
has_code_traces_ = true;
- if (node->second->code().traces_size() == 0) {
- node->second->AddCode(entry.code_def());
- }
+ node->second->AddCode(entry.code_def(), &id_to_string_);
}
}
}
@@ -269,6 +275,9 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) {
void TFStats::WriteProfile(const string& filename) {
ProfileProto profile;
+ for (const auto& entry : id_to_string_) {
+ (*profile.mutable_id_to_string())[entry.first] = entry.second;
+ }
for (auto it = nodes_map_.begin(); it != nodes_map_.end(); it++) {
if (it->second->id() < 0) {
continue;
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h
index 3b1251152d..bb4baea738 100644
--- a/tensorflow/core/profiler/internal/tfprof_stats.h
+++ b/tensorflow/core/profiler/internal/tfprof_stats.h
@@ -109,6 +109,8 @@ class TFStats {
std::map<string, std::unique_ptr<TFGraphNode>> nodes_map_;
GraphNodeProto empty_graph_node_;
MultiGraphNodeProto empty_multi_graph_node_;
+
+ std::map<int64, string> id_to_string_;
};
} // namespace tfprof
diff --git a/tensorflow/core/profiler/tfprof_log.proto b/tensorflow/core/profiler/tfprof_log.proto
index ae571e2540..a1410c7c79 100644
--- a/tensorflow/core/profiler/tfprof_log.proto
+++ b/tensorflow/core/profiler/tfprof_log.proto
@@ -8,10 +8,17 @@ import "tensorflow/core/framework/attr_value.proto";
message CodeDef {
repeated Trace traces = 1;
message Trace {
- string file = 1;
+ string file = 1 [deprecated = true]; // deprecated by file_id.
+ int64 file_id = 6;
+
int32 lineno = 2;
- string function = 3;
- string line = 4;
+
+ string function = 3 [deprecated = true]; // deprecated by function_id.
+ int64 function_id = 7;
+
+ string line = 4 [deprecated = true]; // deprecated line_id.
+ int64 line_id = 8;
+
int32 func_start_line = 5;
}
}
@@ -32,6 +39,10 @@ message OpLogEntry {
message OpLogProto {
repeated OpLogEntry log_entries = 1;
+
+ // Maps from id of CodeDef file,function,line to its string
+ // In the future can also map other id of other fields to string.
+ map<int64, string> id_to_string = 2;
}
// A proto representation of the profiler's profile.
@@ -44,6 +55,10 @@ message ProfileProto {
bool has_trace = 2;
// Traced steps.
repeated int64 steps = 3;
+
+ // Maps from id of CodeDef file,function,line to its string
+ // In the future can also map other id of other fields to string.
+ map<int64, string> id_to_string = 4;
}
message ProfileNode {