aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/profiler/internal/tfprof_node.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/profiler/internal/tfprof_node.h')
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node.h135
1 files changed, 107 insertions, 28 deletions
diff --git a/tensorflow/core/profiler/internal/tfprof_node.h b/tensorflow/core/profiler/internal/tfprof_node.h
index 34bc0a581d..e2d0563a07 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.h
+++ b/tensorflow/core/profiler/internal/tfprof_node.h
@@ -105,8 +105,22 @@ class ExecStep {
const {
return op_execs_;
}
+ const std::map<string, std::vector<std::pair<int64, int64>>>& cpu_execs()
+ const {
+ return cpu_execs_;
+ }
+
int64 all_start_micros() const { return exec_.all_start_micros(); }
int64 latest_end_micros() const { return exec_.latest_end_micros(); }
+ int64 lastest_schedule_end_micros() const {
+ int64 ret = 0;
+ for (const auto& exec : cpu_execs_) {
+ for (const auto& pair : exec.second) {
+ ret = std::max(ret, pair.first + pair.second);
+ }
+ }
+ return ret;
+ }
int64 requested_bytes() const { return exec_.requested_bytes(); }
int64 peak_bytes() const { return exec_.peak_bytes(); }
@@ -127,6 +141,8 @@ class ExecStep {
return exec_.allocator_bytes_in_use();
}
+ const std::vector<Allocation>& allocations() const { return allocations_; }
+
const ExecProfile& ToProto() {
exec_.mutable_accelerator_execs()->clear();
for (const auto& e : accelerator_execs_) {
@@ -161,6 +177,11 @@ class ExecStep {
mem_pb.set_ptr(mem.second.second);
}
+ exec_.mutable_allocations()->Clear();
+ for (const auto& r : allocations_) {
+ exec_.add_allocations()->MergeFrom(r);
+ }
+
return exec_;
}
@@ -175,6 +196,8 @@ class ExecStep {
cpu_execs_.clear();
op_execs_.clear();
+ allocations_.clear();
+
for (const auto& exec_time : exec_.accelerator_execs()) {
auto& exec = accelerator_execs_[exec_time.first];
auto& op_exec = op_execs_[exec_time.first];
@@ -196,6 +219,10 @@ class ExecStep {
mem.first = output_mem.second.bytes();
mem.second = output_mem.second.ptr();
}
+
+ for (const auto& r : exec_.allocations()) {
+ allocations_.push_back(r);
+ }
}
private:
@@ -215,6 +242,9 @@ class ExecStep {
std::set<string> devices_;
// output_idx -> {output_bytes, memory_ptr}
std::map<int32, std::pair<int64, uint64>> output_memory_;
+
+ // The history of accelerator allocations and deallocations of this step.
+ std::vector<Allocation> allocations_;
};
#define GRAPH_NODE_BYTES(type) \
@@ -238,11 +268,15 @@ class ExecStep {
class TFGraphNode {
public:
TFGraphNode(const ProfileNode& node, const ProfileProto& profile,
- const std::map<int64, string>* id_to_string) {
+ const std::map<int64, string>* id_to_string,
+ const std::map<string, std::unique_ptr<TFGraphNode>>* nodes_map) {
+ nodes_map_ = nodes_map;
FromProto(node, profile, id_to_string);
}
- TFGraphNode(const NodeDef* node, int64 id) {
+ TFGraphNode(const NodeDef* node, int64 id,
+ const std::map<string, std::unique_ptr<TFGraphNode>>* nodes_map) {
+ nodes_map_ = nodes_map;
node_.set_id(id);
node_.set_name(node->name());
node_.set_op(node->op());
@@ -269,17 +303,9 @@ class TFGraphNode {
op_types_.insert(node->op());
}
- void AddInput(TFGraphNode* input, int32 output_idx, int input_idx) {
- src_output_idx_[input->name()] = output_idx;
-
- inputs_[input_idx] = input->name();
- const auto& output_shape = input->output_shapes().find(output_idx);
- // Always create an empty vec even if the shape info might be missing.
- std::vector<int64>& shape_vec = input_shapes_[input_idx];
- if (output_shape != input->output_shapes().end()) {
- shape_vec.assign(output_shape->second.begin(),
- output_shape->second.end());
- }
+ void AddInput(const string& input, int64 output_index, int input_idx) {
+ inputs_[input_idx] = input;
+ src_output_idx_[input] = output_index;
}
void AddOpType(const string& op_type) { op_types_.insert(op_type); }
@@ -416,9 +442,6 @@ class TFGraphNode {
}
const std::map<int32, string>& inputs() const { return inputs_; }
- const std::map<string, int32>& src_output_idx() const {
- return src_output_idx_;
- }
// Number of times the graph node is executed. When step < 0, the
// average number of times executed across all steps.
@@ -526,14 +549,30 @@ class TFGraphNode {
return exec->second.latest_end_micros();
}
+ int64 lastest_schedule_end_micros(int64 step) const {
+ auto exec = execs_.find(step);
+ if (exec == execs_.end()) {
+ return 0;
+ }
+ return exec->second.lastest_schedule_end_micros();
+ }
+
const std::map<string, std::vector<std::pair<int64, int64>>>& op_execs(
int64 step) const {
auto exec = execs_.find(step);
if (exec == execs_.end()) {
- return empty_op_execs_;
+ return empty_execs_;
}
return exec->second.op_execs();
}
+ const std::map<string, std::vector<std::pair<int64, int64>>>& cpu_execs(
+ int64 step) const {
+ auto exec = execs_.find(step);
+ if (exec == execs_.end()) {
+ return empty_execs_;
+ }
+ return exec->second.cpu_execs();
+ }
const std::map<int64, ExecStep>& all_op_execs() const { return execs_; }
@@ -551,12 +590,12 @@ class TFGraphNode {
}
return exec->second.host_temp_bytes();
}
- int64 accelerator_persistent_bytes(int64 step) const {
- auto exec = execs_.find(step);
- if (exec == execs_.end()) {
- return 0;
+ int64 accelerator_persistent_bytes() const {
+ int64 persistent_bytes = 0;
+ for (const auto& exec : execs_) {
+ persistent_bytes += exec.second.accelerator_persistent_bytes();
}
- return exec->second.accelerator_persistent_bytes();
+ return persistent_bytes;
}
int64 host_persistent_bytes(int64 step) const {
auto exec = execs_.find(step);
@@ -581,6 +620,14 @@ class TFGraphNode {
return exec->second.allocator_bytes_in_use();
}
+ const std::vector<Allocation>& allocations(int64 step) const {
+ auto exec = execs_.find(step);
+ if (exec == execs_.end()) {
+ return empty_allocations_;
+ }
+ return exec->second.allocations();
+ }
+
int64 parameters() const {
if (!shape().empty()) {
int64 params = 1;
@@ -628,18 +675,44 @@ class TFGraphNode {
const std::map<int, std::vector<int64>>& output_shapes() const {
return output_shapes_;
}
- const std::map<int, std::vector<int64>>& input_shapes() const {
- return input_shapes_;
+
+ const std::map<int, std::vector<int64>> input_shapes() const {
+ std::map<int, std::vector<int64>> input_shapes;
+ for (const auto& inp : inputs_) {
+ // Always create an empty vec even if the shape info might be missing.
+ std::vector<int64>& shape_vec = input_shapes[inp.first];
+ if (!nodes_map_) continue;
+ auto input_it = nodes_map_->find(inp.second);
+ if (input_it == nodes_map_->end()) continue;
+ auto output_it = src_output_idx_.find(inp.second);
+ if (output_it == src_output_idx_.end()) continue;
+
+ const TFGraphNode* input_node = input_it->second.get();
+ if (!input_node) continue;
+ const auto& output_shapes = input_node->output_shapes();
+ const auto& output_shape = output_shapes.find(output_it->second);
+ if (output_shape == output_shapes.end()) continue;
+
+ if (output_shape != input_node->output_shapes().end()) {
+ shape_vec.assign(output_shape->second.begin(),
+ output_shape->second.end());
+ }
+ }
+ return input_shapes;
}
private:
+ // maps graph node name to TFGraphNode. Not owned.
+ const std::map<string, std::unique_ptr<TFGraphNode>>* nodes_map_;
+ // inputs to the node. input index -> input node name.
std::map<int, string> inputs_;
+ // The output index of the source node.
std::map<string, int32> src_output_idx_;
-
+ // proto for serialize/deserialized representation of the node.
ProfileNode node_;
-
+ // Python call stack that creates the name.
std::unique_ptr<CallStack> call_stack_;
-
+ // Shape of the node (e.g. Variable) if available.
std::vector<int64> shape_;
// Won't missing input_idx. But some shapes might be empty (unknown).
std::map<int, std::vector<int64>> input_shapes_;
@@ -651,8 +724,10 @@ class TFGraphNode {
std::map<int64, ExecStep> execs_;
+ // Placeholder for empty cases.
std::map<int32, std::pair<int64, uint64>> empty_output_memory_;
- std::map<string, std::vector<std::pair<int64, int64>>> empty_op_execs_;
+ std::map<string, std::vector<std::pair<int64, int64>>> empty_execs_;
+ std::vector<Allocation> empty_allocations_;
};
class TFMultiGraphNode {
@@ -806,6 +881,10 @@ class TFMultiGraphNode {
};
bool IsPlacedOnAccelerator(const string& device);
+bool CountAsAcceleratorTime(const string& device);
+bool CountAsCPUTime(const string& device);
+bool IsCanonicalDevice(const string& device);
+
} // namespace tfprof
} // namespace tensorflow