aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/tools/tfprof/internal/tfprof_node.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/tools/tfprof/internal/tfprof_node.h')
-rw-r--r--tensorflow/tools/tfprof/internal/tfprof_node.h68
1 files changed, 39 insertions, 29 deletions
diff --git a/tensorflow/tools/tfprof/internal/tfprof_node.h b/tensorflow/tools/tfprof/internal/tfprof_node.h
index 235904ea6c..8e57db7ba2 100644
--- a/tensorflow/tools/tfprof/internal/tfprof_node.h
+++ b/tensorflow/tools/tfprof/internal/tfprof_node.h
@@ -23,12 +23,12 @@ limitations under the License.
#include "tensorflow/core/framework/allocation_description.pb.h"
#include "tensorflow/core/framework/attr_value.pb.h"
-#include "tensorflow/core/framework/cost_graph.pb.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/step_stats.pb.h"
#include "tensorflow/core/framework/tensor_description.pb.h"
#include "tensorflow/core/framework/tensor_shape.pb.h"
#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/tools/tfprof/internal/tfprof_options.h"
#include "tensorflow/tools/tfprof/tfprof_log.pb.h"
@@ -41,10 +41,6 @@ class TFGraphNode {
: node_(node),
code_(nullptr),
step_stat_(nullptr),
- op_start_micros_(0),
- op_schedule_micros_(0),
- kernel_compute_micros_(0),
- all_spent_micros_(0),
requested_bytes_(0),
float_ops_(0) {
if (!node) return;
@@ -69,38 +65,51 @@ class TFGraphNode {
update_shape(shape_vec);
}
op_types_.insert(node->op());
- device_ = node->device();
}
TFGraphNode() : TFGraphNode(nullptr) {}
- void AddInput(TFGraphNode* input) { inputs_[input->name()] = input; }
+ void AddInput(TFGraphNode* input, int64 output_idx) {
+ inputs_[input->name()] = input;
+ output_idx_[input->name()] = output_idx;
+ }
void AddOpType(const string& op_type) { op_types_.insert(op_type); }
void AddStepStat(const string& device, const NodeExecStats* step_stat);
- // Add CostGraphDef::Node.
- void AddNodeStat(const CostGraphDef::Node* cost_node);
-
void AddFloatOps(int64 float_ops) { float_ops_ = float_ops; }
void AddCode(const CodeDef* code) { code_ = code; }
const string& name() const { return node_->name(); }
const NodeDef* node_def() { return node_; }
+
+ const NodeExecStats* step_stats() const { return step_stat_; }
+
const std::map<string, TFGraphNode*>& inputs() const { return inputs_; }
- int64 op_start_micros() { return op_start_micros_; }
- // This is time spent in Op::Compute(), which is GPU kernel schedule time.
- // Currently not used.
- int64 op_schedule_micros() { return op_schedule_micros_; }
+ const std::map<string, int64>& output_idx() { return output_idx_; }
+
// This is time spent in kernel execution.
- int64 kernel_compute_micros() const { return kernel_compute_micros_; }
- int64 all_spent_micros() { return all_spent_micros_; }
+ int64 kernel_exec_micros() const {
+ if (!step_stat_) return 0;
+ int64 total = 0;
+ for (const auto& execs : op_kernel_execs_) {
+ for (const auto& exec : execs.second) {
+ total += exec.second;
+ }
+ }
+ return total;
+ }
+ const std::map<string, std::vector<std::pair<int64, int64>>>&
+ op_kernel_execs() const {
+ return op_kernel_execs_;
+ }
+
int64 requested_bytes() const { return requested_bytes_; }
int64 float_ops() const { return float_ops_; }
const CodeDef* code() { return code_; }
- string device() const { return device_; }
+ std::set<string> devices() const { return devices_; }
const std::set<string>& op_types() const { return op_types_; }
const std::vector<int64>& shape() const { return shape_; }
@@ -109,17 +118,19 @@ class TFGraphNode {
void update_shape(const std::vector<int64>& shape) { shape_ = shape; }
std::map<string, TFGraphNode*> inputs_;
+ std::map<string, int64> output_idx_;
+
const NodeDef* node_;
const CodeDef* code_;
const NodeExecStats* step_stat_;
std::vector<int64> shape_;
std::set<string> op_types_;
- string device_;
- int64 op_start_micros_;
- int64 op_schedule_micros_;
- int64 kernel_compute_micros_;
- int64 all_spent_micros_;
+
+ // device -> vector of {op_start_micros, op_kernel_exec_micros} pairs.
+ std::map<string, std::vector<std::pair<int64, int64>>> op_kernel_execs_;
+
+ std::set<string> devices_;
int64 requested_bytes_;
int64 float_ops_;
};
@@ -128,7 +139,7 @@ class TFCodeNode {
public:
TFCodeNode(const string& trace)
: trace_(trace),
- kernel_compute_micros_(0),
+ kernel_exec_micros_(0),
requested_bytes_(0),
float_ops_(0) {}
@@ -138,16 +149,15 @@ class TFCodeNode {
}
nodes_[node->name()] = node;
- kernel_compute_micros_ += node->kernel_compute_micros();
+ kernel_exec_micros_ += node->kernel_exec_micros();
requested_bytes_ += node->requested_bytes();
float_ops_ += node->float_ops();
op_types_.insert(node->op_types().begin(), node->op_types().end());
if (node->shape().size() > 0) {
shapes_.push_back(node->shape());
}
- if (!node->device().empty()) {
- devices_.insert(node->device());
- }
+ std::set<string> devices = node->devices();
+ devices_.insert(devices.begin(), devices.end());
}
const std::map<string, const TFGraphNode*>& graph_nodes() const {
return nodes_;
@@ -165,7 +175,7 @@ class TFCodeNode {
const string& name() const { return trace_; }
- int64 kernel_compute_micros() const { return kernel_compute_micros_; }
+ int64 kernel_exec_micros() const { return kernel_exec_micros_; }
int64 requested_bytes() const { return requested_bytes_; }
@@ -180,7 +190,7 @@ class TFCodeNode {
private:
const string trace_;
std::set<string> op_types_;
- int64 kernel_compute_micros_;
+ int64 kernel_exec_micros_;
int64 requested_bytes_;
int64 float_ops_;