tensorflow/core/profiler/tfprof_output.proto


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

syntax = "proto3";

import "tensorflow/core/framework/tensor_shape.proto";
import "tensorflow/core/framework/types.proto";

package tensorflow.tfprof;

message TFProfTensorProto {
  DataType dtype = 1;
  // Flatten tensor in row-major.
  // Only one of the following array is set.
  repeated double value_double = 2;
  repeated int64 value_int64 = 3;
  repeated string value_str = 4;
}

// A node in TensorFlow graph. Used by scope/graph view.
message GraphNodeProto {
  // op name.
  string name = 1;
  // tensor value restored from checkpoint.
  TFProfTensorProto tensor_value = 15;
  // op execution time.
  // A node can be defined once but run multiple times in tf.while_loop.
  // the times sum up all different runs.
  int64 run_count = 21;
  int64 exec_micros = 2;
  int64 accelerator_exec_micros = 17;
  int64 cpu_exec_micros = 18;

  // Total bytes requested by the op.
  int64 requested_bytes = 3;
  // Max bytes allocated and being used by the op at a point.
  int64 peak_bytes = 24;
  // Total bytes requested by the op and not released before end.
  int64 residual_bytes = 25;
  // Total bytes output by the op (not necessarily allocated by the op).
  int64 output_bytes = 26;

  // Number of parameters if available.
  int64 parameters = 4;
  // Number of float operations.
  int64 float_ops = 13;
  // Device the op is assigned to.
  // Since an op can fire multiple kernel calls, there can be multiple devices.
  repeated string devices = 10;

  // The following are the aggregated stats from all *accounted* children and
  // the node itself. The actual children depend on the data structure used.
  // In graph view, children are inputs recursively.
  // In scope view, children are nodes under the name scope.
  int64 total_definition_count = 23;
  int64 total_run_count = 22;
  int64 total_exec_micros = 6;
  int64 total_accelerator_exec_micros = 19;
  int64 total_cpu_exec_micros = 20;

  int64 total_requested_bytes = 7;
  int64 total_peak_bytes = 27;
  int64 total_residual_bytes = 28;
  int64 total_output_bytes = 29;

  int64 total_parameters = 8;
  int64 total_float_ops = 14;

  // shape information, if available.
  // TODO(xpan): Why is this repeated?
  repeated TensorShapeProto shapes = 11;

  map<int32, TensorShapeProto> input_shapes = 16;

  // Descendants of the graph. The actual descendants depend on the data
  // structure used (scope, graph).
  repeated GraphNodeProto children = 12;
}

// A node that groups multiple GraphNodeProto.
// Depending on the 'view', the semantics of the TFmultiGraphNodeProto
// is different:
// code view: A node groups all TensorFlow graph nodes created by the
//            Python code.
// op view:   A node groups all TensorFlow graph nodes that are of type
//            of the op (e.g. MatMul, Conv2D).
message MultiGraphNodeProto {
  // Name of the node.
  string name = 1;

  // code execution time.
  int64 exec_micros = 2;
  int64 accelerator_exec_micros = 12;
  int64 cpu_exec_micros = 13;

  // Total requested bytes by the code.
  int64 requested_bytes = 3;
  // Max bytes allocated and being used by the op at a point.
  int64 peak_bytes = 16;
  // Total bytes requested by the op and not released before end.
  int64 residual_bytes = 17;
  // Total bytes output by the op (not necessarily allocated by the op).
  int64 output_bytes = 18;

  // Number of parameters if available.
  int64 parameters = 4;
  // Number of float operations.
  int64 float_ops = 5;

  // The following are the aggregated stats from descendants.
  // The actual descendants depend on the data structure used.
  int64 total_exec_micros = 6;
  int64 total_accelerator_exec_micros = 14;
  int64 total_cpu_exec_micros = 15;

  int64 total_requested_bytes = 7;
  int64 total_peak_bytes = 19;
  int64 total_residual_bytes = 20;
  int64 total_output_bytes = 21;

  int64 total_parameters = 8;
  int64 total_float_ops = 9;

  // TensorFlow graph nodes contained by the MultiGraphNodeProto.
  repeated GraphNodeProto graph_nodes = 10;
  // Descendants of the node. The actual descendants depend on the data
  // structure used.
  repeated MultiGraphNodeProto children = 11;
}

message AdviceProto {
  // checker name -> a list of reports from the checker.
  map<string, Checker> checkers = 1;
  message Checker {
    repeated string reports = 2;
  }
}