1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#ifndef TENSORFLOW_GRAPH_COSTMODEL_H_
#define TENSORFLOW_GRAPH_COSTMODEL_H_
#include <unordered_map>
#include <vector>
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/types.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
namespace tensorflow {
typedef std::unordered_map<string, int32> NodeNameToCostIdMap;
class StepStats;
// CostModel keeps track of the following runtime statistics for nodes
// of a single Graph:
// * The total number of times a node has executed.
// * The accumulated execution time (in microseconds) of a node.
// * The accumulated size (in bytes) of each node's output.
//
// This class is NOT thread-safe.
class CostModel {
public:
// If "global" is true, maintains costs based on Node::cost_id, otherwise
// maintains costs based on Node::id.
explicit CostModel(bool is_global) : is_global_(is_global) {}
// Assigns min_count_ as a function of the median count for a Node.
// This value is then used for suppressing the time/size costs of
// infrequent operations.
// NOTE(tucker): Maybe this should move to a subclass of CostModel.
void SuppressInfrequent();
bool is_global() const { return is_global_; }
// Initializes cost model for 'g'.
void InitFromGraph(const Graph& g);
// Merges costs from cm.
// REQUIRES: is_global_ is true for this and for "cm"
void MergeFromGlobal(const CostModel& cm);
// Merges costs from "cm", which has been computed relative to "g".
// REQUIRES: is_global_ is true for this, and false for "cm".
void MergeFromLocal(const Graph& g, const CostModel& cm);
void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss);
// Sets the number of outputs of "node".
void SetNumOutputs(const Node* node, int num_outputs);
// Records that "node" has executed "num_count" more times.
void RecordCount(const Node* node, int num_count);
// Returns how many times "node" has been executed.
int32 TotalCount(const Node* node) const;
// Records that "output_slot" of "node" has produced tensors of
// aggregated "bytes".
void RecordSize(const Node* node, int output_slot, Bytes bytes);
// Returns total bytes of tensors produced by "node"s output slot.
Bytes TotalBytes(const Node* node, int output_slot) const;
// Returns a prediction for the size of the tensor at the
// output_slot produced by one execution of "node".
Bytes SizeEstimate(const Node* node, int output_slot) const;
// Records that Executions of "node" have taken "time" microseconds.
void RecordTime(const Node* node, Microseconds time);
// Returns the total execution time for "node".
Microseconds TotalTime(const Node* node) const;
// Returns a prediction for one execution of "node".
Microseconds TimeEstimate(const Node* node) const;
// Check that an estimate is available for every OP node in graph.
void CheckInitialized(const Graph& graph) const;
// Helper routines to encapsulate static estimatation heuristics
// Compute an estimate of the time to copy "b" bytes over the network,
// given a fixed cost of "network_latency_millis" milliseconds and
// an estimated bandwidth of "estimated_gbps" gigabits per second (note that
// this value is in gigabits, not gigabytes).
static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis,
double estimated_gbps);
static Microseconds ComputationTimeEstimate(int64 mathops);
// Write the contents of the CostModel to the INFO log.
void WriteToLog();
private:
const bool is_global_;
inline int Id(const Node* n) const {
if (is_global_) {
return n->cost_id();
} else {
return n->id();
}
}
// Resizes vectors so that they are large enough for "id".
void Ensure(int id);
// Nodes and Edges whose count is < this value
// get type/byte estimates of 0.
int32 min_count_ = 0;
// Number of times each Node has been executed.
std::vector<int32> count_;
// Cumulative execution time.
std::vector<Microseconds> time_;
// Cumulative Bytes output on each channel.
std::vector<gtl::InlinedVector<Bytes, 2> > slot_bytes_;
TF_DISALLOW_COPY_AND_ASSIGN(CostModel);
};
} // namespace tensorflow
#endif // TENSORFLOW_GRAPH_COSTMODEL_H_
|