#ifndef TENSORFLOW_GRAPH_COSTMODEL_H_ #define TENSORFLOW_GRAPH_COSTMODEL_H_ #include #include #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/graph/types.h" #include "tensorflow/core/lib/gtl/array_slice.h" namespace tensorflow { typedef std::unordered_map NodeNameToCostIdMap; class StepStats; // CostModel keeps track of the following runtime statistics for nodes // of a single Graph: // * The total number of times a node has executed. // * The accumulated execution time (in microseconds) of a node. // * The accumulated size (in bytes) of each node's output. // // This class is NOT thread-safe. class CostModel { public: // If "global" is true, maintains costs based on Node::cost_id, otherwise // maintains costs based on Node::id. explicit CostModel(bool is_global) : is_global_(is_global) {} // Assigns min_count_ as a function of the median count for a Node. // This value is then used for suppressing the time/size costs of // infrequent operations. // NOTE(tucker): Maybe this should move to a subclass of CostModel. void SuppressInfrequent(); bool is_global() const { return is_global_; } // Initializes cost model for 'g'. void InitFromGraph(const Graph& g); // Merges costs from cm. // REQUIRES: is_global_ is true for this and for "cm" void MergeFromGlobal(const CostModel& cm); // Merges costs from "cm", which has been computed relative to "g". // REQUIRES: is_global_ is true for this, and false for "cm". void MergeFromLocal(const Graph& g, const CostModel& cm); void MergeFromStats(const NodeNameToCostIdMap& map, const StepStats& ss); // Sets the number of outputs of "node". void SetNumOutputs(const Node* node, int num_outputs); // Records that "node" has executed "num_count" more times. void RecordCount(const Node* node, int num_count); // Returns how many times "node" has been executed. int32 TotalCount(const Node* node) const; // Records that "output_slot" of "node" has produced tensors of // aggregated "bytes". void RecordSize(const Node* node, int output_slot, Bytes bytes); // Returns total bytes of tensors produced by "node"s output slot. Bytes TotalBytes(const Node* node, int output_slot) const; // Returns a prediction for the size of the tensor at the // output_slot produced by one execution of "node". Bytes SizeEstimate(const Node* node, int output_slot) const; // Records that Executions of "node" have taken "time" microseconds. void RecordTime(const Node* node, Microseconds time); // Returns the total execution time for "node". Microseconds TotalTime(const Node* node) const; // Returns a prediction for one execution of "node". Microseconds TimeEstimate(const Node* node) const; // Check that an estimate is available for every OP node in graph. void CheckInitialized(const Graph& graph) const; // Helper routines to encapsulate static estimatation heuristics // Compute an estimate of the time to copy "b" bytes over the network, // given a fixed cost of "network_latency_millis" milliseconds and // an estimated bandwidth of "estimated_gbps" gigabits per second (note that // this value is in gigabits, not gigabytes). static Microseconds CopyTimeEstimate(Bytes b, double network_latency_millis, double estimated_gbps); static Microseconds ComputationTimeEstimate(int64 mathops); // Write the contents of the CostModel to the INFO log. void WriteToLog(); private: const bool is_global_; inline int Id(const Node* n) const { if (is_global_) { return n->cost_id(); } else { return n->id(); } } // Resizes vectors so that they are large enough for "id". void Ensure(int id); // Nodes and Edges whose count is < this value // get type/byte estimates of 0. int32 min_count_ = 0; // Number of times each Node has been executed. std::vector count_; // Cumulative execution time. std::vector time_; // Cumulative Bytes output on each channel. std::vector > slot_bytes_; TF_DISALLOW_COPY_AND_ASSIGN(CostModel); }; } // namespace tensorflow #endif // TENSORFLOW_GRAPH_COSTMODEL_H_