aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/grappler/costs/virtual_scheduler.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-10-09 18:59:24 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-10-09 19:03:17 -0700
commitd08cb107e6eeedd74c44f0d3654753b141cfa645 (patch)
treec103b0c7697f03db6eafb82d5e52f4a338104d90 /tensorflow/core/grappler/costs/virtual_scheduler.cc
parent103d383a6c73363d16034c57fa7da6aea7876912 (diff)
Scheduler exports tensor size info to RunMetadata. In addition, tensor size histogram
is printed out optionally (use vmodule=analytical_cost_estimator=1 or 2). PiperOrigin-RevId: 171619454
Diffstat (limited to 'tensorflow/core/grappler/costs/virtual_scheduler.cc')
-rw-r--r--tensorflow/core/grappler/costs/virtual_scheduler.cc71
1 files changed, 53 insertions, 18 deletions
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 99ea75f703..1ae6fac8c8 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include <math.h>
+#include "tensorflow/core/framework/allocation_description.pb.h"
#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/tensor.pb.h"
@@ -26,7 +27,9 @@ limitations under the License.
#include "tensorflow/core/grappler/costs/utils.h"
#include "tensorflow/core/grappler/op_types.h"
#include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/strings/numbers.h"
#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/util/device_name_utils.h"
namespace tensorflow {
@@ -51,7 +54,7 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
result.max_per_op_streaming =
std::max(left.max_per_op_streaming, right.max_per_op_streaming);
}
- VLOG(3) << "costs execution_time=" << result.execution_time.count()
+ VLOG(4) << "costs execution_time=" << result.execution_time.count()
<< " max_memory=" << result.max_memory
<< " max_per_op_buffers=" << result.max_per_op_buffers
<< " max_per_op_streaming=" << result.max_per_op_streaming;
@@ -544,7 +547,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
auto& device_op_cost = FindOrCreateZero(op_name, &device.op_to_cost);
device_op_cost = CombineCosts(device_op_cost, node_costs);
- VLOG(2) << "Op scheduled -- name: " << node->name() << ", op: " << node->op()
+ VLOG(3) << "Op scheduled -- name: " << node->name() << ", op: " << node->op()
<< ", device: " << node->device()
<< ", ready: " << node_state.time_ready.count()
<< ", scheduled: " << node_state.time_scheduled.count()
@@ -649,12 +652,12 @@ Costs VirtualScheduler::Summary() const {
<< ", execution_time = " << state.GetCurrTime().count()
<< ", memory usage: "
<< "persistenst = "
- << Round2(persistent_memory_usage / 1024.0 / 1024.0 / 1024.0)
- << " GB, peak = "
- << Round2(state.max_memory_usage / 1024.0 / 1024.0 / 1024.0)
- << " GB, total = "
- << Round2(max_memory_usage / 1024.0 / 1024.0 / 1024.0)
- << " GB, at the end: " << state.memory_usage << " B";
+ << strings::HumanReadableNumBytes(persistent_memory_usage)
+ << ", peak = "
+ << strings::HumanReadableNumBytes(state.max_memory_usage)
+ << ", total = " << strings::HumanReadableNumBytes(max_memory_usage)
+ << ", at the end: "
+ << strings::HumanReadableNumBytes(state.memory_usage);
VLOG(1) << "Per-op execution time (and memory usage at peak memory usage):";
@@ -668,16 +671,20 @@ Costs VirtualScheduler::Summary() const {
for (const auto& op_cost_pair : state.op_to_cost) {
const auto& op = op_cost_pair.first;
const auto& cost = op_cost_pair.second.execution_time.count();
- const float mem_usage_gb =
- Round2(op_to_memory[op] / 1024.0 / 1024.0 / 1024.0);
- int64 op_mem_usage = op_to_memory.at(op);
+ int64 op_mem_usage = 0;
+ auto it = op_to_memory.find(op);
+ if (it != op_to_memory.end()) {
+ op_mem_usage = it->second;
+ }
+
const float mem_usage_percent =
max_memory_usage > 0 ? Round2(100.0 * op_mem_usage / max_memory_usage)
: 0.0;
if (cost || mem_usage_percent > 1.0) {
// Print out only non-zero cost ops or ops with > 1% memory usage.
- VLOG(1) << " + " << op << " : " << cost << " (" << mem_usage_gb
- << " GB [" << mem_usage_percent << "%] "
+ VLOG(1) << " + " << op << " : " << cost << " ("
+ << strings::HumanReadableNumBytes(op_mem_usage) << " ["
+ << mem_usage_percent << "%] "
<< (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
}
}
@@ -686,11 +693,13 @@ Costs VirtualScheduler::Summary() const {
}
}
- // Also log the op description and their corresponding counts.
- VLOG(2) << "Node description, counts, cost:";
- for (const auto& item : op_counts_) {
- VLOG(2) << "Node: " << item.first << ", Count: " << item.second
- << ", Individual Cost: " << op_costs_.at(item.first);
+ if (VLOG_IS_ON(2)) {
+ // Also log the op description and their corresponding counts.
+ VLOG(2) << "Node description, counts, cost:";
+ for (const auto& item : op_counts_) {
+ VLOG(2) << "Node: " << item.first << ", Count: " << item.second
+ << ", Individual Cost: " << op_costs_.at(item.first);
+ }
}
VLOG(1) << "Critical path execution time: "
@@ -709,6 +718,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
for (const auto& node_def : device.second.nodes_executed) {
const NodeState& nodestate = node_map_.at(node_def);
NodeExecStats* node_stats = device_stepstats->add_node_stats();
+ uint64 total_output_size = 0;
for (int slot = 0; slot < nodestate.output_properties.size(); slot++) {
const auto& properties = nodestate.output_properties[slot];
NodeOutput* no = node_stats->add_output();
@@ -716,6 +726,14 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
TensorDescription* tensor_descr = no->mutable_tensor_description();
tensor_descr->set_dtype(properties.dtype());
*tensor_descr->mutable_shape() = properties.shape();
+ // Optional allocation description.
+ const auto tensor_size =
+ CalculateOutputSize(nodestate.output_properties, slot);
+ total_output_size += tensor_size;
+ tensor_descr->mutable_allocation_description()->set_requested_bytes(
+ tensor_size);
+ tensor_descr->mutable_allocation_description()->set_allocated_bytes(
+ tensor_size);
}
node_stats->set_timeline_label(node_def->op());
node_stats->set_node_name(node_def->name());
@@ -728,6 +746,23 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
node_stats->set_all_end_rel_micros(
nodestate.time_finished.asMicroSeconds().count() -
nodestate.time_scheduled.asMicroSeconds().count());
+ auto* mem_stats = node_stats->mutable_memory_stats();
+ // VirtualScheduler does not specify scratch pad memory usage.
+ mem_stats->set_host_temp_memory_size(0);
+ mem_stats->set_device_temp_memory_size(0);
+ int64 host_persistent_memory_size = 0;
+ int64 device_persistent_memory_size = 0;
+ if (IsPersistentNode(node_def)) {
+ if (device.first.find("cpu") != string::npos ||
+ device.first.find("CPU") != string::npos) {
+ host_persistent_memory_size = total_output_size;
+ } else {
+ device_persistent_memory_size = total_output_size;
+ }
+ }
+ mem_stats->set_host_persistent_memory_size(host_persistent_memory_size);
+ mem_stats->set_device_persistent_memory_size(
+ device_persistent_memory_size);
*device_partition_graph->mutable_node()->Add() = *node_def;
}
}