Scheduler exports tensor size info to RunMetadata. In addition, tensor size histogram

is printed out optionally (use vmodule=analytical_cost_estimator=1 or 2). PiperOrigin-RevId: 171619454
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-10-09 18:59:24 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-10-09 19:03:17 -0700
commit: d08cb107e6eeedd74c44f0d3654753b141cfa645 (patch)
tree: c103b0c7697f03db6eafb82d5e52f4a338104d90 /tensorflow/core/grappler/costs/virtual_scheduler.cc
parent: 103d383a6c73363d16034c57fa7da6aea7876912 (diff)
1 files changed, 53 insertions, 18 deletions
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 99ea75f703..1ae6fac8c8 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <math.h>
 
+#include "tensorflow/core/framework/allocation_description.pb.h"
 #include "tensorflow/core/framework/attr_value.pb.h"
 #include "tensorflow/core/framework/node_def.pb.h"
 #include "tensorflow/core/framework/tensor.pb.h"
@@ -26,7 +27,9 @@ limitations under the License.
 #include "tensorflow/core/grappler/costs/utils.h"
 #include "tensorflow/core/grappler/op_types.h"
 #include "tensorflow/core/grappler/utils.h"
+#include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/platform/logging.h"
 #include "tensorflow/core/util/device_name_utils.h"
 
 namespace tensorflow {
@@ -51,7 +54,7 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
     result.max_per_op_streaming =
         std::max(left.max_per_op_streaming, right.max_per_op_streaming);
   }
-  VLOG(3) << "costs execution_time=" << result.execution_time.count()
+  VLOG(4) << "costs execution_time=" << result.execution_time.count()
           << " max_memory=" << result.max_memory
           << " max_per_op_buffers=" << result.max_per_op_buffers
           << " max_per_op_streaming=" << result.max_per_op_streaming;
@@ -544,7 +547,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
   auto& device_op_cost = FindOrCreateZero(op_name, &device.op_to_cost);
   device_op_cost = CombineCosts(device_op_cost, node_costs);
 
-  VLOG(2) << "Op scheduled -- name: " << node->name() << ", op: " << node->op()
+  VLOG(3) << "Op scheduled -- name: " << node->name() << ", op: " << node->op()
           << ", device: " << node->device()
           << ", ready: " << node_state.time_ready.count()
           << ", scheduled: " << node_state.time_scheduled.count()
@@ -649,12 +652,12 @@ Costs VirtualScheduler::Summary() const {
             << ", execution_time = " << state.GetCurrTime().count()
             << ", memory usage: "
             << "persistenst = "
-            << Round2(persistent_memory_usage / 1024.0 / 1024.0 / 1024.0)
-            << " GB, peak = "
-            << Round2(state.max_memory_usage / 1024.0 / 1024.0 / 1024.0)
-            << " GB, total = "
-            << Round2(max_memory_usage / 1024.0 / 1024.0 / 1024.0)
-            << " GB, at the end: " << state.memory_usage << " B";
+            << strings::HumanReadableNumBytes(persistent_memory_usage)
+            << ", peak = "
+            << strings::HumanReadableNumBytes(state.max_memory_usage)
+            << ", total = " << strings::HumanReadableNumBytes(max_memory_usage)
+            << ", at the end: "
+            << strings::HumanReadableNumBytes(state.memory_usage);
 
     VLOG(1) << "Per-op execution time (and memory usage at peak memory usage):";
 
@@ -668,16 +671,20 @@ Costs VirtualScheduler::Summary() const {
     for (const auto& op_cost_pair : state.op_to_cost) {
       const auto& op = op_cost_pair.first;
       const auto& cost = op_cost_pair.second.execution_time.count();
-      const float mem_usage_gb =
-          Round2(op_to_memory[op] / 1024.0 / 1024.0 / 1024.0);
-      int64 op_mem_usage = op_to_memory.at(op);
+      int64 op_mem_usage = 0;
+      auto it = op_to_memory.find(op);
+      if (it != op_to_memory.end()) {
+        op_mem_usage = it->second;
+      }
+
       const float mem_usage_percent =
           max_memory_usage > 0 ? Round2(100.0 * op_mem_usage / max_memory_usage)
                                : 0.0;
       if (cost || mem_usage_percent > 1.0) {
         // Print out only non-zero cost ops or ops with > 1% memory usage.
-        VLOG(1) << " + " << op << " : " << cost << " (" << mem_usage_gb
-                << " GB [" << mem_usage_percent << "%] "
+        VLOG(1) << " + " << op << " : " << cost << " ("
+                << strings::HumanReadableNumBytes(op_mem_usage) << " ["
+                << mem_usage_percent << "%] "
                 << (persisent_ops.count(op) > 0 ? ": persistent op)" : ")");
       }
     }
@@ -686,11 +693,13 @@ Costs VirtualScheduler::Summary() const {
     }
   }
 
-  // Also log the op description and their corresponding counts.
-  VLOG(2) << "Node description, counts, cost:";
-  for (const auto& item : op_counts_) {
-    VLOG(2) << "Node: " << item.first << ", Count: " << item.second
-            << ", Individual Cost: " << op_costs_.at(item.first);
+  if (VLOG_IS_ON(2)) {
+    // Also log the op description and their corresponding counts.
+    VLOG(2) << "Node description, counts, cost:";
+    for (const auto& item : op_counts_) {
+      VLOG(2) << "Node: " << item.first << ", Count: " << item.second
+              << ", Individual Cost: " << op_costs_.at(item.first);
+    }
   }
 
   VLOG(1) << "Critical path execution time: "
@@ -709,6 +718,7 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
       for (const auto& node_def : device.second.nodes_executed) {
         const NodeState& nodestate = node_map_.at(node_def);
         NodeExecStats* node_stats = device_stepstats->add_node_stats();
+        uint64 total_output_size = 0;
         for (int slot = 0; slot < nodestate.output_properties.size(); slot++) {
           const auto& properties = nodestate.output_properties[slot];
           NodeOutput* no = node_stats->add_output();
@@ -716,6 +726,14 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
           TensorDescription* tensor_descr = no->mutable_tensor_description();
           tensor_descr->set_dtype(properties.dtype());
           *tensor_descr->mutable_shape() = properties.shape();
+          // Optional allocation description.
+          const auto tensor_size =
+              CalculateOutputSize(nodestate.output_properties, slot);
+          total_output_size += tensor_size;
+          tensor_descr->mutable_allocation_description()->set_requested_bytes(
+              tensor_size);
+          tensor_descr->mutable_allocation_description()->set_allocated_bytes(
+              tensor_size);
         }
         node_stats->set_timeline_label(node_def->op());
         node_stats->set_node_name(node_def->name());
@@ -728,6 +746,23 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
         node_stats->set_all_end_rel_micros(
             nodestate.time_finished.asMicroSeconds().count() -
             nodestate.time_scheduled.asMicroSeconds().count());
+        auto* mem_stats = node_stats->mutable_memory_stats();
+        // VirtualScheduler does not specify scratch pad memory usage.
+        mem_stats->set_host_temp_memory_size(0);
+        mem_stats->set_device_temp_memory_size(0);
+        int64 host_persistent_memory_size = 0;
+        int64 device_persistent_memory_size = 0;
+        if (IsPersistentNode(node_def)) {
+          if (device.first.find("cpu") != string::npos ||
+              device.first.find("CPU") != string::npos) {
+            host_persistent_memory_size = total_output_size;
+          } else {
+            device_persistent_memory_size = total_output_size;
+          }
+        }
+        mem_stats->set_host_persistent_memory_size(host_persistent_memory_size);
+        mem_stats->set_device_persistent_memory_size(
+            device_persistent_memory_size);
         *device_partition_graph->mutable_node()->Add() = *node_def;
       }
     }
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-10-09 18:59:24 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-10-09 19:03:17 -0700
commit	d08cb107e6eeedd74c44f0d3654753b141cfa645 (patch)
tree	c103b0c7697f03db6eafb82d5e52f4a338104d90 /tensorflow/core/grappler/costs/virtual_scheduler.cc
parent	103d383a6c73363d16034c57fa7da6aea7876912 (diff)