Add option to use compute_memory_overlap; if true, use max of memory_cost and compute_cost, instead of sum for op level cost in analytical cost estimator.

PiperOrigin-RevId: 162782658
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-07-21 14:07:14 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-07-21 14:11:06 -0700
commit: 74057361032bc4d9a9fbfced1f433b06c06c09ec (patch)
tree: bf6453e743bcd002619166a525bcf9907388b1e1 /tensorflow/core/grappler/costs
parent: 51cbb58ca5147218b3995dc124bd92927d93e913 (diff)
3 files changed, 25 insertions, 1 deletions
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 5148a4b99e..f13b426b3c 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -268,6 +268,9 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
                           Eigen::internal::scalar_quotient_op<float>>::Cost},
       {"TruncateMod", Eigen::internal::functor_traits<
                           Eigen::internal::scalar_mod_op<float>>::Cost}};
+
+  // By default, use sum of memory_time and compute_time for execution_time.
+  compute_memory_overlap_ = false;
 }
 
 Costs OpLevelCostEstimator::PredictCosts(const OpInfo& op_features) const {
@@ -395,7 +398,11 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
   Costs costs;
   costs.compute_time = compute_cost;
   costs.memory_time = memory_cost;
-  costs.execution_time = compute_cost + memory_cost;
+  if (compute_memory_overlap_) {
+    costs.execution_time = std::max(compute_cost, memory_cost);
+  } else {
+    costs.execution_time = compute_cost + memory_cost;
+  }
   costs.inaccurate = found_unknown_shapes;
   return costs;
 }
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.h b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
index 59ced70ba6..36ef6a5c61 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.h
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.h
@@ -150,6 +150,9 @@ class OpLevelCostEstimator {
   std::map<string, int> elementwise_ops_;
   typedef std::function<Costs(const OpInfo& op_feature)> CostImpl;
   std::map<string, CostImpl> device_cost_impl_;
+  // If true, assume compute and memory overlap; hence, the op cost is max of
+  // compute_time and memory_time, insteaf of sum of those two.
+  bool compute_memory_overlap_;
 
  private:
   friend class OpLevelCostEstimatorTest;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
index 1f0e02c160..0cbfb10017 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
@@ -157,6 +157,10 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
                                                  found_unknown_shapes);
   }
 
+  void SetComputeMemoryOverlap(bool value) {
+    estimator_.compute_memory_overlap_ = value;
+  }
+
   OpLevelCostEstimator estimator_;
 };
 
@@ -168,6 +172,16 @@ TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
   EXPECT_TRUE(cost.inaccurate);
 }
 
+TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
+  SetComputeMemoryOverlap(true);
+  auto cost = PredictCosts(DescribeOp("Dummy", 1000, 1));
+  EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
+  EXPECT_EQ(Costs::Duration(200), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(2000), cost.execution_time);  // max(2000, 200)
+  EXPECT_TRUE(cost.inaccurate);
+  SetComputeMemoryOverlap(false);  // Set it back to default.
+}
+
 TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) {
   auto cost = PredictCosts(DescribeOp("Mul", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-07-21 14:07:14 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-07-21 14:11:06 -0700
commit	74057361032bc4d9a9fbfced1f433b06c06c09ec (patch)
tree	bf6453e743bcd002619166a525bcf9907388b1e1 /tensorflow/core/grappler/costs
parent	51cbb58ca5147218b3995dc124bd92927d93e913 (diff)