aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Peter Ma <pcma@google.com>2018-08-10 15:03:22 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-10 15:13:09 -0700
commit2625345c727b14f8e770d4f980fe86e9ccc8b03d (patch)
treeba456d8a49c073e56c8f78d26bc17f413937fff0
parent83a1435684149e381521de528c3af40daa784570 (diff)
Add two counters in Costs Struct for number of ops processed/predicted in total, and number of ops predicted with unknown shapes
PiperOrigin-RevId: 208274158
-rw-r--r--tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc4
-rw-r--r--tensorflow/core/grappler/costs/cost_estimator.h8
-rw-r--r--tensorflow/core/grappler/costs/op_level_cost_estimator.cc23
-rw-r--r--tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc151
-rw-r--r--tensorflow/core/grappler/costs/virtual_scheduler.cc20
-rw-r--r--tensorflow/core/grappler/costs/virtual_scheduler.h1
-rw-r--r--tensorflow/core/grappler/costs/virtual_scheduler_test.cc7
7 files changed, 195 insertions, 19 deletions
diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
index f241922471..a9a1abfa98 100644
--- a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc
@@ -103,6 +103,9 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) {
TF_ASSERT_OK(estimator.PredictCosts(item.graph, &cost_graph, &summary));
EXPECT_EQ(Costs::NanoSeconds(9151), summary.execution_time);
+ // Note there are totally 17 nodes (RandomUniform creates 2 nodes), but
+ // grappler will not process "label", therefore we have 15 here instead
+ EXPECT_EQ(15, summary.num_ops_total);
// Make this estimate accurate:
// TODO(http://b/70031255): Accurate estimator for RandomUniform op needed
@@ -110,6 +113,7 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) {
//
// Change to EXPECT_FALSE when the above TODOs are done:
EXPECT_TRUE(summary.inaccurate);
+ EXPECT_EQ(0, summary.num_ops_with_unknown_shapes);
}
} // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h
index fe8a876f8a..e91f0cc9da 100644
--- a/tensorflow/core/grappler/costs/cost_estimator.h
+++ b/tensorflow/core/grappler/costs/cost_estimator.h
@@ -109,8 +109,16 @@ struct Costs {
int64 max_per_op_buffers; // Sum of all buffers used by the ops.
int64 max_per_op_streaming; // Ignore largest input buffer, assuming it
// streams from main memory.
+
+ // Number of ops included in this Costs in total.
+ // Default initialized to be one.
+ int64 num_ops_total = 1;
// If the time estimation is inaccurate.
bool inaccurate = false;
+ // Number of ops that are estimated with unknown shapes.
+ int64 num_ops_with_unknown_shapes = 0;
+ // TODO(pcma): include a counter for total inaccurate ops and counters for
+ // other reasons causing the inaccuracy
// Max possible memory usage per device.
std::unordered_map<string, uint64> estimated_max_memory_per_device;
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
index 5b303f6ccb..6406a4bdbf 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc
@@ -449,6 +449,7 @@ Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const {
if (found_unknown_shapes || !is_known_elementwise_op) {
costs.inaccurate = true;
}
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
return costs;
}
@@ -469,6 +470,7 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost(
const double total_io_bytes = input_size + output_size;
Costs costs = PredictOpCountBasedCost(operations, total_io_bytes, op_info);
costs.inaccurate = unknown_shapes;
+ costs.num_ops_with_unknown_shapes = unknown_shapes;
costs.max_memory = output_size;
return costs;
}
@@ -627,6 +629,7 @@ int64 OpLevelCostEstimator::CountMatMulOperations(
if (op_features.inputs_size() < 2) {
LOG(ERROR) << "Need 2 inputs but got " << op_features.inputs_size();
+ // TODO(pcma): Try to separate invalid inputs from unknown shapes
*found_unknown_shapes = true;
return 0;
}
@@ -694,11 +697,13 @@ int64 OpLevelCostEstimator::CountBatchMatMulOperations(
const OpInfo& op_features, bool* found_unknown_shapes) const {
if (op_features.op() != kBatchMatMul) {
LOG(ERROR) << "Invalid Operation: " << op_features.op();
+ // TODO(pcma): Try to separate invalid inputs from unknown shapes
*found_unknown_shapes = true;
return 0;
}
if (op_features.inputs_size() != 2) {
LOG(ERROR) << "Expected 2 inputs but got " << op_features.inputs_size();
+ // TODO(pcma): Try to separate invalid inputs from unknown shapes
*found_unknown_shapes = true;
return 0;
}
@@ -858,6 +863,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropInputOperations(
"kDepthwiseConv2dNativeBackpropInput";
if (op_features.inputs_size() < 2) {
+ // TODO(pcma): Try to separate invalid inputs from unknown shapes
*found_unknown_shapes = true;
return ops;
}
@@ -935,6 +941,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropFilterOperations(
}
if (op_features.inputs_size() < 1) {
+ // TODO(pcma): Try to separate invalid inputs from unknown shapes
*found_unknown_shapes = true;
return ops;
}
@@ -1037,6 +1044,7 @@ Costs OpLevelCostEstimator::PredictConv2D(const OpContext& op_context) const {
auto costs = PredictOpCountBasedCost(
CountConv2DOperations(op_features, &found_unknown_shapes), op_features);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
return costs;
}
@@ -1049,6 +1057,7 @@ Costs OpLevelCostEstimator::PredictConv2DBackpropInput(
op_features, nullptr, &found_unknown_shapes),
op_features);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
return costs;
}
@@ -1061,6 +1070,7 @@ Costs OpLevelCostEstimator::PredictConv2DBackpropFilter(
op_features, nullptr, &found_unknown_shapes),
op_features);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
return costs;
}
@@ -1148,6 +1158,7 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation(
// Construct component operations and run the cost computation.
auto costs = PredictFusedOp(op_context_with_output, component_ops);
costs.inaccurate |= found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = costs.inaccurate;
return costs;
}
@@ -1157,6 +1168,7 @@ Costs OpLevelCostEstimator::PredictMatMul(const OpContext& op_context) const {
auto costs = PredictOpCountBasedCost(
CountMatMulOperations(op_features, &found_unknown_shapes), op_features);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
return costs;
}
@@ -1171,6 +1183,7 @@ Costs OpLevelCostEstimator::PredictIdentity(const OpContext& op_context) const {
VLOG(1) << "Op:" << op_features.op() << " Execution Time 0 (ns)";
Costs result = Costs::ZeroCosts();
result.max_memory = CalculateOutputSize(op_features, &result.inaccurate);
+ result.num_ops_with_unknown_shapes = result.inaccurate;
// Assign the minimum amount of time we can represent to the identity op since
// it tends to be really cheap.
result.compute_time = kMinComputeTime;
@@ -1184,6 +1197,7 @@ Costs OpLevelCostEstimator::PredictVariable(const OpContext& op_context) const {
Costs result = Costs::ZeroCosts();
result.persistent_memory =
CalculateOutputSize(op_features, &result.inaccurate);
+ result.num_ops_with_unknown_shapes = result.inaccurate;
result.compute_time = kMinComputeTime;
result.execution_time = result.execution_time;
@@ -1198,6 +1212,7 @@ Costs OpLevelCostEstimator::PredictBatchMatMul(
CountBatchMatMulOperations(op_features, &found_unknown_shapes),
op_features);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
return costs;
}
@@ -1205,6 +1220,7 @@ Costs OpLevelCostEstimator::PredictMetadata(const OpContext& op_context) const {
const auto& op_features = op_context.op_info;
Costs costs = Costs::ZeroCosts();
costs.max_memory = CalculateOutputSize(op_features, &costs.inaccurate);
+ costs.num_ops_with_unknown_shapes = costs.inaccurate;
// Metadata operations are so cheap we assume they take the minimum amount of
// time we can represent (1 ns).
costs.compute_time = kMinComputeTime;
@@ -1249,6 +1265,7 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice(
const double total_io = input_size + output_size;
Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info);
costs.inaccurate = unknown_shapes;
+ costs.num_ops_with_unknown_shapes = unknown_shapes;
costs.max_memory = output_size;
return costs;
@@ -1390,6 +1407,7 @@ Costs OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context) const {
Costs costs = PredictOpCountBasedCost(
ops, total_input_size + total_output_size, op_info);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
costs.max_memory = total_output_size;
return costs;
}
@@ -1432,6 +1450,7 @@ Costs OpLevelCostEstimator::PredictMaxPoolGrad(
Costs costs = PredictOpCountBasedCost(
ops, total_input_size + total_output_size, op_info);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
costs.max_memory = total_output_size;
return costs;
}
@@ -1464,6 +1483,7 @@ Costs OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context) const {
Costs costs = PredictOpCountBasedCost(
ops, total_input_size + total_output_size, op_info);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
costs.max_memory = total_output_size;
return costs;
}
@@ -1516,6 +1536,7 @@ Costs OpLevelCostEstimator::PredictAvgPoolGrad(
Costs costs = PredictOpCountBasedCost(
ops, total_input_size + total_output_size, op_info);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
costs.max_memory = total_output_size;
return costs;
}
@@ -1562,6 +1583,7 @@ Costs OpLevelCostEstimator::PredictFusedBatchNorm(
ops, total_input_size + total_output_size + total_internal_read_size,
op_info);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
costs.max_memory = total_output_size;
return costs;
}
@@ -1595,6 +1617,7 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad(
ops, total_input_size + total_output_size + total_internal_read_size,
op_info);
costs.inaccurate = found_unknown_shapes;
+ costs.num_ops_with_unknown_shapes = found_unknown_shapes;
costs.max_memory = total_output_size;
return costs;
}
diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
index 77352f6652..7271a29319 100644
--- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
+++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc
@@ -488,7 +488,9 @@ TEST_F(OpLevelCostEstimatorTest, TestGatherCosts) {
EXPECT_EQ(Costs::Duration(130), cost.memory_time);
EXPECT_EQ(Costs::Duration(16), cost.compute_time);
EXPECT_EQ(Costs::Duration(146), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, TestGatherCostsWithoutOutput) {
@@ -504,7 +506,9 @@ TEST_F(OpLevelCostEstimatorTest, TestGatherCostsWithoutOutput) {
EXPECT_EQ(Costs::Duration(0), cost.memory_time);
EXPECT_EQ(Costs::Duration(0), cost.compute_time);
EXPECT_EQ(Costs::Duration(0), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) {
@@ -522,7 +526,9 @@ TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) {
EXPECT_EQ(Costs::Duration(81), cost.memory_time);
EXPECT_EQ(Costs::Duration(10), cost.compute_time);
EXPECT_EQ(Costs::Duration(91), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) {
@@ -530,7 +536,9 @@ TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) {
EXPECT_EQ(Costs::Duration(8400), cost.memory_time);
EXPECT_EQ(Costs::Duration(1000), cost.compute_time);
EXPECT_EQ(Costs::Duration(9400), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, Conv2DExecutionTime) {
@@ -538,7 +546,9 @@ TEST_F(OpLevelCostEstimatorTest, Conv2DExecutionTime) {
EXPECT_EQ(Costs::Duration(233780), cost.memory_time);
EXPECT_EQ(Costs::Duration(354877440), cost.compute_time);
EXPECT_EQ(Costs::Duration(355111220), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, DepthwiseConv2dNativeExecutionTime) {
@@ -547,7 +557,9 @@ TEST_F(OpLevelCostEstimatorTest, DepthwiseConv2dNativeExecutionTime) {
EXPECT_EQ(Costs::Duration(112340), cost.memory_time);
EXPECT_EQ(Costs::Duration(4158720), cost.compute_time);
EXPECT_EQ(Costs::Duration(4271060), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
@@ -555,7 +567,9 @@ TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
EXPECT_EQ(Costs::Duration(0), cost.compute_time);
EXPECT_EQ(Costs::Duration(2000), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
@@ -564,7 +578,9 @@ TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
EXPECT_EQ(Costs::Duration(0), cost.compute_time);
EXPECT_EQ(Costs::Duration(2000), cost.execution_time); // max(2000, 200)
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
SetComputeMemoryOverlap(false); // Set it back to default.
}
@@ -576,7 +592,9 @@ TEST_F(OpLevelCostEstimatorTest,
EXPECT_EQ(Costs::Duration(825345), cost.memory_time);
EXPECT_EQ(Costs::Duration(355321038), cost.compute_time);
EXPECT_EQ(Costs::Duration(356146383), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_HWIO) {
@@ -586,7 +604,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_HWIO) {
EXPECT_EQ(Costs::Duration(1416808), cost.memory_time);
EXPECT_EQ(Costs::Duration(355616770), cost.compute_time);
EXPECT_EQ(Costs::Duration(357033578), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_OIHW) {
@@ -596,7 +616,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_OIHW) {
EXPECT_EQ(Costs::Duration(1416808), cost.memory_time);
EXPECT_EQ(Costs::Duration(355616770), cost.compute_time);
EXPECT_EQ(Costs::Duration(357033578), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_HWIO) {
@@ -606,7 +628,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_HWIO) {
EXPECT_EQ(Costs::Duration(1416808), cost.memory_time);
EXPECT_EQ(Costs::Duration(355616770), cost.compute_time);
EXPECT_EQ(Costs::Duration(357033578), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_OIHW) {
@@ -616,7 +640,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_OIHW) {
EXPECT_EQ(Costs::Duration(1416808), cost.memory_time);
EXPECT_EQ(Costs::Duration(355616770), cost.compute_time);
EXPECT_EQ(Costs::Duration(357033578), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
// TODO(yaozhang): Update once NCHW_VECT_C is supported.
@@ -627,7 +653,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_VECT_C_OIHW) {
EXPECT_EQ(Costs::Duration(0), cost.memory_time);
EXPECT_EQ(Costs::Duration(0), cost.compute_time);
EXPECT_EQ(Costs::Duration(0), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
// TODO(yaozhang): Update once OIHW_VECT_I is supported.
@@ -638,7 +666,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_OIHW_VECT_I) {
EXPECT_EQ(Costs::Duration(0), cost.memory_time);
EXPECT_EQ(Costs::Duration(0), cost.compute_time);
EXPECT_EQ(Costs::Duration(0), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) {
@@ -646,7 +676,9 @@ TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) {
EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
EXPECT_EQ(Costs::Duration(200), cost.compute_time);
EXPECT_EQ(Costs::Duration(2200), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) {
@@ -654,7 +686,9 @@ TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) {
EXPECT_EQ(Costs::Duration(3600), cost.memory_time);
EXPECT_EQ(Costs::Duration(400), cost.compute_time);
EXPECT_EQ(Costs::Duration(4000), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) {
@@ -662,7 +696,9 @@ TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) {
EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
EXPECT_EQ(Costs::Duration(1600), cost.compute_time);
EXPECT_EQ(Costs::Duration(3600), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) {
@@ -670,28 +706,77 @@ TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) {
EXPECT_EQ(Costs::Duration(800), cost.memory_time);
EXPECT_EQ(Costs::Duration(100), cost.compute_time);
EXPECT_EQ(Costs::Duration(900), cost.execution_time);
+ EXPECT_EQ(1, cost.num_ops_total);
EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
}
TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) {
- EXPECT_FALSE(PredictCosts(DescribeMatMul(2, 4, 7, 7)).inaccurate);
- EXPECT_TRUE(PredictCosts(DescribeMatMul(-1, 4, 7, 7)).inaccurate);
- EXPECT_TRUE(PredictCosts(DescribeMatMul(2, 4, -1, 7)).inaccurate);
-
- EXPECT_FALSE(PredictCosts(DescribeConvolution(16, 19, 19, 48, 48, 5, 5, 256))
- .inaccurate);
- EXPECT_TRUE(PredictCosts(DescribeConvolution(16, -1, 19, 48, 48, 5, 5, 256))
- .inaccurate);
+ {
+ auto cost = PredictCosts(DescribeMatMul(2, 4, 7, 7));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost = PredictCosts(DescribeMatMul(-1, 4, 7, 7));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(1, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost = PredictCosts(DescribeMatMul(2, 4, -1, 7));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(1, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost =
+ PredictCosts(DescribeConvolution(16, 19, 19, 48, 48, 5, 5, 256));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost =
+ PredictCosts(DescribeConvolution(16, -1, 19, 48, 48, 5, 5, 256));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(1, cost.num_ops_with_unknown_shapes);
+ }
}
TEST_F(OpLevelCostEstimatorTest, BatchMatMul) {
- EXPECT_TRUE(PredictCosts(DescribeBatchMatMul({}, {})).inaccurate);
- EXPECT_TRUE(PredictCosts(DescribeBatchMatMul({2, 4}, {})).inaccurate);
- EXPECT_FALSE(PredictCosts(DescribeBatchMatMul({2, 4}, {4, 2})).inaccurate);
- EXPECT_FALSE(
- PredictCosts(DescribeBatchMatMul({1, 2, 4}, {1, 4, 2})).inaccurate);
- EXPECT_FALSE(
- PredictCosts(DescribeBatchMatMul({2, 4}, {1, 3, 4, 2})).inaccurate);
+ {
+ auto cost = PredictCosts(DescribeBatchMatMul({}, {}));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(1, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost = PredictCosts(DescribeBatchMatMul({2, 4}, {}));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_TRUE(cost.inaccurate);
+ EXPECT_EQ(1, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost = PredictCosts(DescribeBatchMatMul({2, 4}, {4, 2}));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost = PredictCosts(DescribeBatchMatMul({1, 2, 4}, {1, 4, 2}));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+ }
+ {
+ auto cost = PredictCosts(DescribeBatchMatMul({2, 4}, {1, 3, 4, 2}));
+ EXPECT_EQ(1, cost.num_ops_total);
+ EXPECT_FALSE(cost.inaccurate);
+ EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+ }
bool matmul_inaccurate = false;
bool batch_matmul_inaccurate = false;
EXPECT_EQ(
@@ -813,7 +898,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) {
EXPECT_EQ(Costs::Duration(1075200), costs.execution_time);
EXPECT_EQ(Costs::Duration(307200), costs.compute_time);
EXPECT_EQ(Costs::Duration(768000), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 1x1 window with 2x2 stride: used for shortcut in resnet-50.
@@ -821,7 +908,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) {
EXPECT_EQ(Costs::Duration(499200), costs.execution_time);
EXPECT_EQ(Costs::Duration(38400), costs.compute_time);
EXPECT_EQ(Costs::Duration(460800), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 2x2 window with 3x3 stride.
@@ -829,7 +918,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) {
EXPECT_EQ(Costs::Duration(561792), costs.execution_time);
EXPECT_EQ(Costs::Duration(56448), costs.compute_time);
EXPECT_EQ(Costs::Duration(505344), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
}
@@ -849,7 +940,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) {
EXPECT_EQ(Costs::Duration(1996800), costs.execution_time);
EXPECT_EQ(Costs::Duration(614400), costs.compute_time);
EXPECT_EQ(Costs::Duration(1382400), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 1x1 window with 2x2 stride: used for shortcut in resnet-50.
@@ -857,7 +950,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) {
EXPECT_EQ(Costs::Duration(1536000), costs.execution_time);
EXPECT_EQ(Costs::Duration(153600), costs.compute_time);
EXPECT_EQ(Costs::Duration(1382400), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 2x2 window with 3x3 stride.
@@ -865,7 +960,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) {
EXPECT_EQ(Costs::Duration(1514112), costs.execution_time);
EXPECT_EQ(Costs::Duration(210048), costs.compute_time);
EXPECT_EQ(Costs::Duration(1304064), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
}
@@ -884,7 +981,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) {
EXPECT_EQ(Costs::Duration(1113600), costs.execution_time);
EXPECT_EQ(Costs::Duration(345600), costs.compute_time);
EXPECT_EQ(Costs::Duration(768000), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 1x1 window with 2x2 stride: used for shortcut in resnet-50.
@@ -892,7 +991,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) {
EXPECT_EQ(Costs::Duration(499200), costs.execution_time);
EXPECT_EQ(Costs::Duration(38400), costs.compute_time);
EXPECT_EQ(Costs::Duration(460800), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 2x2 window with 3x3 stride.
@@ -900,7 +1001,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) {
EXPECT_EQ(Costs::Duration(580608), costs.execution_time);
EXPECT_EQ(Costs::Duration(75264), costs.compute_time);
EXPECT_EQ(Costs::Duration(505344), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
}
@@ -920,7 +1023,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) {
EXPECT_EQ(Costs::Duration(1305602), costs.execution_time);
EXPECT_EQ(Costs::Duration(537600), costs.compute_time);
EXPECT_EQ(Costs::Duration(768002), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 1x1 window with 2x2 stride: used for shortcut in resnet-50.
@@ -928,7 +1033,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) {
EXPECT_EQ(Costs::Duration(960002), costs.execution_time);
EXPECT_EQ(Costs::Duration(192000), costs.compute_time);
EXPECT_EQ(Costs::Duration(768002), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
// 2x2 window with 3x3 stride.
@@ -936,7 +1043,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) {
EXPECT_EQ(Costs::Duration(862082), costs.execution_time);
EXPECT_EQ(Costs::Duration(172416), costs.compute_time);
EXPECT_EQ(Costs::Duration(689666), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
}
@@ -953,7 +1062,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) {
EXPECT_EQ(Costs::Duration(614737), costs.execution_time);
EXPECT_EQ(Costs::Duration(153706), costs.compute_time);
EXPECT_EQ(Costs::Duration(461031), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
@@ -961,7 +1072,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) {
EXPECT_EQ(Costs::Duration(204913), costs.execution_time);
EXPECT_EQ(Costs::Duration(51236), costs.compute_time);
EXPECT_EQ(Costs::Duration(153677), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
@@ -969,7 +1082,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) {
EXPECT_EQ(Costs::Duration(384154), costs.execution_time);
EXPECT_EQ(Costs::Duration(76800), costs.compute_time);
EXPECT_EQ(Costs::Duration(307354), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
@@ -978,6 +1093,8 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) {
EXPECT_EQ(Costs::Duration(25600), costs.compute_time);
EXPECT_EQ(Costs::Duration(102452), costs.memory_time);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(1, costs.num_ops_total);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
}
@@ -994,7 +1111,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNormGrad) {
EXPECT_EQ(Costs::Duration(1037050), costs.execution_time);
EXPECT_EQ(Costs::Duration(422496), costs.compute_time);
EXPECT_EQ(Costs::Duration(614554), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
{
@@ -1002,7 +1121,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNormGrad) {
EXPECT_EQ(Costs::Duration(6503809), costs.execution_time);
EXPECT_EQ(Costs::Duration(2649677), costs.compute_time);
EXPECT_EQ(Costs::Duration(3854132), costs.memory_time);
+ EXPECT_EQ(1, costs.num_ops_total);
EXPECT_FALSE(costs.inaccurate);
+ EXPECT_EQ(0, costs.num_ops_with_unknown_shapes);
}
}
} // end namespace grappler
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index f31d22e105..6e3ebdee12 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -47,9 +47,11 @@ Costs CombineCosts(const Costs& left, const Costs& right) {
result.execution_time += right.execution_time;
result.compute_time += right.compute_time;
result.memory_time += right.memory_time;
- if (right.inaccurate) {
- result.inaccurate = true;
- }
+
+ result.num_ops_total += right.num_ops_total;
+ if (right.inaccurate) result.inaccurate = true;
+ result.num_ops_with_unknown_shapes += right.num_ops_with_unknown_shapes;
+
if (right.max_memory != kMemoryUnknown) {
result.max_memory += right.max_memory;
}
@@ -283,6 +285,7 @@ VirtualScheduler::VirtualScheduler(const GrapplerItem* grappler_item,
grappler_item_(grappler_item),
use_static_shapes_(use_static_shapes),
placer_(cluster) {
+ graph_costs_.num_ops_total = 0;
initialized_ = false;
}
@@ -845,6 +848,11 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
}
Costs VirtualScheduler::Summary() const {
+ // Overall statement about accuracy
+ VLOG(1) << graph_costs_.num_ops_total << " ops processed in total, with "
+ << graph_costs_.num_ops_with_unknown_shapes
+ << " having unknown shapes";
+
// Print out basic execution summary.
VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count();
VLOG(1) << "Expected compute time: " << graph_costs_.compute_time.count();
@@ -906,6 +914,12 @@ Costs VirtualScheduler::Summary() const {
<< ", at the end: "
<< strings::HumanReadableNumBytes(state.memory_usage);
+ // Overall statement about accuracy
+ VLOG(1) << state.device_costs.num_ops_total
+ << " ops processed in total, with "
+ << state.device_costs.num_ops_with_unknown_shapes
+ << " having unknown shapes";
+
VLOG(1) << "Per-op execution time / compute time / memory time "
"(and memory usage at peak memory usage):";
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h
index 353ca6f071..0e66e8a463 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.h
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.h
@@ -114,6 +114,7 @@ struct DeviceState {
DeviceState() {
device_costs = Costs::ZeroCosts();
+ device_costs.num_ops_total = 0;
memory_usage = 0;
max_memory_usage = 0;
}
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
index f9154e42f9..b1373d8317 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc
@@ -942,7 +942,6 @@ versions {
// target_node.
std::unordered_map<string, OpContext> RunScheduler(
const string& target_node) {
- Costs zero_costs = Costs::ZeroCosts();
std::unordered_map<string, OpContext> ops_executed;
bool more_nodes = true;
do {
@@ -1632,6 +1631,9 @@ TEST_F(VirtualSchedulerTest, SummaryCostTest) {
// Misc - 5 * 1us
// Total: 13000005
EXPECT_EQ(13000005, c.execution_time.asMicroSeconds().count());
+ EXPECT_EQ(grappler_item_->graph.node_size(), c.num_ops_total);
+ EXPECT_FALSE(c.inaccurate);
+ EXPECT_EQ(0, c.num_ops_with_unknown_shapes);
}
// Like the above SummaryCostTest, but makes sure the stepstats timeline is
@@ -1645,6 +1647,9 @@ TEST_F(VirtualSchedulerTest, SummaryCostStepStatsTest) {
Costs c = scheduler_->Summary(&metadata);
StepStats stepstats = metadata.step_stats();
EXPECT_EQ(13000005, c.execution_time.asMicroSeconds().count());
+ EXPECT_EQ(grappler_item_->graph.node_size(), c.num_ops_total);
+ EXPECT_FALSE(c.inaccurate);
+ EXPECT_EQ(0, c.num_ops_with_unknown_shapes);
// Should only be 1 device!
EXPECT_EQ(1, stepstats.dev_stats().size());