From 2625345c727b14f8e770d4f980fe86e9ccc8b03d Mon Sep 17 00:00:00 2001 From: Peter Ma Date: Fri, 10 Aug 2018 15:03:22 -0700 Subject: Add two counters in Costs Struct for number of ops processed/predicted in total, and number of ops predicted with unknown shapes PiperOrigin-RevId: 208274158 --- .../costs/analytical_cost_estimator_test.cc | 4 + tensorflow/core/grappler/costs/cost_estimator.h | 8 ++ .../core/grappler/costs/op_level_cost_estimator.cc | 23 ++++ .../grappler/costs/op_level_cost_estimator_test.cc | 151 +++++++++++++++++++-- .../core/grappler/costs/virtual_scheduler.cc | 20 ++- tensorflow/core/grappler/costs/virtual_scheduler.h | 1 + .../core/grappler/costs/virtual_scheduler_test.cc | 7 +- 7 files changed, 195 insertions(+), 19 deletions(-) diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc index f241922471..a9a1abfa98 100644 --- a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc @@ -103,6 +103,9 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) { TF_ASSERT_OK(estimator.PredictCosts(item.graph, &cost_graph, &summary)); EXPECT_EQ(Costs::NanoSeconds(9151), summary.execution_time); + // Note there are totally 17 nodes (RandomUniform creates 2 nodes), but + // grappler will not process "label", therefore we have 15 here instead + EXPECT_EQ(15, summary.num_ops_total); // Make this estimate accurate: // TODO(http://b/70031255): Accurate estimator for RandomUniform op needed @@ -110,6 +113,7 @@ TEST_F(AnalyticalCostEstimatorTest, SimpleTest) { // // Change to EXPECT_FALSE when the above TODOs are done: EXPECT_TRUE(summary.inaccurate); + EXPECT_EQ(0, summary.num_ops_with_unknown_shapes); } } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/cost_estimator.h b/tensorflow/core/grappler/costs/cost_estimator.h index fe8a876f8a..e91f0cc9da 100644 --- a/tensorflow/core/grappler/costs/cost_estimator.h +++ b/tensorflow/core/grappler/costs/cost_estimator.h @@ -109,8 +109,16 @@ struct Costs { int64 max_per_op_buffers; // Sum of all buffers used by the ops. int64 max_per_op_streaming; // Ignore largest input buffer, assuming it // streams from main memory. + + // Number of ops included in this Costs in total. + // Default initialized to be one. + int64 num_ops_total = 1; // If the time estimation is inaccurate. bool inaccurate = false; + // Number of ops that are estimated with unknown shapes. + int64 num_ops_with_unknown_shapes = 0; + // TODO(pcma): include a counter for total inaccurate ops and counters for + // other reasons causing the inaccuracy // Max possible memory usage per device. std::unordered_map estimated_max_memory_per_device; diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc index 5b303f6ccb..6406a4bdbf 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator.cc @@ -449,6 +449,7 @@ Costs OpLevelCostEstimator::PredictCwiseOp(const OpContext& op_context) const { if (found_unknown_shapes || !is_known_elementwise_op) { costs.inaccurate = true; } + costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } @@ -469,6 +470,7 @@ Costs OpLevelCostEstimator::PredictOpCountBasedCost( const double total_io_bytes = input_size + output_size; Costs costs = PredictOpCountBasedCost(operations, total_io_bytes, op_info); costs.inaccurate = unknown_shapes; + costs.num_ops_with_unknown_shapes = unknown_shapes; costs.max_memory = output_size; return costs; } @@ -627,6 +629,7 @@ int64 OpLevelCostEstimator::CountMatMulOperations( if (op_features.inputs_size() < 2) { LOG(ERROR) << "Need 2 inputs but got " << op_features.inputs_size(); + // TODO(pcma): Try to separate invalid inputs from unknown shapes *found_unknown_shapes = true; return 0; } @@ -694,11 +697,13 @@ int64 OpLevelCostEstimator::CountBatchMatMulOperations( const OpInfo& op_features, bool* found_unknown_shapes) const { if (op_features.op() != kBatchMatMul) { LOG(ERROR) << "Invalid Operation: " << op_features.op(); + // TODO(pcma): Try to separate invalid inputs from unknown shapes *found_unknown_shapes = true; return 0; } if (op_features.inputs_size() != 2) { LOG(ERROR) << "Expected 2 inputs but got " << op_features.inputs_size(); + // TODO(pcma): Try to separate invalid inputs from unknown shapes *found_unknown_shapes = true; return 0; } @@ -858,6 +863,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropInputOperations( "kDepthwiseConv2dNativeBackpropInput"; if (op_features.inputs_size() < 2) { + // TODO(pcma): Try to separate invalid inputs from unknown shapes *found_unknown_shapes = true; return ops; } @@ -935,6 +941,7 @@ int64 OpLevelCostEstimator::CountConv2DBackpropFilterOperations( } if (op_features.inputs_size() < 1) { + // TODO(pcma): Try to separate invalid inputs from unknown shapes *found_unknown_shapes = true; return ops; } @@ -1037,6 +1044,7 @@ Costs OpLevelCostEstimator::PredictConv2D(const OpContext& op_context) const { auto costs = PredictOpCountBasedCost( CountConv2DOperations(op_features, &found_unknown_shapes), op_features); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } @@ -1049,6 +1057,7 @@ Costs OpLevelCostEstimator::PredictConv2DBackpropInput( op_features, nullptr, &found_unknown_shapes), op_features); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } @@ -1061,6 +1070,7 @@ Costs OpLevelCostEstimator::PredictConv2DBackpropFilter( op_features, nullptr, &found_unknown_shapes), op_features); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } @@ -1148,6 +1158,7 @@ Costs OpLevelCostEstimator::PredictFusedConv2DBiasActivation( // Construct component operations and run the cost computation. auto costs = PredictFusedOp(op_context_with_output, component_ops); costs.inaccurate |= found_unknown_shapes; + costs.num_ops_with_unknown_shapes = costs.inaccurate; return costs; } @@ -1157,6 +1168,7 @@ Costs OpLevelCostEstimator::PredictMatMul(const OpContext& op_context) const { auto costs = PredictOpCountBasedCost( CountMatMulOperations(op_features, &found_unknown_shapes), op_features); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } @@ -1171,6 +1183,7 @@ Costs OpLevelCostEstimator::PredictIdentity(const OpContext& op_context) const { VLOG(1) << "Op:" << op_features.op() << " Execution Time 0 (ns)"; Costs result = Costs::ZeroCosts(); result.max_memory = CalculateOutputSize(op_features, &result.inaccurate); + result.num_ops_with_unknown_shapes = result.inaccurate; // Assign the minimum amount of time we can represent to the identity op since // it tends to be really cheap. result.compute_time = kMinComputeTime; @@ -1184,6 +1197,7 @@ Costs OpLevelCostEstimator::PredictVariable(const OpContext& op_context) const { Costs result = Costs::ZeroCosts(); result.persistent_memory = CalculateOutputSize(op_features, &result.inaccurate); + result.num_ops_with_unknown_shapes = result.inaccurate; result.compute_time = kMinComputeTime; result.execution_time = result.execution_time; @@ -1198,6 +1212,7 @@ Costs OpLevelCostEstimator::PredictBatchMatMul( CountBatchMatMulOperations(op_features, &found_unknown_shapes), op_features); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; return costs; } @@ -1205,6 +1220,7 @@ Costs OpLevelCostEstimator::PredictMetadata(const OpContext& op_context) const { const auto& op_features = op_context.op_info; Costs costs = Costs::ZeroCosts(); costs.max_memory = CalculateOutputSize(op_features, &costs.inaccurate); + costs.num_ops_with_unknown_shapes = costs.inaccurate; // Metadata operations are so cheap we assume they take the minimum amount of // time we can represent (1 ns). costs.compute_time = kMinComputeTime; @@ -1249,6 +1265,7 @@ Costs OpLevelCostEstimator::PredictGatherOrSlice( const double total_io = input_size + output_size; Costs costs = PredictOpCountBasedCost(op_count, total_io, op_info); costs.inaccurate = unknown_shapes; + costs.num_ops_with_unknown_shapes = unknown_shapes; costs.max_memory = output_size; return costs; @@ -1390,6 +1407,7 @@ Costs OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context) const { Costs costs = PredictOpCountBasedCost( ops, total_input_size + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; return costs; } @@ -1432,6 +1450,7 @@ Costs OpLevelCostEstimator::PredictMaxPoolGrad( Costs costs = PredictOpCountBasedCost( ops, total_input_size + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; return costs; } @@ -1464,6 +1483,7 @@ Costs OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context) const { Costs costs = PredictOpCountBasedCost( ops, total_input_size + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; return costs; } @@ -1516,6 +1536,7 @@ Costs OpLevelCostEstimator::PredictAvgPoolGrad( Costs costs = PredictOpCountBasedCost( ops, total_input_size + total_output_size, op_info); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; return costs; } @@ -1562,6 +1583,7 @@ Costs OpLevelCostEstimator::PredictFusedBatchNorm( ops, total_input_size + total_output_size + total_internal_read_size, op_info); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; return costs; } @@ -1595,6 +1617,7 @@ Costs OpLevelCostEstimator::PredictFusedBatchNormGrad( ops, total_input_size + total_output_size + total_internal_read_size, op_info); costs.inaccurate = found_unknown_shapes; + costs.num_ops_with_unknown_shapes = found_unknown_shapes; costs.max_memory = total_output_size; return costs; } diff --git a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc index 77352f6652..7271a29319 100644 --- a/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/op_level_cost_estimator_test.cc @@ -488,7 +488,9 @@ TEST_F(OpLevelCostEstimatorTest, TestGatherCosts) { EXPECT_EQ(Costs::Duration(130), cost.memory_time); EXPECT_EQ(Costs::Duration(16), cost.compute_time); EXPECT_EQ(Costs::Duration(146), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, TestGatherCostsWithoutOutput) { @@ -504,7 +506,9 @@ TEST_F(OpLevelCostEstimatorTest, TestGatherCostsWithoutOutput) { EXPECT_EQ(Costs::Duration(0), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(0), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) { @@ -522,7 +526,9 @@ TEST_F(OpLevelCostEstimatorTest, TestSliceCosts) { EXPECT_EQ(Costs::Duration(81), cost.memory_time); EXPECT_EQ(Costs::Duration(10), cost.compute_time); EXPECT_EQ(Costs::Duration(91), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) { @@ -530,7 +536,9 @@ TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) { EXPECT_EQ(Costs::Duration(8400), cost.memory_time); EXPECT_EQ(Costs::Duration(1000), cost.compute_time); EXPECT_EQ(Costs::Duration(9400), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, Conv2DExecutionTime) { @@ -538,7 +546,9 @@ TEST_F(OpLevelCostEstimatorTest, Conv2DExecutionTime) { EXPECT_EQ(Costs::Duration(233780), cost.memory_time); EXPECT_EQ(Costs::Duration(354877440), cost.compute_time); EXPECT_EQ(Costs::Duration(355111220), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, DepthwiseConv2dNativeExecutionTime) { @@ -547,7 +557,9 @@ TEST_F(OpLevelCostEstimatorTest, DepthwiseConv2dNativeExecutionTime) { EXPECT_EQ(Costs::Duration(112340), cost.memory_time); EXPECT_EQ(Costs::Duration(4158720), cost.compute_time); EXPECT_EQ(Costs::Duration(4271060), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) { @@ -555,7 +567,9 @@ TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) { EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(2000), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) { @@ -564,7 +578,9 @@ TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) { EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(2000), cost.execution_time); // max(2000, 200) + EXPECT_EQ(1, cost.num_ops_total); EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); SetComputeMemoryOverlap(false); // Set it back to default. } @@ -576,7 +592,9 @@ TEST_F(OpLevelCostEstimatorTest, EXPECT_EQ(Costs::Duration(825345), cost.memory_time); EXPECT_EQ(Costs::Duration(355321038), cost.compute_time); EXPECT_EQ(Costs::Duration(356146383), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_HWIO) { @@ -586,7 +604,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_HWIO) { EXPECT_EQ(Costs::Duration(1416808), cost.memory_time); EXPECT_EQ(Costs::Duration(355616770), cost.compute_time); EXPECT_EQ(Costs::Duration(357033578), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_OIHW) { @@ -596,7 +616,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_OIHW) { EXPECT_EQ(Costs::Duration(1416808), cost.memory_time); EXPECT_EQ(Costs::Duration(355616770), cost.compute_time); EXPECT_EQ(Costs::Duration(357033578), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_HWIO) { @@ -606,7 +628,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_HWIO) { EXPECT_EQ(Costs::Duration(1416808), cost.memory_time); EXPECT_EQ(Costs::Duration(355616770), cost.compute_time); EXPECT_EQ(Costs::Duration(357033578), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_OIHW) { @@ -616,7 +640,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNHWC_OIHW) { EXPECT_EQ(Costs::Duration(1416808), cost.memory_time); EXPECT_EQ(Costs::Duration(355616770), cost.compute_time); EXPECT_EQ(Costs::Duration(357033578), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } // TODO(yaozhang): Update once NCHW_VECT_C is supported. @@ -627,7 +653,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_VECT_C_OIHW) { EXPECT_EQ(Costs::Duration(0), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(0), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } // TODO(yaozhang): Update once OIHW_VECT_I is supported. @@ -638,7 +666,9 @@ TEST_F(OpLevelCostEstimatorTest, FusedConv2DBiasActivationNCHW_OIHW_VECT_I) { EXPECT_EQ(Costs::Duration(0), cost.memory_time); EXPECT_EQ(Costs::Duration(0), cost.compute_time); EXPECT_EQ(Costs::Duration(0), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) { @@ -646,7 +676,9 @@ TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) { EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(200), cost.compute_time); EXPECT_EQ(Costs::Duration(2200), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) { @@ -654,7 +686,9 @@ TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) { EXPECT_EQ(Costs::Duration(3600), cost.memory_time); EXPECT_EQ(Costs::Duration(400), cost.compute_time); EXPECT_EQ(Costs::Duration(4000), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) { @@ -662,7 +696,9 @@ TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) { EXPECT_EQ(Costs::Duration(2000), cost.memory_time); EXPECT_EQ(Costs::Duration(1600), cost.compute_time); EXPECT_EQ(Costs::Duration(3600), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) { @@ -670,28 +706,77 @@ TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) { EXPECT_EQ(Costs::Duration(800), cost.memory_time); EXPECT_EQ(Costs::Duration(100), cost.compute_time); EXPECT_EQ(Costs::Duration(900), cost.execution_time); + EXPECT_EQ(1, cost.num_ops_total); EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); } TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) { - EXPECT_FALSE(PredictCosts(DescribeMatMul(2, 4, 7, 7)).inaccurate); - EXPECT_TRUE(PredictCosts(DescribeMatMul(-1, 4, 7, 7)).inaccurate); - EXPECT_TRUE(PredictCosts(DescribeMatMul(2, 4, -1, 7)).inaccurate); - - EXPECT_FALSE(PredictCosts(DescribeConvolution(16, 19, 19, 48, 48, 5, 5, 256)) - .inaccurate); - EXPECT_TRUE(PredictCosts(DescribeConvolution(16, -1, 19, 48, 48, 5, 5, 256)) - .inaccurate); + { + auto cost = PredictCosts(DescribeMatMul(2, 4, 7, 7)); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } + { + auto cost = PredictCosts(DescribeMatMul(-1, 4, 7, 7)); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(1, cost.num_ops_with_unknown_shapes); + } + { + auto cost = PredictCosts(DescribeMatMul(2, 4, -1, 7)); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(1, cost.num_ops_with_unknown_shapes); + } + { + auto cost = + PredictCosts(DescribeConvolution(16, 19, 19, 48, 48, 5, 5, 256)); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } + { + auto cost = + PredictCosts(DescribeConvolution(16, -1, 19, 48, 48, 5, 5, 256)); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(1, cost.num_ops_with_unknown_shapes); + } } TEST_F(OpLevelCostEstimatorTest, BatchMatMul) { - EXPECT_TRUE(PredictCosts(DescribeBatchMatMul({}, {})).inaccurate); - EXPECT_TRUE(PredictCosts(DescribeBatchMatMul({2, 4}, {})).inaccurate); - EXPECT_FALSE(PredictCosts(DescribeBatchMatMul({2, 4}, {4, 2})).inaccurate); - EXPECT_FALSE( - PredictCosts(DescribeBatchMatMul({1, 2, 4}, {1, 4, 2})).inaccurate); - EXPECT_FALSE( - PredictCosts(DescribeBatchMatMul({2, 4}, {1, 3, 4, 2})).inaccurate); + { + auto cost = PredictCosts(DescribeBatchMatMul({}, {})); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(1, cost.num_ops_with_unknown_shapes); + } + { + auto cost = PredictCosts(DescribeBatchMatMul({2, 4}, {})); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_TRUE(cost.inaccurate); + EXPECT_EQ(1, cost.num_ops_with_unknown_shapes); + } + { + auto cost = PredictCosts(DescribeBatchMatMul({2, 4}, {4, 2})); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } + { + auto cost = PredictCosts(DescribeBatchMatMul({1, 2, 4}, {1, 4, 2})); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } + { + auto cost = PredictCosts(DescribeBatchMatMul({2, 4}, {1, 3, 4, 2})); + EXPECT_EQ(1, cost.num_ops_total); + EXPECT_FALSE(cost.inaccurate); + EXPECT_EQ(0, cost.num_ops_with_unknown_shapes); + } bool matmul_inaccurate = false; bool batch_matmul_inaccurate = false; EXPECT_EQ( @@ -813,7 +898,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) { EXPECT_EQ(Costs::Duration(1075200), costs.execution_time); EXPECT_EQ(Costs::Duration(307200), costs.compute_time); EXPECT_EQ(Costs::Duration(768000), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 1x1 window with 2x2 stride: used for shortcut in resnet-50. @@ -821,7 +908,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) { EXPECT_EQ(Costs::Duration(499200), costs.execution_time); EXPECT_EQ(Costs::Duration(38400), costs.compute_time); EXPECT_EQ(Costs::Duration(460800), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 2x2 window with 3x3 stride. @@ -829,7 +918,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) { EXPECT_EQ(Costs::Duration(561792), costs.execution_time); EXPECT_EQ(Costs::Duration(56448), costs.compute_time); EXPECT_EQ(Costs::Duration(505344), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } } @@ -849,7 +940,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) { EXPECT_EQ(Costs::Duration(1996800), costs.execution_time); EXPECT_EQ(Costs::Duration(614400), costs.compute_time); EXPECT_EQ(Costs::Duration(1382400), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 1x1 window with 2x2 stride: used for shortcut in resnet-50. @@ -857,7 +950,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) { EXPECT_EQ(Costs::Duration(1536000), costs.execution_time); EXPECT_EQ(Costs::Duration(153600), costs.compute_time); EXPECT_EQ(Costs::Duration(1382400), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 2x2 window with 3x3 stride. @@ -865,7 +960,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictMaxPoolGrad) { EXPECT_EQ(Costs::Duration(1514112), costs.execution_time); EXPECT_EQ(Costs::Duration(210048), costs.compute_time); EXPECT_EQ(Costs::Duration(1304064), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } } @@ -884,7 +981,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) { EXPECT_EQ(Costs::Duration(1113600), costs.execution_time); EXPECT_EQ(Costs::Duration(345600), costs.compute_time); EXPECT_EQ(Costs::Duration(768000), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 1x1 window with 2x2 stride: used for shortcut in resnet-50. @@ -892,7 +991,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) { EXPECT_EQ(Costs::Duration(499200), costs.execution_time); EXPECT_EQ(Costs::Duration(38400), costs.compute_time); EXPECT_EQ(Costs::Duration(460800), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 2x2 window with 3x3 stride. @@ -900,7 +1001,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPool) { EXPECT_EQ(Costs::Duration(580608), costs.execution_time); EXPECT_EQ(Costs::Duration(75264), costs.compute_time); EXPECT_EQ(Costs::Duration(505344), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } } @@ -920,7 +1023,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) { EXPECT_EQ(Costs::Duration(1305602), costs.execution_time); EXPECT_EQ(Costs::Duration(537600), costs.compute_time); EXPECT_EQ(Costs::Duration(768002), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 1x1 window with 2x2 stride: used for shortcut in resnet-50. @@ -928,7 +1033,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) { EXPECT_EQ(Costs::Duration(960002), costs.execution_time); EXPECT_EQ(Costs::Duration(192000), costs.compute_time); EXPECT_EQ(Costs::Duration(768002), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { // 2x2 window with 3x3 stride. @@ -936,7 +1043,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictAvgPoolGrad) { EXPECT_EQ(Costs::Duration(862082), costs.execution_time); EXPECT_EQ(Costs::Duration(172416), costs.compute_time); EXPECT_EQ(Costs::Duration(689666), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } } @@ -953,7 +1062,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) { EXPECT_EQ(Costs::Duration(614737), costs.execution_time); EXPECT_EQ(Costs::Duration(153706), costs.compute_time); EXPECT_EQ(Costs::Duration(461031), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { @@ -961,7 +1072,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) { EXPECT_EQ(Costs::Duration(204913), costs.execution_time); EXPECT_EQ(Costs::Duration(51236), costs.compute_time); EXPECT_EQ(Costs::Duration(153677), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { @@ -969,7 +1082,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) { EXPECT_EQ(Costs::Duration(384154), costs.execution_time); EXPECT_EQ(Costs::Duration(76800), costs.compute_time); EXPECT_EQ(Costs::Duration(307354), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { @@ -978,6 +1093,8 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNorm) { EXPECT_EQ(Costs::Duration(25600), costs.compute_time); EXPECT_EQ(Costs::Duration(102452), costs.memory_time); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(1, costs.num_ops_total); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } } @@ -994,7 +1111,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNormGrad) { EXPECT_EQ(Costs::Duration(1037050), costs.execution_time); EXPECT_EQ(Costs::Duration(422496), costs.compute_time); EXPECT_EQ(Costs::Duration(614554), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } { @@ -1002,7 +1121,9 @@ TEST_F(OpLevelCostEstimatorTest, PredictFusedBatchNormGrad) { EXPECT_EQ(Costs::Duration(6503809), costs.execution_time); EXPECT_EQ(Costs::Duration(2649677), costs.compute_time); EXPECT_EQ(Costs::Duration(3854132), costs.memory_time); + EXPECT_EQ(1, costs.num_ops_total); EXPECT_FALSE(costs.inaccurate); + EXPECT_EQ(0, costs.num_ops_with_unknown_shapes); } } } // end namespace grappler diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index f31d22e105..6e3ebdee12 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -47,9 +47,11 @@ Costs CombineCosts(const Costs& left, const Costs& right) { result.execution_time += right.execution_time; result.compute_time += right.compute_time; result.memory_time += right.memory_time; - if (right.inaccurate) { - result.inaccurate = true; - } + + result.num_ops_total += right.num_ops_total; + if (right.inaccurate) result.inaccurate = true; + result.num_ops_with_unknown_shapes += right.num_ops_with_unknown_shapes; + if (right.max_memory != kMemoryUnknown) { result.max_memory += right.max_memory; } @@ -283,6 +285,7 @@ VirtualScheduler::VirtualScheduler(const GrapplerItem* grappler_item, grappler_item_(grappler_item), use_static_shapes_(use_static_shapes), placer_(cluster) { + graph_costs_.num_ops_total = 0; initialized_ = false; } @@ -845,6 +848,11 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { } Costs VirtualScheduler::Summary() const { + // Overall statement about accuracy + VLOG(1) << graph_costs_.num_ops_total << " ops processed in total, with " + << graph_costs_.num_ops_with_unknown_shapes + << " having unknown shapes"; + // Print out basic execution summary. VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count(); VLOG(1) << "Expected compute time: " << graph_costs_.compute_time.count(); @@ -906,6 +914,12 @@ Costs VirtualScheduler::Summary() const { << ", at the end: " << strings::HumanReadableNumBytes(state.memory_usage); + // Overall statement about accuracy + VLOG(1) << state.device_costs.num_ops_total + << " ops processed in total, with " + << state.device_costs.num_ops_with_unknown_shapes + << " having unknown shapes"; + VLOG(1) << "Per-op execution time / compute time / memory time " "(and memory usage at peak memory usage):"; diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h index 353ca6f071..0e66e8a463 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.h +++ b/tensorflow/core/grappler/costs/virtual_scheduler.h @@ -114,6 +114,7 @@ struct DeviceState { DeviceState() { device_costs = Costs::ZeroCosts(); + device_costs.num_ops_total = 0; memory_usage = 0; max_memory_usage = 0; } diff --git a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc index f9154e42f9..b1373d8317 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler_test.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler_test.cc @@ -942,7 +942,6 @@ versions { // target_node. std::unordered_map RunScheduler( const string& target_node) { - Costs zero_costs = Costs::ZeroCosts(); std::unordered_map ops_executed; bool more_nodes = true; do { @@ -1632,6 +1631,9 @@ TEST_F(VirtualSchedulerTest, SummaryCostTest) { // Misc - 5 * 1us // Total: 13000005 EXPECT_EQ(13000005, c.execution_time.asMicroSeconds().count()); + EXPECT_EQ(grappler_item_->graph.node_size(), c.num_ops_total); + EXPECT_FALSE(c.inaccurate); + EXPECT_EQ(0, c.num_ops_with_unknown_shapes); } // Like the above SummaryCostTest, but makes sure the stepstats timeline is @@ -1645,6 +1647,9 @@ TEST_F(VirtualSchedulerTest, SummaryCostStepStatsTest) { Costs c = scheduler_->Summary(&metadata); StepStats stepstats = metadata.step_stats(); EXPECT_EQ(13000005, c.execution_time.asMicroSeconds().count()); + EXPECT_EQ(grappler_item_->graph.node_size(), c.num_ops_total); + EXPECT_FALSE(c.inaccurate); + EXPECT_EQ(0, c.num_ops_with_unknown_shapes); // Should only be 1 device! EXPECT_EQ(1, stepstats.dev_stats().size()); -- cgit v1.2.3