aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-22 05:06:44 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-22 05:09:48 -0700
commitb353c139489e67c08cba92a15779e30dbc870bf9 (patch)
treea932cc75fa90b1295325a90c1e5749f1c601c0b2
parent4613ceab3bf55e98dc529dad62c385c6a0b6ea7e (diff)
Enable duplicating instructions with same input and output size in fusion
PiperOrigin-RevId: 201669139
-rw-r--r--tensorflow/compiler/xla/service/instruction_fusion.cc24
1 files changed, 24 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 861f3eafed..9ac8635767 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -238,6 +238,30 @@ InstructionFusion::ComputeGloballyUnfusable(
if (EffectivelyAtMostUnary(producer)) {
continue;
}
+
+ // If the total size of the inputs is less than or equal to the total size
+ // of the outputs for the producer then duplicating it won't increase the
+ // memory traffic. In that case, we do not forbid fusion of the operation
+ // here.
+ auto total_size = [](const Shape& shape) {
+ int64 size = 0;
+ ShapeUtil::ForEachSubshape(
+ shape,
+ [&size](const Shape& subshape, const ShapeIndex& shape_index) {
+ if (ShapeUtil::IsArray(subshape)) {
+ size += ShapeUtil::ElementsIn(subshape);
+ }
+ });
+ return size;
+ };
+ int64 operands_size = 0;
+ for (const HloInstruction* op : producer->operands()) {
+ operands_size += total_size(op->shape());
+ }
+ if (operands_size <= total_size(producer->shape())) {
+ continue;
+ }
+
// Otherwise we will forbid fusing the op unless we can fuse it into
// all of its consumers on all paths.
//