diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-06-22 05:06:44 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-06-22 05:09:48 -0700 |
commit | b353c139489e67c08cba92a15779e30dbc870bf9 (patch) | |
tree | a932cc75fa90b1295325a90c1e5749f1c601c0b2 | |
parent | 4613ceab3bf55e98dc529dad62c385c6a0b6ea7e (diff) |
Enable duplicating instructions with same input and output size in fusion
PiperOrigin-RevId: 201669139
-rw-r--r-- | tensorflow/compiler/xla/service/instruction_fusion.cc | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 861f3eafed..9ac8635767 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -238,6 +238,30 @@ InstructionFusion::ComputeGloballyUnfusable( if (EffectivelyAtMostUnary(producer)) { continue; } + + // If the total size of the inputs is less than or equal to the total size + // of the outputs for the producer then duplicating it won't increase the + // memory traffic. In that case, we do not forbid fusion of the operation + // here. + auto total_size = [](const Shape& shape) { + int64 size = 0; + ShapeUtil::ForEachSubshape( + shape, + [&size](const Shape& subshape, const ShapeIndex& shape_index) { + if (ShapeUtil::IsArray(subshape)) { + size += ShapeUtil::ElementsIn(subshape); + } + }); + return size; + }; + int64 operands_size = 0; + for (const HloInstruction* op : producer->operands()) { + operands_size += total_size(op->shape()); + } + if (operands_size <= total_size(producer->shape())) { + continue; + } + // Otherwise we will forbid fusing the op unless we can fuse it into // all of its consumers on all paths. // |