aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/instruction_fusion.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/instruction_fusion.h')
-rw-r--r--tensorflow/compiler/xla/service/gpu/instruction_fusion.h13
1 files changed, 13 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.h b/tensorflow/compiler/xla/service/gpu/instruction_fusion.h
index f629d9ff2c..c91f6343a6 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.h
@@ -27,6 +27,19 @@ class GpuInstructionFusion : public InstructionFusion {
explicit GpuInstructionFusion(bool may_duplicate)
: InstructionFusion(GpuInstructionFusion::IsExpensive, may_duplicate) {}
+ // Maximum number of operands plus outputs allowed on a single fusion node.
+ // Exposed publicly mainly for tests.
+ static constexpr int64 kMaxOperandsAndOutputsPerFusion = 64;
+
+ // Determines whether the combination of `a` and `b` into a (possibly
+ // multi-output) fusion would be "too large" -- i.e., have more operands and
+ // outputs than is allowed.
+ //
+ // `ShouldFuse` and `ShouldFuseIntoMultiOutput` call this; it's public so that
+ // other fusion passes (e.g. GPU multi-output fusion) can also call this.
+ static bool FusionWouldBeTooLarge(const HloInstruction* a,
+ const HloInstruction* b);
+
static bool IsExpensive(const HloInstruction& instruction);
bool ShouldFuse(HloInstruction* consumer, int64 operand_index) override;