aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/instruction_fusion.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/instruction_fusion.cc6
1 files changed, 3 insertions, 3 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index 0f2c83aeb2..de8d021321 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -26,7 +26,7 @@ namespace gpu {
namespace {
-bool IsFusile(const HloInstruction& hlo) {
+bool IsFusible(const HloInstruction& hlo) {
// Don't fuse get-tuple-element on GPU: We can, but it's slower than not
// fusing. We never generate kernels for unfused GTEs. Instead, if an
// unfused GTE is an input to a kernel (including a fusion kernel), we
@@ -41,7 +41,7 @@ bool IsFusile(const HloInstruction& hlo) {
hlo.opcode() == HloOpcode::kDynamicUpdateSlice ||
hlo.opcode() == HloOpcode::kFusion ||
hlo.opcode() == HloOpcode::kGather ||
- hlo.opcode() == HloOpcode::kPad ||
+ hlo.opcode() == HloOpcode::kIota || hlo.opcode() == HloOpcode::kPad ||
hlo.opcode() == HloOpcode::kReduce ||
hlo.opcode() == HloOpcode::kReduceWindow ||
hlo.opcode() == HloOpcode::kReshape ||
@@ -245,7 +245,7 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
return true;
}
- if (!IsFusile(*producer) || !IsFusile(*consumer) ||
+ if (!IsFusible(*producer) || !IsFusible(*consumer) ||
!InstructionFusion::ShouldFuse(consumer, operand_index)) {
return false;
}