aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/compiler/jit/xla_fusion_optimizer.cc8
-rw-r--r--tensorflow/compiler/jit/xla_fusion_optimizer_test.cc2
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc4
-rw-r--r--tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc2
-rw-r--r--tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc4
-rw-r--r--tensorflow/compiler/xla/service/gpu/instruction_fusion.cc4
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc2
-rw-r--r--tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc12
-rw-r--r--tensorflow/compiler/xla/service/hlo_instruction.cc2
-rw-r--r--tensorflow/compiler/xla/service/hlo_instruction.h2
-rw-r--r--tensorflow/compiler/xla/service/hlo_instructions.cc2
-rw-r--r--tensorflow/compiler/xla/service/instruction_fusion.cc24
-rw-r--r--tensorflow/compiler/xla/service/instruction_fusion.h2
-rw-r--r--tensorflow/compiler/xla/service/instruction_fusion_test.cc6
14 files changed, 38 insertions, 38 deletions
diff --git a/tensorflow/compiler/jit/xla_fusion_optimizer.cc b/tensorflow/compiler/jit/xla_fusion_optimizer.cc
index 915c5afa79..07cfab6151 100644
--- a/tensorflow/compiler/jit/xla_fusion_optimizer.cc
+++ b/tensorflow/compiler/jit/xla_fusion_optimizer.cc
@@ -41,8 +41,8 @@ static bool IsShapeConsumerOp(const Node& node) {
}
// Returns true if the op can be decomposed into XLA ops for which
-// there are fusable elemental implementations.
-bool IsXlaFusable(const NodeDef& node) {
+// there are fusible elemental implementations.
+static bool IsXlaFusible(const NodeDef& node) {
static const std::unordered_set<std::string>* elementwise_ops =
new std::unordered_set<std::string>(
{// tf2xla/kernels/aggregate_ops.cc
@@ -176,9 +176,9 @@ Status XlaFusionOptimizer::Optimize(grappler::Cluster* cluster,
TF_RETURN_IF_ERROR(DeviceToDeviceType(node->def().device(), &device_type));
if (device_type.type_string().find("XLA") != string::npos) continue;
- // Assume all fusable ops are registered.
+ // Assume all fusible ops are registered.
// TODO(hpucha): Check for registration if possible.
- if (!IsXlaFusable(node->def())) {
+ if (!IsXlaFusible(node->def())) {
continue;
}
diff --git a/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc b/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc
index b77b207908..68e19c8a13 100644
--- a/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc
+++ b/tensorflow/compiler/jit/xla_fusion_optimizer_test.cc
@@ -73,7 +73,7 @@ TEST_F(XlaFusionOptimizerTest, Chains) {
EXPECT_TRUE(clusters.find("D") == clusters.cend());
}
-TEST_F(XlaFusionOptimizerTest, FusableOps) {
+TEST_F(XlaFusionOptimizerTest, FusibleOps) {
GraphDef graph;
{
GraphDefBuilder builder(GraphDefBuilder::kFailImmediately);
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
index b40d264c03..7f867fa149 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc
@@ -78,7 +78,7 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
}
if (!CanBeLoopFused(*producer)) {
- VLOG(2) << "Producer is not fusile.";
+ VLOG(2) << "Producer is not fusible.";
return false;
}
@@ -140,7 +140,7 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
}
if (CanBeLoopFused(*consumer)) {
- VLOG(2) << "Fusing: consumer is elementwise or fusile.";
+ VLOG(2) << "Fusing: consumer is elementwise or fusible.";
return true;
}
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
index c3e03056f0..28aaa28cdb 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc
@@ -567,7 +567,7 @@ TEST_F(OpcodeFusionTest, DynamicSliceWithDynamicUpdateSlice) {
HloOpcode::kParameter, HloOpcode::kParameter});
}
-TEST_F(OpcodeFusionTest, MessOfFusileNodes) {
+TEST_F(OpcodeFusionTest, MessOfFusibleNodes) {
auto module = CreateNewModule();
HloComputation::Builder builder(TestName());
diff --git a/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc
index b68ac67574..22721051e5 100644
--- a/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/cpu/tests/cpu_fusion_test.cc
@@ -129,8 +129,8 @@ TEST_F(CpuFusionTest, FuseElementwiseOpChain) {
error_spec_);
}
-TEST_F(CpuFusionTest, ElementwiseOpChainWithNonfusableInstruction) {
- // Test a chain of fusable ops with a non-fusable op (a reduce) thrown in the
+TEST_F(CpuFusionTest, ElementwiseOpChainWithNonfusibleInstruction) {
+ // Test a chain of fusible ops with a non-fusible op (a reduce) thrown in the
// middle.
auto module = CreateNewModule();
auto builder = HloComputation::Builder(TestName());
diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
index 0f2c83aeb2..0bcaaee2b7 100644
--- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc
@@ -26,7 +26,7 @@ namespace gpu {
namespace {
-bool IsFusile(const HloInstruction& hlo) {
+bool IsFusible(const HloInstruction& hlo) {
// Don't fuse get-tuple-element on GPU: We can, but it's slower than not
// fusing. We never generate kernels for unfused GTEs. Instead, if an
// unfused GTE is an input to a kernel (including a fusion kernel), we
@@ -245,7 +245,7 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
return true;
}
- if (!IsFusile(*producer) || !IsFusile(*consumer) ||
+ if (!IsFusible(*producer) || !IsFusible(*consumer) ||
!InstructionFusion::ShouldFuse(consumer, operand_index)) {
return false;
}
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 84043689bd..9c7b508e10 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -3345,7 +3345,7 @@ bool IrEmitterUnnested::CheckAndEmitHloWithTile021(HloInstruction* hlo) {
// if there's a Right Choice.
//
// This is only sound if tiled transposes are the only place where we use
- // shared memory in fusions. If in the future other fusile ops use shared
+ // shared memory in fusions. If in the future other fusible ops use shared
// memory, we'll have to adjust this heuristic.
constexpr int kMinBlocksPerCore = 3;
constexpr int64 kShmemPerCore = 48 * 1024;
diff --git a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
index 9fb6f569ae..7a43f0be54 100644
--- a/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/gpu/multi_output_fusion.cc
@@ -145,7 +145,7 @@ bool GpuMultiOutputFusion::IsFusible(HloInstruction* instr) {
// with any other instruction.
// TODO(b/112957171): This should use the same isFusible logic as
// instruction_fusion.
- return instr->IsFusable() &&
+ return instr->IsFusible() &&
(IsInputFusibleReduction(instr) ||
(instr->opcode() == HloOpcode::kFusion &&
instr->fusion_kind() == HloInstruction::FusionKind::kLoop) ||
@@ -204,7 +204,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
tensorflow::gtl::FlatSet<HloInstruction*> to_fuse;
// Keep a list of the instructions to fuse after making all the fusion
// decisions. We first aggressively add instructions to potential_fusion_list,
- // then filter out instructions that will be no longer fusable because of
+ // then filter out instructions that will be no longer fusible because of
// reachability change. This avoids recalculating reachability on a large set
// of instructions.
std::vector<std::pair<HloInstruction*, HloInstruction*>>
@@ -220,7 +220,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
continue;
}
if (!IsInputFusibleReduction(consumer)) {
- VLOG(3) << consumer->name() << " is not an input-fusable reduction.";
+ VLOG(3) << consumer->name() << " is not an input-fusible reduction.";
continue;
}
VLOG(3) << consumer->name()
@@ -229,8 +229,8 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
auto consumer_operands = consumer->operands();
for (size_t i = 0; i < consumer_operands.size(); ++i) {
HloInstruction* producer = consumer_operands[i];
- if (!producer->IsFusable()) {
- VLOG(3) << producer->name() << " is not fusable.";
+ if (!producer->IsFusible()) {
+ VLOG(3) << producer->name() << " is not fusible.";
continue;
}
const bool is_loop_fusion =
@@ -270,7 +270,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
}
}
- // Filter out pairs that will be no longer fusable because of reachability
+ // Filter out pairs that will be no longer fusible because of reachability
// change.
for (auto& fusion_pair : potential_fusion_list) {
HloInstruction* producer = fusion_pair.first;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 2bb9de686f..3e077d8aec 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -2169,7 +2169,7 @@ void HloInstruction::set_tracing(HloInstruction* trace_instruction) {
bool HloInstruction::IsFused() const { return parent_->IsFusionComputation(); }
-bool HloInstruction::IsFusable() const {
+bool HloInstruction::IsFusible() const {
// Instructions which are traced should not be fused.
if (tracing()) {
return false;
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 948e33a0a3..01437f66cd 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -1029,7 +1029,7 @@ class HloInstruction {
// Returns true if this instruction can be legally fused into a fusion
// instruction.
- bool IsFusable() const;
+ bool IsFusible() const;
// Returns the sharding applied to this operator.
// REQUIRES: has_sharding() is true.
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index a0de253eda..b407cfeb50 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -1152,7 +1152,7 @@ HloInstruction* HloFusionInstruction::FuseInstructionInternal(
HloInstruction* HloFusionInstruction::CloneAndFuseInternal(
HloInstruction* instruction_to_fuse, bool add_output) {
- CHECK(instruction_to_fuse->IsFusable()) << instruction_to_fuse->ToString();
+ CHECK(instruction_to_fuse->IsFusible()) << instruction_to_fuse->ToString();
VLOG(3) << "CloneAndFuseInternal:\n" << instruction_to_fuse->ToString();
HloInstruction* clone = nullptr;
if (called_computations().empty()) {
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index be59ce8281..6207cdfb0d 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -189,13 +189,13 @@ bool InstructionFusion::CanFuseOnAllPaths(
if (consumer == producer) {
return true;
}
- if (!consumer->IsFusable()) {
+ if (!consumer->IsFusible()) {
return false;
}
for (int64 i = 0, e = consumer->operand_count(); i < e; ++i) {
auto* consumer_operand = consumer->mutable_operand(i);
// If the operand is not on a path to the producer, it doesn't matter
- // whether it's fusable.
+ // whether it's fusible.
if (!reachability_->IsReachable(producer, consumer_operand)) {
continue;
}
@@ -205,7 +205,7 @@ bool InstructionFusion::CanFuseOnAllPaths(
}
// The producer is reachable from consumer_operand which means we need
// to be able to fuse consumer_operand into consumer in order for
- // producer to be fusable into consumer on all paths.
+ // producer to be fusible into consumer on all paths.
// Perform the recursive step: make sure producer can be fused into
// consumer_operand on all paths.
if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_duplicate)) {
@@ -216,7 +216,7 @@ bool InstructionFusion::CanFuseOnAllPaths(
}
InstructionFusion::HloInstructionSet
-InstructionFusion::ComputeGloballyUnfusable(
+InstructionFusion::ComputeGloballyUnfusible(
tensorflow::gtl::ArraySlice<HloInstruction*> post_order) {
// Forbid fusion of producers that:
// a) Need to be duplicated, unless they can be fused into all consumers
@@ -270,19 +270,19 @@ InstructionFusion::ComputeGloballyUnfusable(
// all of its consumers on all paths.
//
// That means, that for:
- // A --> B (fusable)
- // \-> C (non-fusable)
+ // A --> B (fusible)
+ // \-> C (non-fusible)
// A will be not allowed to be fused into B, as it cannot be fused into C.
//
// Similarly, for:
// A -------------> B
// \-> C -> D -/
// If:
- // - A is fusable into B and C, and D is fusable into B
- // - C is *not* fusable into D
+ // - A is fusible into B and C, and D is fusible into B
+ // - C is *not* fusible into D
// A will be not allowed to be fused into B, as it cannot be fused via
// all paths.
- if (producer->IsFusable() &&
+ if (producer->IsFusible() &&
CanFuseOnAllPaths(producer, consumer, do_not_duplicate)) {
continue;
}
@@ -318,7 +318,7 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
InsertOrDie(&post_order_index, post_order[i], i);
}
- HloInstructionSet do_not_duplicate = ComputeGloballyUnfusable(post_order);
+ HloInstructionSet do_not_duplicate = ComputeGloballyUnfusible(post_order);
// Instruction fusion effectively fuses edges in the computation graph
// (producer instruction -> consumer instruction) so we iterate over all
@@ -341,7 +341,7 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
// consistent.
post_order_index.erase(instruction);
- if (!instruction->IsFusable() &&
+ if (!instruction->IsFusible() &&
instruction->opcode() != HloOpcode::kFusion) {
continue;
}
@@ -413,7 +413,7 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
for (int64 i : sorted_operand_numbers) {
HloInstruction* operand = instruction->mutable_operand(i);
- if (!operand->IsFusable()) {
+ if (!operand->IsFusible()) {
continue;
}
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index 8489c3d9ad..9802d4cfc1 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -122,7 +122,7 @@ class InstructionFusion : public HloPassInterface {
// Computes the set of nodes that we do not want to fuse into any of their
// consumers based on a global analysis of the HLO graph.
- HloInstructionSet ComputeGloballyUnfusable(
+ HloInstructionSet ComputeGloballyUnfusible(
tensorflow::gtl::ArraySlice<HloInstruction*> post_order);
// Used to determine if an HLO is expensive. Expensive operations will not be
diff --git a/tensorflow/compiler/xla/service/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/instruction_fusion_test.cc
index 9e7a15f033..da1ad90959 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion_test.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion_test.cc
@@ -158,7 +158,7 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfParameterUnfused) {
.ValueOrDie());
}
-TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusable) {
+TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusible) {
HloComputation::Builder builder(TestName());
auto shape = ShapeUtil::MakeShape(F32, {16, 16});
auto param0 =
@@ -216,7 +216,7 @@ TEST_F(InstructionFusionTest, FuseCheapNonDuplicatableOps) {
EXPECT_EQ(Count(*module, HloOpcode::kAdd), 1) << module->ToString();
}
-TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) {
+TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusibleRecursively) {
// Make sure we do not duplicate the add, as we cannot fuse through the rng.
//
// p0 -> add -------------------------> sub
@@ -309,7 +309,7 @@ TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusableRecursively) {
EXPECT_EQ(Count(*module, HloOpcode::kAdd), 2) << module->ToString();
// A variant of the above that allows the algorithm to put add2 into the set
- // of unfusable ops to short-circuit the decision whether add1 should be fused
+ // of unfusible ops to short-circuit the decision whether add1 should be fused
// into sub2.
//
// /---------------\