aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-05-18 07:47:41 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-18 07:50:02 -0700
commit56b466583339e4bb110572a0b48b46b42d11e8eb (patch)
treebbde4307dd19900d246cb31aeb6e03b412f559db /tensorflow/compiler/xla/service/gpu/pad_insertion.cc
parent2934484b3a4802c3b4644e6fc9a2b1c647d2eb9a (diff)
Modify PadInsertion pass so that it matches other passes.
Currently, PadInsertion only iterates over the instructions in the entry_computation. Other passes iterate over MakeNonfusionComputations. When we run on HloSnapshots derived from TPU benchmarks, this makes a difference, because it seems none of the convolutions are inside the entry computation. PiperOrigin-RevId: 197145067
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/pad_insertion.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/pad_insertion.cc42
1 files changed, 27 insertions, 15 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
index 7bda4e2fcd..c8f0d4185c 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc
@@ -370,26 +370,38 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution(
return true;
}
-StatusOr<bool> PadInsertion::Run(HloModule* module) {
+StatusOr<bool> PadInsertion::RunOnComputation(HloComputation* computation) {
bool changed = false;
- for (HloInstruction* instruction :
- module->entry_computation()->MakeInstructionPostOrder()) {
- if (IsCustomCallToDnnConvolution(*instruction)) {
- const auto& target = instruction->custom_call_target();
- if (target == kCudnnConvForwardCallTarget) {
- changed |= CanonicalizeForwardConvolution(instruction);
- } else if (target == kCudnnConvBackwardFilterCallTarget) {
- changed |= CanonicalizeBackwardFilterConvolution(instruction);
- } else if (target == kCudnnConvBackwardInputCallTarget) {
- changed |= CanonicalizeBackwardInputConvolution(instruction);
- } else {
- LOG(FATAL) << "Unknown custom call target for cudnn conv: "
- << instruction->ToString();
- }
+ std::vector<HloInstruction*> convs;
+ for (auto* instr : computation->instructions()) {
+ if (IsCustomCallToDnnConvolution(*instr)) {
+ convs.push_back(instr);
+ }
+ }
+ for (HloInstruction* instruction : convs) {
+ const auto& target = instruction->custom_call_target();
+ if (target == kCudnnConvForwardCallTarget) {
+ changed |= CanonicalizeForwardConvolution(instruction);
+ } else if (target == kCudnnConvBackwardFilterCallTarget) {
+ changed |= CanonicalizeBackwardFilterConvolution(instruction);
+ } else if (target == kCudnnConvBackwardInputCallTarget) {
+ changed |= CanonicalizeBackwardInputConvolution(instruction);
+ } else {
+ LOG(FATAL) << "Unknown custom call target for cudnn conv: "
+ << instruction->ToString();
}
}
return changed;
}
+StatusOr<bool> PadInsertion::Run(HloModule* module) {
+ bool changed = false;
+ for (HloComputation* computation : module->MakeNonfusionComputations()) {
+ TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation));
+ changed |= result;
+ }
+ return changed;
+}
+
} // namespace gpu
} // namespace xla