aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/cpu/ir_emitter.h
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2017-10-11 13:04:02 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-10-11 13:07:54 -0700
commit8c857092026c67d3868664daa4c2ee2d39f1b4dd (patch)
treec4132d2060e4e59e543bf9258037d35f0e355a7d /tensorflow/compiler/xla/service/cpu/ir_emitter.h
parent208dabe771f049b0b331f14adf5d8728a7eae931 (diff)
[XLA:CPU] Add an in-place implementation of fused-dynamic-update-slice.
This implementation, which applies when a loop-fusion node's root is a dynamic-update-slice whose input operand and output share the same buffer slice, is much faster than the out-of-place implementation. This patch also unifies the implementation of the CPU and GPU versions of this algorithm. PiperOrigin-RevId: 171863142
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/ir_emitter.h')
-rw-r--r--tensorflow/compiler/xla/service/cpu/ir_emitter.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index ba02f5f778..53c4b6f241 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -236,6 +236,10 @@ class IrEmitter : public DfsHloVisitorWithDefault {
// Gets an IrArray representing the given hlo.
llvm_ir::IrArray GetIrArrayFor(const HloInstruction* hlo);
+ // Gets a list of IrArrays, one for each of hlo's operands.
+ std::vector<llvm_ir::IrArray> GetIrArraysForOperandsOf(
+ const HloInstruction* hlo);
+
// Augments IrArray with aliasing information.
void AddAliasingInformationToIrArray(const HloInstruction& hlo,
llvm_ir::IrArray* array) {