aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2018-05-21 20:41:26 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-21 20:43:56 -0700
commiteab53f2cea0506d869b14713c6c532e0bbfd9c52 (patch)
tree60ef8fa706ad613261484666e66411df38d3969f /tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
parentc0bf28ecc311759ac80e12515ad931b077aae635 (diff)
[XLA:GPU] Implement trivial (one-replica) cross-replica-sum on XLA:GPU.
Also fix the CPU implementation to work in the case when there are multiple operands to the cross-replica-sum op. PiperOrigin-RevId: 197506311
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h')
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index b842f480c6..14780de96d 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter {
Status HandleInfeed(HloInstruction* xla_infeed) override;
Status HandleRng(HloInstruction* random) override;
Status HandleSelect(HloInstruction* select) override;
+ Status HandleCrossReplicaSum(HloInstruction* crs) override;
Status EmitTargetElementLoop(
const HloInstruction& hlo,