aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/layout_assignment.cc
diff options
context:
space:
mode:
authorGravatar Benjamin Kramer <kramerb@google.com>2018-10-09 13:32:24 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-10-09 13:40:43 -0700
commit5d9a7fdf4f02c2db487a03e7ad2d520f8847c4e3 (patch)
treea77d90f9328b7e0e859a15ab3b5d765774954b5a /tensorflow/compiler/xla/service/layout_assignment.cc
parent9989788be25c846d087ac70b76cf78759a209a3e (diff)
[XLA:GPU] Add an implementation of scatter for GPU
This simple has a kernel that runs on every element of the updates tensor, figure out the right indices to perform the update, and applies it with an atomic operation. Currently we emit a CAS for plain (i.e. non-add) updates, which is inefficient. Also TuplePointsToAnalysis doesn't know that it should alias the operand and output buffers of a scatter, which would avoid a copy. PiperOrigin-RevId: 216412467
Diffstat (limited to 'tensorflow/compiler/xla/service/layout_assignment.cc')
-rw-r--r--tensorflow/compiler/xla/service/layout_assignment.cc2
1 files changed, 1 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc
index ad65b147c1..2cf5fc94ac 100644
--- a/tensorflow/compiler/xla/service/layout_assignment.cc
+++ b/tensorflow/compiler/xla/service/layout_assignment.cc
@@ -1908,6 +1908,7 @@ bool LayoutAssignment::InstructionCanChangeLayout(
case HloOpcode::kRemainder:
case HloOpcode::kReverse:
case HloOpcode::kRoundNearestAfz:
+ case HloOpcode::kScatter:
case HloOpcode::kSelect:
case HloOpcode::kSelectAndScatter:
case HloOpcode::kShiftLeft:
@@ -1946,7 +1947,6 @@ bool LayoutAssignment::InstructionCanChangeLayout(
case HloOpcode::kReduce:
case HloOpcode::kReshape:
case HloOpcode::kRng:
- case HloOpcode::kScatter:
case HloOpcode::kSend:
case HloOpcode::kSendDone:
case HloOpcode::kAfterAll: