diff options
author | Benjamin Kramer <kramerb@google.com> | 2018-10-09 14:19:07 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-10-09 14:28:11 -0700 |
commit | fa1542234857acf56af6e7f0dbe8d2084a18fa00 (patch) | |
tree | 1254448bf59e0fc3330d421059f53e0258dc56b6 /tensorflow/compiler/xla/service/gpu/ir_emitter.cc | |
parent | b145f46b735fe1e383be6629cafaa5269b07b7fb (diff) |
[XLA:GPU] Pattern match atomic "apply" into an atomic store
Otherwise we'd emit a CAS loop.
PiperOrigin-RevId: 216421161
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/ir_emitter.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/ir_emitter.cc | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index b7c37bcf3c..47102347cb 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -179,6 +179,21 @@ bool IrEmitter::MaybeEmitDirectAtomicOperation( bool is_atomic_integral = element_type == S32 || element_type == U32 || element_type == S64 || element_type == U64; llvm::Value* source = Load(source_address, "source"); + + // kCopy of RHS -> atomic store. + if (root_opcode == HloOpcode::kCopy && + (element_type == F32 || is_atomic_integral) && + computation.root_instruction()->operand(0)->opcode() == + HloOpcode::kParameter && + computation.root_instruction()->operand(0)->parameter_number() == 1) { + llvm::StoreInst* store = Store(source, output_address); + store->setAtomic(llvm::AtomicOrdering::Unordered); + // Derive a minimum alignment from the type. The optimizer can increase it + // later. + store->setAlignment(ShapeUtil::ByteSizeOfPrimitiveType(element_type)); + return true; + } + if (root_opcode == HloOpcode::kAdd) { // NVPTX supports atomicAdd on F32 and integer types. if (element_type == F32) { |