diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-10-27 09:00:51 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-10-27 09:04:01 -0700 |
commit | 4198e27be8115585ad6b5b141383fb7dc7856c24 (patch) | |
tree | 244405e6ef96cb098d8abbf2547a8f22dfb4c72d /tensorflow/compiler/xla/service/instruction_fusion.cc | |
parent | 4ae245a7db3d0457c4324ee7df8d020ba83b3c60 (diff) |
[XLA:CPU] [XLA:GPU] Adds compiler support for C64 primitive type, including relevant elementwise unary and binary op lowering for CPU and GPU.
We use a named LLVM struct "complex64", laid out the same as std::complex<float>. This named struct is accessed via the llvm::Module, which required changes to accessors of PrimitiveTypeToIrType & friends.
Ops that require atan2 (in particular, angle and log) are only supported on GPU at this point. LLVM lacks a CPU intrinsic for atan or atan2, whereas libdevice provides this for GPU.
PiperOrigin-RevId: 173676849
Diffstat (limited to 'tensorflow/compiler/xla/service/instruction_fusion.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/instruction_fusion.cc | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 0271f41697..fae3ca8ad2 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -32,17 +32,16 @@ namespace xla { const HloInstruction& instruction) { switch (instruction.opcode()) { // Cheap instructions. - case HloOpcode::kAbs: case HloOpcode::kAdd: case HloOpcode::kBitcast: case HloOpcode::kBroadcast: case HloOpcode::kCeil: case HloOpcode::kClamp: + case HloOpcode::kComplex: case HloOpcode::kConcatenate: case HloOpcode::kConstant: case HloOpcode::kConvert: case HloOpcode::kCopy: - case HloOpcode::kCos: case HloOpcode::kDynamicSlice: case HloOpcode::kDynamicUpdateSlice: case HloOpcode::kEq: @@ -50,6 +49,7 @@ namespace xla { case HloOpcode::kGe: case HloOpcode::kGetTupleElement: case HloOpcode::kGt: + case HloOpcode::kImag: case HloOpcode::kInfeed: case HloOpcode::kIsFinite: case HloOpcode::kLe: @@ -64,6 +64,7 @@ namespace xla { case HloOpcode::kNegate: case HloOpcode::kOutfeed: case HloOpcode::kPad: + case HloOpcode::kReal: case HloOpcode::kReducePrecision: case HloOpcode::kReshape: case HloOpcode::kReverse: @@ -72,15 +73,21 @@ namespace xla { case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: - case HloOpcode::kSign: - case HloOpcode::kSin: case HloOpcode::kSlice: case HloOpcode::kSubtract: case HloOpcode::kTranspose: case HloOpcode::kTuple: return false; + // Cheap instructions for reals, but expensive for complex. + case HloOpcode::kAbs: + case HloOpcode::kCos: + case HloOpcode::kSign: + case HloOpcode::kSin: + return ShapeUtil::ElementIsComplex(instruction.shape()); + // Expensive instructions. + case HloOpcode::kAtan2: case HloOpcode::kBatchNormTraining: case HloOpcode::kBatchNormInference: case HloOpcode::kBatchNormGrad: |