aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/instruction_fusion.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-10-27 09:00:51 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-10-27 09:04:01 -0700
commit4198e27be8115585ad6b5b141383fb7dc7856c24 (patch)
tree244405e6ef96cb098d8abbf2547a8f22dfb4c72d /tensorflow/compiler/xla/service/instruction_fusion.cc
parent4ae245a7db3d0457c4324ee7df8d020ba83b3c60 (diff)
[XLA:CPU] [XLA:GPU] Adds compiler support for C64 primitive type, including relevant elementwise unary and binary op lowering for CPU and GPU.
We use a named LLVM struct "complex64", laid out the same as std::complex<float>. This named struct is accessed via the llvm::Module, which required changes to accessors of PrimitiveTypeToIrType & friends. Ops that require atan2 (in particular, angle and log) are only supported on GPU at this point. LLVM lacks a CPU intrinsic for atan or atan2, whereas libdevice provides this for GPU. PiperOrigin-RevId: 173676849
Diffstat (limited to 'tensorflow/compiler/xla/service/instruction_fusion.cc')
-rw-r--r--tensorflow/compiler/xla/service/instruction_fusion.cc15
1 files changed, 11 insertions, 4 deletions
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 0271f41697..fae3ca8ad2 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -32,17 +32,16 @@ namespace xla {
const HloInstruction& instruction) {
switch (instruction.opcode()) {
// Cheap instructions.
- case HloOpcode::kAbs:
case HloOpcode::kAdd:
case HloOpcode::kBitcast:
case HloOpcode::kBroadcast:
case HloOpcode::kCeil:
case HloOpcode::kClamp:
+ case HloOpcode::kComplex:
case HloOpcode::kConcatenate:
case HloOpcode::kConstant:
case HloOpcode::kConvert:
case HloOpcode::kCopy:
- case HloOpcode::kCos:
case HloOpcode::kDynamicSlice:
case HloOpcode::kDynamicUpdateSlice:
case HloOpcode::kEq:
@@ -50,6 +49,7 @@ namespace xla {
case HloOpcode::kGe:
case HloOpcode::kGetTupleElement:
case HloOpcode::kGt:
+ case HloOpcode::kImag:
case HloOpcode::kInfeed:
case HloOpcode::kIsFinite:
case HloOpcode::kLe:
@@ -64,6 +64,7 @@ namespace xla {
case HloOpcode::kNegate:
case HloOpcode::kOutfeed:
case HloOpcode::kPad:
+ case HloOpcode::kReal:
case HloOpcode::kReducePrecision:
case HloOpcode::kReshape:
case HloOpcode::kReverse:
@@ -72,15 +73,21 @@ namespace xla {
case HloOpcode::kShiftLeft:
case HloOpcode::kShiftRightArithmetic:
case HloOpcode::kShiftRightLogical:
- case HloOpcode::kSign:
- case HloOpcode::kSin:
case HloOpcode::kSlice:
case HloOpcode::kSubtract:
case HloOpcode::kTranspose:
case HloOpcode::kTuple:
return false;
+ // Cheap instructions for reals, but expensive for complex.
+ case HloOpcode::kAbs:
+ case HloOpcode::kCos:
+ case HloOpcode::kSign:
+ case HloOpcode::kSin:
+ return ShapeUtil::ElementIsComplex(instruction.shape());
+
// Expensive instructions.
+ case HloOpcode::kAtan2:
case HloOpcode::kBatchNormTraining:
case HloOpcode::kBatchNormInference:
case HloOpcode::kBatchNormGrad: