diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/ir_emitter.h')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/ir_emitter.h | 22 |
1 files changed, 9 insertions, 13 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h index d2dd335f10..e89967a378 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h @@ -79,7 +79,6 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleCrossReplicaSum(HloInstruction* crs) override; Status HandleInfeed(HloInstruction* infeed) override; Status HandleOutfeed(HloInstruction* outfeed) override; - Status HandleSort(HloInstruction* sort) override; Status HandleSend(HloInstruction* send) override; Status HandleSendDone(HloInstruction* send_done) override; Status HandleRecv(HloInstruction* recv) override; @@ -96,6 +95,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { Status HandleBatchNormInference(HloInstruction* batch_norm) override; Status HandleBatchNormTraining(HloInstruction* batch_norm) override; Status HandleBatchNormGrad(HloInstruction* batch_norm) override; + Status HandleIota(HloInstruction* iota) override; Status FinishVisit(HloInstruction* root) override { return Status::OK(); } @@ -162,7 +162,7 @@ class IrEmitter : public DfsHloVisitorWithDefault { // The following fields track the IR emission state. According to LLVM memory // management rules, their memory is owned by the module. - llvm::IRBuilder<> ir_builder_; + llvm::IRBuilder<> b_; // Mapping from HLO to its underlying LLVM value. HloToIrBindings bindings_; @@ -171,17 +171,6 @@ class IrEmitter : public DfsHloVisitorWithDefault { const HloModuleConfig& hlo_module_config_; private: - // Emits a series of nested loops for iterating over an operand array in the - // dot operation. Loops are constructed in major to minor dimension layout - // order. No loop is emitted for the given reduction_dimension. The function - // returns an IrArray index for the given operand_array containing the indvars - // of the loops. All dimensions of the index are filled except for the - // reduction dimension. name_suffix is the string to append to the names of - // LLVM constructs (eg, basic blocks) constructed by this method. - llvm_ir::IrArray::Index EmitOperandArrayLoopNest( - const llvm_ir::IrArray& operand_array, int64 reduction_dimension, - tensorflow::StringPiece name_suffix, llvm_ir::ForLoopNest* loop_nest); - // A helper method for EmitAtomicOperationForNestedComputation. Certain // computations, such as floating-point addition and integer maximization, can // be simply implemented using an LLVM atomic instruction. If "computation" is @@ -198,6 +187,13 @@ class IrEmitter : public DfsHloVisitorWithDefault { llvm::Value* output_address, llvm::Value* source_address); + // A helper method for HandleSort(). It adds the inner comparison loop where + // we compare elements pointed to by 'keys_index' and 'compare_keys_index'. + void EmitCompareLoop(int64 dimension_to_sort, + const llvm_ir::IrArray::Index& keys_index, + const llvm_ir::IrArray::Index& compare_keys_index, + const llvm_ir::IrArray& keys_array); + StatusOr<llvm::Value*> ComputeNestedElement( const HloComputation& computation, tensorflow::gtl::ArraySlice<llvm::Value*> parameter_elements); |