aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/ir_emitter.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/ir_emitter.h')
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter.h22
1 files changed, 9 insertions, 13 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
index d2dd335f10..e89967a378 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
@@ -79,7 +79,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
Status HandleCrossReplicaSum(HloInstruction* crs) override;
Status HandleInfeed(HloInstruction* infeed) override;
Status HandleOutfeed(HloInstruction* outfeed) override;
- Status HandleSort(HloInstruction* sort) override;
Status HandleSend(HloInstruction* send) override;
Status HandleSendDone(HloInstruction* send_done) override;
Status HandleRecv(HloInstruction* recv) override;
@@ -96,6 +95,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
Status HandleBatchNormInference(HloInstruction* batch_norm) override;
Status HandleBatchNormTraining(HloInstruction* batch_norm) override;
Status HandleBatchNormGrad(HloInstruction* batch_norm) override;
+ Status HandleIota(HloInstruction* iota) override;
Status FinishVisit(HloInstruction* root) override { return Status::OK(); }
@@ -162,7 +162,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
// The following fields track the IR emission state. According to LLVM memory
// management rules, their memory is owned by the module.
- llvm::IRBuilder<> ir_builder_;
+ llvm::IRBuilder<> b_;
// Mapping from HLO to its underlying LLVM value.
HloToIrBindings bindings_;
@@ -171,17 +171,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
const HloModuleConfig& hlo_module_config_;
private:
- // Emits a series of nested loops for iterating over an operand array in the
- // dot operation. Loops are constructed in major to minor dimension layout
- // order. No loop is emitted for the given reduction_dimension. The function
- // returns an IrArray index for the given operand_array containing the indvars
- // of the loops. All dimensions of the index are filled except for the
- // reduction dimension. name_suffix is the string to append to the names of
- // LLVM constructs (eg, basic blocks) constructed by this method.
- llvm_ir::IrArray::Index EmitOperandArrayLoopNest(
- const llvm_ir::IrArray& operand_array, int64 reduction_dimension,
- tensorflow::StringPiece name_suffix, llvm_ir::ForLoopNest* loop_nest);
-
// A helper method for EmitAtomicOperationForNestedComputation. Certain
// computations, such as floating-point addition and integer maximization, can
// be simply implemented using an LLVM atomic instruction. If "computation" is
@@ -198,6 +187,13 @@ class IrEmitter : public DfsHloVisitorWithDefault {
llvm::Value* output_address,
llvm::Value* source_address);
+ // A helper method for HandleSort(). It adds the inner comparison loop where
+ // we compare elements pointed to by 'keys_index' and 'compare_keys_index'.
+ void EmitCompareLoop(int64 dimension_to_sort,
+ const llvm_ir::IrArray::Index& keys_index,
+ const llvm_ir::IrArray::Index& compare_keys_index,
+ const llvm_ir::IrArray& keys_array);
+
StatusOr<llvm::Value*> ComputeNestedElement(
const HloComputation& computation,
tensorflow::gtl::ArraySlice<llvm::Value*> parameter_elements);