aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/llvm_ir
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/llvm_ir')
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/BUILD14
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc30
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h11
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/sort_util.cc201
-rw-r--r--tensorflow/compiler/xla/service/llvm_ir/sort_util.h34
5 files changed, 290 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD
index c14a5bfb53..462be543bc 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/BUILD
+++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD
@@ -181,6 +181,20 @@ cc_library(
)
cc_library(
+ name = "sort_util",
+ srcs = ["sort_util.cc"],
+ hdrs = ["sort_util.h"],
+ deps = [
+ ":ir_array",
+ ":llvm_loop",
+ ":llvm_util",
+ "//tensorflow/compiler/xla:shape_util",
+ "//tensorflow/core:lib",
+ "@llvm//:core",
+ ],
+)
+
+cc_library(
name = "tuple_ops",
srcs = ["tuple_ops.cc"],
hdrs = ["tuple_ops.h"],
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc
index c9ae7d3afd..1227534779 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc
@@ -262,5 +262,35 @@ IrArray::Index ForLoopNest::AddLoopsForShapeOnDimensions(
return index;
}
+IrArray::Index ForLoopNest::EmitOperandArrayLoopNest(
+ const llvm_ir::IrArray& operand_array, int64 dimension_to_skip,
+ tensorflow::StringPiece name_suffix) {
+ // Prepares the dimension list we will use to emit the loop nest. Outermost
+ // loops are added first. Add loops in major-to-minor order, and skip the
+ // 'dimension_to_skip' dimension.
+ std::vector<int64> dimensions;
+ const Shape& shape = operand_array.GetShape();
+ for (int64 dimension : LayoutUtil::MinorToMajor(shape)) {
+ if (dimension != dimension_to_skip) {
+ dimensions.push_back(dimension);
+ }
+ }
+
+ // Create loop nest with one for-loop for each dimension of the
+ // output.
+ llvm_ir::IrArray::Index index =
+ AddLoopsForShapeOnDimensions(shape, dimensions, name_suffix);
+ // Verify every dimension except the 'dimension_to_skip' dimension was set in
+ // the index.
+ for (size_t dimension = 0; dimension < index.size(); ++dimension) {
+ if (dimension == dimension_to_skip) {
+ DCHECK_EQ(nullptr, index[dimension]);
+ } else {
+ DCHECK_NE(nullptr, index[dimension]);
+ }
+ }
+ return index;
+}
+
} // namespace llvm_ir
} // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h
index 0dd5b9d3b2..b3266022db 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h
@@ -248,6 +248,17 @@ class ForLoopNest {
const Shape& shape, tensorflow::gtl::ArraySlice<int64> dimensions,
tensorflow::StringPiece suffix);
+ // Emits a series of nested loops for iterating over an operand array. Loops
+ // are constructed in major to minor dimension layout order. No loop is
+ // emitted for the given 'dimension_to_skip'. The function returns an IrArray
+ // index for the given operand_array containing the indvars of the loops. All
+ // dimensions of the index are filled except for 'dimension_to_skip'.
+ // name_suffix is the string to append to the names of LLVM constructs (eg,
+ // basic blocks) constructed by this method.
+ IrArray::Index EmitOperandArrayLoopNest(const llvm_ir::IrArray& operand_array,
+ int64 dimension_to_skip,
+ tensorflow::StringPiece name_suffix);
+
// Convenience methods which return particular basic blocks of the outermost
// or innermost loops. These methods return nullptr if no loops have been
// added yet.
diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
new file mode 100644
index 0000000000..16a9a5aaeb
--- /dev/null
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc
@@ -0,0 +1,201 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/llvm_ir/sort_util.h"
+
+// IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "tensorflow/compiler/xla/primitive_util.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h"
+#include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h"
+#include "tensorflow/compiler/xla/shape_util.h"
+#include "tensorflow/core/lib/core/bits.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace llvm_ir {
+
+namespace {
+// Adds the inner comparison loop where we compare elements pointed to by
+// 'keys_index' and 'compare_keys_index'.
+void EmitCompareLoop(int64 dimension_to_sort,
+ const llvm_ir::IrArray::Index& keys_index,
+ const llvm_ir::IrArray::Index& compare_keys_index,
+ const llvm_ir::IrArray& keys_array,
+ llvm::IRBuilder<>* ir_builder) {
+ // TODO(b/26783907): parallelize this loop.
+
+ // if (is_smaller_index &&
+ // compare_keys[dimension_to_sort] < dimension_to_sort_bound)
+ llvm::Value* is_smaller_index = ir_builder->CreateICmpSLT(
+ keys_index[dimension_to_sort], compare_keys_index[dimension_to_sort]);
+ int64 dimension_to_sort_bound =
+ keys_array.GetShape().dimensions(dimension_to_sort);
+ auto if_data = llvm_ir::EmitIfThenElse(
+ ir_builder->CreateAnd(
+ is_smaller_index,
+ ir_builder->CreateICmpSLT(
+ compare_keys_index[dimension_to_sort],
+ keys_index.GetConstantWithIndexType(dimension_to_sort_bound))),
+ "smaller_comparison_index", ir_builder, /*emit_else=*/false);
+ SetToFirstInsertPoint(if_data.true_block, ir_builder);
+ auto key1 = keys_array.EmitReadArrayElement(keys_index, ir_builder);
+ auto key2 = keys_array.EmitReadArrayElement(compare_keys_index, ir_builder);
+ auto key_type = keys_array.GetShape().element_type();
+ auto comparison =
+ primitive_util::IsFloatingPointType(key_type)
+ // TODO(b/26783907): Figure out how to handle NaNs.
+ ? ir_builder->CreateFCmp(llvm::FCmpInst::FCMP_ULT, key1, key2)
+ : ir_builder->CreateICmp(
+ primitive_util::IsSignedIntegralType(key_type)
+ ? llvm::ICmpInst::ICMP_SLT
+ : llvm::ICmpInst::ICMP_ULT,
+ key1, key2);
+ auto min_key = ir_builder->CreateSelect(comparison, key1, key2);
+ auto max_key = ir_builder->CreateSelect(comparison, key2, key1);
+ keys_array.EmitWriteArrayElement(keys_index, min_key, ir_builder);
+ keys_array.EmitWriteArrayElement(compare_keys_index, max_key, ir_builder);
+}
+} // namespace
+
+Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
+ tensorflow::StringPiece name,
+ llvm::IRBuilder<>* ir_builder) {
+ const Shape& keys_shape = keys_array.GetShape();
+
+ // TODO(b/26783907): This case can probably be avoided with the Algebraic
+ // Simplifier.
+ if (ShapeUtil::IsScalar(keys_shape)) {
+ return Status::OK();
+ }
+
+ // Create loop nests which loop through the operand dimensions. The sort
+ // dimension is handled in three separate innermost loops which perform the
+ // sorting.
+ ForLoopNest loop_nest(name, ir_builder);
+ IrArray::Index keys_index =
+ loop_nest.EmitOperandArrayLoopNest(keys_array, dimension_to_sort, "keys");
+
+ // 'compare_keys_index' is the index of the element that 'keys_index' should
+ // be compared to.
+ IrArray::Index compare_keys_index(keys_index.GetType());
+ for (size_t dimension = 0; dimension < keys_index.size(); ++dimension) {
+ if (dimension != dimension_to_sort) {
+ compare_keys_index.push_back(keys_index[dimension]);
+ } else {
+ compare_keys_index.push_back(nullptr);
+ }
+ }
+
+ // Create the sorting loops which do the sorting.
+ int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort);
+ std::unique_ptr<ForLoop> stages_loop = loop_nest.AddLoop(
+ /*start_index=*/0,
+ /*end_index=*/
+ tensorflow::Log2Ceiling64(dimension_to_sort_bound),
+ /*suffix=*/"sort_stages");
+ std::unique_ptr<ForLoop> mask_loop = loop_nest.AddLoop(
+ /*suffix=*/"mask",
+ /*start_index=*/keys_index.GetConstantWithIndexType(0),
+ /*end_index=*/stages_loop->GetIndVarValue());
+ std::unique_ptr<ForLoop> compare_loop = loop_nest.AddLoop(
+ /*start_index=*/0,
+ /*end_index=*/dimension_to_sort_bound,
+ /*suffix=*/"compare");
+
+ // Naive C++ code for the inner loops (without parallelization):
+ //
+ // for (int64 stage = 0; stage < Log2Ceiling(dimension_to_sort_bound);
+ // ++stage) {
+ // int64 first_xor_mask = (1LL << (stage + 1)) - 1;
+ // for (int64 i = 0; i < dimension_to_sort_bound; ++i) {
+ // int64 j = i ^ first_xor_mask;
+ // if (i < j && j < dimension_to_sort_bound) {
+ // int64 min_key = std::min(keys[i], keys[j]);
+ // keys[j] = std::max(keys[i], keys[j]);
+ // keys[i] = min_key;
+ // }
+ // }
+ // for (int64 mask = 0; mask < stage; ++mask) {
+ // int64 later_xor_mask = (1LL << (stage - (mask + 1));
+ // for (int64 i = 0; i < dimension_to_sort_bound; ++i) {
+ // int64 j = i ^ later_xor_mask;
+ // if (i < j && j < dimension_to_sort_bound) {
+ // int64 min_key = std::min(keys[i], keys[j]);
+ // keys[j] = std::max(keys[i], keys[j]);
+ // keys[i] = min_key;
+ // }
+ // }
+ // }
+ // }
+ //
+ // This follows the algorithm described on Wikipedia:
+ // https://en.wikipedia.org/wiki/Bitonic_sorter
+
+ SetToFirstInsertPoint(stages_loop->GetBodyBasicBlock(), ir_builder);
+ // The first xor mask of a stage is 2^(stage + 1) - 1.
+ auto first_xor_mask = ir_builder->CreateSub(
+ ir_builder->CreateShl(
+ keys_index.GetConstantWithIndexType(1),
+ ir_builder->CreateAdd(stages_loop->GetIndVarValue(),
+ keys_index.GetConstantWithIndexType(1))),
+ keys_index.GetConstantWithIndexType(1));
+ std::unique_ptr<ForLoop> first_compare_loop = ForLoop::EmitForLoop(
+ /*prefix=*/"first_compare",
+ /*start_index=*/keys_index.GetConstantWithIndexType(0),
+ /*end_index=*/
+ keys_index.GetConstantWithIndexType(dimension_to_sort_bound),
+ /*step=*/keys_index.GetConstantWithIndexType(1),
+ /*ir_builder=*/ir_builder);
+
+ SetToFirstInsertPoint(first_compare_loop->GetBodyBasicBlock(), ir_builder);
+ // 'first_compare_loop' iterates through the 'dimension_to_sort'.
+ keys_index[dimension_to_sort] = first_compare_loop->GetIndVarValue();
+ compare_keys_index[dimension_to_sort] = ir_builder->CreateXor(
+ first_compare_loop->GetIndVarValue(), first_xor_mask);
+ EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index, keys_array,
+ ir_builder);
+
+ SetToFirstInsertPoint(compare_loop->GetPreheaderBasicBlock(), ir_builder);
+ // The later masks of a stage are 2^(stage - (mask_loop_ind_var + 1)).
+ auto later_xor_mask = ir_builder->CreateShl(
+ keys_index.GetConstantWithIndexType(1),
+ ir_builder->CreateSub(
+ stages_loop->GetIndVarValue(),
+ ir_builder->CreateAdd(mask_loop->GetIndVarValue(),
+ keys_index.GetConstantWithIndexType(1))));
+
+ SetToFirstInsertPoint(compare_loop->GetBodyBasicBlock(), ir_builder);
+ // 'compare_loop' iterates through the 'dimension_to_sort'.
+ keys_index[dimension_to_sort] = compare_loop->GetIndVarValue();
+ compare_keys_index[dimension_to_sort] =
+ ir_builder->CreateXor(compare_loop->GetIndVarValue(), later_xor_mask);
+ EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index, keys_array,
+ ir_builder);
+
+ // Set the IR builder insert point to the exit basic block of the outer most
+ // loop. This ensures later instructions are inserted after this loop nest.
+ ir_builder->SetInsertPoint(loop_nest.GetOuterLoopExitBasicBlock());
+
+ return Status::OK();
+}
+
+} // namespace llvm_ir
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
new file mode 100644
index 0000000000..fc45bfab12
--- /dev/null
+++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h
@@ -0,0 +1,34 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_
+
+#include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
+#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+namespace llvm_ir {
+// Emits llvm IR to sort the 'dimension_to_sort' dimension of 'keys_array' into
+// ascending order.
+Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
+ tensorflow::StringPiece name,
+ llvm::IRBuilder<>* ir_builder);
+} // namespace llvm_ir
+} // namespace xla
+
+#endif // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_