1 files changed, 218 insertions, 14 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index deb7f28d84..51353eea6e 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/index_util.h"
 #include "tensorflow/compiler/xla/layout_util.h"
+#include "tensorflow/compiler/xla/literal.h"
 #include "tensorflow/compiler/xla/literal_util.h"
 #include "tensorflow/compiler/xla/map_util.h"
 #include "tensorflow/compiler/xla/primitive_util.h"
@@ -135,7 +136,6 @@ StatusOr<std::unique_ptr<Literal>> Compare<complex64>(
 
 }  // namespace
 
-
 HloEvaluator::HloEvaluator(int64 max_loop_iterations)
     : max_loop_iterations_(max_loop_iterations) {
   typed_visitors_[PRED] = MakeUnique<HloEvaluatorTypedVisitor<bool>>(this);
@@ -330,6 +330,24 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::EvaluateElementwiseUnaryOp(
   return result;
 }
 
+StatusOr<std::unique_ptr<Literal>> HloEvaluator::EvaluateDotOp(
+    const DotDimensionNumbers& dim_numbers, const Literal& lhs,
+    const Literal& rhs) {
+  std::unique_ptr<HloInstruction> lhs_instr =
+      HloInstruction::CreateConstant(lhs.CloneToUnique());
+  std::unique_ptr<HloInstruction> rhs_instr =
+      HloInstruction::CreateConstant(rhs.CloneToUnique());
+
+  TF_ASSIGN_OR_RETURN(
+      Shape dot_shape,
+      ShapeInference::InferDotOpShape(lhs.shape(), rhs.shape(), dim_numbers));
+
+  std::unique_ptr<HloInstruction> cloned_instruction =
+      HloInstruction::CreateDot(dot_shape, lhs_instr.get(), rhs_instr.get(),
+                                dim_numbers);
+  return Evaluate(cloned_instruction.get());
+}
+
 Status HloEvaluator::HandleParameter(HloInstruction* parameter) {
   CHECK_LT(parameter->parameter_number(), arg_literals_.size());
   const Literal* input_literal = arg_literals_[parameter->parameter_number()];
@@ -382,7 +400,7 @@ Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) {
         ShapeUtil::GetDimension(operand_shape, concat_dim);
   }
 
-  auto result_literal = Literal::CreateFromDimensions(
+  auto result_literal = LiteralUtil::CreateFromDimensions(
       reference_shape.element_type(), concat_dimensions);
   DimensionVector source_indices(rank, 0);
   DimensionVector dest_indices(concat_dimensions.size(), 0);
@@ -533,7 +551,7 @@ Status HloEvaluator::HandleTuple(HloInstruction* tuple) {
     operand_literals.push_back(&GetEvaluatedLiteralFor(operand));
   }
 
-  evaluated_[tuple] = Literal::MakeTuple(operand_literals);
+  evaluated_[tuple] = LiteralUtil::MakeTuple(operand_literals);
   return Status::OK();
 }
 
@@ -757,6 +775,12 @@ class OutputWindowIndexToInputIndex {
     return ArraySlice<int64>(input_index_);
   }
 
+  // Returns for a given 'input_dim' the corresponding output dimension index,
+  // or -1 if 'input_dim' is an elided window dimension.
+  int64 input_dim_value_to_output_index(int64 input_dim) {
+    return input_dim_value_to_output_index_[input_dim];
+  }
+
  private:
   // Propagates window dimensions from the output index to input_index_ by
   // mutating input_index_ in place.
@@ -774,7 +798,7 @@ class OutputWindowIndexToInputIndex {
 
   // input_dim_value_to_index_vector_[i] tells us how to compute dimension i of
   // the input index from the output index. See
-  // PropagateOutputIndexToInputIndex.
+  // PropagateOutputIndexWindowDimsToInputIndex.
   std::vector<int64> input_dim_value_to_output_index_;
 
   // The result computed by this functor.  operator() returns an ArraySlice into
@@ -827,6 +851,8 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
   // corresponding index in the input shape.
   std::vector<int64> input_index(operand.shape().dimensions_size());
   std::vector<int64> output_index(gather->shape().dimensions_size());
+  std::vector<int64> input_gather_index_clamped(
+      operand.shape().dimensions_size());
 
   OutputGatherIndexToInputIndex output_gather_index_to_input_index(
       &gather->gather_dimension_numbers(), /*input_shape=*/operand.shape(),
@@ -848,14 +874,26 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
       output_index[i] = output_gather_index[i] + output_window_index[i];
       DCHECK_LT(output_index[i], shape.dimensions(i));
     }
+    for (int i = 0, e = input_gather_index.size(); i < e; i++) {
+      int64 output_dim =
+          output_window_index_to_input_index.input_dim_value_to_output_index(i);
+      // If 'output_dim' is -1, it means 'i' is an elided window dim. This means
+      // we set the iteration index to 0, so for the purpose of the following
+      // calculations we can consider the output dimension size to be 1.
+      int64 output_dim_size =
+          output_dim == -1 ? 1 : shape.dimensions(output_dim);
+      // Clamp the gather index so that the gather region fits in the operand.
+      // input_gather_index_clamped[i] = clamp(input_gather_index[i], 0,
+      //                                       operand_shape.dimensions(i) -
+      //                                       output_dim_size);
+      input_gather_index_clamped[i] =
+          std::min(operand_shape.dimensions(i) - output_dim_size,
+                   std::max(0LL, input_gather_index[i]));
+    }
     for (int i = 0, e = input_index.size(); i < e; i++) {
-      // TODO(b/74360564): We should implement whatever out of bounds behavior
-      // we decide for dynamic-slice here as well.
-      input_index[i] = (input_gather_index[i] + input_window_index[i]) %
-                       operand_shape.dimensions(i);
-      if (input_index[i] < 0) {
-        input_index[i] += operand_shape.dimensions(i);
-      }
+      input_index[i] = input_gather_index_clamped[i] + input_window_index[i];
+      DCHECK_GE(input_index[i], 0);
+      DCHECK_LT(input_index[i], operand_shape.dimensions(i));
     }
     TF_RETURN_IF_ERROR(
         result->CopyElementFrom(operand, input_index, output_index));
@@ -903,7 +941,7 @@ Status HloEvaluator::HandleBroadcast(HloInstruction* broadcast) {
 }
 
 Status HloEvaluator::HandleAfterAll(HloInstruction* token) {
-  evaluated_[token] = Literal::CreateToken();
+  evaluated_[token] = LiteralUtil::CreateToken();
   return Status::OK();
 }
 
@@ -1024,8 +1062,6 @@ Status HloEvaluator::HandleSelect(HloInstruction* select) {
   const auto& on_false = GetEvaluatedLiteralFor(select->operand(2));
 
   // If predicate is of scalar type, no element-wise selection would be needed.
-  // This would also handle output array of tuple types as the DefaultAction
-  // would go through the HloEvaluatorTypedVisitor which doesn't handle tuples.
   if (ShapeUtil::IsScalar(pred.shape())) {
     if (pred.Get<bool>({})) {
       evaluated_[select] = on_true.CloneToUnique();
@@ -1038,6 +1074,19 @@ Status HloEvaluator::HandleSelect(HloInstruction* select) {
   return DefaultAction(select);
 }
 
+Status HloEvaluator::HandleTupleSelect(HloInstruction* tuple_select) {
+  const auto& pred = GetEvaluatedLiteralFor(tuple_select->operand(0));
+  const auto& on_true = GetEvaluatedLiteralFor(tuple_select->operand(1));
+  const auto& on_false = GetEvaluatedLiteralFor(tuple_select->operand(2));
+
+  if (pred.Get<bool>({})) {
+    evaluated_[tuple_select] = on_true.CloneToUnique();
+  } else {
+    evaluated_[tuple_select] = on_false.CloneToUnique();
+  }
+  return Status::OK();
+}
+
 Status HloEvaluator::HandleWhile(HloInstruction* while_hlo) {
   HloComputation* cond_comp = while_hlo->while_condition();
   HloComputation* body_comp = while_hlo->while_body();
@@ -1068,6 +1117,161 @@ Status HloEvaluator::HandleWhile(HloInstruction* while_hlo) {
   return Status::OK();
 }
 
+// Key-value sort is a special snowflake: it's templated on two different
+// element types, one for the keys, and one for the values. Jump through some
+// hoops to make this work.
+namespace {
+template <typename KeyType, typename ValueType>
+StatusOr<std::unique_ptr<Literal>> EvaluateSortInternal(
+    HloInstruction* sort, const Literal& keys_literal,
+    const Literal& values_literal) {
+  auto rank = ShapeUtil::Rank(keys_literal.shape());
+  TF_RET_CHECK(
+      ShapeUtil::SameDimensions(keys_literal.shape(), values_literal.shape()))
+      << "Sort keys and values must have the same dimensions";
+  TF_RET_CHECK(rank > 0 && rank <= 2)
+      << "Sort is only supported for rank-1 and rank-2 shapes, rank is: "
+      << rank;
+  TF_RET_CHECK(sort->operand_count() == 2) << "Expected key-value sort";
+  // We need to sort and array of keys and an array of values, where the
+  // sorted order of the values is determined by the keys. The simplest(?)
+  // way to do this is to go to an array-of-pairs representation, sort the
+  // array using the keys, and then go back to pair-of-arrays.
+  VLOG(3) << "HandleSort keys_literal: " << keys_literal.ToString();
+  VLOG(3) << "HandleSort values_literal: " << values_literal.ToString();
+
+  auto sort_r1 = [](const Literal& keys_literal,
+                    const Literal& values_literal) {
+    const auto& keys_data = keys_literal.data<KeyType>();
+    const auto& values_data = values_literal.data<ValueType>();
+
+    using kv_pair = std::pair<KeyType, ValueType>;
+    std::vector<kv_pair> key_value_vector;
+    CHECK_EQ(keys_data.size(), values_data.size());
+    key_value_vector.reserve(keys_data.size());
+    for (int i = 0; i < keys_data.size(); ++i) {
+      key_value_vector.push_back(std::make_pair(keys_data[i], values_data[i]));
+    }
+    std::sort(key_value_vector.begin(), key_value_vector.end(),
+              [](const kv_pair& a, const kv_pair& b) {
+                return SafeLess<KeyType>(a.first, b.first);
+              });
+    std::vector<KeyType> result_keys;
+    std::vector<ValueType> result_values;
+    for (const auto& key_value : key_value_vector) {
+      result_keys.push_back(key_value.first);
+      result_values.push_back(key_value.second);
+    }
+    auto result_keys_literal = MakeUnique<Literal>(keys_literal.shape());
+    result_keys_literal->PopulateR1(
+        tensorflow::gtl::ArraySlice<KeyType>(result_keys));
+    auto result_values_literal = MakeUnique<Literal>(values_literal.shape());
+    result_values_literal->PopulateR1(
+        tensorflow::gtl::ArraySlice<ValueType>(result_values));
+    return std::make_pair(std::move(result_keys_literal),
+                          std::move(result_values_literal));
+  };
+
+  std::unique_ptr<Literal> result_tuple;
+  if (rank == 1) {
+    auto result_pair = sort_r1(keys_literal, values_literal);
+    result_tuple = LiteralUtil::MakeTuple(
+        {result_pair.first.get(), result_pair.second.get()});
+  } else {
+    // For R2 sort, the desired semantics are to sort each matrix row
+    // independently.
+    auto keys_result_literal = MakeUnique<Literal>(keys_literal.shape());
+    auto values_result_literal = MakeUnique<Literal>(values_literal.shape());
+    int64 r1_length = keys_literal.shape().dimensions(1);
+    for (int64 row = 0; row < keys_literal.shape().dimensions(0); ++row) {
+      TF_ASSIGN_OR_RETURN(auto keys_r1_slice,
+                          keys_literal.Slice({row, 0}, {row + 1, r1_length})
+                              ->Reshape({r1_length}));
+      TF_ASSIGN_OR_RETURN(auto values_r1_slice,
+                          values_literal.Slice({row, 0}, {row + 1, r1_length})
+                              ->Reshape({r1_length}));
+      auto r1_result_pair = sort_r1(*keys_r1_slice, *values_r1_slice);
+      TF_ASSIGN_OR_RETURN(auto sorted_keys,
+                          r1_result_pair.first->Reshape({1, r1_length}));
+      TF_ASSIGN_OR_RETURN(auto sorted_values,
+                          r1_result_pair.second->Reshape({1, r1_length}));
+      TF_RETURN_IF_ERROR(keys_result_literal->CopySliceFrom(
+          *sorted_keys, {0, 0}, {row, 0}, {1, r1_length}));
+      TF_RETURN_IF_ERROR(values_result_literal->CopySliceFrom(
+          *sorted_values, {0, 0}, {row, 0}, {1, r1_length}));
+    }
+    result_tuple = LiteralUtil::MakeTuple(
+        {keys_result_literal.get(), values_result_literal.get()});
+  }
+
+  VLOG(3) << "HandleSort result_tuple: " << result_tuple->ToString();
+  return std::move(result_tuple);
+}
+
+template <typename KeyType>
+StatusOr<std::unique_ptr<Literal>> EvaluateSortCurried(
+    HloInstruction* sort, const Literal& keys_literal,
+    const Literal& values_literal) {
+  switch (sort->operand(1)->shape().element_type()) {
+    case F32:
+      return EvaluateSortInternal<KeyType, float>(sort, keys_literal,
+                                                  values_literal);
+    case U32:
+      return EvaluateSortInternal<KeyType, uint32>(sort, keys_literal,
+                                                   values_literal);
+    case S32:
+      return EvaluateSortInternal<KeyType, int32>(sort, keys_literal,
+                                                  values_literal);
+    case BF16:
+      return EvaluateSortInternal<KeyType, bfloat16>(sort, keys_literal,
+                                                     values_literal);
+    default:
+      return InvalidArgument("Unsupported type for Sort");
+  }
+}
+
+StatusOr<std::unique_ptr<Literal>> EvaluateSort(HloInstruction* sort,
+                                                const Literal& keys_literal,
+                                                const Literal& values_literal) {
+  switch (sort->operand(0)->shape().element_type()) {
+    case F32:
+      return EvaluateSortCurried<float>(sort, keys_literal, values_literal);
+    case U32:
+      return EvaluateSortCurried<uint32>(sort, keys_literal, values_literal);
+    case S32:
+      return EvaluateSortCurried<int32>(sort, keys_literal, values_literal);
+    case BF16:
+      return EvaluateSortCurried<bfloat16>(sort, keys_literal, values_literal);
+    default:
+      return InvalidArgument("Unsupported type for Sort");
+  }
+}
+}  // namespace
+
+Status HloEvaluator::HandleSort(HloInstruction* sort) {
+  const int64 sort_dim = sort->dimensions(0);
+  const int64 rank = ShapeUtil::Rank(sort->operand(0)->shape());
+  if (sort_dim != rank - 1) {
+    return Unimplemented(
+        "Trying to support along dimension %lld, which is not the last "
+        "dimension",
+        sort_dim);
+  }
+
+  if (!ShapeUtil::IsTuple(sort->shape())) {
+    return DefaultAction(sort);
+  } else {
+    auto result = EvaluateSort(sort, GetEvaluatedLiteralFor(sort->operand(0)),
+                               GetEvaluatedLiteralFor(sort->operand(1)));
+    if (result.ok()) {
+      evaluated_[sort] = std::move(result.ValueOrDie());
+      return Status::OK();
+    } else {
+      return result.status();
+    }
+  }
+}
+
 Status HloEvaluator::Preprocess(HloInstruction* hlo) {
   VLOG(2) << "About to visit HLO: " << hlo->ToString();
   return Status::OK();