aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/indexed_array_analysis.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/indexed_array_analysis.cc')
-rw-r--r--tensorflow/compiler/xla/service/indexed_array_analysis.cc180
1 files changed, 176 insertions, 4 deletions
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 1985d20578..8b2df32567 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/util.h"
#include "tensorflow/core/lib/gtl/flatset.h"
#include "tensorflow/core/lib/gtl/inlined_vector.h"
+#include "tensorflow/core/lib/gtl/optional.h"
#include "tensorflow/core/lib/strings/strcat.h"
namespace xla {
@@ -160,6 +161,12 @@ StatusOr<Analysis::Array*> IndexedArrayAnalysis::ComputeArrayFor(
computed_array,
ComputeArrayForReshape(instr->shape(),
FindOrDie(cache_, instr->operand(0))));
+ } else if (instr->opcode() == HloOpcode::kDot) {
+ TF_ASSIGN_OR_RETURN(
+ computed_array,
+ ComputeArrayForDot(instr->shape(), instr->dot_dimension_numbers(),
+ FindOrDie(cache_, instr->operand(0)),
+ FindOrDie(cache_, instr->operand(1))));
} else {
computed_array = nullptr;
}
@@ -290,8 +297,7 @@ StatusOr<Analysis::Array*> IndexedArrayAnalysis::ComputeArrayForGather(
}
if (auto* indexed = dynamic_cast<ScalarIndexedArray*>(source)) {
- auto it = c_find(indexed->output_dims(), source_dim);
- if (it != indexed->output_dims().end()) {
+ if (c_linear_search(indexed->output_dims(), source_dim)) {
return FoldGatherOfGather(indexed, indices, source_dim, output_dims,
shape);
}
@@ -956,11 +962,177 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseUnaryOp(HloOpcode opcode,
return Construct<ScalarIndexedConstantArray>(
new_source, scalar_indexed_const->indices(),
scalar_indexed_const->source_dim(),
- std::vector<int64>(scalar_indexed_const->output_dims().begin(),
- scalar_indexed_const->output_dims().end()),
+ ArraySliceToVector(scalar_indexed_const->output_dims()),
scalar_indexed_const->shape());
}
+namespace {
+
+// Returns the non-contracting non-batch dimension (as per `contracting_dims`
+// and `batch_dims`) if there is exactly one, otherwise returns nullopt.
+gtl::optional<int64> GetOnlyNonContractingNonBatchDim(
+ int64 rank, ArraySlice<int64> contracting_dims,
+ ArraySlice<int64> batch_dims) {
+ gtl::optional<int64> result;
+ for (int64 dim = 0; dim < rank; dim++) {
+ if (!ArrayContains(contracting_dims, dim) &&
+ !ArrayContains(batch_dims, dim)) {
+ if (result.has_value()) {
+ return gtl::nullopt;
+ }
+ result = dim;
+ }
+ }
+ return result;
+}
+
+// Returns true if `indexed_array`, which is either the LHS or the RHS of a Dot
+// HLO, can be folded into the dot operation. For now these conditions are both
+// necessary and sufficient.
+//
+// `tag` describes the caller. Used only for logging.
+//
+// `contracting_dims` and `batch_dims` are the contracting and batch dimensions
+// of whatever operand `indexed_array` is to the dot (LHS or RHS).
+bool CanFoldDotIntoIndexedArray(
+ tensorflow::StringPiece tag,
+ Analysis::ScalarIndexedConstantArray* indexed_array,
+ ArraySlice<int64> contracting_dims, ArraySlice<int64> batch_dims) {
+ gtl::optional<int64> non_contracting_non_batch_dim =
+ GetOnlyNonContractingNonBatchDim(ShapeUtil::Rank(indexed_array->shape()),
+ contracting_dims, batch_dims);
+ if (!non_contracting_non_batch_dim.has_value()) {
+ VLOG(3) << tag << ": multiple or no non-contracting non-batch dimensions";
+ return false;
+ }
+
+ if (indexed_array->output_dims().size() != 1 ||
+ indexed_array->output_dims()[0] != *non_contracting_non_batch_dim) {
+ VLOG(3) << tag << ": output dims != the lhs non-contracting non-batch dim";
+ return false;
+ }
+
+ int64 indexed_array_rank = ShapeUtil::Rank(indexed_array->shape());
+ if (indexed_array->source_dim() < (indexed_array_rank - 2)) {
+ // This restriction can be lifted by inserting reshape nodes.
+ VLOG(3) << tag
+ << ": source dim is not in the low two dims, won't be able to form "
+ "a matmul";
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace
+
+StatusOr<Analysis::Array*>
+IndexedArrayAnalysis::ComputeArrayForDotWithIndexedLhs(
+ const Shape& shape, const DotDimensionNumbers& dim_numbers,
+ ScalarIndexedConstantArray* lhs, ConstantArray* rhs) {
+ VLOG(3) << "ComputeArrayForDotWithIndexedLhs(" << ToString(lhs) << " "
+ << ToString(rhs);
+ if (!CanFoldDotIntoIndexedArray(
+ "ComputeArrayForDotWithIndexedLhs", lhs, /*contracting_dims=*/
+ AsInt64Slice(dim_numbers.lhs_contracting_dimensions()),
+ /*batch_dims=*/AsInt64Slice(dim_numbers.lhs_batch_dimensions()))) {
+ return nullptr;
+ }
+
+ int64 lhs_rank = ShapeUtil::Rank(lhs->shape());
+ DotDimensionNumbers new_dim_numbers = dim_numbers;
+ new_dim_numbers.set_lhs_contracting_dimensions(
+ 0, lhs->source_dim() == (lhs_rank - 1) ? (lhs_rank - 2) : (lhs_rank - 1));
+
+ TF_ASSIGN_OR_RETURN(Literal * literal_for_new_source,
+ TakeOwnership(HloEvaluator{}.EvaluateDotOp(
+ new_dim_numbers, lhs->literal(), *rhs->literal())));
+
+ // The new source dimension is wherever the non-batch non-contracting LHS
+ // dimension "went".
+ int64 new_source_dim = dim_numbers.lhs_batch_dimensions_size() +
+ dim_numbers.rhs_batch_dimensions_size();
+
+ ConstantArray* new_source = Construct<ConstantArray>(literal_for_new_source);
+ return Construct<ScalarIndexedConstantArray>(
+ new_source, lhs->indices(), new_source_dim,
+ ArraySliceToVector(lhs->output_dims()), shape);
+}
+
+StatusOr<Analysis::Array*>
+IndexedArrayAnalysis::ComputeArrayForDotWithIndexedRhs(
+ const Shape& shape, const DotDimensionNumbers& dim_numbers,
+ ConstantArray* lhs, ScalarIndexedConstantArray* rhs) {
+ VLOG(3) << "ComputeArrayForDotWithIndexedRhs(" << ToString(lhs) << " "
+ << ToString(rhs);
+ if (!CanFoldDotIntoIndexedArray(
+ "ComputeArrayForDotWithIndexedRhs", rhs, /*contracting_dims=*/
+ AsInt64Slice(dim_numbers.rhs_contracting_dimensions()),
+ /*batch_dims=*/AsInt64Slice(dim_numbers.rhs_batch_dimensions()))) {
+ return nullptr;
+ }
+
+ int64 rhs_rank = ShapeUtil::Rank(rhs->shape());
+
+ DotDimensionNumbers new_dim_numbers = dim_numbers;
+ new_dim_numbers.set_rhs_contracting_dimensions(
+ 0, rhs->source_dim() == (rhs_rank - 1) ? (rhs_rank - 2) : (rhs_rank - 1));
+
+ TF_ASSIGN_OR_RETURN(Literal * literal_for_new_source,
+ TakeOwnership(HloEvaluator{}.EvaluateDotOp(
+ new_dim_numbers, *lhs->literal(), rhs->literal())));
+
+ // The new source dimension is wherever the non-batch non-contracting RHS
+ // dimension "went".
+ int64 new_source_dim = dim_numbers.lhs_batch_dimensions_size() +
+ dim_numbers.rhs_batch_dimensions_size() + 1;
+
+ ConstantArray* new_source = Construct<ConstantArray>(literal_for_new_source);
+ return Construct<ScalarIndexedConstantArray>(
+ new_source, rhs->indices(), new_source_dim,
+ ArraySliceToVector(rhs->output_dims()), shape);
+}
+
+StatusOr<Analysis::Array*> IndexedArrayAnalysis::ComputeArrayForDot(
+ const Shape& shape, const DotDimensionNumbers& dim_numbers, Array* lhs,
+ Array* rhs) {
+ // Intuitively, if
+ //
+ // - The LHS of a dot product is a gathered sequence of rows from a constant
+ // array (i.e. LHS[I,J] = Const[Indices[I],J]) and the RHS is a constant
+ //
+ // OR
+ //
+ // - If the RHS of a dot product is a gathered sequence of columns from a
+ // constant array (i.e. RHS[I,J] = Const[I, Indices[J]]) and the LHS is a
+ // constant
+ //
+ // then the result of the dot product itself is a gather from a constant
+ // array. E.g. Dot(LHS, ConstRhs) where LHS[I,J] = Const[Indices[I],J] can be
+ // rewritten as Result where Result[I,J] = Dot(Const, ConstRhs)[Indices[I],
+ // J].
+ //
+ // We do a general version of this rewrite here.
+ VLOG(3) << "ComputeArrayForDot(" << ToString(lhs) << " " << ToString(rhs);
+ if (auto* lhs_indexed_array =
+ dynamic_cast<ScalarIndexedConstantArray*>(lhs)) {
+ if (auto* rhs_constant = dynamic_cast<ConstantArray*>(rhs)) {
+ return ComputeArrayForDotWithIndexedLhs(shape, dim_numbers,
+ lhs_indexed_array, rhs_constant);
+ }
+ }
+
+ if (auto* rhs_indexed_array =
+ dynamic_cast<ScalarIndexedConstantArray*>(rhs)) {
+ if (auto* lhs_constant = dynamic_cast<ConstantArray*>(lhs)) {
+ return ComputeArrayForDotWithIndexedRhs(shape, dim_numbers, lhs_constant,
+ rhs_indexed_array);
+ }
+ }
+
+ return nullptr;
+}
+
tensorflow::StringPiece IndexedArrayAnalysisPrinterPass::name() const {
return "indexed-array-analysis-printer-pass";
}