/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h" #include #include #include #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" namespace xla { string BufferAlias::ToString() const { return tensorflow::strings::StrCat( "BufferAlias(", instruction_->FullyQualifiedName(), "[", tensorflow::str_util::Join(index_, ","), "])"); } std::ostream& operator<<(std::ostream& out, const BufferAlias& buffer_alias) { out << buffer_alias.ToString(); return out; } bool PointsToSet::IsAmbiguous() const { bool ambiguous = false; ForEachElement( [&ambiguous](const ShapeIndex& /*index*/, const std::vector& points_to) { ambiguous |= points_to.size() > 1; }); return ambiguous; } bool PointsToSet::IsDistinct() const { bool distinct = true; std::set all_points_to; ForEachElement([&distinct, &all_points_to]( const ShapeIndex& /*index*/, const std::vector& points_to) { for (auto& buffer : points_to) { if (all_points_to.count(buffer) != 0) { distinct = false; } all_points_to.insert(buffer); } }); return distinct; } size_t PointsToSet::size() const { // Because pointed-to elements may be duplicated we have to create a flattened // set and return the size. return CreateFlattenedSet().size(); } tensorflow::gtl::FlatSet PointsToSet::CreateFlattenedSet() const { tensorflow::gtl::FlatSet flat_set; ForEachElement([&flat_set](const ShapeIndex& /*index*/, const std::vector& buffers) { flat_set.insert(buffers.begin(), buffers.end()); }); return flat_set; } bool PointsToSet::ContainsBuffer(const LogicalBuffer& buffer) const { bool found = false; ForEachElement([&found, &buffer](const ShapeIndex& /*index*/, const std::vector& pointed_to_buffers) { if (!found && std::find(pointed_to_buffers.begin(), pointed_to_buffers.end(), &buffer) != pointed_to_buffers.end()) { found = true; } }); return found; } bool PointsToSet::ContainsBufferAtIndex(const LogicalBuffer& buffer, const ShapeIndex& index) const { const std::vector& pointed_to_buffers = element(index); return std::find(pointed_to_buffers.begin(), pointed_to_buffers.end(), &buffer) != pointed_to_buffers.end(); } void PointsToSet::AddPointedToBuffer(const LogicalBuffer& buffer, const ShapeIndex& index) { if (ContainsBufferAtIndex(buffer, index)) { return; } mutable_element(index)->push_back(&buffer); } const std::set& PointsToSet::tuple_sources( const ShapeIndex& index) const { return tuple_sources_.element(index); } void PointsToSet::add_tuple_source(const ShapeIndex& index, HloInstruction* tuple) { tuple_sources_.mutable_element(index)->insert(tuple); } /* static */ StatusOr> TuplePointsToAnalysis::Run(const HloModule* module, Colorer colorer) { std::unique_ptr analysis( new TuplePointsToAnalysis(module, std::move(colorer))); TF_RETURN_IF_ERROR(analysis->Analyze()); return std::move(analysis); } /* static */ StatusOr> TuplePointsToAnalysis::Run(const HloModule* module) { return Run(module, DefaultColorer()); } Status TuplePointsToAnalysis::Analyze() { points_to_.clear(); for (auto& computation : module_->computations()) { TF_RETURN_IF_ERROR(computation->Accept(this)); TF_RETURN_IF_ERROR( PopulateDefinedBuffersAndAliases(computation->instructions())); // Run points-to analysis on fusion instructions in 'computation'. for (auto& instruction : computation->instructions()) { if (instruction->opcode() != HloOpcode::kFusion) { continue; } TF_RETURN_IF_ERROR(instruction->fused_expression_root()->Accept(this)); TF_RETURN_IF_ERROR( PopulateDefinedBuffersAndAliases(instruction->fused_instructions())); } } XLA_VLOG_LINES(3, ToString()); return Status::OK(); } Status TuplePointsToAnalysis::PopulateDefinedBuffersAndAliases( const std::list>& instructions) { for (auto& instruction : instructions) { TF_RETURN_IF_ERROR(GatherBuffersDefinedByInstruction( instruction.get(), &instruction_defined_buffers_[instruction.get()])); const PointsToSet& points_to_set = GetPointsToSet(instruction.get()); points_to_set.ForEachElement( [this, &instruction]( const ShapeIndex& index, const std::vector& pointed_to_buffers) { for (const LogicalBuffer* buffer : pointed_to_buffers) { if (buffer_aliases_.count(buffer) == 0) { buffer_aliases_.insert({buffer, std::vector()}); } buffer_aliases_[buffer].emplace_back(instruction.get(), index); } }); } return Status::OK(); } const LogicalBuffer& TuplePointsToAnalysis::NewLogicalBuffer( HloInstruction* instruction, const ShapeIndex& index) { CHECK_EQ(logical_buffers_.size(), next_buffer_id_); logical_buffers_.push_back(MakeUnique( instruction, index, next_buffer_id_, colorer_(instruction, index))); ++next_buffer_id_; return *logical_buffers_.back(); } Status TuplePointsToAnalysis::DefaultAction(HloInstruction* hlo_instruction) { // Create trivial points-to set for instruction. Each points-to set at index i // contains a single element LogicalBuffer(hlo_instruction, i). This indicates // that this instruction is the source of all buffers in its own output. PointsToSet& points_to_set = CreateEmptyPointsToSet(hlo_instruction); points_to_set.ForEachMutableElement( [this, hlo_instruction](const ShapeIndex& index, std::vector* buffers) { const LogicalBuffer& buffer = NewLogicalBuffer(hlo_instruction, index); buffers->push_back(&buffer); }); if (ShapeUtil::IsTuple(hlo_instruction->shape())) { // If the hlo instruction is a tuple-shaped, then trivially the instruction // itself is the source of the tuple. points_to_set.add_tuple_source({}, hlo_instruction); } return Status::OK(); } Status TuplePointsToAnalysis::HandleGetTupleElement( HloInstruction* get_tuple_element, HloInstruction* operand) { // GetTupleElement forwards a pointer to a particular element of the tuple // operand. int64 element_index = get_tuple_element->tuple_index(); PointsToSet& points_to_set = CreateEmptyPointsToSet(get_tuple_element); const PointsToSet& operand_points_to_set = *FindOrDie(points_to_, operand); // Copy the points-to set (and tuple sources) at index {element_index} of the // operand to the points-to set for this GetTupleElement instruction. points_to_set.ForEachMutableElement( [&, this](const ShapeIndex& target_index, std::vector* points_to) { // Construct an index into the operand by prepending element_index to // the index for the GetTupleElement instruction's points-to set. ShapeIndex src_index; src_index.push_back(element_index); for (auto element : target_index) { src_index.push_back(element); } *points_to = operand_points_to_set.element(src_index); for (HloInstruction* tuple : operand_points_to_set.tuple_sources(src_index)) { points_to_set.add_tuple_source(target_index, tuple); } }); return Status::OK(); } Status TuplePointsToAnalysis::HandleCopy(HloInstruction* copy) { // A kCopy instruction performs a shallow copy of the operand. The top-level // buffer (index={}) is newly created, but all other buffers (in the case of a // tuple shape) come from the operand PointsToSet& points_to_set = CreateCopiedPointsToSet(copy, copy->operand(0)); points_to_set.mutable_element(/*index=*/{})->clear(); points_to_set.AddPointedToBuffer(NewLogicalBuffer(copy, /*index=*/{}), /*index=*/{}); return Status::OK(); } Status TuplePointsToAnalysis::HandleBitcast(HloInstruction* bitcast) { // A kBitcast instruction aliases its operand. That is, the buffer of its // result *is* the buffer of its operand, so just copy the operands points-to // set. CreateCopiedPointsToSet(bitcast, bitcast->operand(0)); return Status::OK(); } Status TuplePointsToAnalysis::HandleTuple( HloInstruction* tuple, tensorflow::gtl::ArraySlice operands) { PointsToSet& points_to_set = CreateEmptyPointsToSet(tuple); points_to_set.AddPointedToBuffer(NewLogicalBuffer(tuple, /*index=*/{}), /*index=*/{}); // A tuple contains references to all input operands and transitively any // references in those operands. for (int64 i = 0; i < operands.size(); ++i) { const PointsToSet& operand_points_to_set = *FindOrDie(points_to_, operands[i]); // Copy the points-to set (and tuple sources) of the operand into the // respective subtree of the tuple instructions points-to set. operand_points_to_set.ForEachElement( [&points_to_set, &operand_points_to_set, i]( const ShapeIndex& src_index, const std::vector& points_to) { ShapeIndex target_index; target_index.push_back(i); for (auto element : src_index) { target_index.push_back(element); } *points_to_set.mutable_element(target_index) = points_to; for (HloInstruction* tuple : operand_points_to_set.tuple_sources(src_index)) { points_to_set.add_tuple_source(target_index, tuple); } }); } points_to_set.add_tuple_source({}, tuple); return Status::OK(); } Status TuplePointsToAnalysis::HandleSelect(HloInstruction* select, HloInstruction* /*pred*/, HloInstruction* on_true, HloInstruction* on_false) { // Select allocates a new buffer and then shallow copies the on_true or // on_false buffer into this new buffer. Which side is chosen cannot be // determined statically so conservatively set the points-to set to the union // of these on_true and on_false operands. // // First create a copy of the on_true points-to set (and tuple sources), then // add in elements of the on_false points-to set (tuple sources). PointsToSet& points_to_set = CreateCopiedPointsToSet(select, on_true); const PointsToSet& false_points_to_set = *FindOrDie(points_to_, on_false); points_to_set.ForEachMutableElement( [&](const ShapeIndex& index, std::vector* buffers) { for (const LogicalBuffer* false_buffer : false_points_to_set.element(index)) { points_to_set.AddPointedToBuffer(*false_buffer, index); } for (HloInstruction* tuple : false_points_to_set.tuple_sources(index)) { points_to_set.add_tuple_source(index, tuple); } }); // Select creates a new (top-level) buffer to store its result, so its // respective element in the points-to set should contain only itself. points_to_set.mutable_element({})->clear(); points_to_set.AddPointedToBuffer(NewLogicalBuffer(select, /*index=*/{}), /*index=*/{}); return Status::OK(); } const PointsToSet& TuplePointsToAnalysis::GetPointsToSet( const HloInstruction* hlo_instruction) const { return *FindOrDie(points_to_, hlo_instruction); } PointsToSet& TuplePointsToAnalysis::CreateEmptyPointsToSet( const HloInstruction* instruction) { CHECK_EQ(0, points_to_.count(instruction)); points_to_[instruction] = MakeUnique(instruction->shape()); return *FindOrDie(points_to_, instruction); } bool TuplePointsToAnalysis::InstructionDefinesBufferAtIndex( const HloInstruction* instruction, const ShapeIndex& index) const { const std::vector& buffers = GetPointsToSet(instruction).element(index); return (buffers.size() == 1 && buffers[0]->instruction() == instruction); } Status TuplePointsToAnalysis::VerifyBuffer(const LogicalBuffer& buffer) const { if (!InstructionDefinesBufferAtIndex(buffer.instruction(), buffer.index())) { return FailedPrecondition( "LogicalBuffer %s is ill-defined: instruction %s does not define a " "buffer at that index", buffer.ToString().c_str(), buffer.instruction()->name().c_str()); } if (buffer.id() < 0 || buffer.id() >= next_buffer_id_) { return FailedPrecondition( "LogicalBuffer %s is ill-defined: invalid id %lld", buffer.ToString().c_str(), buffer.id()); } if (GetBuffer(buffer.id()).instruction() != buffer.instruction() || GetBuffer(buffer.id()).index() != buffer.index()) { return FailedPrecondition( "LogicalBuffer %s is ill-defined: buffer with same id differs: %s", buffer.ToString().c_str(), GetBuffer(buffer.id()).ToString().c_str()); } return Status::OK(); } const LogicalBuffer& TuplePointsToAnalysis::GetBuffer( LogicalBuffer::Id id) const { CHECK_GE(id, 0); CHECK_LT(id, logical_buffers_.size()); return *logical_buffers_[id]; } StatusOr TuplePointsToAnalysis::GetBufferDefinedAt( const HloInstruction* instruction, const ShapeIndex& index) const { const std::vector& buffers = GetPointsToSet(instruction).element(index); if (buffers.size() != 1 || buffers[0]->instruction() != instruction) { return FailedPrecondition( "instruction %s does not define buffer at index {%s}", instruction->name().c_str(), tensorflow::str_util::Join(index, ",").c_str()); } return buffers[0]; } const std::vector& TuplePointsToAnalysis::GetBufferAliases( const LogicalBuffer& buffer) const { return buffer_aliases_.at(&buffer); } const std::vector& TuplePointsToAnalysis::GetBuffersDefinedByInstruction( const HloInstruction* instruction) const { return instruction_defined_buffers_.at(instruction); } Status TuplePointsToAnalysis::GatherBuffersDefinedByInstruction( const HloInstruction* instruction, std::vector* buffers) { GetPointsToSet(instruction) .ForEachElement( [this, buffers, instruction]( const ShapeIndex& index, const std::vector& source_buffers) { // Add buffers which 'instruction' is the source of. CHECK(!source_buffers.empty()); if (source_buffers.size() == 1 && source_buffers[0]->instruction() == instruction) { // If this instruction is the source of this buffer the // indices must match. DCHECK(source_buffers[0]->index() == index); buffers->push_back(source_buffers[0]); } else { // If the points-to set includes more than one buffer then // necessarily this instruction did not produce the // buffer. for (const LogicalBuffer* source_buffer : source_buffers) { DCHECK(source_buffer->instruction() != instruction); } } }); return Status::OK(); } PointsToSet& TuplePointsToAnalysis::CreateCopiedPointsToSet( const HloInstruction* instruction, const HloInstruction* src) { // PointsToSet doesn't have a copy constructor so copy over element-by-element // from src PointsToSet. PointsToSet& dst_points_to_set = CreateEmptyPointsToSet(instruction); const PointsToSet& src_points_to_set = GetPointsToSet(src); dst_points_to_set.ForEachMutableElement( [this, &dst_points_to_set, &src_points_to_set]( const ShapeIndex& index, std::vector* buffers) { *buffers = src_points_to_set.element(index); for (auto& tuple_source : src_points_to_set.tuple_sources(index)) { dst_points_to_set.add_tuple_source(index, tuple_source); } }); return *FindOrDie(points_to_, instruction); } string TuplePointsToAnalysis::ToString() const { string output = tensorflow::strings::Printf( "TuplePointsToSet for module %s:\n", module_->name().c_str()); for (const auto& computation : module_->computations()) { const char* entry = computation.get() == module_->entry_computation() ? "entry " : ""; tensorflow::strings::StrAppend(&output, entry, "computation ", computation->name(), ":\n"); for (const HloInstruction* instruction : computation->MakeInstructionPostOrder()) { InstructionToString(instruction, &output); if (instruction->opcode() == HloOpcode::kFusion) { for (auto& fused : instruction->fused_instructions()) { InstructionToString(fused.get(), &output); } } } } tensorflow::strings::StrAppend(&output, "LogicalBuffers:\n"); for (auto& buffer : logical_buffers_) { tensorflow::strings::StrAppend(&output, " buffer ", buffer->ToString(), ":\n"); for (const BufferAlias& buffer_alias : buffer_aliases_.at(buffer.get())) { tensorflow::strings::StrAppend(&output, " alias ", buffer_alias.ToString(), "\n"); } } return output; } void TuplePointsToAnalysis::InstructionToString( const HloInstruction* instruction, string* output) const { const string prefix = instruction->IsFused() ? " " : ""; tensorflow::strings::StrAppend(output, prefix, " instruction ", instruction->ToShortString(), ":\n"); const PointsToSet& points_to_set = GetPointsToSet(instruction); points_to_set.ForEachElement([&prefix, &output]( const ShapeIndex& index, const std::vector& points_to) { tensorflow::strings::StrAppend( output, prefix, " {", tensorflow::str_util::Join(index, ","), "}: ", tensorflow::str_util::Join( points_to, ", ", [](string* out, const LogicalBuffer* source) { out->append(source->ToString()); }), "\n"); }); } } // namespace xla