diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/buffer_assignment.h')
-rw-r--r-- | tensorflow/compiler/xla/service/buffer_assignment.h | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h new file mode 100644 index 0000000000..af455de298 --- /dev/null +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -0,0 +1,358 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ + +#include <functional> +#include <iosfwd> +#include <memory> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#include "tensorflow/compiler/xla/service/buffer_liveness.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/logical_buffer.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +// This class abstracts an allocation of contiguous memory which can hold the +// values described by LogicalBuffers. A BufferAllocation may hold different +// LogicalBuffers at different times, but currently never more than one +// LogicalBuffer simultaneously. The abstraction includes information required +// by the backends for allocation, use, and deallocation of the buffer. This +// includes the LogicalBuffers which are held in this allocation through the +// execution of the computation. +class BufferAllocation { + public: + // Holds a unique identifier for each allocation. Values are assigned + // contiguously and can be used as array indexes. + using Index = int64; + + BufferAllocation(Index index, int64 size, bool is_thread_local) + : index_(index), size_(size), is_thread_local_(is_thread_local) {} + ~BufferAllocation() {} + + // Adds a LogicalBuffer to the set assigned to this buffer. + void AddAssignment(const LogicalBuffer& buffer); + + // Whether this allocation is used in a parallel calling context such as + // inside of a map or reduce computation. Such allocations need to be thread + // local. + bool is_thread_local() const { return is_thread_local_; } + + // Whether this allocation holds a LogicalBuffer from a parameter of the entry + // computation. These buffers have lifetimes which may be longer than the + // XLA computation. + bool is_entry_computation_parameter() const { + return is_entry_computation_parameter_; + } + // If this allocation holds a Buffer from a parameter of the entry + // computation, this methods returns the parameter number. CHECKs otherwise. + int64 parameter_number() const { + CHECK(is_entry_computation_parameter_); + return parameter_number_; + } + // Sets that this allocation holds a LogicalBuffer from a parameter of the + // entry computation. + void set_entry_computation_parameter(int64 parameter_number) { + is_entry_computation_parameter_ = true; + parameter_number_ = parameter_number; + } + + // Returns/sets whether this allocation is assigned a LogicalBuffer which may + // be live out of the entry computation. + bool maybe_live_out() const { return maybe_live_out_; } + void set_maybe_live_out(bool value) { maybe_live_out_ = value; } + + // Returns the size of the allocation. Necessarily this must be at least as + // large as any LogicalBuffer assigned to this allocation. + int64 size() const { return size_; } + + // Access to the logical buffers assigned to this allocation. + const std::vector<const LogicalBuffer*>& assigned_buffers() const { + return assigned_buffers_; + } + + Index index() const { return index_; } + + string ToString() const; + + // Whether the buffer is a parameter to or live out of the entry computation. + bool IsInputOrOutput() const { + return is_entry_computation_parameter() || maybe_live_out(); + } + + // Whether the buffer is a temporary buffer allocated before + // Executable::ExecuteOnStream. + bool IsPreallocatedTempBuffer() const { + // Parameters do not need temporary buffers. + return !is_entry_computation_parameter() && + // LogicalBuffers that maybe pointed to by the output should live out + // of the computation. + !maybe_live_out() && + // Thread-local buffers are allocated using `alloca`s. + !is_thread_local(); + } + + bool operator==(const BufferAllocation& other) const { + return index_ == other.index_; + } + bool operator!=(const BufferAllocation& other) const { + return !(*this == other); + } + bool operator<(const BufferAllocation& other) const { + return index() < other.index(); + } + + private: + // The index of the allocation in the BufferAssignment. + Index index_; + + // Size of the allocation in bytes. + int64 size_; + + // Whether this buffer needs to be thread-local. + bool is_thread_local_; + + // Whether this allocation holds an entry computation parameter. Entry + // computation parameters are special be cause they have lifetimes which may + // outlast the computation. + bool is_entry_computation_parameter_ = false; + + // If this allocation holds an entry computation parameter, this field + // indicates the index (starting from 0) of the parameter. + int64 parameter_number_ = 0; + + // Whether the allocation contains a LogicalBuffer which may be live-out of + // the entry computation. Note that this flag is conservatively computed by + // TuplePointsToAnalysis. That is, an allocation marked `maybe_live_out_` + // might not actually escape. + bool maybe_live_out_ = false; + + // The set of buffers assigned to this allocation. + std::vector<const LogicalBuffer*> assigned_buffers_; +}; + +// Add stream operator for nicer output of CHECK/RET_CHECK failures. +std::ostream& operator<<(std::ostream& out, const BufferAllocation& s); + +// This class encapsulates an assignment of the LogicalBuffers in an XLA +// module to a set of BufferAllocations. +class BufferAssignment { + public: + // Returns the vector containing all buffer allocations in this assignment. + const std::vector<BufferAllocation>& Allocations() const { + return allocations_; + } + + // Returns whether the given buffer has been assigned an allocation. + bool HasAllocation(const LogicalBuffer& buffer) const; + + // Returns the allocation that a particular LogicalBuffer has been assigned + // to. CHECKs if buffer has not been assigned an allocation. + const BufferAllocation& GetAssignedAllocation( + const LogicalBuffer& buffer) const; + + // Returns the allocation with the given index. CHECKs if no allocation exists + // with the given index. + const BufferAllocation& GetAllocation(BufferAllocation::Index index) const; + + // Builds and returns a vector containing the allocations which might contain + // the subvalue at the given index of given instruction. + std::set<BufferAllocation> GetAllocations(const HloInstruction* instruction, + const ShapeIndex& index) const; + + // Convenience function which returns whether the top-level buffer of the + // instruction (index == {}) is assigned an allocation. + bool HasTopLevelAllocation(const HloInstruction* instruction) const; + + // Convenience function which returns the unique buffer allocation containing + // the buffer at the given index of the given instruction. If an allocation is + // not assigned or the allocation cannot be determined at compile time then an + // error is returned. + StatusOr<const BufferAllocation*> GetUniqueAllocation( + const HloInstruction* instruction, const ShapeIndex& index) const; + // Like GetUniqueAllocation but fixes the index to the top-level of the shape + // (index = {}). + StatusOr<const BufferAllocation*> GetUniqueTopLevelAllocation( + const HloInstruction* instruction) const; + // Like GetUniqueTopLevelAllocation but returns the allocation for the output + // of the entry computation of the HLO module (ie, the result of the XLA + // computation). + StatusOr<const BufferAllocation*> GetUniqueTopLevelOutputAllocation() const; + + // Returns the set LogicalBuffers which may be the source of the value at the + // given index and instruction. + const std::vector<const LogicalBuffer*>& GetSourceBuffers( + const HloInstruction* instruction, const ShapeIndex& index) const { + return GetPointsToSet(instruction).element(index); + } + + // Returns the underlying points-to analysis used for this assignment. + const TuplePointsToAnalysis& points_to_analysis() const { + return liveness_->points_to_analysis(); + } + + string ToString() const; + + private: + // Only BufferAssigner can build or modify BufferAssignments. + friend class BufferAssigner; + + explicit BufferAssignment(const HloModule* module, + std::unique_ptr<BufferLiveness> liveness) + : module_(module), liveness_(std::move(liveness)) {} + + // Creates and returns a new BufferAllocation. Ownership is maintained + // internally. The allocation initially has only the given LogicalBuffer + // assigned to it. `is_thread_local` indicates whether this buffer needs to be + // thread-local. + BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size, + bool is_thread_local); + + // Adds a LogicalBuffer to the set assigned to the given allocation. + void AddAssignment(const LogicalBuffer& buffer, BufferAllocation* allocation); + + // Returns the BufferLiveness object used to construct this assignment. + const BufferLiveness& liveness() { return *liveness_; } + + // Convenience function which returns the PointsToSet for the given + // instruction. Extracted from the liveness object. + const PointsToSet& GetPointsToSet(const HloInstruction* instruction) const; + + // Mutable accessors for allocations. + BufferAllocation* GetMutableAssignedAllocation(const LogicalBuffer& buffer); + BufferAllocation* GetMutableAllocation(BufferAllocation::Index index); + + // The vector of buffer allocations. Indexed by BufferAllocation::Index. + std::vector<BufferAllocation> allocations_; + + // Maps Buffers to the index of the BufferAllocation which holds the buffer. + std::map<const LogicalBuffer*, BufferAllocation::Index> + allocation_index_for_buffer_; + + const HloModule* module_; + std::unique_ptr<BufferLiveness> liveness_; + + TF_DISALLOW_COPY_AND_ASSIGN(BufferAssignment); +}; + +// A class which constructs a buffer assignment. +class BufferAssigner { + public: + // Build and return a BufferAssignment for the given module. The given + // HloOrdering is used to determine buffer liveness. buffer_size is a function + // which returns the size of a LogicalBuffer. If hlos_to_allocate is not null + // then only instructions in this vector are considered for buffer + // assignment. If hlos_to_allocate is null then all instructions are + // considered. If 'colocate_related_buffers' is true, related LogicalBuffers + // will be colocated in the same allocation (i.e buffers for while result + // will share an allocation with buffers related to that same while + // instruction: init operand, condition/body parameter and body result). + using BufferSizeFunction = std::function<int64(const LogicalBuffer&)>; + static StatusOr<std::unique_ptr<BufferAssignment>> Run( + const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, + BufferSizeFunction buffer_size, bool colocate_related_buffers, + const std::vector<const HloInstruction*>* hlos_to_allocate = nullptr); + + // Overload of Run which uses ShapeUtil::ByteSizeOf to determine buffer size + // and assigns buffers to all HLO instructions in the module. + static StatusOr<std::unique_ptr<BufferAssignment>> Run( + const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, + int64 pointer_size); + + private: + explicit BufferAssigner(BufferSizeFunction buffer_size, + bool colocate_related_buffers) + : buffer_size_(std::move(buffer_size)), + colocate_related_buffers_(colocate_related_buffers) {} + virtual ~BufferAssigner() = default; + + // Create a buffer assignment. + StatusOr<std::unique_ptr<BufferAssignment>> CreateAssignment( + const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering, + const std::vector<const HloInstruction*>* hlos_to_allocate = nullptr); + + // Assigns buffers to the instructions in the given computation. "assignment" + // is modified to reflect the new buffer assignments. If is_thread_local is + // true, then all assigned buffers have the is_thread_local flag set to + // true. If hlos_to_allocate is not null it indicates which HLOs to include in + // buffer assignment. If null, all instructions in the computation are + // included. + tensorflow::Status AssignBuffersForComputation( + const HloComputation* computation, bool is_thread_local, + const std::unordered_set<const HloInstruction*>* hlos_to_allocate, + BufferAssignment* assignment); + + // Tries to assign the given instruction to the given buffer. Returns if the + // assignment was successful. + bool MaybeAssignBuffer(BufferAllocation* allocation, + const LogicalBuffer& buffer, + BufferAssignment* assignment); + + using ColocatedBufferSet = std::vector<const LogicalBuffer*>; + + // Returns a vector of ColocatedBufferSet objects, where each + // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module' + // which should be colocated in the same buffer allocation. + std::vector<ColocatedBufferSet> BuildColocatedBufferSets( + const HloModule* module, const TuplePointsToAnalysis& points_to_analysis); + + // For each buffer set in 'colocated_buffer_sets', assigns all buffers in the + // same set to the same buffer allocation in 'assignment'. + void AssignColocatedBufferSets( + const std::vector<ColocatedBufferSet>& colocated_buffer_sets, + BufferAssignment* assignment); + + // Checks that points-to set of 'instruction' is unambiguous and distinct + // (ensured by CopyInsertion), then adds buffer from point-to set at 'index' + // to 'colocated_buffer_set'. + void AddBufferToColocatedBufferSet( + const HloInstruction* instruction, const ShapeIndex& index, + const TuplePointsToAnalysis& points_to_analysis, + BufferAssigner::ColocatedBufferSet* colocated_buffer_set); + + const HloModule* module_; + + // Function which returns the buffer size for a given shape. + BufferSizeFunction buffer_size_; + + // Indicates whether related buffers should share the same buffer allocation. + const bool colocate_related_buffers_; + + // Set of colocated buffers populated in AssignColocatedBufferSets. + std::unordered_set<const LogicalBuffer*> colocated_buffers_; + + // Set of allocations containing colocated buffers. + std::unordered_set<BufferAllocation::Index> colocated_buffer_allocations_; + + TF_DISALLOW_COPY_AND_ASSIGN(BufferAssigner); +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_ |