1 files changed, 358 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h
new file mode 100644
index 0000000000..af455de298
--- /dev/null
+++ b/tensorflow/compiler/xla/service/buffer_assignment.h
@@ -0,0 +1,358 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_
+
+#include <functional>
+#include <iosfwd>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "tensorflow/compiler/xla/service/buffer_liveness.h"
+#include "tensorflow/compiler/xla/service/hlo_computation.h"
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/logical_buffer.h"
+#include "tensorflow/compiler/xla/statusor.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/xla_data.pb.h"
+#include "tensorflow/core/lib/gtl/array_slice.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/macros.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+
+// This class abstracts an allocation of contiguous memory which can hold the
+// values described by LogicalBuffers. A BufferAllocation may hold different
+// LogicalBuffers at different times, but currently never more than one
+// LogicalBuffer simultaneously. The abstraction includes information required
+// by the backends for allocation, use, and deallocation of the buffer. This
+// includes the LogicalBuffers which are held in this allocation through the
+// execution of the computation.
+class BufferAllocation {
+ public:
+  // Holds a unique identifier for each allocation. Values are assigned
+  // contiguously and can be used as array indexes.
+  using Index = int64;
+
+  BufferAllocation(Index index, int64 size, bool is_thread_local)
+      : index_(index), size_(size), is_thread_local_(is_thread_local) {}
+  ~BufferAllocation() {}
+
+  // Adds a LogicalBuffer to the set assigned to this buffer.
+  void AddAssignment(const LogicalBuffer& buffer);
+
+  // Whether this allocation is used in a parallel calling context such as
+  // inside of a map or reduce computation. Such allocations need to be thread
+  // local.
+  bool is_thread_local() const { return is_thread_local_; }
+
+  // Whether this allocation holds a LogicalBuffer from a parameter of the entry
+  // computation. These buffers have lifetimes which may be longer than the
+  // XLA computation.
+  bool is_entry_computation_parameter() const {
+    return is_entry_computation_parameter_;
+  }
+  // If this allocation holds a Buffer from a parameter of the entry
+  // computation, this methods returns the parameter number. CHECKs otherwise.
+  int64 parameter_number() const {
+    CHECK(is_entry_computation_parameter_);
+    return parameter_number_;
+  }
+  // Sets that this allocation holds a LogicalBuffer from a parameter of the
+  // entry computation.
+  void set_entry_computation_parameter(int64 parameter_number) {
+    is_entry_computation_parameter_ = true;
+    parameter_number_ = parameter_number;
+  }
+
+  // Returns/sets whether this allocation is assigned a LogicalBuffer which may
+  // be live out of the entry computation.
+  bool maybe_live_out() const { return maybe_live_out_; }
+  void set_maybe_live_out(bool value) { maybe_live_out_ = value; }
+
+  // Returns the size of the allocation. Necessarily this must be at least as
+  // large as any LogicalBuffer assigned to this allocation.
+  int64 size() const { return size_; }
+
+  // Access to the logical buffers assigned to this allocation.
+  const std::vector<const LogicalBuffer*>& assigned_buffers() const {
+    return assigned_buffers_;
+  }
+
+  Index index() const { return index_; }
+
+  string ToString() const;
+
+  // Whether the buffer is a parameter to or live out of the entry computation.
+  bool IsInputOrOutput() const {
+    return is_entry_computation_parameter() || maybe_live_out();
+  }
+
+  // Whether the buffer is a temporary buffer allocated before
+  // Executable::ExecuteOnStream.
+  bool IsPreallocatedTempBuffer() const {
+    // Parameters do not need temporary buffers.
+    return !is_entry_computation_parameter() &&
+           // LogicalBuffers that maybe pointed to by the output should live out
+           // of the computation.
+           !maybe_live_out() &&
+           // Thread-local buffers are allocated using `alloca`s.
+           !is_thread_local();
+  }
+
+  bool operator==(const BufferAllocation& other) const {
+    return index_ == other.index_;
+  }
+  bool operator!=(const BufferAllocation& other) const {
+    return !(*this == other);
+  }
+  bool operator<(const BufferAllocation& other) const {
+    return index() < other.index();
+  }
+
+ private:
+  // The index of the allocation in the BufferAssignment.
+  Index index_;
+
+  // Size of the allocation in bytes.
+  int64 size_;
+
+  // Whether this buffer needs to be thread-local.
+  bool is_thread_local_;
+
+  // Whether this allocation holds an entry computation parameter. Entry
+  // computation parameters are special be cause they have lifetimes which may
+  // outlast the computation.
+  bool is_entry_computation_parameter_ = false;
+
+  // If this allocation holds an entry computation parameter, this field
+  // indicates the index (starting from 0) of the parameter.
+  int64 parameter_number_ = 0;
+
+  // Whether the allocation contains a LogicalBuffer which may be live-out of
+  // the entry computation. Note that this flag is conservatively computed by
+  // TuplePointsToAnalysis.  That is, an allocation marked `maybe_live_out_`
+  // might not actually escape.
+  bool maybe_live_out_ = false;
+
+  // The set of buffers assigned to this allocation.
+  std::vector<const LogicalBuffer*> assigned_buffers_;
+};
+
+// Add stream operator for nicer output of CHECK/RET_CHECK failures.
+std::ostream& operator<<(std::ostream& out, const BufferAllocation& s);
+
+// This class encapsulates an assignment of the LogicalBuffers in an XLA
+// module to a set of BufferAllocations.
+class BufferAssignment {
+ public:
+  // Returns the vector containing all buffer allocations in this assignment.
+  const std::vector<BufferAllocation>& Allocations() const {
+    return allocations_;
+  }
+
+  // Returns whether the given buffer has been assigned an allocation.
+  bool HasAllocation(const LogicalBuffer& buffer) const;
+
+  // Returns the allocation that a particular LogicalBuffer has been assigned
+  // to. CHECKs if buffer has not been assigned an allocation.
+  const BufferAllocation& GetAssignedAllocation(
+      const LogicalBuffer& buffer) const;
+
+  // Returns the allocation with the given index. CHECKs if no allocation exists
+  // with the given index.
+  const BufferAllocation& GetAllocation(BufferAllocation::Index index) const;
+
+  // Builds and returns a vector containing the allocations which might contain
+  // the subvalue at the given index of given instruction.
+  std::set<BufferAllocation> GetAllocations(const HloInstruction* instruction,
+                                            const ShapeIndex& index) const;
+
+  // Convenience function which returns whether the top-level buffer of the
+  // instruction (index == {}) is assigned an allocation.
+  bool HasTopLevelAllocation(const HloInstruction* instruction) const;
+
+  // Convenience function which returns the unique buffer allocation containing
+  // the buffer at the given index of the given instruction. If an allocation is
+  // not assigned or the allocation cannot be determined at compile time then an
+  // error is returned.
+  StatusOr<const BufferAllocation*> GetUniqueAllocation(
+      const HloInstruction* instruction, const ShapeIndex& index) const;
+  // Like GetUniqueAllocation but fixes the index to the top-level of the shape
+  // (index = {}).
+  StatusOr<const BufferAllocation*> GetUniqueTopLevelAllocation(
+      const HloInstruction* instruction) const;
+  // Like GetUniqueTopLevelAllocation but returns the allocation for the output
+  // of the entry computation of the HLO module (ie, the result of the XLA
+  // computation).
+  StatusOr<const BufferAllocation*> GetUniqueTopLevelOutputAllocation() const;
+
+  // Returns the set LogicalBuffers which may be the source of the value at the
+  // given index and instruction.
+  const std::vector<const LogicalBuffer*>& GetSourceBuffers(
+      const HloInstruction* instruction, const ShapeIndex& index) const {
+    return GetPointsToSet(instruction).element(index);
+  }
+
+  // Returns the underlying points-to analysis used for this assignment.
+  const TuplePointsToAnalysis& points_to_analysis() const {
+    return liveness_->points_to_analysis();
+  }
+
+  string ToString() const;
+
+ private:
+  // Only BufferAssigner can build or modify BufferAssignments.
+  friend class BufferAssigner;
+
+  explicit BufferAssignment(const HloModule* module,
+                            std::unique_ptr<BufferLiveness> liveness)
+      : module_(module), liveness_(std::move(liveness)) {}
+
+  // Creates and returns a new BufferAllocation. Ownership is maintained
+  // internally. The allocation initially has only the given LogicalBuffer
+  // assigned to it. `is_thread_local` indicates whether this buffer needs to be
+  // thread-local.
+  BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size,
+                                  bool is_thread_local);
+
+  // Adds a LogicalBuffer to the set assigned to the given allocation.
+  void AddAssignment(const LogicalBuffer& buffer, BufferAllocation* allocation);
+
+  // Returns the BufferLiveness object used to construct this assignment.
+  const BufferLiveness& liveness() { return *liveness_; }
+
+  // Convenience function which returns the PointsToSet for the given
+  // instruction. Extracted from the liveness object.
+  const PointsToSet& GetPointsToSet(const HloInstruction* instruction) const;
+
+  // Mutable accessors for allocations.
+  BufferAllocation* GetMutableAssignedAllocation(const LogicalBuffer& buffer);
+  BufferAllocation* GetMutableAllocation(BufferAllocation::Index index);
+
+  // The vector of buffer allocations. Indexed by BufferAllocation::Index.
+  std::vector<BufferAllocation> allocations_;
+
+  // Maps Buffers to the index of the BufferAllocation which holds the buffer.
+  std::map<const LogicalBuffer*, BufferAllocation::Index>
+      allocation_index_for_buffer_;
+
+  const HloModule* module_;
+  std::unique_ptr<BufferLiveness> liveness_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(BufferAssignment);
+};
+
+// A class which constructs a buffer assignment.
+class BufferAssigner {
+ public:
+  // Build and return a BufferAssignment for the given module. The given
+  // HloOrdering is used to determine buffer liveness. buffer_size is a function
+  // which returns the size of a LogicalBuffer. If hlos_to_allocate is not null
+  // then only instructions in this vector are considered for buffer
+  // assignment. If hlos_to_allocate is null then all instructions are
+  // considered. If 'colocate_related_buffers' is true, related LogicalBuffers
+  // will be colocated in the same allocation (i.e buffers for while result
+  // will share an allocation with buffers related to that same while
+  // instruction: init operand, condition/body parameter and body result).
+  using BufferSizeFunction = std::function<int64(const LogicalBuffer&)>;
+  static StatusOr<std::unique_ptr<BufferAssignment>> Run(
+      const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering,
+      BufferSizeFunction buffer_size, bool colocate_related_buffers,
+      const std::vector<const HloInstruction*>* hlos_to_allocate = nullptr);
+
+  // Overload of Run which uses ShapeUtil::ByteSizeOf to determine buffer size
+  // and assigns buffers to all HLO instructions in the module.
+  static StatusOr<std::unique_ptr<BufferAssignment>> Run(
+      const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering,
+      int64 pointer_size);
+
+ private:
+  explicit BufferAssigner(BufferSizeFunction buffer_size,
+                          bool colocate_related_buffers)
+      : buffer_size_(std::move(buffer_size)),
+        colocate_related_buffers_(colocate_related_buffers) {}
+  virtual ~BufferAssigner() = default;
+
+  // Create a buffer assignment.
+  StatusOr<std::unique_ptr<BufferAssignment>> CreateAssignment(
+      const HloModule* module, std::unique_ptr<HloOrdering> hlo_ordering,
+      const std::vector<const HloInstruction*>* hlos_to_allocate = nullptr);
+
+  // Assigns buffers to the instructions in the given computation. "assignment"
+  // is modified to reflect the new buffer assignments. If is_thread_local is
+  // true, then all assigned buffers have the is_thread_local flag set to
+  // true. If hlos_to_allocate is not null it indicates which HLOs to include in
+  // buffer assignment. If null, all instructions in the computation are
+  // included.
+  tensorflow::Status AssignBuffersForComputation(
+      const HloComputation* computation, bool is_thread_local,
+      const std::unordered_set<const HloInstruction*>* hlos_to_allocate,
+      BufferAssignment* assignment);
+
+  // Tries to assign the given instruction to the given buffer. Returns if the
+  // assignment was successful.
+  bool MaybeAssignBuffer(BufferAllocation* allocation,
+                         const LogicalBuffer& buffer,
+                         BufferAssignment* assignment);
+
+  using ColocatedBufferSet = std::vector<const LogicalBuffer*>;
+
+  // Returns a vector of ColocatedBufferSet objects, where each
+  // ColocatedBufferSet aggregates a set of related LogicalBuffers from 'module'
+  // which should be colocated in the same buffer allocation.
+  std::vector<ColocatedBufferSet> BuildColocatedBufferSets(
+      const HloModule* module, const TuplePointsToAnalysis& points_to_analysis);
+
+  // For each buffer set in 'colocated_buffer_sets', assigns all buffers in the
+  // same set to the same buffer allocation in 'assignment'.
+  void AssignColocatedBufferSets(
+      const std::vector<ColocatedBufferSet>& colocated_buffer_sets,
+      BufferAssignment* assignment);
+
+  // Checks that points-to set of 'instruction' is unambiguous and distinct
+  // (ensured by CopyInsertion), then adds buffer from point-to set at 'index'
+  // to 'colocated_buffer_set'.
+  void AddBufferToColocatedBufferSet(
+      const HloInstruction* instruction, const ShapeIndex& index,
+      const TuplePointsToAnalysis& points_to_analysis,
+      BufferAssigner::ColocatedBufferSet* colocated_buffer_set);
+
+  const HloModule* module_;
+
+  // Function which returns the buffer size for a given shape.
+  BufferSizeFunction buffer_size_;
+
+  // Indicates whether related buffers should share the same buffer allocation.
+  const bool colocate_related_buffers_;
+
+  // Set of colocated buffers populated in AssignColocatedBufferSets.
+  std::unordered_set<const LogicalBuffer*> colocated_buffers_;
+
+  // Set of allocations containing colocated buffers.
+  std::unordered_set<BufferAllocation::Index> colocated_buffer_allocations_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(BufferAssigner);
+};
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_SERVICE_BUFFER_ASSIGNMENT_H_