diff options
author | 2018-07-24 13:28:26 -0700 | |
---|---|---|
committer | 2018-07-24 13:33:31 -0700 | |
commit | d6d95beac43b9f85c57e8302711db53a48b96e65 (patch) | |
tree | 2e3d5a4255f9ef5f3f7c8b4c6aa76ff2d6b190d3 | |
parent | 3e9adc8b51a209c30e79812dc844827f8706bc3f (diff) |
Remove BufferAllocation::is_reusable() and introduce is_readonly(); NFC
- Instead of remembering is_reusable_, remember BufferAllocation::is_tuple_ and
compute is_reusable() from is_tuple() and is_thread_local().
- Introduce is_readonly() which tells us whether an allocation holds readonly
data. In the future this will return true for constant buffer allocations
but today only entry parameters are readonly.
is_reusable() is about lifetime whereas is_readonly() is about write access. In
particular, we sometimes "re-use" readonly allocations e.g. when the init value
of a while loop is an entry parameter and the while body is readonly.
PiperOrigin-RevId: 205881338
4 files changed, 48 insertions, 49 deletions
diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 783e3f7e73..bcca9f46d3 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -270,7 +270,7 @@ BufferAllocationProto BufferAllocation::ToProto() const { proto.set_index(index_); proto.set_size(size_); proto.set_is_thread_local(is_thread_local_); - proto.set_is_reusable(is_reusable_); + proto.set_is_tuple(is_tuple_); proto.set_color(color_.value()); if (is_entry_computation_parameter_) { proto.set_is_entry_computation_parameter(true); @@ -491,20 +491,16 @@ BufferAssignment::GetUniqueTopLevelOutputSlice() const { } BufferAllocation* BufferAssignment::NewEmptyAllocation( - int64 size, bool is_thread_local, bool is_reusable, - LogicalBuffer::Color color) { + int64 size, LogicalBuffer::Color color) { BufferAllocation::Index index = allocations_.size(); - allocations_.emplace_back(index, size, is_thread_local, is_reusable, color); + allocations_.emplace_back(index, size, color); BufferAllocation* allocation = &allocations_.back(); return allocation; } BufferAllocation* BufferAssignment::NewAllocation(const LogicalBuffer& buffer, - int64 size, - bool is_thread_local, - bool is_reusable) { - BufferAllocation* allocation = - NewEmptyAllocation(size, is_thread_local, is_reusable, buffer.color()); + int64 size) { + BufferAllocation* allocation = NewEmptyAllocation(size, buffer.color()); AddAssignment(allocation, buffer, /*offset=*/0, size); allocation->peak_buffers_.push_back(&buffer); return allocation; @@ -517,7 +513,8 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation, CHECK_EQ(0, allocation_index_for_buffer_.count(&buffer)) << "LogicalBuffer " << buffer << " already has an allocation."; CHECK(allocation->is_reusable() || allocation->assigned_buffers().empty()) - << "Non-reusable allocation already assigned a buffer"; + << "Non-reusable allocation already assigned a buffer: " + << allocation->ToString(); TF_CHECK_OK(points_to_analysis().VerifyBuffer(buffer)); @@ -751,8 +748,8 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, return false; } - if (allocation->is_entry_computation_parameter()) { - VLOG(4) << "Can't assign: allocation holds parameter"; + if (allocation->is_readonly()) { + VLOG(4) << "Can't assign: allocation is readonly"; return false; } @@ -923,9 +920,7 @@ Status BufferAssigner::AssignBuffersForComputation( // computations do not need special allocations because they live inside // callers. BufferAllocation* allocation = - assignment->NewAllocation(*buffer, buffer_size, - /*is_thread_local=*/false, - /*is_reusable=*/false); + assignment->NewAllocation(*buffer, buffer_size); allocation->set_entry_computation_parameter( instruction->parameter_number(), buffer->index()); VLOG(3) << "New allocation #" << allocation->index() @@ -934,20 +929,18 @@ Status BufferAssigner::AssignBuffersForComputation( } if (is_thread_local) { - // We do not reuse thread-local buffers for now, because they are - // dynamically allocated and their lifetimes are hard to compute. - BufferAllocation* allocation = assignment->NewAllocation( - *buffer, buffer_size, is_thread_local, /*is_reusable=*/false); + BufferAllocation* allocation = + assignment->NewAllocation(*buffer, buffer_size); + allocation->set_is_thread_local(true); VLOG(3) << "New allocation #" << allocation->index() << " for thread-local: " << *buffer; continue; } if (ShapeUtil::IsTuple(buffer->shape())) { - // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend - // assumes longer buffer liveness than indicated by the analysis. - BufferAllocation* allocation = assignment->NewAllocation( - *buffer, buffer_size, is_thread_local, /*is_reusable=*/false); + BufferAllocation* allocation = + assignment->NewAllocation(*buffer, buffer_size); + allocation->set_is_tuple(true); VLOG(3) << "New allocation #" << allocation->index() << " for tuple-shaped buffer: " << *buffer; continue; @@ -1030,8 +1023,8 @@ Status BufferAssigner::AssignBuffersForComputation( } if (!assignment->HasAllocation(*buffer)) { - BufferAllocation* allocation = assignment->NewAllocation( - *buffer, buffer_size, is_thread_local, /*is_reusable=*/true); + BufferAllocation* allocation = + assignment->NewAllocation(*buffer, buffer_size); allocation_indices.push_back(allocation->index()); VLOG(3) << "New allocation #" << allocation->index() << " for: " << *buffer; @@ -1227,8 +1220,8 @@ void BufferAssigner::AssignBuffersFromHeapSimulator( result.fragmentation_size; } - BufferAllocation* allocation = assignment->NewEmptyAllocation( - result.heap_size, /*is_thread_local=*/false, /*is_reusable=*/true, color); + BufferAllocation* allocation = + assignment->NewEmptyAllocation(result.heap_size, color); for (const auto& buffer_chunk : result.chunk_map) { // TODO(lauj) Remove this down_cast after downstream users of // BufferAllocation::assigned_buffers() are updated to use BufferValue. @@ -1584,9 +1577,7 @@ void BufferAssigner::AssignColocatedBufferSets( // allocations for each colocated buffer set. When liveness has // module-level scope, we can allow buffers to be shared across // computations (in some cases). - allocation = assignment->NewAllocation(*buffer, buffer_size, - /*is_thread_local=*/false, - /*is_reusable=*/true); + allocation = assignment->NewAllocation(*buffer, buffer_size); if (entry_parameter_number >= 0) { // This colocated buffer set contains an entry parameter and other // logical buffers which use the parameter as read-only in a while diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index ad0b0bf7c2..8844b6e3ba 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -58,13 +58,8 @@ class BufferAllocation { // contiguously and can be used as array indexes. using Index = int64; - BufferAllocation(Index index, int64 size, bool is_thread_local, - bool is_reusable, LogicalBuffer::Color color) - : index_(index), - size_(size), - is_thread_local_(is_thread_local), - is_reusable_(is_reusable), - color_(color) {} + BufferAllocation(Index index, int64 size, LogicalBuffer::Color color) + : index_(index), size_(size), color_(color) {} ~BufferAllocation() {} // Returns the index of this allocation. @@ -74,9 +69,26 @@ class BufferAllocation { // inside of a map or reduce computation. Such allocations need to be thread // local. bool is_thread_local() const { return is_thread_local_; } + void set_is_thread_local(bool is_thread_local) { + is_thread_local_ = is_thread_local; + } // Whether this allocation can be used by more than one logical buffer. - bool is_reusable() const { return is_reusable_; } + bool is_reusable() const { + // We do not reuse thread-local buffers for now, because they are + // dynamically allocated and their lifetimes are hard to compute. + // + // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend + // assumes longer buffer liveness than indicated by the analysis. + return !is_thread_local() && !is_tuple(); + } + + // Whether this allocation is readonly i.e. backed by memory we cannot write + // to. + bool is_readonly() const { return is_entry_computation_parameter(); } + + bool is_tuple() const { return is_tuple_; } + void set_is_tuple(bool is_tuple) { is_tuple_ = is_tuple; } // Whether this allocation holds a LogicalBuffer from a parameter of the entry // computation. These buffers have lifetimes which may be longer than the @@ -256,10 +268,10 @@ class BufferAllocation { int64 size_; // Whether this buffer needs to be thread-local. - bool is_thread_local_; + bool is_thread_local_ = false; - // Whether this buffer is usable by more than one logical buffer. - bool is_reusable_; + // Whether this buffer holds a tuple. + bool is_tuple_ = false; // Color of the allocation. LogicalBuffer::Color color_; @@ -426,14 +438,11 @@ class BufferAssignment { // Creates and returns a new BufferAllocation, with no assigned // LogicalBuffers. Ownership is maintained internally. - BufferAllocation* NewEmptyAllocation(int64 size, bool is_thread_local, - bool is_reusable, - LogicalBuffer::Color color); + BufferAllocation* NewEmptyAllocation(int64 size, LogicalBuffer::Color color); // Helper that calls NewEmptyAllocation and AddAssignment in one call, // creating an allocation containing a single LogicalBuffer. - BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size, - bool is_thread_local, bool is_reusable); + BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size); // Adds a LogicalBuffer to the set assigned to the given allocation. void AddAssignment(BufferAllocation* allocation, const LogicalBuffer& buffer, diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 87abc0e74f..50d7f1823c 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -244,7 +244,7 @@ message BufferAllocationProto { int64 index = 1; int64 size = 2; bool is_thread_local = 3; - bool is_reusable = 4; + bool is_tuple = 11; bool is_entry_computation_parameter = 5; int64 parameter_number = 6; repeated int64 parameter_shape_index = 10; diff --git a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc index 93a8c130e1..e5370eca56 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/alias_analysis.cc @@ -28,8 +28,7 @@ namespace llvm_ir { // Sentry allocation used to represent parameters of the entry computation in // alias_scope_metadata_ and noalias_metadata_. static const BufferAllocation* kParameterAllocation = new BufferAllocation( - /*index=*/-1, /*size=*/0, /*is_thread_local=*/false, /*is_reusable=*/false, - LogicalBuffer::Color(0)); + /*index=*/-1, /*size=*/0, LogicalBuffer::Color(0)); void AliasAnalysis::AddAliasingInformationToIrArray(const HloInstruction& hlo, llvm_ir::IrArray* array, |