aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/hlo_computation.cc
diff options
context:
space:
mode:
authorGravatar Mark Heffernan <meheff@google.com>2017-11-02 22:12:33 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-11-02 22:16:19 -0700
commit7bb2d57b0b051d1cf8dd74d3276bf5a452774172 (patch)
treed5b07beacebcc425454978eb87ffecfe728d4281 /tensorflow/compiler/xla/service/hlo_computation.cc
parent8a7f5c47dcb71deb71df4a72f3cf829904c5a28e (diff)
Rewrite CopyInsertion to use module-scoped HloAliasAnalysis. The net effect (number of copies inserted) is roughly similar to the existing implementation, but the new implementation is much more general. The new implementation can handle entry argument buffer reuse with minimal modification, for example.
Some unnecessary copies are still added due to deficiencies in buffer assignment (b/62548313), but these can be removed when buffer assignment also uses HloAliasAnalysis. Also address a few issues uncovered with this cl: (1) For inplace dynamic slice in llvm backends, truncate do not wrap the slice. This matches the behavior of the non-inplace variant. (2) Disable SelectBetweenPredTuples test on GPU. The test introduces top-level buffer ambiguity which is not tolerated by the gpu backend. (3) When deserializing HLO form a proto, do not uniquify instruction names in fused computations. (4) In dataflow analysis, don't deallocate deleted HloValues during propagation. (5) In dataflow analysis, fix issue with live_out_of_computation property. PiperOrigin-RevId: 174423881
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_computation.cc')
-rw-r--r--tensorflow/compiler/xla/service/hlo_computation.cc13
1 files changed, 8 insertions, 5 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index b853444da4..a9c7fdc4e5 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -412,16 +412,18 @@ HloComputationProto HloComputation::ToProto() const {
/* static */ StatusOr<std::unique_ptr<HloComputation>>
HloComputation::CreateFromProto(
HloModule* module, const HloComputationProto& proto,
- tensorflow::gtl::FlatMap<string, HloComputation*>* computation_map,
+ const tensorflow::gtl::FlatMap<string, HloComputation*>& computation_map,
+ const std::function<void(std::unique_ptr<HloComputation>)>&
+ add_fused_computation,
HloInstruction* fusion_instruction) {
std::vector<std::unique_ptr<HloInstruction>> instructions;
tensorflow::gtl::FlatMap<string, HloInstruction*> instruction_map;
int64 parameter_count = 0;
for (const HloInstructionProto& instruction_proto : proto.instructions()) {
- TF_ASSIGN_OR_RETURN(
- std::unique_ptr<HloInstruction> instruction,
- HloInstruction::CreateFromProto(module, instruction_proto,
- instruction_map, computation_map));
+ TF_ASSIGN_OR_RETURN(std::unique_ptr<HloInstruction> instruction,
+ HloInstruction::CreateFromProto(
+ module, instruction_proto, instruction_map,
+ computation_map, add_fused_computation));
if (instruction->opcode() == HloOpcode::kParameter) {
parameter_count++;
}
@@ -531,6 +533,7 @@ StatusOr<HloInstruction*> HloComputation::DeepCopyInstruction(
if (indices_to_copy != nullptr &&
!ShapeUtil::Compatible(instruction->shape(), indices_to_copy->shape())) {
+ LOG(FATAL) << "DEATH!";
return FailedPrecondition(
"Can't deep copy instruction %s: given shape tree of indices to copy "
"has incompatible shape",