diff options
author | 2018-06-19 07:34:09 -0700 | |
---|---|---|
committer | 2018-06-19 07:38:03 -0700 | |
commit | 124fadcf1cc6a4b95f91c69e67b5fb592556e363 (patch) | |
tree | b9d277d0cb2ee4deec3e950b5d259dbff2d38c49 | |
parent | 1e3caf55ba86cd6ea36b8b9dfe5e7670ace29c05 (diff) |
Performance microtweaks: Pass by reference rather than by value; pre-reserve capacity when total vectoroid size is known.
PiperOrigin-RevId: 201172723
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc | 6 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/hlo_query.cc | 4 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/hlo_query.h | 4 | ||||
-rw-r--r-- | tensorflow/compiler/xla/service/shape_inference.cc | 2 | ||||
-rw-r--r-- | tensorflow/compiler/xla/shape_util.cc | 1 |
5 files changed, 11 insertions, 6 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc index bb47a42805..c9574c87a3 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc @@ -120,9 +120,10 @@ Status IrEmitterNested::EmitTargetElementLoop( // For MOF we give the loop emitter an array for every output it should // generate. if (hlo.IsMultiOutputFusion()) { + const int64 num_elems = ShapeUtil::TupleElementCount(hlo.shape()); std::vector<llvm_ir::IrArray> target_arrays; - for (int64 i = 0, e = ShapeUtil::TupleElementCount(hlo.shape()); i != e; - ++i) { + target_arrays.reserve(num_elems); + for (int64 i = 0; i != num_elems; ++i) { target_arrays.push_back(GetIrArray(hlo, hlo, {i})); } TF_RETURN_IF_ERROR( @@ -130,6 +131,7 @@ Status IrEmitterNested::EmitTargetElementLoop( .EmitLoop()); std::vector<llvm::Value*> tuple_operand_ptrs; + tuple_operand_ptrs.reserve(num_elems); for (const llvm_ir::IrArray& array : target_arrays) { tuple_operand_ptrs.push_back(array.GetBasePointer()); } diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc index d45038f1f4..2418c19f3d 100644 --- a/tensorflow/compiler/xla/service/hlo_query.cc +++ b/tensorflow/compiler/xla/service/hlo_query.cc @@ -61,7 +61,7 @@ bool AllOperandsAreConstants(const HloInstruction& instruction) { } HloInstruction* GetMatchingOperand( - std::function<bool(const HloInstruction*)> matcher, + const std::function<bool(const HloInstruction*)>& matcher, HloInstruction* instruction) { for (HloInstruction* op : instruction->operands()) { if (matcher(op)) { @@ -72,7 +72,7 @@ HloInstruction* GetMatchingOperand( } bool MatchBinaryInstructionOperand( - std::function<bool(const HloInstruction*)> matcher, + const std::function<bool(const HloInstruction*)>& matcher, HloInstruction* instruction, HloInstruction** matching_operand, HloInstruction** other_operand) { CHECK_EQ(instruction->operand_count(), 2); diff --git a/tensorflow/compiler/xla/service/hlo_query.h b/tensorflow/compiler/xla/service/hlo_query.h index c79347bbf9..c0826a6aee 100644 --- a/tensorflow/compiler/xla/service/hlo_query.h +++ b/tensorflow/compiler/xla/service/hlo_query.h @@ -45,7 +45,7 @@ bool IsScalarConstant(const HloInstruction* instruction); // multiple matching operands, then the first matching operand is returned. If // there are no matching operands then nullptr is returned. HloInstruction* GetMatchingOperand( - std::function<bool(const HloInstruction*)> matcher, + const std::function<bool(const HloInstruction*)>& matcher, HloInstruction* instruction); // Returns whether a binary instruction has a matching operand. Sets @@ -53,7 +53,7 @@ HloInstruction* GetMatchingOperand( // other_operand. Note: in the case where both operands match, the first operand // of the instruction is returned. bool MatchBinaryInstructionOperand( - std::function<bool(const HloInstruction*)> matcher, + const std::function<bool(const HloInstruction*)>& matcher, HloInstruction* instruction, HloInstruction** matching_operand, HloInstruction** other_operand); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index e25f5e67c7..4606d8f202 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -939,6 +939,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, HloOpcode opcode, tensorflow::gtl::ArraySlice<const HloInstruction*> operands) { std::vector<const Shape*> operand_shapes; + operand_shapes.reserve(operands.size()); for (const HloInstruction* operand : operands) { operand_shapes.push_back(&operand->shape()); } @@ -954,6 +955,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, switch (opcode) { case HloOpcode::kTuple: { Shape result = ShapeUtil::MakeTupleShape({}); + result.mutable_tuple_shapes()->Reserve(operand_shapes.size()); for (const Shape* shape : operand_shapes) { ShapeUtil::AppendShapeToTuple(*shape, &result); } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index e9d7178e3d..ba09b63859 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -264,6 +264,7 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( tensorflow::gtl::ArraySlice<Shape> shapes) { Shape result; result.set_element_type(TUPLE); + result.mutable_tuple_shapes()->Reserve(shapes.size()); for (const auto& shape : shapes) { AppendShapeToTuple(shape, &result); } |