aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-19 07:34:09 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-19 07:38:03 -0700
commit124fadcf1cc6a4b95f91c69e67b5fb592556e363 (patch)
treeb9d277d0cb2ee4deec3e950b5d259dbff2d38c49 /tensorflow/compiler/xla/service
parent1e3caf55ba86cd6ea36b8b9dfe5e7670ace29c05 (diff)
Performance microtweaks: Pass by reference rather than by value; pre-reserve capacity when total vectoroid size is known.
PiperOrigin-RevId: 201172723
Diffstat (limited to 'tensorflow/compiler/xla/service')
-rw-r--r--tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc6
-rw-r--r--tensorflow/compiler/xla/service/hlo_query.cc4
-rw-r--r--tensorflow/compiler/xla/service/hlo_query.h4
-rw-r--r--tensorflow/compiler/xla/service/shape_inference.cc2
4 files changed, 10 insertions, 6 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
index bb47a42805..c9574c87a3 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_nested.cc
@@ -120,9 +120,10 @@ Status IrEmitterNested::EmitTargetElementLoop(
// For MOF we give the loop emitter an array for every output it should
// generate.
if (hlo.IsMultiOutputFusion()) {
+ const int64 num_elems = ShapeUtil::TupleElementCount(hlo.shape());
std::vector<llvm_ir::IrArray> target_arrays;
- for (int64 i = 0, e = ShapeUtil::TupleElementCount(hlo.shape()); i != e;
- ++i) {
+ target_arrays.reserve(num_elems);
+ for (int64 i = 0; i != num_elems; ++i) {
target_arrays.push_back(GetIrArray(hlo, hlo, {i}));
}
TF_RETURN_IF_ERROR(
@@ -130,6 +131,7 @@ Status IrEmitterNested::EmitTargetElementLoop(
.EmitLoop());
std::vector<llvm::Value*> tuple_operand_ptrs;
+ tuple_operand_ptrs.reserve(num_elems);
for (const llvm_ir::IrArray& array : target_arrays) {
tuple_operand_ptrs.push_back(array.GetBasePointer());
}
diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc
index d45038f1f4..2418c19f3d 100644
--- a/tensorflow/compiler/xla/service/hlo_query.cc
+++ b/tensorflow/compiler/xla/service/hlo_query.cc
@@ -61,7 +61,7 @@ bool AllOperandsAreConstants(const HloInstruction& instruction) {
}
HloInstruction* GetMatchingOperand(
- std::function<bool(const HloInstruction*)> matcher,
+ const std::function<bool(const HloInstruction*)>& matcher,
HloInstruction* instruction) {
for (HloInstruction* op : instruction->operands()) {
if (matcher(op)) {
@@ -72,7 +72,7 @@ HloInstruction* GetMatchingOperand(
}
bool MatchBinaryInstructionOperand(
- std::function<bool(const HloInstruction*)> matcher,
+ const std::function<bool(const HloInstruction*)>& matcher,
HloInstruction* instruction, HloInstruction** matching_operand,
HloInstruction** other_operand) {
CHECK_EQ(instruction->operand_count(), 2);
diff --git a/tensorflow/compiler/xla/service/hlo_query.h b/tensorflow/compiler/xla/service/hlo_query.h
index c79347bbf9..c0826a6aee 100644
--- a/tensorflow/compiler/xla/service/hlo_query.h
+++ b/tensorflow/compiler/xla/service/hlo_query.h
@@ -45,7 +45,7 @@ bool IsScalarConstant(const HloInstruction* instruction);
// multiple matching operands, then the first matching operand is returned. If
// there are no matching operands then nullptr is returned.
HloInstruction* GetMatchingOperand(
- std::function<bool(const HloInstruction*)> matcher,
+ const std::function<bool(const HloInstruction*)>& matcher,
HloInstruction* instruction);
// Returns whether a binary instruction has a matching operand. Sets
@@ -53,7 +53,7 @@ HloInstruction* GetMatchingOperand(
// other_operand. Note: in the case where both operands match, the first operand
// of the instruction is returned.
bool MatchBinaryInstructionOperand(
- std::function<bool(const HloInstruction*)> matcher,
+ const std::function<bool(const HloInstruction*)>& matcher,
HloInstruction* instruction, HloInstruction** matching_operand,
HloInstruction** other_operand);
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index e25f5e67c7..4606d8f202 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -939,6 +939,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
HloOpcode opcode,
tensorflow::gtl::ArraySlice<const HloInstruction*> operands) {
std::vector<const Shape*> operand_shapes;
+ operand_shapes.reserve(operands.size());
for (const HloInstruction* operand : operands) {
operand_shapes.push_back(&operand->shape());
}
@@ -954,6 +955,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
switch (opcode) {
case HloOpcode::kTuple: {
Shape result = ShapeUtil::MakeTupleShape({});
+ result.mutable_tuple_shapes()->Reserve(operand_shapes.size());
for (const Shape* shape : operand_shapes) {
ShapeUtil::AppendShapeToTuple(*shape, &result);
}