aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/cpu/ir_emitter.h
diff options
context:
space:
mode:
authorGravatar Sanjoy Das <sanjoy@google.com>2018-05-09 19:39:58 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-09 19:42:49 -0700
commitbb8315f0cf066266647c6eacdf575ac8f5e9989e (patch)
tree3701a5004258519f0baa4420416008be22dc0114 /tensorflow/compiler/xla/service/cpu/ir_emitter.h
parentf79dbc73c5b2c0debb916280e4436d98890ed03b (diff)
Don't call into Eigen unless the input and output tensors are aligned
We teach TargetMachineFeatures about the alignment required for Eigen GEMM and Conv and then pipe TargetMachineFeatures through the places that need to decide whether a dot or a conv needs to be lowered to a call to Eigen. I also had to fix a minor bug in our LLVM IR implementation for convolution. PiperOrigin-RevId: 196065557
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/ir_emitter.h')
-rw-r--r--tensorflow/compiler/xla/service/cpu/ir_emitter.h7
1 files changed, 2 insertions, 5 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index 5a04076080..f49cfc1dc3 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -76,7 +76,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
instruction_to_profile_idx,
std::unordered_map<const HloComputation*, int64>
computation_to_profile_idx,
- llvm::TargetMachine* target_machine,
+ const TargetMachineFeatures* target_machine,
ExternalConstantPool* external_constant_pool);
~IrEmitter() override;
@@ -514,9 +514,6 @@ class IrEmitter : public DfsHloVisitorWithDefault {
// Calculate the alignment of a buffer allocated for a given primitive type.
int MinimumAlignmentForPrimitiveType(PrimitiveType primitive_type);
- // Calculate the alignment of a buffer with a particular size.
- int MinimumAlignmentForBufferSize(int64 buffer_size);
-
// Returns the number of bytes within the shape.
int64 ByteSizeOf(const Shape& shape) const;
@@ -536,7 +533,7 @@ class IrEmitter : public DfsHloVisitorWithDefault {
bool is_top_level_computation_;
- TargetMachineFeatures target_machine_features_;
+ const TargetMachineFeatures& target_machine_features_;
int64 external_global_constant_counter_ = 0;
ExternalConstantPool* external_constant_pool_;