Don't call into Eigen unless the input and output tensors are aligned

We teach TargetMachineFeatures about the alignment required for Eigen GEMM and Conv and then pipe TargetMachineFeatures through the places that need to decide whether a dot or a conv needs to be lowered to a call to Eigen. I also had to fix a minor bug in our LLVM IR implementation for convolution. PiperOrigin-RevId: 196065557
author: Sanjoy Das <sanjoy@google.com> 2018-05-09 19:39:58 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-05-09 19:42:49 -0700
commit: bb8315f0cf066266647c6eacdf575ac8f5e9989e (patch)
tree: 3701a5004258519f0baa4420416008be22dc0114 /tensorflow/compiler/xla/service/cpu/BUILD
parent: f79dbc73c5b2c0debb916280e4436d98890ed03b (diff)
1 files changed, 32 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD
index 7e6d58c7fa..790163fca6 100644
--- a/tensorflow/compiler/xla/service/cpu/BUILD
+++ b/tensorflow/compiler/xla/service/cpu/BUILD
@@ -296,6 +296,15 @@ cc_library(
 )
 
 cc_library(
+    name = "target_machine_features_fake",
+    testonly = 1,
+    hdrs = ["target_machine_features_fake.h"],
+    deps = [
+        ":target_machine_features",
+    ],
+)
+
+cc_library(
     name = "ir_function",
     srcs = ["ir_function.cc"],
     hdrs = ["ir_function.h"],
@@ -336,6 +345,7 @@ cc_library(
     deps = [
         ":cpu_options",
         ":cpu_runtime",
+        ":ir_emission_utils",
         ":target_machine_features",
         ":vector_support_library",
         "//tensorflow/compiler/xla:shape_util",
@@ -660,6 +670,7 @@ cc_library(
     hdrs = ["ir_emission_utils.h"],
     deps = [
         ":cpu_runtime",
+        ":target_machine_features",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:window_util",
         "//tensorflow/compiler/xla/service:hlo",
@@ -672,6 +683,7 @@ tf_cc_test(
     srcs = ["ir_emission_utils_test.cc"],
     deps = [
         ":ir_emission_utils",
+        ":target_machine_features_fake",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:test_helpers",
         "//tensorflow/compiler/xla:util",
@@ -690,6 +702,7 @@ cc_library(
     deps = [
         ":dot_op_emitter",
         ":ir_emission_utils",
+        ":target_machine_features",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla/service:computation_layout",
         "//tensorflow/compiler/xla/service:layout_assignment",
@@ -703,6 +716,7 @@ tf_cc_test(
     srcs = ["cpu_layout_assignment_test.cc"],
     deps = [
         ":cpu_layout_assignment",
+        ":target_machine_features_fake",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_layout",
         "//tensorflow/compiler/xla:shape_util",
@@ -727,6 +741,7 @@ cc_library(
     deps = [
         ":cpu_runtime",
         ":ir_emission_utils",
+        ":target_machine_features",
         "//tensorflow/compiler/xla:shape_util",
         "//tensorflow/compiler/xla:util",
         "//tensorflow/compiler/xla:xla_data_proto",
@@ -741,6 +756,7 @@ tf_cc_test(
     srcs = ["conv_canonicalization_test.cc"],
     deps = [
         ":conv_canonicalization",
+        ":target_machine_features_fake",
         "//tensorflow/compiler/xla:test",
         "//tensorflow/compiler/xla:test_helpers",
         "//tensorflow/compiler/xla:util",
@@ -779,6 +795,7 @@ cc_library(
         ":dot_op_emitter",
         ":ir_emission_utils",
         ":shape_partition",
+        ":target_machine_features",
         "//tensorflow/compiler/xla/service:hlo",
         "//tensorflow/compiler/xla/service:hlo_cost_analysis",
         "//tensorflow/compiler/xla/service:hlo_pass",
@@ -791,6 +808,7 @@ tf_cc_test(
     deps = [
         ":cpu_executable",
         ":parallel_task_assignment",
+        ":target_machine_features_fake",
         "//tensorflow/compiler/xla:literal_util",
         "//tensorflow/compiler/xla:shape_layout",
         "//tensorflow/compiler/xla:shape_util",
@@ -913,3 +931,17 @@ tf_cc_test(
         "//tensorflow/core:test",
     ],
 )
+
+tf_cc_test(
+    name = "cpu_eigen_tensor_alignment_test",
+    size = "small",
+    srcs = ["cpu_eigen_tensor_alignment_test.cc"],
+    deps = [
+        ":dot_op_emitter",
+        ":ir_emission_utils",
+        ":target_machine_features_fake",
+        "//tensorflow/compiler/xla:test",
+        "//tensorflow/compiler/xla/tests:xla_internal_test_main",
+        "//tensorflow/compiler/xla/tools/parser:hlo_parser",
+    ],
+)
author	Sanjoy Das <sanjoy@google.com>	2018-05-09 19:39:58 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-05-09 19:42:49 -0700
commit	bb8315f0cf066266647c6eacdf575ac8f5e9989e (patch)
tree	3701a5004258519f0baa4420416008be22dc0114 /tensorflow/compiler/xla/service/cpu/BUILD
parent	f79dbc73c5b2c0debb916280e4436d98890ed03b (diff)