Enable half precision convolution for the CPU and GPU backends.

Enhance the CPU IR emitter to support F16 dot operation and convolution operation. Add a CPU runtime implementation for F16 convolution. Enhance the GPU backend to handle F16 convolution thunk. Convert some F32 xla convolution tests to support both F32 and F16 and disable the tests for the CPU backend due to b/72509305. PiperOrigin-RevId: 185862438
author: Bixia Zheng <bixia@google.com> 2018-02-15 10:39:04 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-02-15 10:42:55 -0800
commit: b91155edb661e074b716d7051c2cb71cbf9ec759 (patch)
tree: 48cc74ee878c9313e9cb3f8a10e8227dcdd96ede /tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
parent: c356d2800182ef7430a70baa2b1b75ea854f9adf (diff)
1 files changed, 19 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
index d0b0e11ac0..5afccc6a86 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
@@ -22,6 +22,24 @@ limitations under the License.
 using tensorflow::int64;
 
 TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
+__xla_cpu_runtime_EigenSingleThreadedConvF16(
+    const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
+    Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols,
+    int64 input_channels, int64 kernel_rows, int64 kernel_cols,
+    int64 kernel_channels, int64 kernel_filters, int64 output_rows,
+    int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top,
+    int64 padding_bottom, int64 padding_left, int64 padding_right,
+    int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation,
+    int64 rhs_col_dilation) {
+  tensorflow::xla::EigenConvImpl(
+      Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows,
+      input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels,
+      kernel_filters, output_rows, output_cols, row_stride, col_stride,
+      padding_top, padding_bottom, padding_left, padding_right,
+      lhs_row_dilation, lhs_col_dilation, rhs_row_dilation, rhs_col_dilation);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
 __xla_cpu_runtime_EigenSingleThreadedConvF32(
     const void* run_options_ptr, float* out, float* lhs, float* rhs,
     int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels,
@@ -30,7 +48,7 @@ __xla_cpu_runtime_EigenSingleThreadedConvF32(
     int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom,
     int64 padding_left, int64 padding_right, int64 lhs_row_dilation,
     int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) {
-  tensorflow::xla::EigenConvF32Impl(
+  tensorflow::xla::EigenConvImpl(
       Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows,
       input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels,
       kernel_filters, output_rows, output_cols, row_stride, col_stride,
author	Bixia Zheng <bixia@google.com>	2018-02-15 10:39:04 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-02-15 10:42:55 -0800
commit	b91155edb661e074b716d7051c2cb71cbf9ec759 (patch)
tree	48cc74ee878c9313e9cb3f8a10e8227dcdd96ede /tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
parent	c356d2800182ef7430a70baa2b1b75ea854f9adf (diff)