aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
diff options
context:
space:
mode:
authorGravatar Bixia Zheng <bixia@google.com>2018-02-15 10:39:04 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-02-15 10:42:55 -0800
commitb91155edb661e074b716d7051c2cb71cbf9ec759 (patch)
tree48cc74ee878c9313e9cb3f8a10e8227dcdd96ede /tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
parentc356d2800182ef7430a70baa2b1b75ea854f9adf (diff)
Enable half precision convolution for the CPU and GPU backends.
Enhance the CPU IR emitter to support F16 dot operation and convolution operation. Add a CPU runtime implementation for F16 convolution. Enhance the GPU backend to handle F16 convolution thunk. Convert some F32 xla convolution tests to support both F32 and F16 and disable the tests for the CPU backend due to b/72509305. PiperOrigin-RevId: 185862438
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc')
-rw-r--r--tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc20
1 files changed, 19 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
index d0b0e11ac0..5afccc6a86 100644
--- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
+++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc
@@ -22,6 +22,24 @@ limitations under the License.
using tensorflow::int64;
TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
+__xla_cpu_runtime_EigenSingleThreadedConvF16(
+ const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs,
+ Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols,
+ int64 input_channels, int64 kernel_rows, int64 kernel_cols,
+ int64 kernel_channels, int64 kernel_filters, int64 output_rows,
+ int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top,
+ int64 padding_bottom, int64 padding_left, int64 padding_right,
+ int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation,
+ int64 rhs_col_dilation) {
+ tensorflow::xla::EigenConvImpl(
+ Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows,
+ input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels,
+ kernel_filters, output_rows, output_cols, row_stride, col_stride,
+ padding_top, padding_bottom, padding_left, padding_right,
+ lhs_row_dilation, lhs_col_dilation, rhs_row_dilation, rhs_col_dilation);
+}
+
+TF_ATTRIBUTE_NO_SANITIZE_MEMORY void
__xla_cpu_runtime_EigenSingleThreadedConvF32(
const void* run_options_ptr, float* out, float* lhs, float* rhs,
int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels,
@@ -30,7 +48,7 @@ __xla_cpu_runtime_EigenSingleThreadedConvF32(
int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom,
int64 padding_left, int64 padding_right, int64 lhs_row_dilation,
int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) {
- tensorflow::xla::EigenConvF32Impl(
+ tensorflow::xla::EigenConvImpl(
Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows,
input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels,
kernel_filters, output_rows, output_cols, row_stride, col_stride,