diff options
author | Bixia Zheng <bixia@google.com> | 2018-02-15 10:39:04 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-02-15 10:42:55 -0800 |
commit | b91155edb661e074b716d7051c2cb71cbf9ec759 (patch) | |
tree | 48cc74ee878c9313e9cb3f8a10e8227dcdd96ede /tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc | |
parent | c356d2800182ef7430a70baa2b1b75ea854f9adf (diff) |
Enable half precision convolution for the CPU and GPU backends.
Enhance the CPU IR emitter to support F16 dot operation and convolution
operation.
Add a CPU runtime implementation for F16 convolution.
Enhance the GPU backend to handle F16 convolution thunk.
Convert some F32 xla convolution tests to support both F32 and F16 and disable
the tests for the CPU backend due to b/72509305.
PiperOrigin-RevId: 185862438
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc index d0b0e11ac0..5afccc6a86 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.cc @@ -22,6 +22,24 @@ limitations under the License. using tensorflow::int64; TF_ATTRIBUTE_NO_SANITIZE_MEMORY void +__xla_cpu_runtime_EigenSingleThreadedConvF16( + const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 input_batch, int64 input_rows, int64 input_cols, + int64 input_channels, int64 kernel_rows, int64 kernel_cols, + int64 kernel_channels, int64 kernel_filters, int64 output_rows, + int64 output_cols, int64 row_stride, int64 col_stride, int64 padding_top, + int64 padding_bottom, int64 padding_left, int64 padding_right, + int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, + int64 rhs_col_dilation) { + tensorflow::xla::EigenConvImpl( + Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows, + input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, + kernel_filters, output_rows, output_cols, row_stride, col_stride, + padding_top, padding_bottom, padding_left, padding_right, + lhs_row_dilation, lhs_col_dilation, rhs_row_dilation, rhs_col_dilation); +} + +TF_ATTRIBUTE_NO_SANITIZE_MEMORY void __xla_cpu_runtime_EigenSingleThreadedConvF32( const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 input_batch, int64 input_rows, int64 input_cols, int64 input_channels, @@ -30,7 +48,7 @@ __xla_cpu_runtime_EigenSingleThreadedConvF32( int64 row_stride, int64 col_stride, int64 padding_top, int64 padding_bottom, int64 padding_left, int64 padding_right, int64 lhs_row_dilation, int64 lhs_col_dilation, int64 rhs_row_dilation, int64 rhs_col_dilation) { - tensorflow::xla::EigenConvF32Impl( + tensorflow::xla::EigenConvImpl( Eigen::DefaultDevice(), out, lhs, rhs, input_batch, input_rows, input_cols, input_channels, kernel_rows, kernel_cols, kernel_channels, kernel_filters, output_rows, output_cols, row_stride, col_stride, |