diff options
Diffstat (limited to 'tensorflow/core/kernels/slice_op.cc')
-rw-r--r-- | tensorflow/core/kernels/slice_op.cc | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index dc33a25cec..e2978eccbd 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -56,6 +56,9 @@ gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) { typedef Eigen::ThreadPoolDevice CPUDevice; typedef Eigen::GpuDevice GPUDevice; +#ifdef TENSORFLOW_USE_SYCL +typedef Eigen::SyclDevice SYCLDevice; +#endif // TENSORFLOW_USE_SYCL // Shared code that is not dependent on the type of T. We do this to reduce // code size by not duplicating all this for all T (float, double, int32, etc.) @@ -300,4 +303,58 @@ REGISTER_KERNEL_BUILDER(Name("Slice") #endif // GOOGLE_CUDA +#ifdef TENSORFLOW_USE_SYCL +// Forward declarations of the functor specializations for SYCL. +namespace functor { +#define DECLARE_SYCL_SPEC(T, NDIM) \ + template <> \ + void Slice<SYCLDevice, T, NDIM>::operator()( \ + const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output,\ + typename TTypes<T, NDIM>::ConstTensor input, \ + const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices, \ + const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes); \ + extern template struct Slice<SYCLDevice, T, NDIM>; + +#define DECLARE_FOR_N(T) \ + DECLARE_SYCL_SPEC(T, 1); \ + DECLARE_SYCL_SPEC(T, 2); \ + DECLARE_SYCL_SPEC(T, 3); \ + DECLARE_SYCL_SPEC(T, 4); \ + DECLARE_SYCL_SPEC(T, 5); \ + DECLARE_SYCL_SPEC(T, 6); + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N); +DECLARE_FOR_N(int32); + +#undef DECLARE_FOR_N +#undef DECLARE_SYCL_SPEC +} // namespace functor + +#define REGISTER_SYCL(type) \ + REGISTER_KERNEL_BUILDER(Name("Slice") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T") \ + .HostMemory("begin") \ + .HostMemory("size") \ + .TypeConstraint<int32>("Index"), \ + SliceOp<SYCLDevice, type>) + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL); + +// A special GPU kernel for int32. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("Slice") + .Device(DEVICE_SYCL) + .TypeConstraint<int32>("T") + .TypeConstraint<int32>("Index") + .HostMemory("input") + .HostMemory("begin") + .HostMemory("size") + .HostMemory("output"), + SliceOp<CPUDevice, int32>); + +#undef REGISTER_SYCL + +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow |