1 files changed, 57 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index dc33a25cec..e2978eccbd 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -56,6 +56,9 @@ gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
+#ifdef TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SYCLDevice;
+#endif // TENSORFLOW_USE_SYCL
 
 // Shared code that is not dependent on the type of T.  We do this to reduce
 // code size by not duplicating all this for all T (float, double, int32, etc.)
@@ -300,4 +303,58 @@ REGISTER_KERNEL_BUILDER(Name("Slice")
 
 #endif  // GOOGLE_CUDA
 
+#ifdef TENSORFLOW_USE_SYCL
+// Forward declarations of the functor specializations for SYCL.
+namespace functor {
+#define DECLARE_SYCL_SPEC(T, NDIM)                                 \
+  template <>                                                      \
+  void Slice<SYCLDevice, T, NDIM>::operator()(                     \
+      const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output,\
+      typename TTypes<T, NDIM>::ConstTensor input,                 \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices,       \
+      const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes);        \
+  extern template struct Slice<SYCLDevice, T, NDIM>;
+
+#define DECLARE_FOR_N(T)   \
+  DECLARE_SYCL_SPEC(T, 1); \
+  DECLARE_SYCL_SPEC(T, 2); \
+  DECLARE_SYCL_SPEC(T, 3); \
+  DECLARE_SYCL_SPEC(T, 4); \
+  DECLARE_SYCL_SPEC(T, 5); \
+  DECLARE_SYCL_SPEC(T, 6);
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N);
+DECLARE_FOR_N(int32);
+
+#undef DECLARE_FOR_N
+#undef DECLARE_SYCL_SPEC
+}  // namespace functor
+
+#define REGISTER_SYCL(type)                                    \
+  REGISTER_KERNEL_BUILDER(Name("Slice")                        \
+                              .Device(DEVICE_SYCL)             \
+                              .TypeConstraint<type>("T")       \
+                              .HostMemory("begin")             \
+                              .HostMemory("size")              \
+                              .TypeConstraint<int32>("Index"), \
+                          SliceOp<SYCLDevice, type>)
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL);
+
+// A special GPU kernel for int32.
+// TODO(b/25387198): Also enable int32 in device memory. This kernel
+// registration requires all int32 inputs and outputs to be in host memory.
+REGISTER_KERNEL_BUILDER(Name("Slice")
+                            .Device(DEVICE_SYCL)
+                            .TypeConstraint<int32>("T")
+                            .TypeConstraint<int32>("Index")
+                            .HostMemory("input")
+                            .HostMemory("begin")
+                            .HostMemory("size")
+                            .HostMemory("output"),
+                        SliceOp<CPUDevice, int32>);
+
+#undef REGISTER_SYCL
+
+#endif  // TENSORFLOW_USE_SYCL
 }  // namespace tensorflow