aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/slice_op.cc
diff options
context:
space:
mode:
authorGravatar Jonathan Hseu <jhseu@google.com>2016-12-22 15:38:30 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-12-22 15:48:41 -0800
commitbed8383c27a0a7225e6fc7ff59a2cd6388fb4d09 (patch)
treeb70cfc88f95f318195f8610ffb960e98604348d1 /tensorflow/core/kernels/slice_op.cc
parent1e5bd8cdd62033d1f7ea928fcbec521bb48bb1f5 (diff)
Merge changes from github.
Change: 142805270
Diffstat (limited to 'tensorflow/core/kernels/slice_op.cc')
-rw-r--r--tensorflow/core/kernels/slice_op.cc57
1 files changed, 57 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc
index dc33a25cec..e2978eccbd 100644
--- a/tensorflow/core/kernels/slice_op.cc
+++ b/tensorflow/core/kernels/slice_op.cc
@@ -56,6 +56,9 @@ gtl::InlinedVector<int64, 4> IntTensorToInt64Vec(const Tensor& tensor) {
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
+#ifdef TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SYCLDevice;
+#endif // TENSORFLOW_USE_SYCL
// Shared code that is not dependent on the type of T. We do this to reduce
// code size by not duplicating all this for all T (float, double, int32, etc.)
@@ -300,4 +303,58 @@ REGISTER_KERNEL_BUILDER(Name("Slice")
#endif // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+// Forward declarations of the functor specializations for SYCL.
+namespace functor {
+#define DECLARE_SYCL_SPEC(T, NDIM) \
+ template <> \
+ void Slice<SYCLDevice, T, NDIM>::operator()( \
+ const SYCLDevice& d, typename TTypes<T, NDIM>::Tensor output,\
+ typename TTypes<T, NDIM>::ConstTensor input, \
+ const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices, \
+ const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes); \
+ extern template struct Slice<SYCLDevice, T, NDIM>;
+
+#define DECLARE_FOR_N(T) \
+ DECLARE_SYCL_SPEC(T, 1); \
+ DECLARE_SYCL_SPEC(T, 2); \
+ DECLARE_SYCL_SPEC(T, 3); \
+ DECLARE_SYCL_SPEC(T, 4); \
+ DECLARE_SYCL_SPEC(T, 5); \
+ DECLARE_SYCL_SPEC(T, 6);
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_FOR_N);
+DECLARE_FOR_N(int32);
+
+#undef DECLARE_FOR_N
+#undef DECLARE_SYCL_SPEC
+} // namespace functor
+
+#define REGISTER_SYCL(type) \
+ REGISTER_KERNEL_BUILDER(Name("Slice") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T") \
+ .HostMemory("begin") \
+ .HostMemory("size") \
+ .TypeConstraint<int32>("Index"), \
+ SliceOp<SYCLDevice, type>)
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_SYCL);
+
+// A special GPU kernel for int32.
+// TODO(b/25387198): Also enable int32 in device memory. This kernel
+// registration requires all int32 inputs and outputs to be in host memory.
+REGISTER_KERNEL_BUILDER(Name("Slice")
+ .Device(DEVICE_SYCL)
+ .TypeConstraint<int32>("T")
+ .TypeConstraint<int32>("Index")
+ .HostMemory("input")
+ .HostMemory("begin")
+ .HostMemory("size")
+ .HostMemory("output"),
+ SliceOp<CPUDevice, int32>);
+
+#undef REGISTER_SYCL
+
+#endif // TENSORFLOW_USE_SYCL
} // namespace tensorflow