diff options
Diffstat (limited to 'tensorflow/core/kernels/shape_ops.cc')
-rw-r--r-- | tensorflow/core/kernels/shape_ops.cc | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index 165fc64a84..cce5c3adb0 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -63,6 +63,40 @@ REGISTER_KERNEL_BUILDER(Name("Shape") .TypeConstraint<int64>("out_type"), ShapeOp<int64>); +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Shape") \ + .Device(DEVICE_SYCL) \ + .HostMemory("output") \ + .TypeConstraint<int32>("out_type") \ + .TypeConstraint<type>("T"), \ + ShapeOp<int32>); \ + REGISTER_KERNEL_BUILDER(Name("Shape") \ + .Device(DEVICE_SYCL) \ + .HostMemory("output") \ + .TypeConstraint<int64>("out_type") \ + .TypeConstraint<type>("T"), \ + ShapeOp<int64>); + +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_SYCL_KERNEL); +#undef REGISTER_SYCL_KERNEL + +REGISTER_KERNEL_BUILDER(Name("Shape") + .Device(DEVICE_SYCL) + .HostMemory("input") + .HostMemory("output") + .TypeConstraint<int32>("T") + .TypeConstraint<int32>("out_type"), + ShapeOp<int32>); +REGISTER_KERNEL_BUILDER(Name("Shape") + .Device(DEVICE_SYCL) + .HostMemory("input") + .HostMemory("output") + .TypeConstraint<int32>("T") + .TypeConstraint<int64>("out_type"), + ShapeOp<int64>); +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("Shape") \ @@ -193,6 +227,34 @@ class RankOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("Rank").Device(DEVICE_CPU).HostMemory("output"), RankOp); +#ifdef TENSORFLOW_USE_SYCL +#define REGISTER_SYCL_KERNEL(type) \ + REGISTER_KERNEL_BUILDER(Name("Rank") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T") \ + .HostMemory("output"), \ + RankOp); +REGISTER_SYCL_KERNEL(float); +#undef REGISTER_SYCL_KERNEL + +// A special GPU kernel for int32 and bool. +// TODO(b/25387198): Also enable int32 in device memory. This kernel +// registration requires all int32 inputs and outputs to be in host memory. +REGISTER_KERNEL_BUILDER(Name("Rank") + .Device(DEVICE_SYCL) + .TypeConstraint<int32>("T") + .HostMemory("input") + .HostMemory("output"), + RankOp); + +REGISTER_KERNEL_BUILDER(Name("Rank") + .Device(DEVICE_SYCL) + .TypeConstraint<bool>("T") + .HostMemory("input") + .HostMemory("output"), + RankOp); +#endif // TENSORFLOW_USE_SYCL + #if GOOGLE_CUDA #define REGISTER_GPU_KERNEL(type) \ REGISTER_KERNEL_BUILDER(Name("Rank") \ |