aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/pack_op.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/pack_op.cc')
-rw-r--r--tensorflow/core/kernels/pack_op.cc18
1 files changed, 8 insertions, 10 deletions
diff --git a/tensorflow/core/kernels/pack_op.cc b/tensorflow/core/kernels/pack_op.cc
index cb78aacb0d..edaa10761e 100644
--- a/tensorflow/core/kernels/pack_op.cc
+++ b/tensorflow/core/kernels/pack_op.cc
@@ -118,6 +118,12 @@ class PackOp : public OpKernel {
return;
}
#endif // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+ if (std::is_same<Device, SYCLDevice>::value) {
+ ConcatSYCL<T>(c->eigen_sycl_device(), inputs_flat, &output_flat);
+ return;
+ }
+#endif // TENSORFLOW_USE_SYCL
ConcatCPU<T>(c->device(), inputs_flat, &output_flat);
}
}
@@ -166,26 +172,18 @@ REGISTER_KERNEL_BUILDER(Name("Pack")
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
-
#define REGISTER_SYCL(type) \
REGISTER_KERNEL_BUILDER( \
Name("Pack").Device(DEVICE_SYCL).TypeConstraint<type>("T"), \
PackOp<SYCLDevice, type>)
-REGISTER_SYCL(float);
-REGISTER_SYCL(double);
-#undef REGISTER_SYCL
-
-// A special GPU kernel for int32.
-// TODO(b/25387198): Also enable int32 in device memory. This kernel
-// registration requires all int32 inputs and outputs to be in host memory.
+TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL);
REGISTER_KERNEL_BUILDER(Name("Pack")
.Device(DEVICE_SYCL)
.HostMemory("values")
.HostMemory("output")
.TypeConstraint<int32>("T"),
PackOp<CPUDevice, int32>);
-
+#undef REGISTER_SYCL
#endif // TENSORFLOW_USE_SYCL
-
} // namespace tensorflow