aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Olivia Nordquist <nolivia@google.com>2016-10-04 11:04:42 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-10-04 12:18:19 -0700
commite92e24be0d3a60882444d4b8efbb6e386d10b4d2 (patch)
tree19c01d6efff467b9129d1f961cad2609a5130c81
parent19aeb9d301df55533ed39dc4d655a8c6c777d94d (diff)
enable tile for complex inputs in gpu, github issue #2577
Change: 135135194
-rw-r--r--tensorflow/core/kernels/tile_ops.cc27
-rw-r--r--tensorflow/core/kernels/tile_ops_gpu.cu.cc7
2 files changed, 33 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/tile_ops.cc b/tensorflow/core/kernels/tile_ops.cc
index d021544316..9d9ff68df1 100644
--- a/tensorflow/core/kernels/tile_ops.cc
+++ b/tensorflow/core/kernels/tile_ops.cc
@@ -244,6 +244,8 @@ TF_CALL_int16(HANDLE_TYPE_NAME_GPU);
TF_CALL_int32(HANDLE_TYPE_NAME_GPU);
TF_CALL_int64(HANDLE_TYPE_NAME_GPU);
TF_CALL_half(HANDLE_TYPE_NAME_GPU);
+TF_CALL_complex64(HANDLE_TYPE_NAME_GPU);
+TF_CALL_complex128(HANDLE_TYPE_NAME_GPU);
#endif // GOOGLE_CUDA
#undef HANDLE_TYPE_NAME_CPU
@@ -320,6 +322,8 @@ class TileGradientOp : public OpKernel {
TF_CALL_int16(HANDLE_TYPE_NAME);
TF_CALL_int64(HANDLE_TYPE_NAME);
TF_CALL_half(HANDLE_TYPE_NAME);
+ TF_CALL_complex64(HANDLE_TYPE_NAME);
+ TF_CALL_complex128(HANDLE_TYPE_NAME);
#undef HANDLE_TYPE_NAME
#undef HANDLE_TYPE
@@ -479,6 +483,8 @@ TF_CALL_int16(HANDLE_TYPE_NAME_GPU);
TF_CALL_int32(HANDLE_TYPE_NAME_GPU);
TF_CALL_int64(HANDLE_TYPE_NAME_GPU);
TF_CALL_half(HANDLE_TYPE_NAME_GPU);
+TF_CALL_complex64(HANDLE_TYPE_NAME_GPU);
+TF_CALL_complex128(HANDLE_TYPE_NAME_GPU);
#endif // GOOGLE_CUDA
#undef HANDLE_TYPE_NAME_CPU
@@ -515,6 +521,16 @@ REGISTER_KERNEL_BUILDER(Name("Tile")
.TypeConstraint<int16>("T")
.HostMemory("multiples"),
TileOp<GPUDevice>);
+REGISTER_KERNEL_BUILDER(Name("Tile")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<complex64>("T")
+ .HostMemory("multiples"),
+ TileOp<GPUDevice>);
+REGISTER_KERNEL_BUILDER(Name("Tile")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<complex128>("T")
+ .HostMemory("multiples"),
+ TileOp<GPUDevice>);
REGISTER_KERNEL_BUILDER(Name("TileGrad")
.Device(DEVICE_GPU)
@@ -536,5 +552,16 @@ REGISTER_KERNEL_BUILDER(Name("TileGrad")
.TypeConstraint<int16>("T")
.HostMemory("multiples"),
TileGradientOp<GPUDevice>);
+REGISTER_KERNEL_BUILDER(Name("TileGrad")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<complex64>("T")
+ .HostMemory("multiples"),
+ TileGradientOp<GPUDevice>);
+REGISTER_KERNEL_BUILDER(Name("TileGrad")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<complex128>("T")
+ .HostMemory("multiples"),
+ TileGradientOp<GPUDevice>);
+
#endif // GOOGLE_CUDA
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/tile_ops_gpu.cu.cc b/tensorflow/core/kernels/tile_ops_gpu.cu.cc
index 6a83ad4b4f..30808da0e0 100644
--- a/tensorflow/core/kernels/tile_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/tile_ops_gpu.cu.cc
@@ -18,6 +18,7 @@ limitations under the License.
#define EIGEN_USE_GPU
#include <stdio.h>
+#include "tensorflow/core/framework/numeric_types.h"
#include "tensorflow/core/kernels/tile_ops_impl.h"
namespace tensorflow {
@@ -31,7 +32,9 @@ typedef Eigen::GpuDevice GPUDevice;
DEFINE_DIM(T, 3) \
DEFINE_DIM(T, 4) \
DEFINE_DIM(T, 5) \
- DEFINE_DIM(T, 6)
+ DEFINE_DIM(T, 6) \
+ DEFINE_DIM(T, 7) \
+ DEFINE_DIM(T, 8)
#define DEFINE_DIM(T, NDIM) \
template struct Tile<GPUDevice, T, NDIM>; \
@@ -44,6 +47,8 @@ DEFINE_TYPE(Eigen::half)
DEFINE_TYPE(int64)
DEFINE_TYPE(int32)
DEFINE_TYPE(int16)
+DEFINE_TYPE(complex64)
+DEFINE_TYPE(complex128)
// NOTE(keveman): Eigen's int8 and string versions don't compile yet with nvcc.
#undef DEFINE_DIM