aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/reduction_ops_sum.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_sum.cc')
-rw-r--r--tensorflow/core/kernels/reduction_ops_sum.cc90
1 files changed, 64 insertions, 26 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc
index c1f4f3475a..5318d8c133 100644
--- a/tensorflow/core/kernels/reduction_ops_sum.cc
+++ b/tensorflow/core/kernels/reduction_ops_sum.cc
@@ -17,26 +17,39 @@ limitations under the License.
namespace tensorflow {
-#define REGISTER_CPU_KERNELS(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("Sum") \
- .Device(DEVICE_CPU) \
- .TypeConstraint<type>("T") \
- .TypeConstraint<int32>("Tidx"), \
- ReductionOp<CPUDevice, type, Eigen::internal::SumReducer<type>>);
+#define REGISTER_CPU_KERNELS(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sum") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int32>("Tidx"), \
+ ReductionOp<CPUDevice, type, int32, Eigen::internal::SumReducer<type>>); \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sum") \
+ .Device(DEVICE_CPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int64>("Tidx"), \
+ ReductionOp<CPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
#if GOOGLE_CUDA
-#define REGISTER_GPU_KERNELS(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("Sum") \
- .Device(DEVICE_GPU) \
- .TypeConstraint<type>("T") \
- .TypeConstraint<int32>("Tidx") \
- .HostMemory("reduction_indices"), \
- ReductionOp<GPUDevice, type, Eigen::internal::SumReducer<type>>);
+#define REGISTER_GPU_KERNELS(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sum") \
+ .Device(DEVICE_GPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int32>("Tidx") \
+ .HostMemory("reduction_indices"), \
+ ReductionOp<GPUDevice, type, int32, Eigen::internal::SumReducer<type>>); \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Sum") \
+ .Device(DEVICE_GPU) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int64>("Tidx") \
+ .HostMemory("reduction_indices"), \
+ ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
TF_CALL_complex64(REGISTER_GPU_KERNELS);
TF_CALL_complex128(REGISTER_GPU_KERNELS);
@@ -53,19 +66,35 @@ REGISTER_KERNEL_BUILDER(
.HostMemory("input")
.HostMemory("output")
.HostMemory("reduction_indices"),
- ReductionOp<CPUDevice, int32, Eigen::internal::SumReducer<int32>>);
+ ReductionOp<CPUDevice, int32, int32, Eigen::internal::SumReducer<int32>>);
+REGISTER_KERNEL_BUILDER(
+ Name("Sum")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<int32>("T")
+ .TypeConstraint<int64>("Tidx")
+ .HostMemory("input")
+ .HostMemory("output")
+ .HostMemory("reduction_indices"),
+ ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>);
#endif
#ifdef TENSORFLOW_USE_SYCL
-#define REGISTER_SYCL_KERNELS(type) \
- REGISTER_KERNEL_BUILDER( \
- Name("Sum") \
- .Device(DEVICE_SYCL) \
- .TypeConstraint<type>("T") \
- .TypeConstraint<int32>("Tidx") \
- .HostMemory("reduction_indices"), \
- ReductionOp<SYCLDevice, type, Eigen::internal::SumReducer<type>>);
+#define REGISTER_SYCL_KERNELS(type) \
+ REGISTER_KERNEL_BUILDER(Name("Sum") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int32>("Tidx") \
+ .HostMemory("reduction_indices"), \
+ ReductionOp<SYCLDevice, type, int32, \
+ Eigen::internal::SumReducer<type>>); \
+ REGISTER_KERNEL_BUILDER(Name("Sum") \
+ .Device(DEVICE_SYCL) \
+ .TypeConstraint<type>("T") \
+ .TypeConstraint<int64>("Tidx") \
+ .HostMemory("reduction_indices"), \
+ ReductionOp<SYCLDevice, type, int64, \
+ Eigen::internal::SumReducer<type>>);
REGISTER_SYCL_KERNELS(float);
REGISTER_SYCL_KERNELS(double);
@@ -77,8 +106,17 @@ REGISTER_KERNEL_BUILDER(
.HostMemory("input")
.HostMemory("output")
.HostMemory("reduction_indices"),
- ReductionOp<CPUDevice, int32, Eigen::internal::SumReducer<int32>>);
+ ReductionOp<CPUDevice, int32, int32, Eigen::internal::SumReducer<int32>>);
+REGISTER_KERNEL_BUILDER(
+ Name("Sum")
+ .Device(DEVICE_SYCL)
+ .TypeConstraint<int32>("T")
+ .TypeConstraint<int64>("Tidx")
+ .HostMemory("input")
+ .HostMemory("output")
+ .HostMemory("reduction_indices"),
+ ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>);
#undef REGISTER_SYCL_KERNELS
-#endif // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
} // namespace tensorflow