diff options
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_sum.cc')
-rw-r--r-- | tensorflow/core/kernels/reduction_ops_sum.cc | 90 |
1 files changed, 64 insertions, 26 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_sum.cc b/tensorflow/core/kernels/reduction_ops_sum.cc index c1f4f3475a..5318d8c133 100644 --- a/tensorflow/core/kernels/reduction_ops_sum.cc +++ b/tensorflow/core/kernels/reduction_ops_sum.cc @@ -17,26 +17,39 @@ limitations under the License. namespace tensorflow { -#define REGISTER_CPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Sum") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<type>("T") \ - .TypeConstraint<int32>("Tidx"), \ - ReductionOp<CPUDevice, type, Eigen::internal::SumReducer<type>>); +#define REGISTER_CPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Sum") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int32>("Tidx"), \ + ReductionOp<CPUDevice, type, int32, Eigen::internal::SumReducer<type>>); \ + REGISTER_KERNEL_BUILDER( \ + Name("Sum") \ + .Device(DEVICE_CPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int64>("Tidx"), \ + ReductionOp<CPUDevice, type, int64, Eigen::internal::SumReducer<type>>); TF_CALL_NUMBER_TYPES(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS #if GOOGLE_CUDA -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Sum") \ - .Device(DEVICE_GPU) \ - .TypeConstraint<type>("T") \ - .TypeConstraint<int32>("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp<GPUDevice, type, Eigen::internal::SumReducer<type>>); +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Sum") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int32>("Tidx") \ + .HostMemory("reduction_indices"), \ + ReductionOp<GPUDevice, type, int32, Eigen::internal::SumReducer<type>>); \ + REGISTER_KERNEL_BUILDER( \ + Name("Sum") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int64>("Tidx") \ + .HostMemory("reduction_indices"), \ + ReductionOp<GPUDevice, type, int64, Eigen::internal::SumReducer<type>>); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); TF_CALL_complex64(REGISTER_GPU_KERNELS); TF_CALL_complex128(REGISTER_GPU_KERNELS); @@ -53,19 +66,35 @@ REGISTER_KERNEL_BUILDER( .HostMemory("input") .HostMemory("output") .HostMemory("reduction_indices"), - ReductionOp<CPUDevice, int32, Eigen::internal::SumReducer<int32>>); + ReductionOp<CPUDevice, int32, int32, Eigen::internal::SumReducer<int32>>); +REGISTER_KERNEL_BUILDER( + Name("Sum") + .Device(DEVICE_GPU) + .TypeConstraint<int32>("T") + .TypeConstraint<int64>("Tidx") + .HostMemory("input") + .HostMemory("output") + .HostMemory("reduction_indices"), + ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>); #endif #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Sum") \ - .Device(DEVICE_SYCL) \ - .TypeConstraint<type>("T") \ - .TypeConstraint<int32>("Tidx") \ - .HostMemory("reduction_indices"), \ - ReductionOp<SYCLDevice, type, Eigen::internal::SumReducer<type>>); +#define REGISTER_SYCL_KERNELS(type) \ + REGISTER_KERNEL_BUILDER(Name("Sum") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int32>("Tidx") \ + .HostMemory("reduction_indices"), \ + ReductionOp<SYCLDevice, type, int32, \ + Eigen::internal::SumReducer<type>>); \ + REGISTER_KERNEL_BUILDER(Name("Sum") \ + .Device(DEVICE_SYCL) \ + .TypeConstraint<type>("T") \ + .TypeConstraint<int64>("Tidx") \ + .HostMemory("reduction_indices"), \ + ReductionOp<SYCLDevice, type, int64, \ + Eigen::internal::SumReducer<type>>); REGISTER_SYCL_KERNELS(float); REGISTER_SYCL_KERNELS(double); @@ -77,8 +106,17 @@ REGISTER_KERNEL_BUILDER( .HostMemory("input") .HostMemory("output") .HostMemory("reduction_indices"), - ReductionOp<CPUDevice, int32, Eigen::internal::SumReducer<int32>>); + ReductionOp<CPUDevice, int32, int32, Eigen::internal::SumReducer<int32>>); +REGISTER_KERNEL_BUILDER( + Name("Sum") + .Device(DEVICE_SYCL) + .TypeConstraint<int32>("T") + .TypeConstraint<int64>("Tidx") + .HostMemory("input") + .HostMemory("output") + .HostMemory("reduction_indices"), + ReductionOp<CPUDevice, int32, int64, Eigen::internal::SumReducer<int32>>); #undef REGISTER_SYCL_KERNELS -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL } // namespace tensorflow |