diff options
author | 2016-03-11 10:59:37 -0800 | |
---|---|---|
committer | 2016-03-11 11:42:44 -0800 | |
commit | a2b966692ffec38d5e6f1e35cf21ef9ab0ea01fc (patch) | |
tree | 2bc5440ca7794aa381dd0a2290080c8f79c9fa80 | |
parent | f6f77accbb9c978b5d3562922425bffd01690f79 (diff) |
Fixed a bug in the mean reduction code for GPUs
Change: 116988587
-rw-r--r-- | tensorflow/core/kernels/reduction_ops_gpu.cu.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/kernels/reduction_ops_mean.cc | 15 |
2 files changed, 16 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc index d5d9f44da2..dee47cf928 100644 --- a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc @@ -52,7 +52,7 @@ struct ReduceFunctor<GPUDevice, Eigen::internal::MeanReducer<T> > { Index num_coeffs_to_reduce = 1; for (int i = 0; i < Eigen::internal::array_size<ReductionAxes>::value; ++i) { - num_coeffs_to_reduce *= in.dimension(i); + num_coeffs_to_reduce *= in.dimension(reduction_axes[i]); } T scale = T(1.0) / num_coeffs_to_reduce; out.device(d) = (in * scale).sum(reduction_axes); diff --git a/tensorflow/core/kernels/reduction_ops_mean.cc b/tensorflow/core/kernels/reduction_ops_mean.cc index 4f7a89b040..cd2ec01ea4 100644 --- a/tensorflow/core/kernels/reduction_ops_mean.cc +++ b/tensorflow/core/kernels/reduction_ops_mean.cc @@ -24,4 +24,19 @@ namespace tensorflow { TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS); #undef REGISTER_CPU_KERNELS +#if GOOGLE_CUDA + +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Mean") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<type>("T") \ + .HostMemory("reduction_indices"), \ + ReductionOp<GPUDevice, type, Eigen::internal::MeanReducer<type>>); +REGISTER_GPU_KERNELS(float); +REGISTER_GPU_KERNELS(double); +#undef REGISTER_GPU_KERNELS + +#endif + } // namespace tensorflow |