Fixed a bug in the mean reduction code for GPUs

Change: 116988587
author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-03-11 10:59:37 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-03-11 11:42:44 -0800
commit: a2b966692ffec38d5e6f1e35cf21ef9ab0ea01fc (patch)
tree: 2bc5440ca7794aa381dd0a2290080c8f79c9fa80
parent: f6f77accbb9c978b5d3562922425bffd01690f79 (diff)
2 files changed, 16 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
index d5d9f44da2..dee47cf928 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
@@ -52,7 +52,7 @@ struct ReduceFunctor<GPUDevice, Eigen::internal::MeanReducer<T> > {
     Index num_coeffs_to_reduce = 1;
     for (int i = 0; i < Eigen::internal::array_size<ReductionAxes>::value;
          ++i) {
-      num_coeffs_to_reduce *= in.dimension(i);
+      num_coeffs_to_reduce *= in.dimension(reduction_axes[i]);
     }
     T scale = T(1.0) / num_coeffs_to_reduce;
     out.device(d) = (in * scale).sum(reduction_axes);
diff --git a/tensorflow/core/kernels/reduction_ops_mean.cc b/tensorflow/core/kernels/reduction_ops_mean.cc
index 4f7a89b040..cd2ec01ea4 100644
--- a/tensorflow/core/kernels/reduction_ops_mean.cc
+++ b/tensorflow/core/kernels/reduction_ops_mean.cc
@@ -24,4 +24,19 @@ namespace tensorflow {
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
 #undef REGISTER_CPU_KERNELS
 
+#if GOOGLE_CUDA
+
+#define REGISTER_GPU_KERNELS(type)          \
+  REGISTER_KERNEL_BUILDER(                  \
+      Name("Mean")                          \
+          .Device(DEVICE_GPU)               \
+          .TypeConstraint<type>("T")        \
+          .HostMemory("reduction_indices"), \
+      ReductionOp<GPUDevice, type, Eigen::internal::MeanReducer<type>>);
+REGISTER_GPU_KERNELS(float);
+REGISTER_GPU_KERNELS(double);
+#undef REGISTER_GPU_KERNELS
+
+#endif
+
 }  // namespace tensorflow
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-03-11 10:59:37 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-03-11 11:42:44 -0800
commit	a2b966692ffec38d5e6f1e35cf21ef9ab0ea01fc (patch)
tree	2bc5440ca7794aa381dd0a2290080c8f79c9fa80
parent	f6f77accbb9c978b5d3562922425bffd01690f79 (diff)