aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-11 10:59:37 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-11 11:42:44 -0800
commita2b966692ffec38d5e6f1e35cf21ef9ab0ea01fc (patch)
tree2bc5440ca7794aa381dd0a2290080c8f79c9fa80
parentf6f77accbb9c978b5d3562922425bffd01690f79 (diff)
Fixed a bug in the mean reduction code for GPUs
Change: 116988587
-rw-r--r--tensorflow/core/kernels/reduction_ops_gpu.cu.cc2
-rw-r--r--tensorflow/core/kernels/reduction_ops_mean.cc15
2 files changed, 16 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
index d5d9f44da2..dee47cf928 100644
--- a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
+++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
@@ -52,7 +52,7 @@ struct ReduceFunctor<GPUDevice, Eigen::internal::MeanReducer<T> > {
Index num_coeffs_to_reduce = 1;
for (int i = 0; i < Eigen::internal::array_size<ReductionAxes>::value;
++i) {
- num_coeffs_to_reduce *= in.dimension(i);
+ num_coeffs_to_reduce *= in.dimension(reduction_axes[i]);
}
T scale = T(1.0) / num_coeffs_to_reduce;
out.device(d) = (in * scale).sum(reduction_axes);
diff --git a/tensorflow/core/kernels/reduction_ops_mean.cc b/tensorflow/core/kernels/reduction_ops_mean.cc
index 4f7a89b040..cd2ec01ea4 100644
--- a/tensorflow/core/kernels/reduction_ops_mean.cc
+++ b/tensorflow/core/kernels/reduction_ops_mean.cc
@@ -24,4 +24,19 @@ namespace tensorflow {
TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
#undef REGISTER_CPU_KERNELS
+#if GOOGLE_CUDA
+
+#define REGISTER_GPU_KERNELS(type) \
+ REGISTER_KERNEL_BUILDER( \
+ Name("Mean") \
+ .Device(DEVICE_GPU) \
+ .TypeConstraint<type>("T") \
+ .HostMemory("reduction_indices"), \
+ ReductionOp<GPUDevice, type, Eigen::internal::MeanReducer<type>>);
+REGISTER_GPU_KERNELS(float);
+REGISTER_GPU_KERNELS(double);
+#undef REGISTER_GPU_KERNELS
+
+#endif
+
} // namespace tensorflow