aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/reduction_ops_common.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_common.h')
-rw-r--r--tensorflow/core/kernels/reduction_ops_common.h25
1 files changed, 25 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 1bc7e14187..0d309c2185 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -268,6 +268,31 @@ struct ReduceFunctor<CPUDevice, Reducer>
template <typename Reducer>
struct ReduceFunctor<SYCLDevice, Reducer>
: ReduceFunctorBase<SYCLDevice, Reducer>{};
+
+template <typename T>
+struct ReduceFunctor<SYCLDevice, Eigen::internal::MeanReducer<T> > {
+ template <typename OUT_T, typename IN_T, typename ReductionAxes>
+ static void Reduce(const SYCLDevice& d, OUT_T out, IN_T in,
+ const ReductionAxes& reduction_axes,
+ const Eigen::internal::MeanReducer<T>& reducer) {
+ typedef typename IN_T::Index Index;
+ // Eigen sum reductions are much faster on GPU than mean reductions:
+ // Simply trigger them by computing the sum of the weighted inputs.
+ Index num_coeffs_to_reduce = 1;
+ for (int i = 0; i < Eigen::internal::array_size<ReductionAxes>::value;
+ ++i) {
+ num_coeffs_to_reduce *= in.dimension(reduction_axes[i]);
+ }
+ T scale = T(1.0) / num_coeffs_to_reduce;
+ out.device(d) = (in * scale).sum(reduction_axes);
+ }
+
+ template <typename OUT_T>
+ static void FillIdentity(const SYCLDevice& d, OUT_T out,
+ const Eigen::internal::MeanReducer<T>& reducer) {
+ FillIdentityEigenImpl(d, out, reducer);
+ }
+};
#endif // TENSORFLOW_USE_SYCL
} // namespace functor