1 files changed, 65 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
new file mode 100644
index 0000000000..8e29d2d06c
--- /dev/null
+++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
@@ -0,0 +1,65 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/kernels/reduction_ops.h"
+
+namespace tensorflow {
+namespace functor {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+// Derive Index type. int (32-bit) or long (64-bit) depending on the
+// compile-time configuration. "float" here is not relevant.
+// TODO(zhifengc): Moves the definition to TTypes.
+typedef TTypes<float>::Tensor::Index Index;
+
+template <>
+struct ReduceFunctor<GPUDevice> {
+  template <typename OUT_T, typename IN_T, typename ReductionAxes,
+            typename Reducer>
+  static void Reduce(const GPUDevice& d, OUT_T out, IN_T in,
+                     const ReductionAxes& reduction_axes,
+                     const Reducer& reducer) {
+    ReduceEigenImpl(d, To32Bit(out), To32Bit(in), reduction_axes, reducer);
+  }
+};
+
+// T: the data type
+// REDUCER: the reducer functor
+// NUM_AXES: the number of axes to reduce
+// IN_DIMS: the number of dimensions of the input tensor
+#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES)                        \
+  template void ReduceFunctor<GPUDevice>::Reduce(                    \
+      const GPUDevice& d, TTypes<T, IN_DIMS - NUM_AXES>::Tensor out, \
+      TTypes<T, IN_DIMS>::ConstTensor in,                            \
+      const Eigen::array<Index, NUM_AXES>& reduction_axes,           \
+      const REDUCER& reducer);
+
+#define DEFINE_FOR_TYPE_AND_R(T, R) \
+  DEFINE(T, R, 1, 1);               \
+  DEFINE(T, R, 2, 1);               \
+  DEFINE(T, R, 3, 1);               \
+  DEFINE(T, R, 3, 2);
+
+#define DEFINE_FOR_ALL_REDUCERS(T)                          \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>); \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>); \
+  DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>)
+
+DEFINE_FOR_ALL_REDUCERS(float);
+#undef DEFINE_FOR_ALL_REDUCERS
+
+DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::SumReducer<complex64>);
+DEFINE_FOR_TYPE_AND_R(bool, AllReducer);
+DEFINE_FOR_TYPE_AND_R(bool, AnyReducer);
+#undef DEFINE_FOR_TYPE_AND_R
+
+#undef DEFINE
+
+}  // end namespace functor
+}  // end namespace tensorflow
+
+#endif  // GOOGLE_CUDA