aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_gpu.cu.cc')
-rw-r--r--tensorflow/core/kernels/reduction_ops_gpu.cu.cc65
1 files changed, 65 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
new file mode 100644
index 0000000000..8e29d2d06c
--- /dev/null
+++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc
@@ -0,0 +1,65 @@
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/framework/numeric_types.h"
+#include "tensorflow/core/kernels/reduction_ops.h"
+
+namespace tensorflow {
+namespace functor {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+// Derive Index type. int (32-bit) or long (64-bit) depending on the
+// compile-time configuration. "float" here is not relevant.
+// TODO(zhifengc): Moves the definition to TTypes.
+typedef TTypes<float>::Tensor::Index Index;
+
+template <>
+struct ReduceFunctor<GPUDevice> {
+ template <typename OUT_T, typename IN_T, typename ReductionAxes,
+ typename Reducer>
+ static void Reduce(const GPUDevice& d, OUT_T out, IN_T in,
+ const ReductionAxes& reduction_axes,
+ const Reducer& reducer) {
+ ReduceEigenImpl(d, To32Bit(out), To32Bit(in), reduction_axes, reducer);
+ }
+};
+
+// T: the data type
+// REDUCER: the reducer functor
+// NUM_AXES: the number of axes to reduce
+// IN_DIMS: the number of dimensions of the input tensor
+#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \
+ template void ReduceFunctor<GPUDevice>::Reduce( \
+ const GPUDevice& d, TTypes<T, IN_DIMS - NUM_AXES>::Tensor out, \
+ TTypes<T, IN_DIMS>::ConstTensor in, \
+ const Eigen::array<Index, NUM_AXES>& reduction_axes, \
+ const REDUCER& reducer);
+
+#define DEFINE_FOR_TYPE_AND_R(T, R) \
+ DEFINE(T, R, 1, 1); \
+ DEFINE(T, R, 2, 1); \
+ DEFINE(T, R, 3, 1); \
+ DEFINE(T, R, 3, 2);
+
+#define DEFINE_FOR_ALL_REDUCERS(T) \
+ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \
+ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>); \
+ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>); \
+ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>)
+
+DEFINE_FOR_ALL_REDUCERS(float);
+#undef DEFINE_FOR_ALL_REDUCERS
+
+DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::SumReducer<complex64>);
+DEFINE_FOR_TYPE_AND_R(bool, AllReducer);
+DEFINE_FOR_TYPE_AND_R(bool, AnyReducer);
+#undef DEFINE_FOR_TYPE_AND_R
+
+#undef DEFINE
+
+} // end namespace functor
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA