diff options
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_gpu.cu.cc')
-rw-r--r-- | tensorflow/core/kernels/reduction_ops_gpu.cu.cc | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc new file mode 100644 index 0000000000..8e29d2d06c --- /dev/null +++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc @@ -0,0 +1,65 @@ +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/kernels/reduction_ops.h" + +namespace tensorflow { +namespace functor { + +typedef Eigen::GpuDevice GPUDevice; + +// Derive Index type. int (32-bit) or long (64-bit) depending on the +// compile-time configuration. "float" here is not relevant. +// TODO(zhifengc): Moves the definition to TTypes. +typedef TTypes<float>::Tensor::Index Index; + +template <> +struct ReduceFunctor<GPUDevice> { + template <typename OUT_T, typename IN_T, typename ReductionAxes, + typename Reducer> + static void Reduce(const GPUDevice& d, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Reducer& reducer) { + ReduceEigenImpl(d, To32Bit(out), To32Bit(in), reduction_axes, reducer); + } +}; + +// T: the data type +// REDUCER: the reducer functor +// NUM_AXES: the number of axes to reduce +// IN_DIMS: the number of dimensions of the input tensor +#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \ + template void ReduceFunctor<GPUDevice>::Reduce( \ + const GPUDevice& d, TTypes<T, IN_DIMS - NUM_AXES>::Tensor out, \ + TTypes<T, IN_DIMS>::ConstTensor in, \ + const Eigen::array<Index, NUM_AXES>& reduction_axes, \ + const REDUCER& reducer); + +#define DEFINE_FOR_TYPE_AND_R(T, R) \ + DEFINE(T, R, 1, 1); \ + DEFINE(T, R, 2, 1); \ + DEFINE(T, R, 3, 1); \ + DEFINE(T, R, 3, 2); + +#define DEFINE_FOR_ALL_REDUCERS(T) \ + DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer<T>); \ + DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer<T>); \ + DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer<T>); \ + DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer<T>) + +DEFINE_FOR_ALL_REDUCERS(float); +#undef DEFINE_FOR_ALL_REDUCERS + +DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::SumReducer<complex64>); +DEFINE_FOR_TYPE_AND_R(bool, AllReducer); +DEFINE_FOR_TYPE_AND_R(bool, AnyReducer); +#undef DEFINE_FOR_TYPE_AND_R + +#undef DEFINE + +} // end namespace functor +} // end namespace tensorflow + +#endif // GOOGLE_CUDA |