#if GOOGLE_CUDA #define EIGEN_USE_GPU #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/kernels/reduction_ops.h" namespace tensorflow { namespace functor { typedef Eigen::GpuDevice GPUDevice; // Derive Index type. int (32-bit) or long (64-bit) depending on the // compile-time configuration. "float" here is not relevant. // TODO(zhifengc): Moves the definition to TTypes. typedef TTypes::Tensor::Index Index; template <> struct ReduceFunctor { template static void Reduce(const GPUDevice& d, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Reducer& reducer) { ReduceEigenImpl(d, To32Bit(out), To32Bit(in), reduction_axes, reducer); } }; // T: the data type // REDUCER: the reducer functor // NUM_AXES: the number of axes to reduce // IN_DIMS: the number of dimensions of the input tensor #define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \ template void ReduceFunctor::Reduce( \ const GPUDevice& d, TTypes::Tensor out, \ TTypes::ConstTensor in, \ const Eigen::array& reduction_axes, \ const REDUCER& reducer); #define DEFINE_FOR_TYPE_AND_R(T, R) \ DEFINE(T, R, 1, 1); \ DEFINE(T, R, 2, 1); \ DEFINE(T, R, 3, 1); \ DEFINE(T, R, 3, 2); #define DEFINE_FOR_ALL_REDUCERS(T) \ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::SumReducer); \ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MinReducer); \ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::MaxReducer); \ DEFINE_FOR_TYPE_AND_R(T, Eigen::internal::ProdReducer) DEFINE_FOR_ALL_REDUCERS(float); #undef DEFINE_FOR_ALL_REDUCERS DEFINE_FOR_TYPE_AND_R(complex64, Eigen::internal::SumReducer); DEFINE_FOR_TYPE_AND_R(bool, AllReducer); DEFINE_FOR_TYPE_AND_R(bool, AnyReducer); #undef DEFINE_FOR_TYPE_AND_R #undef DEFINE } // end namespace functor } // end namespace tensorflow #endif // GOOGLE_CUDA