#if GOOGLE_CUDA #define EIGEN_USE_GPU #include "tensorflow/core/kernels/aggregate_ops.h" #include "tensorflow/core/platform/port.h" #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; // Partial specialization for a GPUDevice, that uses the Eigen implementation. namespace functor { template struct Add2Functor { void operator()(const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2) { Add2EigenImpl::Compute(d, out, in1, in2); } }; template struct Add3Functor { void operator()(const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3) { Add3EigenImpl::Compute(d, out, in1, in2, in3); } }; template struct Add4Functor { void operator()(const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4) { Add4EigenImpl::Compute(d, out, in1, in2, in3, in4); } }; template struct Add5Functor { void operator()(const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, typename TTypes::ConstFlat in5) { Add5EigenImpl::Compute(d, out, in1, in2, in3, in4, in5); } }; template struct Add6Functor { void operator()(const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6) { Add6EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6); } }; template struct Add7Functor { void operator()(const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, typename TTypes::ConstFlat in7) { Add7EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, in7); } }; template struct Add8Functor { void operator()( const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, typename TTypes::ConstFlat in7, typename TTypes::ConstFlat in8) { Add8EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, in7, in8); } }; template struct Add8pFunctor { void operator()( const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, typename TTypes::ConstFlat in7, typename TTypes::ConstFlat in8) { Add8pEigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, in7, in8); } }; template struct Add9Functor { void operator()( const GPUDevice& d, typename TTypes::Flat out, typename TTypes::ConstFlat in1, typename TTypes::ConstFlat in2, typename TTypes::ConstFlat in3, typename TTypes::ConstFlat in4, typename TTypes::ConstFlat in5, typename TTypes::ConstFlat in6, typename TTypes::ConstFlat in7, typename TTypes::ConstFlat in8, typename TTypes::ConstFlat in9) { Add9EigenImpl::Compute(d, out, in1, in2, in3, in4, in5, in6, in7, in8, in9); } }; } // end namespace functor // Instantiate the GPU implementation for float. template struct functor::Add2Functor; template struct functor::Add3Functor; template struct functor::Add4Functor; template struct functor::Add5Functor; template struct functor::Add6Functor; template struct functor::Add7Functor; template struct functor::Add8Functor; template struct functor::Add8pFunctor; template struct functor::Add9Functor; } // end namespace tensorflow #endif // GOOGLE_CUDA