From e2999d4c388f3bc556a556befdcb51b1139e9d92 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 2 Aug 2019 11:18:13 -0700 Subject: Fix performance regressions due to https://bitbucket.org/eigen/eigen/pull-requests/662. The change caused the device struct to be copied for each expression evaluation, and caused, e.g., a 10% regression in the TensorFlow multinomial op on GPU: Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 128173 231326 2922 1.610G items/s VS Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 146683 246914 2719 1.509G items/s --- unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 1d48b5eed..a3a79d4e9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -164,7 +164,7 @@ struct TensorEvaluator protected: EvaluatorPointerType m_data; Dimensions m_dims; - const Device m_device; + const Device EIGEN_DEVICE_REF m_device; }; namespace { @@ -302,7 +302,7 @@ struct TensorEvaluator protected: EvaluatorPointerType m_data; Dimensions m_dims; - const Device m_device; + const Device EIGEN_DEVICE_REF m_device; }; @@ -480,7 +480,7 @@ struct TensorEvaluator, Device> private: - const Device m_device; + const Device EIGEN_DEVICE_REF m_device; const UnaryOp m_functor; TensorEvaluator m_argImpl; }; @@ -603,7 +603,7 @@ struct TensorEvaluator m_leftImpl; TensorEvaluator m_rightImpl; -- cgit v1.2.3