aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-08-02 11:18:13 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-08-02 11:18:13 -0700
commite2999d4c388f3bc556a556befdcb51b1139e9d92 (patch)
treedd0d03cd89ce8dbc2b74b22d741a21b9e6bc0752 /unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
parentf22b7283a3822524d03e4d93e6144bc1c9dd13a5 (diff)
Fix performance regressions due to https://bitbucket.org/eigen/eigen/pull-requests/662.
The change caused the device struct to be copied for each expression evaluation, and caused, e.g., a 10% regression in the TensorFlow multinomial op on GPU: Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 128173 231326 2922 1.610G items/s VS Benchmark Time(ns) CPU(ns) Iterations ---------------------------------------------------------------------- BM_Multinomial_gpu_1_100000_4 146683 246914 2719 1.509G items/s
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 1d48b5eed..a3a79d4e9 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -164,7 +164,7 @@ struct TensorEvaluator
protected:
EvaluatorPointerType m_data;
Dimensions m_dims;
- const Device m_device;
+ const Device EIGEN_DEVICE_REF m_device;
};
namespace {
@@ -302,7 +302,7 @@ struct TensorEvaluator<const Derived, Device>
protected:
EvaluatorPointerType m_data;
Dimensions m_dims;
- const Device m_device;
+ const Device EIGEN_DEVICE_REF m_device;
};
@@ -480,7 +480,7 @@ struct TensorEvaluator<const TensorCwiseUnaryOp<UnaryOp, ArgType>, Device>
private:
- const Device m_device;
+ const Device EIGEN_DEVICE_REF m_device;
const UnaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
};
@@ -603,7 +603,7 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
}
#endif
private:
- const Device m_device;
+ const Device EIGEN_DEVICE_REF m_device;
const BinaryOp m_functor;
TensorEvaluator<LeftArgType, Device> m_leftImpl;
TensorEvaluator<RightArgType, Device> m_rightImpl;