aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-08-08 19:29:59 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-08-08 19:29:59 -0700
commit64e68cbe87d18f40978fba31797fc07c2fbb9a1f (patch)
treee7057cb080b3a6f4f87aed8737c301e5e5a91a78 /unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
parent5157ce8cbf48f8b8844622a5d159f191935cb51a (diff)
Don't attempt to optimize partial reductions when the optimized implementation doesn't buy anything.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h24
1 files changed, 20 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 0e1576954..9df697e4c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -505,12 +505,20 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
(reducing_inner_dims || ReducingInnerMostDims)) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) {
+ if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data;
}
Op reducer(m_reducer);
- return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL);
+ if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
+ if (m_result) {
+ m_device.deallocate(m_result);
+ m_result = NULL;
+ }
+ return true;
+ } else {
+ return (m_result != NULL);
+ }
}
bool preserving_inner_dims = true;
@@ -525,12 +533,20 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
preserving_inner_dims) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) {
+ if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
m_result = data;
}
Op reducer(m_reducer);
- return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL);
+ if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
+ if (m_result) {
+ m_device.deallocate(m_result);
+ m_result = NULL;
+ }
+ return true;
+ } else {
+ return (m_result != NULL);
+ }
}
}
return true;