aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-14 22:13:21 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-14 22:13:21 -0700
commit33c702c79fe227a5b22229c26af276d359a6cb1d (patch)
tree86ca1888d05abb165b3ddbe892cfff7b5464573d /unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
parent756292f8aa124c842d1e6d9beeb0c416c0d9a7f3 (diff)
Added support for fast integer divisions by a constant
Sped up tensor slicing by a factor of 3 by using these fast integer divisions.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h26
1 files changed, 16 insertions, 10 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 2b1b503cf..ca3735d64 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -305,8 +305,10 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
for (int i = 0; i < NumDims; ++i) {
if (i > 0) {
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
+ m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
} else {
m_outputStrides[0] = 1;
+ m_fastOutputStrides[0] = 1;
}
}
}
@@ -331,7 +333,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
{
Index inputIndex = 0;
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
+ const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
@@ -349,8 +351,8 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / m_outputStrides[i];
- const Index idx1 = indices[1] / m_outputStrides[i];
+ const Index idx0 = indices[0] / m_fastOutputStrides[i];
+ const Index idx1 = indices[1] / m_fastOutputStrides[i];
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
indices[0] -= idx0 * m_outputStrides[i];
@@ -379,6 +381,7 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
private:
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;
+ array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
array<Index, NumDims> m_inputStrides;
const StartIndices m_offsets;
TensorEvaluator<ArgType, Device> m_impl;
@@ -418,9 +421,11 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
for (int i = 0; i < NumDims; ++i) {
if (i > 0) {
m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
+ m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
} else {
m_outputStrides[0] = 1;
- }
+ m_fastOutputStrides[0] = 1;
+ }
}
}
@@ -444,7 +449,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
{
Index inputIndex = 0;
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
+ const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
@@ -460,8 +465,8 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / m_outputStrides[i];
- const Index idx1 = indices[1] / m_outputStrides[i];
+ const Index idx0 = indices[0] / m_fastOutputStrides[i];
+ const Index idx1 = indices[1] / m_fastOutputStrides[i];
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
indices[0] -= idx0 * m_outputStrides[i];
@@ -489,7 +494,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
{
Index inputIndex = 0;
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx = index / m_outputStrides[i];
+ const Index idx = index / m_fastOutputStrides[i];
inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
@@ -504,8 +509,8 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
Index inputIndices[] = {0, 0};
Index indices[] = {index, index + packetSize - 1};
for (int i = NumDims - 1; i > 0; --i) {
- const Index idx0 = indices[0] / m_outputStrides[i];
- const Index idx1 = indices[1] / m_outputStrides[i];
+ const Index idx0 = indices[0] / m_fastOutputStrides[i];
+ const Index idx1 = indices[1] / m_fastOutputStrides[i];
inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
indices[0] -= idx0 * m_outputStrides[i];
@@ -532,6 +537,7 @@ struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
private:
Dimensions m_dimensions;
array<Index, NumDims> m_outputStrides;
+ array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
array<Index, NumDims> m_inputStrides;
const StartIndices m_offsets;
TensorEvaluator<ArgType, Device> m_impl;