aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-11 10:32:57 -0800
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-11-11 10:32:57 -0800
commitc952b8dfda1cc161ae8e06c151a3323826e3ddad (patch)
treee7e01d57d31bbbf1ea0445552bfd15e0a8b46d36 /unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
parentebf04fb3e8dcab201d56fd1e3df4bf9c2bdaefc5 (diff)
Break loop dependence in TensorGenerator block access
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h27
1 files changed, 21 insertions, 6 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
index f590c71be..7f57281a0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h
@@ -269,20 +269,35 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
CoeffReturnType* block_buffer = block_storage.data();
+ static const int packet_size = PacketType<CoeffReturnType, Device>::size;
+
+ static const int inner_dim = is_col_major ? 0 : NumDims - 1;
+ const Index inner_dim_size = it[0].size;
+ const Index inner_dim_vectorized = inner_dim_size - packet_size;
+
while (it[NumDims - 1].count < it[NumDims - 1].size) {
- // Generate data for the inner-most dimension.
- for (Index i = 0; i < it[0].size; ++i) {
+ Index i = 0;
+ // Generate data for the vectorized part of the inner-most dimension.
+ for (; i <= inner_dim_vectorized; i += packet_size) {
+ for (Index j = 0; j < packet_size; ++j) {
+ array<Index, NumDims> j_coords = coords; // Break loop dependence.
+ j_coords[inner_dim] += j;
+ *(block_buffer + offset + i + j) = m_generator(j_coords);
+ }
+ coords[inner_dim] += packet_size;
+ }
+ // Finalize non-vectorized part of the inner-most dimension.
+ for (; i < inner_dim_size; ++i) {
*(block_buffer + offset + i) = m_generator(coords);
- coords[is_col_major ? 0 : NumDims - 1]++;
+ coords[inner_dim]++;
}
- coords[is_col_major ? 0 : NumDims - 1] =
- initial_coords[is_col_major ? 0 : NumDims - 1];
+ coords[inner_dim] = initial_coords[inner_dim];
// For the 1d tensor we need to generate only one inner-most dimension.
if (NumDims == 1) break;
// Update offset.
- for (Index i = 1; i < NumDims; ++i) {
+ for (i = 1; i < NumDims; ++i) {
if (++it[i].count < it[i].size) {
offset += it[i].stride;
coords[is_col_major ? i : NumDims - 1 - i]++;