Fix typo + get rid of redundant member variables for block sizes

author: Eugene Zhulenev <ezhulenev@google.com> 2018-08-01 12:35:19 -0700
committer: Eugene Zhulenev <ezhulenev@google.com> 2018-08-01 12:35:19 -0700
commit: 64abdf1d7eb17174f571751346dd0cbadcf3bc52 (patch)
tree: a112affc194ca8a976e5bba18e46fa9fc9d2179a /unsupported/Eigen/CXX11/src/Tensor
parent: 385b3ff12f1dd41a096908a0103873a768a8597d (diff)
6 files changed, 32 insertions, 42 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index b6dbe5a22..cca14aafd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
   // Block based access to the XprType (input) tensor.
   using TensorBlock                = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>;
   using TensorBlockReader          = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>;
-  // We do block based broadcasting using a a trick with 2x tensor rank and 0
+  // We do block based broadcasting using a trick with 2x tensor rank and 0
   // strides. See block method implementation for details.
   using BroadcastDimensions        = DSizes<Index, 2 * NumDims>;
   using BroadcastTensorBlock       = internal::TensorBlock<ScalarNoConst, Index, 2 * NumDims, Layout>;
@@ -589,8 +589,8 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
     // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large
     // tensors. But this might need further tuning.
-    Index l1_cache_scalars = m_device.firstLevelCacheSize() / sizeof(Scalar);
-    Index block_total_size_max = numext::maxi(Index(1), l1_cache_scalars);
+    auto block_total_size_max = numext::maxi<Eigen::Index>(
+        1, m_device.firstLevelCacheSize() / sizeof(Scalar));
 
     resources->push_back(internal::TensorOpResourceRequirements(
         internal::TensorBlockShapeType::kSkewedInnerDims,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
index 7579ab507..aca2ead12 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h
@@ -202,9 +202,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
           m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1];
         }
       }
-
-      m_block_total_size_max =
-          numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
     }
   }
 
@@ -290,9 +287,11 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    auto block_total_size_max = numext::maxi<Eigen::Index>(
+        1, m_device.lastLevelCacheSize() / sizeof(Scalar));
     resources->push_back(internal::TensorOpResourceRequirements(
         internal::TensorBlockShapeType::kSkewedInnerDims,
-        m_block_total_size_max));
+        block_total_size_max));
     m_impl.getResourceRequirements(resources);
   }
 
@@ -370,13 +369,14 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
   {
     Index inputIndex;
     if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) ||
-  (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) {
+        (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims - 1)) {
       // m_stride is equal to 1, so let's avoid the integer division.
       eigen_assert(m_stride == 1);
       inputIndex = index * m_inputStride + m_inputOffset;
-    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) ||
-         (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
-      // m_stride is aways greater than index, so let's avoid the integer division.
+    } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) ||
+               (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) {
+      // m_stride is aways greater than index, so let's avoid the integer
+      // division.
       eigen_assert(m_stride > index);
       inputIndex = index + m_inputOffset;
     } else {
@@ -392,7 +392,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device>
   Index m_stride;
   Index m_inputOffset;
   Index m_inputStride;
-  Index m_block_total_size_max;
   DSizes<Index, NumInputDims> m_inputStrides;
   TensorEvaluator<ArgType, Device> m_impl;
   const internal::DimensionId<DimId> m_dim;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
index 39759b6c3..a8247be90 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h
@@ -259,7 +259,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
   #else
     EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device)
   #endif
-      : m_impl(op.expression(), device)
+      : m_device(device), m_impl(op.expression(), device)
 #ifdef EIGEN_USE_SYCL
       , m_op(op)
 #endif
@@ -404,9 +404,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
     } else {
       m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]);
     }
-
-    m_block_total_size_max =
-        numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -551,9 +548,11 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    auto block_total_size_max = numext::maxi<Eigen::Index>(
+        1, m_device.lastLevelCacheSize() / sizeof(Scalar));
     resources->push_back(internal::TensorOpResourceRequirements(
         internal::TensorBlockShapeType::kSkewedInnerDims,
-        m_block_total_size_max));
+        block_total_size_max));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(
@@ -743,8 +742,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device>
   internal::TensorIntDivisor<Index> m_fastOutputDepth;
 
   Scalar m_paddingValue;
-  Index m_block_total_size_max;
 
+  const Device& m_device;
   TensorEvaluator<ArgType, Device> m_impl;
   #ifdef EIGEN_USE_SYCL
   // Required for SYCL in order to construct the expression tree on the device
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 2630311b8..6ddded0bd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -560,9 +560,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
       }
     }
-
-    m_block_total_size_max =
-        numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -672,9 +669,11 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    auto block_total_size_max = numext::maxi<Eigen::Index>(
+        1, m_device.lastLevelCacheSize() / sizeof(Scalar));
     resources->push_back(internal::TensorOpResourceRequirements(
         internal::TensorBlockShapeType::kSkewedInnerDims,
-        m_block_total_size_max));
+        block_total_size_max));
     m_impl.getResourceRequirements(resources);
   }
 
@@ -761,7 +760,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi
   Dimensions m_dimensions;
   bool m_is_identity;
   const StartIndices m_offsets;
-  Index m_block_total_size_max;
 };
 
 
@@ -1047,9 +1045,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
         m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
       }
     }
-    m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
-                                          device.lastLevelCacheSize() /
-                                          sizeof(Scalar));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -1128,7 +1123,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices,
   DSizes<Index, NumDims> m_dimensions;
   DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
   const Strides m_strides;
-  std::size_t m_block_total_size_max;
   //use by sycl
   const StartIndices m_exprStartIndices;
   //use by sycl
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index c41783106..73675e7dd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -572,9 +572,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
             : (static_cast<int>(Layout) == static_cast<int>(ColMajor))
                   ? m_preservedStrides[0]
                   : m_preservedStrides[NumOutputDims - 1];
-
-    m_block_total_size_max =
-        numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -771,9 +768,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    auto block_total_size_max = numext::maxi<Eigen::Index>(
+        1, m_device.lastLevelCacheSize() / sizeof(Scalar));
     resources->push_back(internal::TensorOpResourceRequirements(
         internal::TensorBlockShapeType::kSkewedInnerDims,
-        m_block_total_size_max));
+        block_total_size_max));
     m_impl.getResourceRequirements(resources);
   }
 
@@ -1204,9 +1203,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>,
   // Indexed by reduced dimensions.
   array<Index, NumReducedDims> m_reducedDims;
 
-  // Block size for tiled (aka TensorBlock) evaluation.
-  Index m_block_total_size_max;
-
   // Evaluator for the input expression.
   TensorEvaluator<ArgType, Device> m_impl;
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index 77f47bf64..f94c1380d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -124,8 +124,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
   using TensorBlock       = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>;
   using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>;
 
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
-      : m_impl(op.expression(), device), m_shuffle(op.shufflePermutation())
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op,
+                                                        const Device& device)
+      : m_device(device),
+        m_impl(op.expression(), device),
+        m_shuffle(op.shufflePermutation())
   {
     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
     const Shuffle& shuffle = op.shufflePermutation();
@@ -162,9 +165,6 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
     for (int i = 0; i < NumDims; ++i) {
       m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]];
     }
-
-    m_block_total_size_max =
-        numext::maxi<Index>(1, device.firstLevelCacheSize() / sizeof(Scalar));
   }
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -226,9 +226,10 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
 
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
       std::vector<internal::TensorOpResourceRequirements>* resources) const {
+    auto block_total_size_max = numext::maxi<Eigen::Index>(
+        1, m_device.firstLevelCacheSize() / sizeof(Scalar));
     resources->push_back(internal::TensorOpResourceRequirements(
-        internal::TensorBlockShapeType::kUniformAllDims,
-        m_block_total_size_max));
+        internal::TensorBlockShapeType::kUniformAllDims, block_total_size_max));
     m_impl.getResourceRequirements(resources);
   }
 
@@ -384,7 +385,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device>
   array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
   array<Index, NumDims> m_inputStrides;
   array<Index, NumDims> m_unshuffledInputStrides;
-  Index m_block_total_size_max;
+
+  const Device& m_device;
   TensorEvaluator<ArgType, Device> m_impl;
   /// required by sycl
   Shuffle m_shuffle;
author	Eugene Zhulenev <ezhulenev@google.com>	2018-08-01 12:35:19 -0700
committer	Eugene Zhulenev <ezhulenev@google.com>	2018-08-01 12:35:19 -0700
commit	64abdf1d7eb17174f571751346dd0cbadcf3bc52 (patch)
tree	a112affc194ca8a976e5bba18e46fa9fc9d2179a /unsupported/Eigen/CXX11/src/Tensor
parent	385b3ff12f1dd41a096908a0103873a768a8597d (diff)