aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <eugene.zhulenev@gmail.com>2019-12-18 20:07:00 +0000
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2019-12-18 20:07:00 +0000
commitae07801dd8d295657f28b006e1e4999edf835052 (patch)
tree08a91a4368c15d365127344f920bd10f8e437db2 /unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
parent72166d0e6eaf12a99f449e26f402f926bef2bb50 (diff)
Tensor block evaluation cost model
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h95
1 files changed, 76 insertions, 19 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index dc9af3aa8..e89f40213 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -73,14 +73,68 @@ EIGEN_STRONG_INLINE DSizes<std::ptrdiff_t, sizeof...(Indices)> strides(
enum class TensorBlockShapeType { kUniformAllDims, kSkewedInnerDims };
struct TensorBlockResourceRequirements {
- TensorBlockShapeType shape_type;
- size_t size;
+ TensorBlockShapeType shape_type; // target block shape
+ size_t size; // target block size
+ TensorOpCost cost_per_coeff; // cost of computing a single block element
+
+ template <typename Scalar>
+ EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize(
+ TensorBlockShapeType shape_type, size_t size_in_bytes,
+ TensorOpCost cost) {
+ const size_t size = numext::maxi(size_t(1), size_in_bytes / sizeof(Scalar));
+ return {shape_type, size, cost};
+ }
+
+ template <typename Scalar>
+ EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize(
+ TensorBlockShapeType shape_type, size_t size_in_bytes) {
+ // This default cost per coefficient is valid for most materialized tensor
+ // block evaluation implementations, because they typically just read
+ // coefficients from the underlying tensor storage, and write to the tensor
+ // block buffer (scratch or destination memory, reads and writes have linear
+ // access pattern). We ignore the fixed cost of block evaluation, because in
+ // practice it should negligible.
+ //
+ // Lazy block evaluation adds the cost of calling a functor for each
+ // coefficient.
+ //
+ // All non-trivial block evaluation implementations must provide their own
+ // cost approximation (e.g. shuffling inner dimension has a much higher cost
+ // because it reads memory randomly, although the total number of moved
+ // bytes is the same).
+ return withShapeAndSize<Scalar>(shape_type, size_in_bytes,
+ {/*bytes_loaded=*/sizeof(Scalar),
+ /*bytes_stored=*/sizeof(Scalar),
+ /*compute_cycles=*/0});
+ }
+
+ template <typename Scalar>
+ EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements skewed(
+ size_t size_in_bytes) {
+ return withShapeAndSize<Scalar>(TensorBlockShapeType::kSkewedInnerDims,
+ size_in_bytes);
+ }
+
+ template <typename Scalar>
+ EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements uniform(
+ size_t size_in_bytes) {
+ return withShapeAndSize<Scalar>(TensorBlockShapeType::kUniformAllDims,
+ size_in_bytes);
+ }
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements
- merge(const TensorBlockResourceRequirements &lhs,
- const TensorBlockResourceRequirements &rhs) {
- return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)};
+ merge(const TensorBlockResourceRequirements& lhs,
+ const TensorBlockResourceRequirements& rhs) {
+ return {merge(lhs.shape_type, rhs.shape_type), // shape_type
+ merge(lhs.size, rhs.size), // size
+ merge(lhs.cost_per_coeff, rhs.cost_per_coeff)}; // cost_per_coeff
+ }
+
+ EIGEN_DEVICE_FUNC TensorBlockResourceRequirements& addCostPerCoeff(
+ TensorOpCost cost) {
+ cost_per_coeff += cost;
+ return *this;
}
// This is a resource requirement that should be returned from expressions
@@ -88,10 +142,10 @@ struct TensorBlockResourceRequirements {
// expression with raw buffer access).
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE TensorBlockResourceRequirements any() {
- return {TensorBlockShapeType::kUniformAllDims, 1};
+ return {TensorBlockShapeType::kUniformAllDims, 1, {0, 0, 0}};
}
-private:
+ private:
using Requirements = TensorBlockResourceRequirements;
EIGEN_DEVICE_FUNC
@@ -100,13 +154,19 @@ private:
}
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs,
- TensorBlockShapeType rhs) {
+ static EIGEN_STRONG_INLINE TensorBlockShapeType
+ merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs) {
return (lhs == TensorBlockShapeType::kSkewedInnerDims ||
rhs == TensorBlockShapeType::kSkewedInnerDims)
? TensorBlockShapeType::kSkewedInnerDims
: TensorBlockShapeType::kUniformAllDims;
}
+
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost,
+ TensorOpCost rhs_cost) {
+ return lhs_cost + rhs_cost;
+ }
};
// -------------------------------------------------------------------------- //
@@ -131,8 +191,9 @@ class TensorBlockDescriptor {
class DestinationBuffer {
public:
enum DestinationBufferKind : int {
- // The above explicit specification of "int" as the enum basetype is needed
- // to get around a HIPCC link error ("the field type is not amp-compatible")
+ // The above explicit specification of "int" as the enum basetype is
+ // needed to get around a HIPCC link error ("the field type is not
+ // amp-compatible")
// which is issued for class members with the enum type.
// TODO(rocm):
// remove the "int" basetype once HIPCC has been fixed to not error out
@@ -280,7 +341,7 @@ class TensorBlockMapper {
TensorBlockMapper() = default;
TensorBlockMapper(const DSizes<IndexType, NumDims>& dimensions,
- const TensorBlockResourceRequirements& requirements)
+ const TensorBlockResourceRequirements& requirements)
: m_tensor_dimensions(dimensions), m_requirements(requirements) {
// Compute block dimensions and the total number of blocks.
InitializeBlockDimensions();
@@ -299,8 +360,8 @@ class TensorBlockMapper {
return m_block_dimensions;
}
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- BlockDescriptor blockDescriptor(IndexType block_index) const {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor
+ blockDescriptor(IndexType block_index) const {
static const bool isColMajor = Layout == static_cast<int>(ColMajor);
IndexType offset = 0;
@@ -416,7 +477,7 @@ class TensorBlockMapper {
eigen_assert(m_block_dimensions.TotalSize() >=
numext::mini<IndexType>(target_block_size,
- m_tensor_dimensions.TotalSize()));
+ m_tensor_dimensions.TotalSize()));
// Calculate block counts by dimension and total block count.
DSizes<IndexType, NumDims> block_count;
@@ -761,7 +822,6 @@ class TensorMaterializedBlock {
template <typename UnaryOp, typename ArgTensorBlock>
class TensorCwiseUnaryBlock {
-
static const bool NoArgBlockAccess =
internal::is_void<typename ArgTensorBlock::XprType>::value;
@@ -793,7 +853,6 @@ class TensorCwiseUnaryBlock {
template <typename BinaryOp, typename LhsTensorBlock, typename RhsTensorBlock>
class TensorCwiseBinaryBlock {
-
static const bool NoArgBlockAccess =
internal::is_void<typename LhsTensorBlock::XprType>::value ||
internal::is_void<typename RhsTensorBlock::XprType>::value;
@@ -840,7 +899,6 @@ class TensorCwiseBinaryBlock {
template <typename BlockFactory, typename ArgTensorBlock>
class TensorUnaryExprBlock {
-
typedef typename ArgTensorBlock::XprType ArgXprType;
static const bool NoArgBlockAccess = internal::is_void<ArgXprType>::value;
@@ -872,7 +930,6 @@ class TensorUnaryExprBlock {
template <typename BlockFactory, typename Arg1TensorBlock,
typename Arg2TensorBlock, typename Arg3TensorBlock>
class TensorTernaryExprBlock {
-
typedef typename Arg1TensorBlock::XprType Arg1XprType;
typedef typename Arg2TensorBlock::XprType Arg2XprType;
typedef typename Arg3TensorBlock::XprType Arg3XprType;