From ae07801dd8d295657f28b006e1e4999edf835052 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 18 Dec 2019 20:07:00 +0000 Subject: Tensor block evaluation cost model --- unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 95 ++++++++++++---- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 8 +- .../Eigen/CXX11/src/Tensor/TensorChipping.h | 6 +- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 23 ++-- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 7 +- .../Eigen/CXX11/src/Tensor/TensorGenerator.h | 8 +- .../Eigen/CXX11/src/Tensor/TensorMorphing.h | 5 +- unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h | 5 +- unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h | 10 +- .../Eigen/CXX11/src/Tensor/TensorShuffling.h | 21 ++-- unsupported/test/cxx11_tensor_block_access.cpp | 119 ++++++++++----------- unsupported/test/cxx11_tensor_block_eval.cpp | 3 +- unsupported/test/cxx11_tensor_block_io.cpp | 10 +- 13 files changed, 194 insertions(+), 126 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index dc9af3aa8..e89f40213 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -73,14 +73,68 @@ EIGEN_STRONG_INLINE DSizes strides( enum class TensorBlockShapeType { kUniformAllDims, kSkewedInnerDims }; struct TensorBlockResourceRequirements { - TensorBlockShapeType shape_type; - size_t size; + TensorBlockShapeType shape_type; // target block shape + size_t size; // target block size + TensorOpCost cost_per_coeff; // cost of computing a single block element + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize( + TensorBlockShapeType shape_type, size_t size_in_bytes, + TensorOpCost cost) { + const size_t size = numext::maxi(size_t(1), size_in_bytes / sizeof(Scalar)); + return {shape_type, size, cost}; + } + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements withShapeAndSize( + TensorBlockShapeType shape_type, size_t size_in_bytes) { + // This default cost per coefficient is valid for most materialized tensor + // block evaluation implementations, because they typically just read + // coefficients from the underlying tensor storage, and write to the tensor + // block buffer (scratch or destination memory, reads and writes have linear + // access pattern). We ignore the fixed cost of block evaluation, because in + // practice it should negligible. + // + // Lazy block evaluation adds the cost of calling a functor for each + // coefficient. + // + // All non-trivial block evaluation implementations must provide their own + // cost approximation (e.g. shuffling inner dimension has a much higher cost + // because it reads memory randomly, although the total number of moved + // bytes is the same). + return withShapeAndSize(shape_type, size_in_bytes, + {/*bytes_loaded=*/sizeof(Scalar), + /*bytes_stored=*/sizeof(Scalar), + /*compute_cycles=*/0}); + } + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements skewed( + size_t size_in_bytes) { + return withShapeAndSize(TensorBlockShapeType::kSkewedInnerDims, + size_in_bytes); + } + + template + EIGEN_DEVICE_FUNC static TensorBlockResourceRequirements uniform( + size_t size_in_bytes) { + return withShapeAndSize(TensorBlockShapeType::kUniformAllDims, + size_in_bytes); + } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE TensorBlockResourceRequirements - merge(const TensorBlockResourceRequirements &lhs, - const TensorBlockResourceRequirements &rhs) { - return {merge(lhs.shape_type, rhs.shape_type), merge(rhs.size, lhs.size)}; + merge(const TensorBlockResourceRequirements& lhs, + const TensorBlockResourceRequirements& rhs) { + return {merge(lhs.shape_type, rhs.shape_type), // shape_type + merge(lhs.size, rhs.size), // size + merge(lhs.cost_per_coeff, rhs.cost_per_coeff)}; // cost_per_coeff + } + + EIGEN_DEVICE_FUNC TensorBlockResourceRequirements& addCostPerCoeff( + TensorOpCost cost) { + cost_per_coeff += cost; + return *this; } // This is a resource requirement that should be returned from expressions @@ -88,10 +142,10 @@ struct TensorBlockResourceRequirements { // expression with raw buffer access). EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE TensorBlockResourceRequirements any() { - return {TensorBlockShapeType::kUniformAllDims, 1}; + return {TensorBlockShapeType::kUniformAllDims, 1, {0, 0, 0}}; } -private: + private: using Requirements = TensorBlockResourceRequirements; EIGEN_DEVICE_FUNC @@ -100,13 +154,19 @@ private: } EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE TensorBlockShapeType merge(TensorBlockShapeType lhs, - TensorBlockShapeType rhs) { + static EIGEN_STRONG_INLINE TensorBlockShapeType + merge(TensorBlockShapeType lhs, TensorBlockShapeType rhs) { return (lhs == TensorBlockShapeType::kSkewedInnerDims || rhs == TensorBlockShapeType::kSkewedInnerDims) ? TensorBlockShapeType::kSkewedInnerDims : TensorBlockShapeType::kUniformAllDims; } + + EIGEN_DEVICE_FUNC + static EIGEN_STRONG_INLINE TensorOpCost merge(TensorOpCost lhs_cost, + TensorOpCost rhs_cost) { + return lhs_cost + rhs_cost; + } }; // -------------------------------------------------------------------------- // @@ -131,8 +191,9 @@ class TensorBlockDescriptor { class DestinationBuffer { public: enum DestinationBufferKind : int { - // The above explicit specification of "int" as the enum basetype is needed - // to get around a HIPCC link error ("the field type is not amp-compatible") + // The above explicit specification of "int" as the enum basetype is + // needed to get around a HIPCC link error ("the field type is not + // amp-compatible") // which is issued for class members with the enum type. // TODO(rocm): // remove the "int" basetype once HIPCC has been fixed to not error out @@ -280,7 +341,7 @@ class TensorBlockMapper { TensorBlockMapper() = default; TensorBlockMapper(const DSizes& dimensions, - const TensorBlockResourceRequirements& requirements) + const TensorBlockResourceRequirements& requirements) : m_tensor_dimensions(dimensions), m_requirements(requirements) { // Compute block dimensions and the total number of blocks. InitializeBlockDimensions(); @@ -299,8 +360,8 @@ class TensorBlockMapper { return m_block_dimensions; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - BlockDescriptor blockDescriptor(IndexType block_index) const { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockDescriptor + blockDescriptor(IndexType block_index) const { static const bool isColMajor = Layout == static_cast(ColMajor); IndexType offset = 0; @@ -416,7 +477,7 @@ class TensorBlockMapper { eigen_assert(m_block_dimensions.TotalSize() >= numext::mini(target_block_size, - m_tensor_dimensions.TotalSize())); + m_tensor_dimensions.TotalSize())); // Calculate block counts by dimension and total block count. DSizes block_count; @@ -761,7 +822,6 @@ class TensorMaterializedBlock { template class TensorCwiseUnaryBlock { - static const bool NoArgBlockAccess = internal::is_void::value; @@ -793,7 +853,6 @@ class TensorCwiseUnaryBlock { template class TensorCwiseBinaryBlock { - static const bool NoArgBlockAccess = internal::is_void::value || internal::is_void::value; @@ -840,7 +899,6 @@ class TensorCwiseBinaryBlock { template class TensorUnaryExprBlock { - typedef typename ArgTensorBlock::XprType ArgXprType; static const bool NoArgBlockAccess = internal::is_void::value; @@ -872,7 +930,6 @@ class TensorUnaryExprBlock { template class TensorTernaryExprBlock { - typedef typename Arg1TensorBlock::XprType Arg1XprType; typedef typename Arg2TensorBlock::XprType Arg2XprType; typedef typename Arg3TensorBlock::XprType Arg3XprType; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 620c8741c..3408f90d1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -620,12 +620,10 @@ struct TensorEvaluator, Device> internal::TensorBlockResourceRequirements getResourceRequirements() const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - const size_t target_block_size = numext::maxi( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.firstLevelCacheSize(); return internal::TensorBlockResourceRequirements::merge( - {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size}, - m_impl.getResourceRequirements()); + m_impl.getResourceRequirements(), + internal::TensorBlockResourceRequirements::skewed(target_size)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index f51a8559d..5b28e706d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -296,11 +296,9 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - const size_t target_block_size = - numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.lastLevelCacheSize(); return internal::TensorBlockResourceRequirements::merge( - {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size}, + internal::TensorBlockResourceRequirements::skewed(target_size), m_impl.getResourceRequirements()); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 146cc325e..d4532b72c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -521,7 +521,9 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - return m_argImpl.getResourceRequirements(); + static const double functor_cost = internal::functor_traits::Cost; + return m_argImpl.getResourceRequirements().addCostPerCoeff( + {0, 0, functor_cost / PacketSize}); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock @@ -654,9 +656,11 @@ struct TensorEvaluator::Cost; return internal::TensorBlockResourceRequirements::merge( - m_leftImpl.getResourceRequirements(), - m_rightImpl.getResourceRequirements()); + m_leftImpl.getResourceRequirements(), + m_rightImpl.getResourceRequirements()) + .addCostPerCoeff({0, 0, functor_cost / PacketSize}); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock @@ -934,11 +938,16 @@ struct TensorEvaluator EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { + auto then_req = m_thenImpl.getResourceRequirements(); + auto else_req = m_elseImpl.getResourceRequirements(); + + auto merged_req = + internal::TensorBlockResourceRequirements::merge(then_req, else_req); + merged_req.cost_per_coeff = + then_req.cost_per_coeff.cwiseMax(else_req.cost_per_coeff); + return internal::TensorBlockResourceRequirements::merge( - m_condImpl.getResourceRequirements(), - internal::TensorBlockResourceRequirements::merge( - m_thenImpl.getResourceRequirements(), - m_elseImpl.getResourceRequirements())); + m_condImpl.getResourceRequirements(), merged_req); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index b90791d8d..93bab11b1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -245,8 +245,8 @@ TensorExecutorTilingContext GetTensorExecutorTilingContext( evaluator.getResourceRequirements(); // Update target block size based on cost model. - TensorOpCost cost = evaluator.costPerCoeff(Vectorizable); - double taskSize = TensorCostModel::taskSize(1, cost); + double taskSize = TensorCostModel::taskSize( + 1, requirements.cost_per_coeff); requirements.size = static_cast(1.0 / taskSize); TensorBlockMapper block_mapper( @@ -259,7 +259,8 @@ TensorExecutorTilingContext GetTensorExecutorTilingContext( align * divup(block_size * sizeof(typename Evaluator::Scalar), align); - return {block_mapper, cost * block_size, aligned_blocksize}; + return {block_mapper, requirements.cost_per_coeff * block_size, + aligned_blocksize}; } template diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h index fb4b5e246..b1ff1d8b1 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGenerator.h @@ -166,10 +166,10 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - const size_t target_block_size = numext::maxi( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - return {internal::TensorBlockShapeType::kSkewedInnerDims, - target_block_size}; + const size_t target_size = m_device.firstLevelCacheSize(); + // TODO(ezhulenev): Generator should have a cost. + return internal::TensorBlockResourceRequirements::skewed( + target_size); } struct BlockIteratorState { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 5c2036626..879a67ea4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -634,10 +634,9 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - const size_t target_block_size = - numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); + const size_t target_size = m_device.lastLevelCacheSize(); return internal::TensorBlockResourceRequirements::merge( - {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size}, + internal::TensorBlockResourceRequirements::skewed(target_size), m_impl.getResourceRequirements()); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h index 201bea6bb..e070d0b93 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h @@ -229,10 +229,9 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - const size_t target_block_size = - numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); + const size_t target_size = m_device.lastLevelCacheSize(); return internal::TensorBlockResourceRequirements::merge( - {internal::TensorBlockShapeType::kSkewedInnerDims, target_block_size}, + internal::TensorBlockResourceRequirements::skewed(target_size), m_impl.getResourceRequirements()); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h index c4ac81db8..2fc85c13c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReverse.h @@ -246,10 +246,12 @@ struct TensorEvaluator, Device EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::TensorBlockResourceRequirements getResourceRequirements() const { - const size_t target_block_size = - numext::maxi(1, m_device.lastLevelCacheSize() / sizeof(Scalar)); - return {internal::TensorBlockShapeType::kSkewedInnerDims, - target_block_size}; + const size_t target_size = m_device.lastLevelCacheSize(); + // Block evaluation reads underlying memory in reverse order, and default + // cost model does not properly catch this in bytes stored/loaded. + return internal::TensorBlockResourceRequirements::skewed( + target_size) + .addCostPerCoeff({0, 0, 24}); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 1a6891ffd..597ca64cd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -249,14 +249,21 @@ struct TensorEvaluator, Device> static const int inner_dim = Layout == static_cast(ColMajor) ? 0 : NumDims - 1; - const size_t target_block_size = numext::maxi( - 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); - + const size_t target_size = m_device.firstLevelCacheSize(); const bool inner_dim_shuffled = m_shuffle[inner_dim] != inner_dim; - return {inner_dim_shuffled - ? internal::TensorBlockShapeType::kUniformAllDims - : internal::TensorBlockShapeType::kSkewedInnerDims, - target_block_size}; + + // Shuffled inner dimensions leads to a random memory access, which is not + // captured by default cost model bytes loaded/stored. We add this cost + // explicitly. The number of cycles picked based on the benchmarks. + // TODO(ezhulenev): This number was picked based on a very questionable + // benchmarks, add benchmarks that are representative of real workloads. + using BlockRequirements = internal::TensorBlockResourceRequirements; + if (inner_dim_shuffled) { + return BlockRequirements::uniform(target_size) + .addCostPerCoeff({0, 0, NumDims * 28}); + } else { + return BlockRequirements::skewed(target_size); + } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorBlock diff --git a/unsupported/test/cxx11_tensor_block_access.cpp b/unsupported/test/cxx11_tensor_block_access.cpp index 33dc2535a..5fb12e0e0 100644 --- a/unsupported/test/cxx11_tensor_block_access.cpp +++ b/unsupported/test/cxx11_tensor_block_access.cpp @@ -21,6 +21,7 @@ using Eigen::RowMajor; using Eigen::ColMajor; using Eigen::internal::TensorBlockShapeType; +static TensorOpCost zeroCost() { return {0, 0, 0}; } template static const T& choose(int layout, const T& col, const T& row) { @@ -73,7 +74,7 @@ static void test_block_mapper_sanity() // Test uniform blocks. TensorBlockMapper uniform_block_mapper( - tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100}); + tensor_dims, {TensorBlockShapeType::kUniformAllDims, 100, zeroCost()}); VERIFY_IS_EQUAL(uniform_block_mapper.blockCount(), 100); VERIFY_IS_EQUAL(uniform_block_mapper.blockTotalSize(), 100); @@ -85,7 +86,7 @@ static void test_block_mapper_sanity() // Test skewed to inner dims blocks. TensorBlockMapper skewed_block_mapper( - tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100}); + tensor_dims, {TensorBlockShapeType::kSkewedInnerDims, 100, zeroCost()}); VERIFY_IS_EQUAL(skewed_block_mapper.blockCount(), 100); VERIFY_IS_EQUAL(skewed_block_mapper.blockTotalSize(), 100); @@ -130,7 +131,8 @@ static void test_block_mapper_maps_every_element() { std::set coeff_set; // Try different combinations of block types and sizes. - TensorBlockMapper block_mapper(dims, {RandomShape(), RandomTargetSize(dims)}); + TensorBlockMapper block_mapper( + dims, {RandomShape(), RandomTargetSize(dims), zeroCost()}); for (int i = 0; i < block_mapper.blockCount(); ++i) { auto block = block_mapper.blockDescriptor(i); @@ -233,9 +235,8 @@ static void test_uniform_block_shape() // Test shape 'UniformAllDims' with uniform 'max_coeff count'. DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 5 * 5 * 5 * 5 * 5; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); for (int i = 0; i < 5; ++i) { VERIFY_IS_EQUAL(5, block.dimensions()[i]); @@ -248,9 +249,8 @@ static void test_uniform_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 7 * 5 * 5 * 5 * 5; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[0]); for (int i = 1; i < 5; ++i) { @@ -260,9 +260,8 @@ static void test_uniform_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 5 * 5 * 5 * 5 * 6; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(6, block.dimensions()[4]); for (int i = 3; i >= 0; --i) { @@ -276,9 +275,8 @@ static void test_uniform_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 11 * 5 * 5 * 5 * 5; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(11, block.dimensions()[0]); for (int i = 1; i < 5; ++i) { @@ -288,9 +286,8 @@ static void test_uniform_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 5 * 5 * 5 * 5 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); for (int i = 3; i >= 0; --i) { @@ -304,9 +301,8 @@ static void test_uniform_block_shape() if (Layout == ColMajor) { DSizes dims(7, 5, 6, 17, 7); const Index max_coeff_count = 7 * 5 * 6 * 7 * 5; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[0]); VERIFY_IS_EQUAL(5, block.dimensions()[1]); @@ -317,9 +313,8 @@ static void test_uniform_block_shape() } else { DSizes dims(7, 5, 6, 9, 7); const Index max_coeff_count = 5 * 5 * 5 * 6 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); VERIFY_IS_EQUAL(6, block.dimensions()[3]); @@ -333,9 +328,8 @@ static void test_uniform_block_shape() if (Layout == ColMajor) { DSizes dims(7, 5, 6, 17, 7); const Index max_coeff_count = 7 * 5 * 6 * 17 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[0]); VERIFY_IS_EQUAL(5, block.dimensions()[1]); @@ -346,9 +340,8 @@ static void test_uniform_block_shape() } else { DSizes dims(7, 5, 6, 9, 7); const Index max_coeff_count = 7 * 5 * 6 * 9 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, - max_coeff_count}); + TensorBlockMapper block_mapper(dims, {TensorBlockShapeType::kUniformAllDims, + max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); VERIFY_IS_EQUAL(9, block.dimensions()[3]); @@ -369,9 +362,9 @@ static void test_skewed_inner_dim_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 10 * 1 * 1 * 1 * 1; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(10, block.dimensions()[0]); for (int i = 1; i < 5; ++i) { @@ -381,9 +374,9 @@ static void test_skewed_inner_dim_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 1 * 1 * 1 * 1 * 6; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(6, block.dimensions()[4]); for (int i = 3; i >= 0; --i) { @@ -396,9 +389,9 @@ static void test_skewed_inner_dim_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 11 * 1 * 1 * 1 * 1; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(11, block.dimensions()[0]); for (int i = 1; i < 5; ++i) { @@ -408,9 +401,9 @@ static void test_skewed_inner_dim_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 1 * 1 * 1 * 1 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); for (int i = 3; i >= 0; --i) { @@ -424,9 +417,9 @@ static void test_skewed_inner_dim_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 11 * 3 * 1 * 1 * 1; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(11, block.dimensions()[0]); VERIFY_IS_EQUAL(3, block.dimensions()[1]); @@ -437,9 +430,9 @@ static void test_skewed_inner_dim_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 1 * 1 * 1 * 15 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); VERIFY_IS_EQUAL(15, block.dimensions()[3]); @@ -454,9 +447,9 @@ static void test_skewed_inner_dim_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 11 * 5 * 5 * 1 * 1; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(11, block.dimensions()[0]); VERIFY_IS_EQUAL(5, block.dimensions()[1]); @@ -468,9 +461,9 @@ static void test_skewed_inner_dim_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 1 * 1 * 5 * 17 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); VERIFY_IS_EQUAL(17, block.dimensions()[3]); @@ -485,9 +478,9 @@ static void test_skewed_inner_dim_block_shape() if (Layout == ColMajor) { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 11 * 5 * 6 * 17 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(11, block.dimensions()[0]); VERIFY_IS_EQUAL(5, block.dimensions()[1]); @@ -498,9 +491,9 @@ static void test_skewed_inner_dim_block_shape() } else { DSizes dims(11, 5, 6, 17, 7); const Index max_coeff_count = 11 * 5 * 6 * 17 * 7; - TensorBlockMapper - block_mapper(dims, {TensorBlockShapeType::kSkewedInnerDims, - max_coeff_count}); + TensorBlockMapper block_mapper( + dims, + {TensorBlockShapeType::kSkewedInnerDims, max_coeff_count, zeroCost()}); TensorBlock block = block_mapper.blockDescriptor(0); VERIFY_IS_EQUAL(7, block.dimensions()[4]); VERIFY_IS_EQUAL(17, block.dimensions()[3]); @@ -524,7 +517,8 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape) DSizes dims(0); for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) { - TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count}); + TensorBlockMapper block_mapper( + dims, {block_shape, max_coeff_count, zeroCost()}); VERIFY_IS_EQUAL(block_mapper.blockCount(), 0); VERIFY(block_mapper.blockTotalSize() >= 1); } @@ -537,7 +531,8 @@ static void test_empty_dims(const internal::TensorBlockShapeType block_shape) for (int dim2 = 0; dim2 < 3; ++dim2) { DSizes dims(dim1, dim2); for (size_t max_coeff_count = 0; max_coeff_count < 2; ++max_coeff_count) { - TensorBlockMapper block_mapper(dims, {block_shape, max_coeff_count}); + TensorBlockMapper block_mapper( + dims, {block_shape, max_coeff_count, zeroCost()}); if (dim1 * dim2 == 0) { VERIFY_IS_EQUAL(block_mapper.blockCount(), 0); } diff --git a/unsupported/test/cxx11_tensor_block_eval.cpp b/unsupported/test/cxx11_tensor_block_eval.cpp index 4a785dcdc..81f0c90da 100644 --- a/unsupported/test/cxx11_tensor_block_eval.cpp +++ b/unsupported/test/cxx11_tensor_block_eval.cpp @@ -64,7 +64,8 @@ static TensorBlockParams SkewedInnerBlock( using BlockMapper = internal::TensorBlockMapper; BlockMapper block_mapper(dims, {internal::TensorBlockShapeType::kSkewedInnerDims, - internal::random(1, dims.TotalSize())}); + internal::random(1, dims.TotalSize()), + {0, 0, 0}}); Index total_blocks = block_mapper.blockCount(); Index block_index = internal::random(0, total_blocks - 1); diff --git a/unsupported/test/cxx11_tensor_block_io.cpp b/unsupported/test/cxx11_tensor_block_io.cpp index 25584433e..b8600eaea 100644 --- a/unsupported/test/cxx11_tensor_block_io.cpp +++ b/unsupported/test/cxx11_tensor_block_io.cpp @@ -75,8 +75,8 @@ static void test_block_io_copy_data_from_source_to_target() { // Construct a tensor block mapper. using TensorBlockMapper = internal::TensorBlockMapper; - TensorBlockMapper block_mapper(dims, {RandomBlockShape(), - RandomTargetBlockSize(dims)}); + TensorBlockMapper block_mapper( + dims, {RandomBlockShape(), RandomTargetBlockSize(dims), {0, 0, 0}}); // We will copy data from input to output through this buffer. Tensor block(block_mapper.blockDimensions()); @@ -146,8 +146,10 @@ static void test_block_io_copy_using_reordered_dimensions() { // NOTE: Tensor block mapper works with shuffled dimensions. using TensorBlockMapper = internal::TensorBlockMapper; - TensorBlockMapper block_mapper(output_tensor_dims, {RandomBlockShape(), - RandomTargetBlockSize(output_tensor_dims)}); + TensorBlockMapper block_mapper(output_tensor_dims, + {RandomBlockShape(), + RandomTargetBlockSize(output_tensor_dims), + {0, 0, 0}}); // We will copy data from input to output through this buffer. Tensor block(block_mapper.blockDimensions()); -- cgit v1.2.3