PR 544: Set requestedAlignment correctly for SliceVectorizedTraversals

Commit aa110e681b8b2237757a652ba47da49e1fbd2cd6 optimised the multiplication of small dyanmically sized matrices by restricting the packet size to a maximum of 4, increasing the chances that SIMD instructions are used in the computation. However, it introduced a mismatch between the packet size and the requestedAlignment. This mismatch can lead to crashes when the destination is not aligned. This patch fixes the issue by ensuring that the AssignmentTraits are correctly computed when using a restricted packet size. * * * Bind LinearPacketType to MaxPacketSize This commit applies any packet size limit specified when instantiating copy_using_evaluator_traits to the LinearPacketType, providing that the size of the destination is not known at compile time. * * * Add unit test for restricted packet assignment A new unit test is added to check that multiplication of small dynamically sized matrices works correctly when the packet size is restricted to 4 and the destination is unaligned.
author: Mark D Ryan <mark.d.ryan@intel.com> 2018-11-13 16:15:08 +0100
committer: Mark D Ryan <mark.d.ryan@intel.com> 2018-11-13 16:15:08 +0100
commit: 670d56441cbf115652630c890bac1ba41cd20106 (patch)
tree: 336bd0bddcbf7823f527fd5abd24cf23e10c3f67 /test/evaluators.cpp
parent: 3dc08450468b01ffdd311f3422b3ebfa875f02a0 (diff)
1 files changed, 25 insertions, 0 deletions
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
index f4fdaf053..ec000f1eb 100644
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@@ -90,6 +90,12 @@ namespace Eigen {
     {
       call_assignment_no_alias(dst.expression(), src, func);
     }
+
+    template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
+    EIGEN_DEVICE_FUNC void call_restricted_packet_assignment(const NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
+    {
+      call_restricted_packet_assignment_no_alias(dst.expression(), src, func);
+    }
   }
   
 }
@@ -496,4 +502,23 @@ EIGEN_DECLARE_TEST(evaluators)
     VERIFY_IS_EQUAL( get_cost(a*(a+b)), 1);
     VERIFY_IS_EQUAL( get_cost(a.lazyProduct(a+b)), 15);
   }
+
+  {
+    // test restricted_packet_assignment with an unaligned destination
+    const size_t M = 2;
+    const size_t K = 2;
+    const size_t N = 5;
+    float *destMem = new float[(M*N) + 1];
+    float *dest = (internal::UIntPtr(destMem)%EIGEN_MAX_ALIGN_BYTES) == 0 ? destMem+1 : destMem;
+
+    const Matrix<float, Dynamic, Dynamic, RowMajor> a = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(M, K);
+    const Matrix<float, Dynamic, Dynamic, RowMajor> b = Matrix<float, Dynamic, Dynamic, RowMajor>::Random(K, N);
+    
+    Map<Matrix<float, Dynamic, Dynamic, RowMajor> > z(dest, M, N);;
+    Product<Matrix<float, Dynamic, Dynamic, RowMajor>, Matrix<float, Dynamic, Dynamic, RowMajor>, LazyProduct> tmp(a,b);
+    internal::call_restricted_packet_assignment(z.noalias(), tmp.derived(), internal::assign_op<float, float>());
+    
+    VERIFY_IS_APPROX(z, a*b);
+    delete[] destMem;
+  }
 }
author	Mark D Ryan <mark.d.ryan@intel.com>	2018-11-13 16:15:08 +0100
committer	Mark D Ryan <mark.d.ryan@intel.com>	2018-11-13 16:15:08 +0100
commit	670d56441cbf115652630c890bac1ba41cd20106 (patch)
tree	336bd0bddcbf7823f527fd5abd24cf23e10c3f67 /test/evaluators.cpp
parent	3dc08450468b01ffdd311f3422b3ebfa875f02a0 (diff)