diff options
-rw-r--r-- | Eigen/src/Core/Assign.h | 68 | ||||
-rw-r--r-- | Eigen/src/Core/util/Meta.h | 2 | ||||
-rw-r--r-- | test/packetmath.cpp | 39 |
3 files changed, 70 insertions, 39 deletions
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index 758858165..7bdf312c2 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -299,28 +299,58 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling> *** Linear vectorization *** ***************************/ +// template<typename Derived1, typename Derived2> +// struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> +// { +// static void run(Derived1 &dst, const Derived2 &src) +// { +// const int size = dst.size(); +// const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; +// const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0 +// : ei_alignmentOffset(&dst.coeffRef(0), size); +// const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; +// +// for(int index = 0; index < alignedStart; index++) +// dst.copyCoeff(index, src); +// +// for(int index = alignedStart; index < alignedEnd; index += packetSize) +// { +// dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src); +// } +// +// for(int index = alignedEnd; index < size; index++) +// dst.copyCoeff(index, src); +// } +// }; template<typename Derived1, typename Derived2> struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling> { - static void run(Derived1 &dst, const Derived2 &src) - { - const int size = dst.size(); - const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; - const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0 - : ei_alignmentOffset(&dst.coeffRef(0), size); - const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - - for(int index = 0; index < alignedStart; index++) - dst.copyCoeff(index, src); - - for(int index = alignedStart; index < alignedEnd; index += packetSize) - { - dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src); - } - - for(int index = alignedEnd; index < size; index++) - dst.copyCoeff(index, src); - } + static void run(Derived1 &dst, const Derived2 &src) + { + asm("#begin"); + const int size = dst.size(); + const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size; + const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0 + : ei_alignmentOffset(&dst.coeffRef(0), size); + const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + asm("#unaligned start"); + + for(int index = 0; index < alignedStart; index++) + dst.copyCoeff(index, src); + asm("#aligned middle"); + + for(int index = alignedStart; index < alignedEnd; index += packetSize) + { + dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src); + } + + asm("#unaligned end"); + + for(int index = alignedEnd; index < size; index++) + dst.copyCoeff(index, src); + asm("#end"); + } }; template<typename Derived1, typename Derived2> diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 97e455d83..bbb780053 100644 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -54,7 +54,7 @@ template<typename T> struct ei_unpointer<T*const> { typedef T type; }; template<typename T> struct ei_unconst { typedef T type; }; template<typename T> struct ei_unconst<const T> { typedef T type; }; -template<typename T> struct ei_unconst<const T&> { typedef T& type; }; +// template<typename T> struct ei_unconst<const T&> { typedef T& type; }; template<typename T> struct ei_cleantype { typedef T type; }; template<typename T> struct ei_cleantype<const T> { typedef typename ei_cleantype<T>::type type; }; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index d7bfec94e..4d08f8020 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -86,23 +86,24 @@ template<typename Scalar> void packetmath() VERIFY(areApprox(data1, data2+offset, PacketSize) && "ei_pstoreu"); } - if (!ei_is_same_type<Scalar,double>::ret) + for (int offset=0; offset<PacketSize; ++offset) { - for (int offset=0; offset<PacketSize; ++offset) - { - packets[0] = ei_pload(data1); - packets[1] = ei_pload(data1+PacketSize); - if (offset==0) ei_palign<0>(packets[0], packets[1]); - else if (offset==1) ei_palign<1>(packets[0], packets[1]); - else if (offset==2) ei_palign<2>(packets[0], packets[1]); - else if (offset==3) ei_palign<3>(packets[0], packets[1]); - ei_pstore(data2, packets[0]); - - for (int i=0; i<PacketSize; ++i) - ref[i] = data1[i+offset]; - - VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign"); - } + packets[0] = ei_pload(data1); + packets[1] = ei_pload(data1+PacketSize); + if (offset==0) ei_palign<0>(packets[0], packets[1]); + else if (offset==1) ei_palign<1>(packets[0], packets[1]); + else if (offset==2) ei_palign<2>(packets[0], packets[1]); + else if (offset==3) ei_palign<3>(packets[0], packets[1]); + ei_pstore(data2, packets[0]); + + for (int i=0; i<PacketSize; ++i) + ref[i] = data1[i+offset]; + + typedef Matrix<Scalar, PacketSize, 1> Vector; + std::cout << Vector(data1).transpose() << " | " << Vector(data1+PacketSize).transpose() << "\n"; + std::cout << " " << offset << " => " << Vector(ref).transpose() << " == " << Vector(data2).transpose() << "\n"; + + VERIFY(areApprox(ref, data2, PacketSize) && "ei_palign"); } CHECK_CWISE(REF_ADD, ei_padd); @@ -143,9 +144,9 @@ template<typename Scalar> void packetmath() void test_packetmath() { for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST( packetmath<float>() ); - CALL_SUBTEST( packetmath<double>() ); - CALL_SUBTEST( packetmath<int>() ); +// CALL_SUBTEST( packetmath<float>() ); +// CALL_SUBTEST( packetmath<double>() ); +// CALL_SUBTEST( packetmath<int>() ); packetmath<std::complex<float> >(); } } |