diff options
author | Gael Guennebaud <g.gael@free.fr> | 2009-03-08 10:14:07 +0000 |
---|---|---|
committer | Gael Guennebaud <g.gael@free.fr> | 2009-03-08 10:14:07 +0000 |
commit | 7718a8ed83bde47dbf4bc41e4d70ffd4c9fc1efb (patch) | |
tree | 95352f9dc3d4ed2af802a0f027ea73be2da3b1cd /Eigen/src/Geometry/arch | |
parent | f9790a649c7bd5dd321a330a9f5d423c56a6ed83 (diff) |
slight optimization of SSE base integer mul (thanks to Rohit Garg)
Diffstat (limited to 'Eigen/src/Geometry/arch')
-rw-r--r-- | Eigen/src/Geometry/arch/Geometry_SSE.h | 17 |
1 files changed, 7 insertions, 10 deletions
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 6c2933898..9be5ecd6f 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -26,9 +26,6 @@ #ifndef EIGEN_GEOMETRY_SSE_H #define EIGEN_GEOMETRY_SSE_H -#define vec4f_swizzle(v,p,q,r,s) (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), \ - ((s)<<6|(r)<<4|(q)<<2|(p))))) - template<> inline Quaternion<float> ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quaternion<float>& _b) { @@ -36,14 +33,14 @@ ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quate Quaternion<float> res; __m128 a = _a.coeffs().packet<Aligned>(0); __m128 b = _b.coeffs().packet<Aligned>(0); - __m128 flip1 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle(a,1,2,0,2), - vec4f_swizzle(b,2,0,1,2)),mask); - __m128 flip2 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle(a,3,3,3,1), - vec4f_swizzle(b,0,1,2,1)),mask); + __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), + ei_vec4f_swizzle1(b,2,0,1,2)),mask); + __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), + ei_vec4f_swizzle1(b,0,1,2,1)),mask); ei_pstore(&res.x(), - _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle(b,3,3,3,3)), - _mm_mul_ps(vec4f_swizzle(a,2,0,1,0), - vec4f_swizzle(b,1,2,0,0))), + _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), + _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), + ei_vec4f_swizzle1(b,1,2,0,0))), _mm_add_ps(flip1,flip2))); return res; } |