aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Geometry/arch
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2009-03-08 10:14:07 +0000
committerGravatar Gael Guennebaud <g.gael@free.fr>2009-03-08 10:14:07 +0000
commit7718a8ed83bde47dbf4bc41e4d70ffd4c9fc1efb (patch)
tree95352f9dc3d4ed2af802a0f027ea73be2da3b1cd /Eigen/src/Geometry/arch
parentf9790a649c7bd5dd321a330a9f5d423c56a6ed83 (diff)
slight optimization of SSE base integer mul (thanks to Rohit Garg)
Diffstat (limited to 'Eigen/src/Geometry/arch')
-rw-r--r--Eigen/src/Geometry/arch/Geometry_SSE.h17
1 files changed, 7 insertions, 10 deletions
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h
index 6c2933898..9be5ecd6f 100644
--- a/Eigen/src/Geometry/arch/Geometry_SSE.h
+++ b/Eigen/src/Geometry/arch/Geometry_SSE.h
@@ -26,9 +26,6 @@
#ifndef EIGEN_GEOMETRY_SSE_H
#define EIGEN_GEOMETRY_SSE_H
-#define vec4f_swizzle(v,p,q,r,s) (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), \
- ((s)<<6|(r)<<4|(q)<<2|(p)))))
-
template<> inline Quaternion<float>
ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quaternion<float>& _b)
{
@@ -36,14 +33,14 @@ ei_quaternion_product<EiArch_SSE,float>(const Quaternion<float>& _a, const Quate
Quaternion<float> res;
__m128 a = _a.coeffs().packet<Aligned>(0);
__m128 b = _b.coeffs().packet<Aligned>(0);
- __m128 flip1 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle(a,1,2,0,2),
- vec4f_swizzle(b,2,0,1,2)),mask);
- __m128 flip2 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle(a,3,3,3,1),
- vec4f_swizzle(b,0,1,2,1)),mask);
+ __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2),
+ ei_vec4f_swizzle1(b,2,0,1,2)),mask);
+ __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1),
+ ei_vec4f_swizzle1(b,0,1,2,1)),mask);
ei_pstore(&res.x(),
- _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle(b,3,3,3,3)),
- _mm_mul_ps(vec4f_swizzle(a,2,0,1,0),
- vec4f_swizzle(b,1,2,0,0))),
+ _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)),
+ _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0),
+ ei_vec4f_swizzle1(b,1,2,0,0))),
_mm_add_ps(flip1,flip2)));
return res;
}