diff options
Diffstat (limited to 'Eigen/src/Geometry/arch/Geometry_SSE.h')
-rw-r--r-- | Eigen/src/Geometry/arch/Geometry_SSE.h | 59 |
1 files changed, 31 insertions, 28 deletions
diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index 7d82be694..cbe695c72 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -26,8 +26,10 @@ #ifndef EIGEN_GEOMETRY_SSE_H #define EIGEN_GEOMETRY_SSE_H +namespace internal { + template<class Derived, class OtherDerived> -struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned> +struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned> { inline static Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { @@ -35,31 +37,31 @@ struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned> Quaternion<float> res; __m128 a = _a.coeffs().template packet<Aligned>(0); __m128 b = _b.coeffs().template packet<Aligned>(0); - __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), - ei_vec4f_swizzle1(b,2,0,1,2)),mask); - __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), - ei_vec4f_swizzle1(b,0,1,2,1)),mask); - ei_pstore(&res.x(), - _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), - _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), - ei_vec4f_swizzle1(b,1,2,0,0))), + __m128 flip1 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,1,2,0,2), + vec4f_swizzle1(b,2,0,1,2)),mask); + __m128 flip2 = _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a,3,3,3,1), + vec4f_swizzle1(b,0,1,2,1)),mask); + pstore(&res.x(), + _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), + _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), + vec4f_swizzle1(b,1,2,0,0))), _mm_add_ps(flip1,flip2))); return res; } }; template<typename VectorLhs,typename VectorRhs> -struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> +struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> { - inline static typename ei_plain_matrix_type<VectorLhs>::type + inline static typename plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs, const VectorRhs& rhs) { __m128 a = lhs.template packet<VectorLhs::Flags&AlignedBit ? Aligned : Unaligned>(0); __m128 b = rhs.template packet<VectorRhs::Flags&AlignedBit ? Aligned : Unaligned>(0); - __m128 mul1=_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,3),ei_vec4f_swizzle1(b,2,0,1,3)); - __m128 mul2=_mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,3),ei_vec4f_swizzle1(b,1,2,0,3)); - typename ei_plain_matrix_type<VectorLhs>::type res; - ei_pstore(&res.x(),_mm_sub_ps(mul1,mul2)); + __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); + __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); + typename plain_matrix_type<VectorLhs>::type res; + pstore(&res.x(),_mm_sub_ps(mul1,mul2)); return res; } }; @@ -68,7 +70,7 @@ struct ei_cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> template<class Derived, class OtherDerived> -struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned> +struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned> { inline static Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { @@ -79,10 +81,10 @@ struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned const double* a = _a.coeffs().data(); Packet2d b_xy = _b.coeffs().template packet<Aligned>(0); Packet2d b_zw = _b.coeffs().template packet<Aligned>(2); - Packet2d a_xx = ei_pset1<Packet2d>(a[0]); - Packet2d a_yy = ei_pset1<Packet2d>(a[1]); - Packet2d a_zz = ei_pset1<Packet2d>(a[2]); - Packet2d a_ww = ei_pset1<Packet2d>(a[3]); + Packet2d a_xx = pset1<Packet2d>(a[0]); + Packet2d a_yy = pset1<Packet2d>(a[1]); + Packet2d a_zz = pset1<Packet2d>(a[2]); + Packet2d a_ww = pset1<Packet2d>(a[3]); // two temporaries: Packet2d t1, t2; @@ -92,13 +94,13 @@ struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned * t2 = zz*xy - xx*zw * res.xy = t1 +/- swap(t2) */ - t1 = ei_padd(ei_pmul(a_ww, b_xy), ei_pmul(a_yy, b_zw)); - t2 = ei_psub(ei_pmul(a_zz, b_xy), ei_pmul(a_xx, b_zw)); + t1 = padd(pmul(a_ww, b_xy), pmul(a_yy, b_zw)); + t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); #ifdef __SSE3__ EIGEN_UNUSED_VARIABLE(mask) - ei_pstore(&res.x(), _mm_addsub_pd(t1, ei_preverse(t2))); + pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); #else - ei_pstore(&res.x(), ei_padd(t1, ei_pxor(mask,ei_preverse(t2)))); + pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); #endif /* @@ -106,18 +108,19 @@ struct ei_quat_product<Architecture::SSE, Derived, OtherDerived, double, Aligned * t2 = zz*zw + xx*xy * res.zw = t1 -/+ swap(t2) = swap( swap(t1) +/- t2) */ - t1 = ei_psub(ei_pmul(a_ww, b_zw), ei_pmul(a_yy, b_xy)); - t2 = ei_padd(ei_pmul(a_zz, b_zw), ei_pmul(a_xx, b_xy)); + t1 = psub(pmul(a_ww, b_zw), pmul(a_yy, b_xy)); + t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); #ifdef __SSE3__ EIGEN_UNUSED_VARIABLE(mask) - ei_pstore(&res.z(), ei_preverse(_mm_addsub_pd(ei_preverse(t1), t2))); + pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); #else - ei_pstore(&res.z(), ei_psub(t1, ei_pxor(mask,ei_preverse(t2)))); + pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); #endif return res; } }; +} // end namespace internal #endif // EIGEN_GEOMETRY_SSE_H |