aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/src/MoreVectorization/MathFunctions.h')
-rw-r--r--unsupported/Eigen/src/MoreVectorization/MathFunctions.h47
1 files changed, 25 insertions, 22 deletions
diff --git a/unsupported/Eigen/src/MoreVectorization/MathFunctions.h b/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
index a998a0615..bc948d0bd 100644
--- a/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
+++ b/unsupported/Eigen/src/MoreVectorization/MathFunctions.h
@@ -26,12 +26,14 @@
#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H
+namespace internal {
+
/** \internal \returns the arcsin of \a a (coeff-wise) */
-template<typename Packet> inline static Packet ei_pasin(Packet a) { return std::asin(a); }
+template<typename Packet> inline static Packet pasin(Packet a) { return std::asin(a); }
#ifdef EIGEN_VECTORIZE_SSE
-template<> EIGEN_DONT_INLINE Packet4f ei_pasin(Packet4f x)
+template<> EIGEN_DONT_INLINE Packet4f pasin(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5);
@@ -48,9 +50,9 @@ template<> EIGEN_DONT_INLINE Packet4f ei_pasin(Packet4f x)
_EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2);
_EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1);
- Packet4f a = ei_pabs(x);//got the absolute value
+ Packet4f a = pabs(x);//got the absolute value
- Packet4f sign_bit= _mm_and_ps(x, ei_p4f_sign_mask);//extracted the sign bit
+ Packet4f sign_bit= _mm_and_ps(x, p4f_sign_mask);//extracted the sign bit
Packet4f z1,z2;//will need them during computation
@@ -58,34 +60,34 @@ template<> EIGEN_DONT_INLINE Packet4f ei_pasin(Packet4f x)
//will compute the two branches for asin
//so first compare with half
- Packet4f branch_mask= _mm_cmpgt_ps(a, ei_p4f_half);//this is to select which branch to take
+ Packet4f branch_mask= _mm_cmpgt_ps(a, p4f_half);//this is to select which branch to take
//both will be taken, and finally results will be merged
//the branch for values >0.5
{
//the core series expansion
- z1=ei_pmadd(ei_p4f_minus_half,a,ei_p4f_half);
- Packet4f x1=ei_psqrt(z1);
- Packet4f s1=ei_pmadd(ei_p4f_asin1, z1, ei_p4f_asin2);
- Packet4f s2=ei_pmadd(s1, z1, ei_p4f_asin3);
- Packet4f s3=ei_pmadd(s2,z1, ei_p4f_asin4);
- Packet4f s4=ei_pmadd(s3,z1, ei_p4f_asin5);
- Packet4f temp=ei_pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
- z1=ei_pmadd(temp,x1,x1);
- z1=ei_padd(z1,z1);
- z1=ei_psub(ei_p4f_pi_over_2,z1);
+ z1=pmadd(p4f_minus_half,a,p4f_half);
+ Packet4f x1=psqrt(z1);
+ Packet4f s1=pmadd(p4f_asin1, z1, p4f_asin2);
+ Packet4f s2=pmadd(s1, z1, p4f_asin3);
+ Packet4f s3=pmadd(s2,z1, p4f_asin4);
+ Packet4f s4=pmadd(s3,z1, p4f_asin5);
+ Packet4f temp=pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd
+ z1=pmadd(temp,x1,x1);
+ z1=padd(z1,z1);
+ z1=psub(p4f_pi_over_2,z1);
}
{
//the core series expansion
Packet4f x2=a;
- z2=ei_pmul(x2,x2);
- Packet4f s1=ei_pmadd(ei_p4f_asin1, z2, ei_p4f_asin2);
- Packet4f s2=ei_pmadd(s1, z2, ei_p4f_asin3);
- Packet4f s3=ei_pmadd(s2,z2, ei_p4f_asin4);
- Packet4f s4=ei_pmadd(s3,z2, ei_p4f_asin5);
- Packet4f temp=ei_pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
- z2=ei_pmadd(temp,x2,x2);
+ z2=pmul(x2,x2);
+ Packet4f s1=pmadd(p4f_asin1, z2, p4f_asin2);
+ Packet4f s2=pmadd(s1, z2, p4f_asin3);
+ Packet4f s3=pmadd(s2,z2, p4f_asin4);
+ Packet4f s4=pmadd(s3,z2, p4f_asin5);
+ Packet4f temp=pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd
+ z2=pmadd(temp,x2,x2);
}
/* select the correct result from the two branch evaluations */
@@ -97,6 +99,7 @@ template<> EIGEN_DONT_INLINE Packet4f ei_pasin(Packet4f x)
return _mm_xor_ps(z, sign_bit);
}
+} // end namespace internal
#endif