diff options
author | Benoit Jacob <jacob.benoit.1@gmail.com> | 2009-04-14 13:15:53 +0000 |
---|---|---|
committer | Benoit Jacob <jacob.benoit.1@gmail.com> | 2009-04-14 13:15:53 +0000 |
commit | 2bb1c9e8dc78c31c90d07b296bba2cf81521c5cb (patch) | |
tree | 81295b788900a45509255cb640219daf502cb6dc /unsupported | |
parent | 804a239d308bea55722c59b70c95459336230488 (diff) |
finally commit Rohit's work as the start of a new (currently
unsupported) module, MoreVectorization.
CCMAIL:rpg.314@gmail.com
Diffstat (limited to 'unsupported')
-rw-r--r-- | unsupported/Eigen/CMakeLists.txt | 2 | ||||
-rw-r--r-- | unsupported/Eigen/MoreVectorization | 16 | ||||
-rw-r--r-- | unsupported/Eigen/src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | unsupported/Eigen/src/MoreVectorization/CMakeLists.txt | 6 | ||||
-rw-r--r-- | unsupported/Eigen/src/MoreVectorization/MathFunctions.h | 103 |
5 files changed, 127 insertions, 1 deletions
diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt index eb54f88ac..49c6817fb 100644 --- a/unsupported/Eigen/CMakeLists.txt +++ b/unsupported/Eigen/CMakeLists.txt @@ -1,4 +1,4 @@ -set(Eigen_HEADERS AdolcForward BVH IterativeSolvers) +set(Eigen_HEADERS AdolcForward BVH IterativeSolvers MoreVectorization) install(FILES ${Eigen_HEADERS} diff --git a/unsupported/Eigen/MoreVectorization b/unsupported/Eigen/MoreVectorization new file mode 100644 index 000000000..e60526a3d --- /dev/null +++ b/unsupported/Eigen/MoreVectorization @@ -0,0 +1,16 @@ +#ifndef EIGEN_MOREVECTORIZATION_MODULE_H +#define EIGEN_MOREVECTORIZATION_MODULE_H + +#include <Eigen/Core> + +namespace Eigen { + +/** \ingroup Unsupported_modules + * \defgroup MoreVectorization additional vectorization module + */ + +#include "src/MoreVectorization/MathFunctions.h" + +} + +#endif // EIGEN_MOREVECTORIZATION_MODULE_H diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt index c2f63db20..3a688afd8 100644 --- a/unsupported/Eigen/src/CMakeLists.txt +++ b/unsupported/Eigen/src/CMakeLists.txt @@ -1,3 +1,4 @@ ADD_SUBDIRECTORY(IterativeSolvers) ADD_SUBDIRECTORY(BVH) ADD_SUBDIRECTORY(AutoDiff) +ADD_SUBDIRECTORY(MoreVectorization) diff --git a/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt b/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt new file mode 100644 index 000000000..1b887cc8e --- /dev/null +++ b/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_MoreVectorization_SRCS "*.h") + +INSTALL(FILES + ${Eigen_MoreVectorization_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MoreVectorization COMPONENT Devel + ) diff --git a/unsupported/Eigen/src/MoreVectorization/MathFunctions.h b/unsupported/Eigen/src/MoreVectorization/MathFunctions.h new file mode 100644 index 000000000..351131472 --- /dev/null +++ b/unsupported/Eigen/src/MoreVectorization/MathFunctions.h @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Rohit Garg <rpg.314@gmail.com> +// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see <http://www.gnu.org/licenses/>. + +#ifndef EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H +#define EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H + +/** \internal \returns the arcsin of \a a (coeff-wise) */ +template<typename Packet> inline static Packet ei_pasin(Packet a) { return std::asin(a); } + +#ifdef EIGEN_VECTORIZE_SSE + +template<> EIGEN_DONT_INLINE Packet4f ei_pasin(Packet4f x) +{ + _EIGEN_DECLARE_CONST_Packet4f(half, 0.5); + _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5); + _EIGEN_DECLARE_CONST_Packet4f(3half, 1.5); + + _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000); + + _EIGEN_DECLARE_CONST_Packet4f(pi, 3.141592654); + _EIGEN_DECLARE_CONST_Packet4f(pi_over_2, 3.141592654*0.5); + + _EIGEN_DECLARE_CONST_Packet4f(asin1, 4.2163199048E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin2, 2.4181311049E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin3, 4.5470025998E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin4, 7.4953002686E-2); + _EIGEN_DECLARE_CONST_Packet4f(asin5, 1.6666752422E-1); + + Packet4f a = ei_pabs(x);//got the absolute value + + Packet4f sign_bit= _mm_and_ps(x, ei_p4f_sign_mask);//extracted the sign bit + + Packet4f z1,z2;//will need them during computation + + +//will compute the two branches for asin +//so first compare with half + + Packet4f branch_mask= _mm_cmpgt_ps(a, ei_p4f_half);//this is to select which branch to take +//both will be taken, and finally results will be merged +//the branch for values >0.5 + + { +//the core series expansion + z1=ei_pmadd(ei_p4f_minus_half,a,ei_p4f_half); + Packet4f x1=ei_psqrt(z1); + Packet4f s1=ei_pmadd(ei_p4f_asin1, z1, ei_p4f_asin2); + Packet4f s2=ei_pmadd(s1, z1, ei_p4f_asin3); + Packet4f s3=ei_pmadd(s2,z1, ei_p4f_asin4); + Packet4f s4=ei_pmadd(s3,z1, ei_p4f_asin5); + Packet4f temp=ei_pmul(s4,z1);//not really a madd but a mul by z so that the next term can be a madd + z1=ei_pmadd(temp,x1,x1); + z1=ei_padd(z1,z1); + z1=ei_psub(ei_p4f_pi_over_2,z1); + } + + { +//the core series expansion + Packet4f x2=a; + z2=ei_pmul(x2,x2); + Packet4f s1=ei_pmadd(ei_p4f_asin1, z2, ei_p4f_asin2); + Packet4f s2=ei_pmadd(s1, z2, ei_p4f_asin3); + Packet4f s3=ei_pmadd(s2,z2, ei_p4f_asin4); + Packet4f s4=ei_pmadd(s3,z2, ei_p4f_asin5); + Packet4f temp=ei_pmul(s4,z2);//not really a madd but a mul by z so that the next term can be a madd + z2=ei_pmadd(temp,x2,x2); + } + +/* select the correct result from the two branch evaluations */ + z1 = _mm_and_ps(branch_mask, z1); + z2 = _mm_andnot_ps(branch_mask, z2); + Packet4f z = _mm_or_ps(z1,z2); + +/* update the sign */ + return _mm_xor_ps(z, sign_bit); +} + + +#endif + +#endif // EIGEN_MOREVECTORIZATION_MATHFUNCTIONS_H |