aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SSE/Complex.h
diff options
context:
space:
mode:
authorGravatar Gael Guennebaud <g.gael@free.fr>2010-07-05 16:18:09 +0200
committerGravatar Gael Guennebaud <g.gael@free.fr>2010-07-05 16:18:09 +0200
commite1eccfad3f3b4e9e2ac94ca9a43256144ef72888 (patch)
tree0f4be317c45e33da92ec26f261f7d27e3d752033 /Eigen/src/Core/arch/SSE/Complex.h
parentefb79600b98b210f09d030f5c307b99c45ef0ad5 (diff)
add intitial support for the vectorization of complex<float>
Diffstat (limited to 'Eigen/src/Core/arch/SSE/Complex.h')
-rw-r--r--Eigen/src/Core/arch/SSE/Complex.h144
1 files changed, 144 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h
new file mode 100644
index 000000000..ab8bf8b84
--- /dev/null
+++ b/Eigen/src/Core/arch/SSE/Complex.h
@@ -0,0 +1,144 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_COMPLEX_SSE_H
+#define EIGEN_COMPLEX_SSE_H
+
+struct Packet2cf
+{
+ EIGEN_STRONG_INLINE Packet2cf() {}
+ EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
+ __m128 v;
+};
+
+typedef __m128d Packet1cd;
+
+template<> struct ei_packet_traits<std::complex<float> > : ei_default_packet_traits
+{
+ typedef Packet2cf type; enum {size=2};
+};
+
+template<> struct ei_unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1<std::complex<float> >(const std::complex<float>& from)
+{
+ Packet2cf res;
+ res.v = _mm_loadl_pi(res.v, (const __m64*)&from);
+ return Packet2cf(_mm_movelh_ps(res.v,res.v));
+}
+
+// template<> EIGEN_STRONG_INLINE Packet4f ei_plset<std::complex<float> >(const std::complex<float> & a) { }
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ei_psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a)
+{
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
+ return Packet2cf(_mm_xor_ps(a.v,mask));
+
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ // TODO optimize it for SSE3 and 4
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+ return Packet2cf(_mm_add_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xa0)), b.v),
+ _mm_xor_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xf5)),
+ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b.v), 0xb1 ))), mask)));
+}
+
+// template<> EIGEN_STRONG_INLINE Packet2cf ei_pmadd<Packet2cf>(const Packet2cf& a, const Packet2cf& b, const Packet2cf& c)
+// {std::cerr << __LINE__ << "\n";
+// // TODO optimize it for SSE3 and 4
+// const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
+// return Packet2cf(_mm_add_ps(c.v,
+// _mm_add_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xa0)), b.v),
+// _mm_xor_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xf5)),
+// _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b.v), 0xb1 ))), mask))));
+// }
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
+{
+ // TODO optimize it for SSE3 and 4
+ const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
+ Packet2cf res(_mm_add_ps(_mm_mul_ps(a.v, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b.v), 0xa0))),
+ _mm_xor_ps(_mm_mul_ps(_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a.v), 0xb1)),
+ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b.v), 0xf5 ))), mask)));
+ __m128 s = _mm_mul_ps(b.v,b.v);
+ return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(s), 0xb1)))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ei_por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); }
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_pload <std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(_mm_load_ps((const float*)from)); }
+template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu<std::complex<float> >(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); }
+
+template<> EIGEN_STRONG_INLINE void ei_pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps((float*)to, from.v); }
+template<> EIGEN_STRONG_INLINE void ei_pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstoreu((float*)to, from.v); }
+
+template<> EIGEN_STRONG_INLINE void ei_prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+
+template<> EIGEN_STRONG_INLINE std::complex<float> ei_pfirst<Packet2cf>(const Packet2cf& a)
+{
+ std::complex<float> res;
+ _mm_storel_pi((__m64*)&res, a.v);
+ return res;
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(ei_preverse(_mm_castps_pd(a.v)))); }
+
+// template<> EIGEN_STRONG_INLINE Packet2cf ei_pabs(const Packet2cf& a) {}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux<Packet2cf>(const Packet2cf& a)
+{
+ return ei_pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
+}
+
+template<> EIGEN_STRONG_INLINE Packet2cf ei_preduxp<Packet2cf>(const Packet2cf* vecs)
+{
+ return Packet2cf(_mm_add_ps(_mm_movelh_ps(vecs[0].v,vecs[1].v), _mm_movehl_ps(vecs[1].v,vecs[0].v)));
+}
+
+template<> EIGEN_STRONG_INLINE std::complex<float> ei_predux_mul<Packet2cf>(const Packet2cf& a)
+{
+ return ei_pfirst(ei_pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
+}
+
+template<int Offset>
+struct ei_palign_impl<Offset,Packet2cf>
+{
+ EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
+ {
+ if (Offset==1)
+ {
+ first.v = _mm_movehl_ps(first.v, first.v);
+ first.v = _mm_movelh_ps(first.v, second.v);
+ }
+ }
+};
+
+#endif // EIGEN_COMPLEX_SSE_H