From ca67c6015031d9740034e98774ff8de5f5bbf865 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <rk_eigen@redking.me.uk>
Date: Mon, 10 Jun 2013 15:59:03 +0200
Subject: Fix bug #591: minor optimization in NEON vectorization support

---
 Eigen/src/Core/arch/NEON/Complex.h | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'Eigen/src/Core/arch/NEON/Complex.h')
diff --git a/Eigen/src/Core/arch/NEON/Complex.h b/Eigen/src/Core/arch/NEON/Complex.h
index 795b4be73..f183d31de 100644
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@@ -68,7 +68,6 @@ template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
 {
   Packet4f v1, v2;
-  float32x2_t a_lo, a_hi;
 
   // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
   v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
@@ -81,9 +80,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
   // Conjugate v2 
   v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
   // Swap real/imag elements in v2.
-  a_lo = vrev64_f32(vget_low_f32(v2));
-  a_hi = vrev64_f32(vget_high_f32(v2));
-  v2 = vcombine_f32(a_lo, a_hi);
+  v2 = vrev64q_f32(v2);
   // Add and return the result
   return Packet2cf(vaddq_f32(v1, v2));
 }
@@ -241,13 +238,10 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
   // TODO optimize it for AltiVec
   Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
   Packet4f s, rev_s;
-  float32x2_t a_lo, a_hi;
 
   // this computes the norm
   s = vmulq_f32(b.v, b.v);
-  a_lo = vrev64_f32(vget_low_f32(s));
-  a_hi = vrev64_f32(vget_high_f32(s));
-  rev_s = vcombine_f32(a_lo, a_hi);
+  rev_s = vrev64q_f32(s);
 
   return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
 }
-- 
cgit v1.2.3