From 9312a5bf5cd72f45558f402077b0c95683ee0fea Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Wed, 30 Jun 2021 15:53:06 -0700
Subject: Implement a generic vectorized version of Smith's algorithms for
 complex division.

---
 .../Core/arch/Default/GenericPacketMathFunctions.h   | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h')
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index c9fbaf68b..f1e10c898 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -757,6 +757,26 @@ Packet pcos_float(const Packet& x)
   return psincos_float<false>(x);
 }
 
+template<typename Packet>
+EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+EIGEN_UNUSED Packet pdiv_complex(const Packet& x, const Packet& y) {
+  typedef typename unpacket_traits<Packet>::as_real RealPacket;
+  // In the following we annotate the code for the case where the inputs
+  // are a pair length-2 SIMD vectors representing a single pair of complex
+  // numbers x = a + i*b, y = c + i*d.
+  const RealPacket y_abs = pabs(y.v);  // |c|, |d|
+  const RealPacket y_abs_flip = pcplxflip(Packet(y_abs)).v; // |d|, |c|
+  const RealPacket y_max = pmax(y_abs, y_abs_flip); // max(|c|, |d|), max(|c|, |d|)
+  const RealPacket y_scaled = pdiv(y.v, y_max);  // c / max(|c|, |d|), d / max(|c|, |d|)
+  // Compute scaled denominator.
+  const RealPacket y_scaled_sq = pmul(y_scaled, y_scaled); // c'**2, d'**2
+  const RealPacket denom = y_scaled_sq + pcplxflip(Packet(y_scaled_sq)).v;
+  Packet result_scaled = pmul(x, pconj(Packet(y_scaled)));  // a * c' + b * d', -a * d + b * c
+  // Divide elementwise by denom.
+  result_scaled = Packet(pdiv(result_scaled.v, denom));
+  // Rescale result
+  return Packet(pdiv(result_scaled.v, y_max));
+}
 
 template<typename Packet>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-- 
cgit v1.2.3