add SSE2 versions of sin, cos, log, exp using code from Julien

Pommier. They are for float only, and they return exactly the same result as the standard versions in about 90% of the cases. Otherwise the max error is below 1e-7. However, for very large values (>1e3) the accuracy of sin and cos slighlty decrease. They are about 3 or 4 times faster than 4 calls to their respective standard versions. So, is it ok to enable them by default in their respective functors ?
author: Gael Guennebaud <g.gael@free.fr> 2009-03-25 12:26:13 +0000
committer: Gael Guennebaud <g.gael@free.fr> 2009-03-25 12:26:13 +0000
commit: 17860e578cab13d5aacf8b5e6e373e59403352ba (patch)
tree: 285e8ef963751a1a96ea127457ff51cf44a6cf16 /test
parent: 64fbd93cd904790e831aa5404698c5aa30f54be4 (diff)
1 files changed, 65 insertions, 0 deletions
diff --git a/test/packetmath.cpp b/test/packetmath.cpp
index b1da6e44b..858bbb9c3 100644
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@@ -1,6 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra. Eigen itself is part of the KDE project.
 //
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
@@ -49,6 +50,34 @@ template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int s
   VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
 }
 
+template<bool Cond,typename Packet>
+struct packet_helper
+{
+  template<typename T>
+  inline Packet load(const T* from) const { return ei_pload(from); }
+  
+  template<typename T>
+  inline void store(T* to, const Packet& x) const { ei_pstore(to,x); }
+};
+
+template<typename Packet>
+struct packet_helper<false,Packet>
+{
+  template<typename T>
+  inline T load(const T* from) const { return *from; }
+  
+  template<typename T>
+  inline void store(T* to, const T& x) const { *to = x; }
+};
+
+#define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \
+  packet_helper<COND,Packet> h; \
+  for (int i=0; i<PacketSize; ++i) \
+    ref[i] = REFOP(data1[i]); \
+  h.store(data2, POP(h.load(data1))); \
+  VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
+}
+
 #define REF_ADD(a,b) ((a)+(b))
 #define REF_SUB(a,b) ((a)-(b))
 #define REF_MUL(a,b) ((a)*(b))
@@ -167,6 +196,39 @@ template<typename Scalar> void packetmath()
   VERIFY(areApprox(ref, data2, PacketSize) && "ei_preverse");
 }
 
+template<typename Scalar> void packetmath_real()
+{
+  typedef typename ei_packet_traits<Scalar>::type Packet;
+  const int PacketSize = ei_packet_traits<Scalar>::size;
+
+  const int size = PacketSize*4;
+  EIGEN_ALIGN_128 Scalar data1[ei_packet_traits<Scalar>::size*4];
+  EIGEN_ALIGN_128 Scalar data2[ei_packet_traits<Scalar>::size*4];
+  EIGEN_ALIGN_128 Scalar ref[ei_packet_traits<Scalar>::size*4];
+  
+  for (int i=0; i<size; ++i)
+  {
+    data1[i] = ei_random<Scalar>(-1e3,1e3);
+    data2[i] = ei_random<Scalar>(-1e3,1e3);
+  }
+  CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasSin, ei_sin, ei_psin);
+  CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasCos, ei_cos, ei_pcos);
+  
+  for (int i=0; i<size; ++i)
+  {
+    data1[i] = ei_random<Scalar>(-87,88);
+    data2[i] = ei_random<Scalar>(-87,88);
+  }
+  CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasExp, ei_exp, ei_pexp);
+  
+  for (int i=0; i<size; ++i)
+  {
+    data1[i] = ei_random<Scalar>(0,1e6);
+    data2[i] = ei_random<Scalar>(0,1e6);
+  }
+  CHECK_CWISE1_IF(ei_packet_traits<Scalar>::HasLog, ei_log, ei_plog);
+}
+
 void test_packetmath()
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -174,5 +236,8 @@ void test_packetmath()
     CALL_SUBTEST( packetmath<double>() );
     CALL_SUBTEST( packetmath<int>() );
     CALL_SUBTEST( packetmath<std::complex<float> >() );
+    
+    CALL_SUBTEST( packetmath_real<float>() );
+    CALL_SUBTEST( packetmath_real<double>() );
   }
 }
author	Gael Guennebaud <g.gael@free.fr>	2009-03-25 12:26:13 +0000
committer	Gael Guennebaud <g.gael@free.fr>	2009-03-25 12:26:13 +0000
commit	17860e578cab13d5aacf8b5e6e373e59403352ba (patch)
tree	285e8ef963751a1a96ea127457ff51cf44a6cf16 /test
parent	64fbd93cd904790e831aa5404698c5aa30f54be4 (diff)