* merge

* remove a ctor in QuaternionBase as it gives a strange error with GCC 4.4.2.
author: Benoit Jacob <jacob.benoit.1@gmail.com> 2009-11-09 09:08:03 -0500
committer: Benoit Jacob <jacob.benoit.1@gmail.com> 2009-11-09 09:08:03 -0500
commit: 92749eed11d000300cfa54654f1043cd52399ed8 (patch)
tree: ba227522582b2f9f4280ed1404e74c654e21ccb3 /unsupported/Eigen/src
parent: 4b366b07be4e409239c61158a23d93e8ebf3811b (diff)
parent: 670651e2e0932c5edfe2a2da4b9f3c42af3b7dec (diff)
4 files changed, 427 insertions, 422 deletions
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 2fb733a99..c4607c2b8 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -29,7 +29,7 @@ namespace Eigen {
 
 template<typename A, typename B>
 struct ei_make_coherent_impl {
-  static void run(A& a, B& b) {}
+  static void run(A&, B&) {}
 };
 
 // resize a to match b is a.size()==0, and conversely.
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h b/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
index 69ea9144e..03c82b7e8 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffVector.h
@@ -35,7 +35,7 @@ namespace Eigen {
   * This class represents a scalar value while tracking its respective derivatives.
   *
   * It supports the following list of global math function:
-  *  - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, 
+  *  - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos,
   *  - ei_abs, ei_sqrt, ei_pow, ei_exp, ei_log, ei_sin, ei_cos,
   *  - ei_conj, ei_real, ei_imag, ei_abs2.
   *
@@ -48,130 +48,150 @@ template<typename ValueType, typename JacobianType>
 class AutoDiffVector
 {
   public:
-    typedef typename ei_traits<ValueType>::Scalar Scalar;
-    
+    //typedef typename ei_traits<ValueType>::Scalar Scalar;
+    typedef typename ei_traits<ValueType>::Scalar BaseScalar;
+    typedef AutoDiffScalar<Matrix<BaseScalar,JacobianType::RowsAtCompileTime,1> > ActiveScalar;
+    typedef ActiveScalar Scalar;
+    typedef AutoDiffScalar<typename JacobianType::ColXpr> CoeffType;
+
     inline AutoDiffVector() {}
-    
+
     inline AutoDiffVector(const ValueType& values)
       : m_values(values)
     {
       m_jacobian.setZero();
     }
-    
+
+
+    CoeffType operator[] (int i) { return CoeffType(m_values[i], m_jacobian.col(i)); }
+    const CoeffType operator[] (int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
+
+    CoeffType operator() (int i) { return CoeffType(m_values[i], m_jacobian.col(i)); }
+    const CoeffType operator() (int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
+
+    CoeffType coeffRef(int i) { return CoeffType(m_values[i], m_jacobian.col(i)); }
+    const CoeffType coeffRef(int i) const { return CoeffType(m_values[i], m_jacobian.col(i)); }
+
+    int size() const { return m_values.size(); }
+
+    // FIXME here we could return an expression of the sum
+    Scalar sum() const { /*std::cerr << "sum \n\n";*/ /*std::cerr << m_jacobian.rowwise().sum() << "\n\n";*/ return Scalar(m_values.sum(), m_jacobian.rowwise().sum()); }
+
+
     inline AutoDiffVector(const ValueType& values, const JacobianType& jac)
       : m_values(values), m_jacobian(jac)
     {}
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
     inline AutoDiffVector(const AutoDiffVector<OtherValueType, OtherJacobianType>& other)
       : m_values(other.values()), m_jacobian(other.jacobian())
     {}
-    
+
     inline AutoDiffVector(const AutoDiffVector& other)
       : m_values(other.values()), m_jacobian(other.jacobian())
     {}
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
-    inline AutoDiffScalar& operator=(const AutoDiffVector<OtherValueType, OtherJacobianType>& other)
+    inline AutoDiffVector& operator=(const AutoDiffVector<OtherValueType, OtherJacobianType>& other)
     {
       m_values = other.values();
       m_jacobian = other.jacobian();
       return *this;
     }
-    
+
     inline AutoDiffVector& operator=(const AutoDiffVector& other)
     {
       m_values = other.values();
       m_jacobian = other.jacobian();
       return *this;
     }
-    
+
     inline const ValueType& values() const { return m_values; }
     inline ValueType& values() { return m_values; }
-    
+
     inline const JacobianType& jacobian() const { return m_jacobian; }
     inline JacobianType& jacobian() { return m_jacobian; }
-    
+
     template<typename OtherValueType,typename OtherJacobianType>
     inline const AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,JacobianType,OtherJacobianType> >
-    operator+(const AutoDiffScalar<OtherDerType>& other) const
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,ValueType,OtherValueType>::Type,
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,JacobianType,OtherJacobianType>::Type >
+    operator+(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const
     {
       return AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_sum_op<Scalar>,JacobianType,OtherJacobianType> >(
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,ValueType,OtherValueType>::Type,
+      typename MakeCwiseBinaryOp<ei_scalar_sum_op<BaseScalar>,JacobianType,OtherJacobianType>::Type >(
         m_values + other.values(),
         m_jacobian + other.jacobian());
     }
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
     inline AutoDiffVector&
-    operator+=(const AutoDiffVector<OtherValueType,OtherDerType>& other)
+    operator+=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other)
     {
       m_values += other.values();
       m_jacobian += other.jacobian();
       return *this;
     }
-    
+
     template<typename OtherValueType,typename OtherJacobianType>
     inline const AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType> >
-    operator-(const AutoDiffScalar<OtherDerType>& other) const
+      typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType>::Type,
+      typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType>::Type >
+    operator-(const AutoDiffVector<OtherValueType,OtherJacobianType>& other) const
     {
       return AutoDiffVector<
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType> >
-      CwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType> >(
-        m_values - other.values(),
-        m_jacobian - other.jacobian());
+        typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,ValueType,OtherValueType>::Type,
+        typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,JacobianType,OtherJacobianType>::Type >(
+          m_values - other.values(),
+          m_jacobian - other.jacobian());
     }
-    
+
     template<typename OtherValueType, typename OtherJacobianType>
     inline AutoDiffVector&
-    operator-=(const AutoDiffVector<OtherValueType,OtherDerType>& other)
+    operator-=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other)
     {
       m_values -= other.values();
       m_jacobian -= other.jacobian();
       return *this;
     }
-    
+
     inline const AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType> >
+      typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>::Type,
+      typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType>::Type >
     operator-() const
     {
       return AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType> >(
-        -m_values,
-        -m_jacobian);
+        typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, ValueType>::Type,
+        typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, JacobianType>::Type >(
+          -m_values,
+          -m_jacobian);
     }
-    
+
     inline const AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >
-    operator*(const Scalar& other) const
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type>
+    operator*(const BaseScalar& other) const
     {
       return AutoDiffVector<
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >(
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type >(
           m_values * other,
-          (m_jacobian * other));
+          m_jacobian * other);
     }
-    
+
     friend inline const AutoDiffVector<
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-      CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+      typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type >
     operator*(const Scalar& other, const AutoDiffVector& v)
     {
       return AutoDiffVector<
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>
-        CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType> >(
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, ValueType>::Type,
+        typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, JacobianType>::Type >(
           v.values() * other,
           v.jacobian() * other);
     }
-    
+
 //     template<typename OtherValueType,typename OtherJacobianType>
 //     inline const AutoDiffVector<
 //       CwiseBinaryOp<ei_scalar_multiple_op<Scalar>, ValueType, OtherValueType>
@@ -188,25 +208,25 @@ class AutoDiffVector
 //             m_values.cwise() * other.values(),
 //             (m_jacobian * other.values()).nestByValue() + (m_values * other.jacobian()).nestByValue());
 //     }
-    
+
     inline AutoDiffVector& operator*=(const Scalar& other)
     {
       m_values *= other;
       m_jacobian *= other;
       return *this;
     }
-    
+
     template<typename OtherValueType,typename OtherJacobianType>
     inline AutoDiffVector& operator*=(const AutoDiffVector<OtherValueType,OtherJacobianType>& other)
     {
       *this = *this * other;
       return *this;
     }
-    
+
   protected:
     ValueType m_values;
     JacobianType m_jacobian;
-    
+
 };
 
 }
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
index e1f67f334..a66b7398c 100644
--- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -166,6 +166,7 @@
         m_plans.clear();
       }
 
+      // complex-to-complex forward FFT
       inline
       void fwd( Complex * dst,const Complex *src,int nfft)
       {
@@ -177,9 +178,6 @@
       void fwd( Complex * dst,const Scalar * src,int nfft) 
       {
           get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft);
-          int nhbins=(nfft>>1)+1;
-          for (int k=nhbins;k < nfft; ++k )
-              dst[k] = conj(dst[nfft-k]);
       }
 
       // inverse complex-to-complex
@@ -187,12 +185,6 @@
       void inv(Complex * dst,const Complex  *src,int nfft)
       {
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
-
-        //TODO move scaling to Eigen::FFT
-        // scaling
-        Scalar s = Scalar(1.)/nfft;
-        for (int k=0;k<nfft;++k)
-          dst[k] *= s;
       }
 
       // half-complex to scalar
@@ -200,11 +192,6 @@
       void inv( Scalar * dst,const Complex * src,int nfft) 
       {
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
-
-        //TODO move scaling to Eigen::FFT
-        Scalar s = Scalar(1.)/nfft;
-        for (int k=0;k<nfft;++k)
-          dst[k] *= s;
       }
 
   protected:
@@ -222,3 +209,5 @@
           return m_plans[key];
       }
   };
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index c068d8765..5c958d1ec 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -27,388 +27,384 @@
   // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
   // Copyright 2003-2009 Mark Borgerding
 
-  template <typename _Scalar>
-    struct ei_kiss_cpx_fft
+template <typename _Scalar>
+struct ei_kiss_cpx_fft
+{
+  typedef _Scalar Scalar;
+  typedef std::complex<Scalar> Complex;
+  std::vector<Complex> m_twiddles;
+  std::vector<int> m_stageRadix;
+  std::vector<int> m_stageRemainder;
+  std::vector<Complex> m_scratchBuf;
+  bool m_inverse;
+
+  inline
+    void make_twiddles(int nfft,bool inverse)
     {
-      typedef _Scalar Scalar;
-      typedef std::complex<Scalar> Complex;
-      std::vector<Complex> m_twiddles;
-      std::vector<int> m_stageRadix;
-      std::vector<int> m_stageRemainder;
-      std::vector<Complex> m_scratchBuf;
-      bool m_inverse;
-
-      void make_twiddles(int nfft,bool inverse)
-      {
-        m_inverse = inverse;
-        m_twiddles.resize(nfft);
-        Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-        for (int i=0;i<nfft;++i)
-          m_twiddles[i] = exp( Complex(0,i*phinc) );
+      m_inverse = inverse;
+      m_twiddles.resize(nfft);
+      Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+      for (int i=0;i<nfft;++i)
+        m_twiddles[i] = exp( Complex(0,i*phinc) );
+    }
+
+  void factorize(int nfft)
+  {
+    //start factoring out 4's, then 2's, then 3,5,7,9,...
+    int n= nfft;
+    int p=4;
+    do {
+      while (n % p) {
+        switch (p) {
+          case 4: p = 2; break;
+          case 2: p = 3; break;
+          default: p += 2; break;
+        }
+        if (p*p>n)
+          p=n;// impossible to have a factor > sqrt(n)
       }
+      n /= p;
+      m_stageRadix.push_back(p);
+      m_stageRemainder.push_back(n);
+      if ( p > 5 )
+        m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
+    }while(n>1);
+  }
+
+  template <typename _Src>
+    inline
+    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+    {
+      int p = m_stageRadix[stage];
+      int m = m_stageRemainder[stage];
+      Complex * Fout_beg = xout;
+      Complex * Fout_end = xout + p*m;
 
-      void factorize(int nfft)
-      {
-        //start factoring out 4's, then 2's, then 3,5,7,9,...
-        int n= nfft;
-        int p=4;
-        do {
-          while (n % p) {
-            switch (p) {
-              case 4: p = 2; break;
-              case 2: p = 3; break;
-              default: p += 2; break;
-            }
-            if (p*p>n)
-              p=n;// impossible to have a factor > sqrt(n)
-          }
-          n /= p;
-          m_stageRadix.push_back(p);
-          m_stageRemainder.push_back(n);
-          if ( p > 5 )
-              m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
-        }while(n>1);
+      if (m>1) {
+        do{
+          // recursive call:
+          // DFT of size m*p performed by doing
+          // p instances of smaller DFTs of size m, 
+          // each one takes a decimated version of the input
+          work(stage+1, xout , xin, fstride*p,in_stride);
+          xin += fstride*in_stride;
+        }while( (xout += m) != Fout_end );
+      }else{
+        do{
+          *xout = *xin;
+          xin += fstride*in_stride;
+        }while(++xout != Fout_end );
       }
-
-      template <typename _Src>
-        void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
-        {
-          int p = m_stageRadix[stage];
-          int m = m_stageRemainder[stage];
-          Complex * Fout_beg = xout;
-          Complex * Fout_end = xout + p*m;
-
-          if (m>1) {
-            do{
-              // recursive call:
-              // DFT of size m*p performed by doing
-              // p instances of smaller DFTs of size m, 
-              // each one takes a decimated version of the input
-              work(stage+1, xout , xin, fstride*p,in_stride);
-              xin += fstride*in_stride;
-            }while( (xout += m) != Fout_end );
-          }else{
-            do{
-              *xout = *xin;
-              xin += fstride*in_stride;
-            }while(++xout != Fout_end );
-          }
-          xout=Fout_beg;
-
-          // recombine the p smaller DFTs 
-          switch (p) {
-            case 2: bfly2(xout,fstride,m); break;
-            case 3: bfly3(xout,fstride,m); break;
-            case 4: bfly4(xout,fstride,m); break;
-            case 5: bfly5(xout,fstride,m); break;
-            default: bfly_generic(xout,fstride,m,p); break;
-          }
-        }
-
-      inline
-      void bfly2( Complex * Fout, const size_t fstride, int m)
-      {
-        for (int k=0;k<m;++k) {
-          Complex t = Fout[m+k] * m_twiddles[k*fstride];
-          Fout[m+k] = Fout[k] - t;
-          Fout[k] += t;
-        }
+      xout=Fout_beg;
+
+      // recombine the p smaller DFTs 
+      switch (p) {
+        case 2: bfly2(xout,fstride,m); break;
+        case 3: bfly3(xout,fstride,m); break;
+        case 4: bfly4(xout,fstride,m); break;
+        case 5: bfly5(xout,fstride,m); break;
+        default: bfly_generic(xout,fstride,m,p); break;
       }
+    }
 
-      inline
-      void bfly4( Complex * Fout, const size_t fstride, const size_t m)
-      {
-        Complex scratch[6];
-        int negative_if_inverse = m_inverse * -2 +1;
-        for (size_t k=0;k<m;++k) {
-          scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
-          scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
-          scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
-          scratch[5] = Fout[k] - scratch[1];
-
-          Fout[k] += scratch[1];
-          scratch[3] = scratch[0] + scratch[2];
-          scratch[4] = scratch[0] - scratch[2];
-          scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
-
-          Fout[k+2*m]  = Fout[k] - scratch[3];
-          Fout[k] += scratch[3];
-          Fout[k+m] = scratch[5] + scratch[4];
-          Fout[k+3*m] = scratch[5] - scratch[4];
-        }
+  inline
+    void bfly2( Complex * Fout, const size_t fstride, int m)
+    {
+      for (int k=0;k<m;++k) {
+        Complex t = Fout[m+k] * m_twiddles[k*fstride];
+        Fout[m+k] = Fout[k] - t;
+        Fout[k] += t;
       }
+    }
 
-      inline
-      void bfly3( Complex * Fout, const size_t fstride, const size_t m)
-      {
-        size_t k=m;
-        const size_t m2 = 2*m;
-        Complex *tw1,*tw2;
-        Complex scratch[5];
-        Complex epi3;
-        epi3 = m_twiddles[fstride*m];
-
-        tw1=tw2=&m_twiddles[0];
-
-        do{
-          scratch[1]=Fout[m] * *tw1;
-          scratch[2]=Fout[m2] * *tw2;
-
-          scratch[3]=scratch[1]+scratch[2];
-          scratch[0]=scratch[1]-scratch[2];
-          tw1 += fstride;
-          tw2 += fstride*2;
-          Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-          scratch[0] *= epi3.imag();
-          *Fout += scratch[3];
-          Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-          Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
-          ++Fout;
-        }while(--k);
+  inline
+    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex scratch[6];
+      int negative_if_inverse = m_inverse * -2 +1;
+      for (size_t k=0;k<m;++k) {
+        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+        scratch[5] = Fout[k] - scratch[1];
+
+        Fout[k] += scratch[1];
+        scratch[3] = scratch[0] + scratch[2];
+        scratch[4] = scratch[0] - scratch[2];
+        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+        Fout[k+2*m]  = Fout[k] - scratch[3];
+        Fout[k] += scratch[3];
+        Fout[k+m] = scratch[5] + scratch[4];
+        Fout[k+3*m] = scratch[5] - scratch[4];
       }
+    }
 
-      inline
-      void bfly5( Complex * Fout, const size_t fstride, const size_t m)
-      {
-        Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-        size_t u;
-        Complex scratch[13];
-        Complex * twiddles = &m_twiddles[0];
-        Complex *tw;
-        Complex ya,yb;
-        ya = twiddles[fstride*m];
-        yb = twiddles[fstride*2*m];
-
-        Fout0=Fout;
-        Fout1=Fout0+m;
-        Fout2=Fout0+2*m;
-        Fout3=Fout0+3*m;
-        Fout4=Fout0+4*m;
-
-        tw=twiddles;
-        for ( u=0; u<m; ++u ) {
-          scratch[0] = *Fout0;
-
-          scratch[1]  = *Fout1 * tw[u*fstride];
-          scratch[2]  = *Fout2 * tw[2*u*fstride];
-          scratch[3]  = *Fout3 * tw[3*u*fstride];
-          scratch[4]  = *Fout4 * tw[4*u*fstride];
-
-          scratch[7] = scratch[1] + scratch[4];
-          scratch[10] = scratch[1] - scratch[4];
-          scratch[8] = scratch[2] + scratch[3];
-          scratch[9] = scratch[2] - scratch[3];
-
-          *Fout0 +=  scratch[7];
-          *Fout0 +=  scratch[8];
-
-          scratch[5] = scratch[0] + Complex(
-              (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
-              (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
-              );
-
-          scratch[6] = Complex(
-              (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
-              -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+  inline
+    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      size_t k=m;
+      const size_t m2 = 2*m;
+      Complex *tw1,*tw2;
+      Complex scratch[5];
+      Complex epi3;
+      epi3 = m_twiddles[fstride*m];
+
+      tw1=tw2=&m_twiddles[0];
+
+      do{
+        scratch[1]=Fout[m] * *tw1;
+        scratch[2]=Fout[m2] * *tw2;
+
+        scratch[3]=scratch[1]+scratch[2];
+        scratch[0]=scratch[1]-scratch[2];
+        tw1 += fstride;
+        tw2 += fstride*2;
+        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+        scratch[0] *= epi3.imag();
+        *Fout += scratch[3];
+        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+        ++Fout;
+      }while(--k);
+    }
+
+  inline
+    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+      size_t u;
+      Complex scratch[13];
+      Complex * twiddles = &m_twiddles[0];
+      Complex *tw;
+      Complex ya,yb;
+      ya = twiddles[fstride*m];
+      yb = twiddles[fstride*2*m];
+
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      tw=twiddles;
+      for ( u=0; u<m; ++u ) {
+        scratch[0] = *Fout0;
+
+        scratch[1]  = *Fout1 * tw[u*fstride];
+        scratch[2]  = *Fout2 * tw[2*u*fstride];
+        scratch[3]  = *Fout3 * tw[3*u*fstride];
+        scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+        scratch[7] = scratch[1] + scratch[4];
+        scratch[10] = scratch[1] - scratch[4];
+        scratch[8] = scratch[2] + scratch[3];
+        scratch[9] = scratch[2] - scratch[3];
+
+        *Fout0 +=  scratch[7];
+        *Fout0 +=  scratch[8];
+
+        scratch[5] = scratch[0] + Complex(
+            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+            );
+
+        scratch[6] = Complex(
+            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+            );
+
+        *Fout1 = scratch[5] - scratch[6];
+        *Fout4 = scratch[5] + scratch[6];
+
+        scratch[11] = scratch[0] +
+          Complex(
+              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
               );
 
-          *Fout1 = scratch[5] - scratch[6];
-          *Fout4 = scratch[5] + scratch[6];
-
-          scratch[11] = scratch[0] +
-            Complex(
-                (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
-                (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
-                );
+        scratch[12] = Complex(
+            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+            );
 
-          scratch[12] = Complex(
-              -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
-              (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
-              );
-
-          *Fout2=scratch[11]+scratch[12];
-          *Fout3=scratch[11]-scratch[12];
+        *Fout2=scratch[11]+scratch[12];
+        *Fout3=scratch[11]-scratch[12];
 
-          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-        }
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
       }
+    }
+
+  /* perform the butterfly for one stage of a mixed radix FFT */
+  inline
+    void bfly_generic(
+        Complex * Fout,
+        const size_t fstride,
+        int m,
+        int p
+        )
+    {
+      int u,k,q1,q;
+      Complex * twiddles = &m_twiddles[0];
+      Complex t;
+      int Norig = m_twiddles.size();
+      Complex * scratchbuf = &m_scratchBuf[0];
+
+      for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          scratchbuf[q1] = Fout[ k  ];
+          k += m;
+        }
 
-      /* perform the butterfly for one stage of a mixed radix FFT */
-      inline
-      void bfly_generic(
-          Complex * Fout,
-          const size_t fstride,
-          int m,
-          int p
-          )
-      {
-        int u,k,q1,q;
-        Complex * twiddles = &m_twiddles[0];
-        Complex t;
-        int Norig = m_twiddles.size();
-        Complex * scratchbuf = &m_scratchBuf[0];
-
-        for ( u=0; u<m; ++u ) {
-          k=u;
-          for ( q1=0 ; q1<p ; ++q1 ) {
-            scratchbuf[q1] = Fout[ k  ];
-            k += m;
-          }
-
-          k=u;
-          for ( q1=0 ; q1<p ; ++q1 ) {
-            int twidx=0;
-            Fout[ k ] = scratchbuf[0];
-            for (q=1;q<p;++q ) {
-              twidx += fstride * k;
-              if (twidx>=Norig) twidx-=Norig;
-              t=scratchbuf[q] * twiddles[twidx];
-              Fout[ k ] += t;
-            }
-            k += m;
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          int twidx=0;
+          Fout[ k ] = scratchbuf[0];
+          for (q=1;q<p;++q ) {
+            twidx += fstride * k;
+            if (twidx>=Norig) twidx-=Norig;
+            t=scratchbuf[q] * twiddles[twidx];
+            Fout[ k ] += t;
           }
+          k += m;
         }
       }
-    };
-
-  template <typename _Scalar>
-    struct ei_kissfft_impl
+    }
+};
+
+template <typename _Scalar>
+struct ei_kissfft_impl
+{
+  typedef _Scalar Scalar;
+  typedef std::complex<Scalar> Complex;
+
+  void clear() 
+  {
+    m_plans.clear();
+    m_realTwiddles.clear();
+  }
+
+  inline
+    void fwd( Complex * dst,const Complex *src,int nfft)
     {
-      typedef _Scalar Scalar;
-      typedef std::complex<Scalar> Complex;
-
-      void clear() 
-      {
-        m_plans.clear();
-        m_realTwiddles.clear();
-      }
-
-      template <typename _Src>
-      inline
-        void fwd( Complex * dst,const _Src *src,int nfft)
-        {
-          get_plan(nfft,false).work(0, dst, src, 1,1);
+      get_plan(nfft,false).work(0, dst, src, 1,1);
+    }
+
+  // real-to-complex forward FFT
+  // perform two FFTs of src even and src odd
+  // then twiddle to recombine them into the half-spectrum format
+  // then fill in the conjugate symmetric half
+  inline
+    void fwd( Complex * dst,const Scalar * src,int nfft) 
+    {
+      if ( nfft&3  ) {
+        // use generic mode for odd
+        m_tmpBuf1.resize(nfft);
+        get_plan(nfft,false).work(0, &m_tmpBuf1[0], src, 1,1);
+        std::copy(m_tmpBuf1.begin(),m_tmpBuf1.begin()+(nfft>>1)+1,dst );
+      }else{
+        int ncfft = nfft>>1;
+        int ncfft2 = nfft>>2;
+        Complex * rtw = real_twiddles(ncfft2);
+
+        // use optimized mode for even real
+        fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
+        Complex dc = dst[0].real() +  dst[0].imag();
+        Complex nyquist = dst[0].real() -  dst[0].imag();
+        int k;
+        for ( k=1;k <= ncfft2 ; ++k ) {
+          Complex fpk = dst[k];
+          Complex fpnk = conj(dst[ncfft-k]);
+          Complex f1k = fpk + fpnk;
+          Complex f2k = fpk - fpnk;
+          Complex tw= f2k * rtw[k-1];
+          dst[k] =  (f1k + tw) * Scalar(.5);
+          dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
         }
+        dst[0] = dc;
+        dst[ncfft] = nyquist;
+      }
+    }
 
-      // real-to-complex forward FFT
-      // perform two FFTs of src even and src odd
-      // then twiddle to recombine them into the half-spectrum format
-      // then fill in the conjugate symmetric half
-      inline
-      void fwd( Complex * dst,const Scalar * src,int nfft) 
-      {
-        if ( nfft&3  ) {
-          // use generic mode for odd
-          get_plan(nfft,false).work(0, dst, src, 1,1);
-        }else{
-          int ncfft = nfft>>1;
-          int ncfft2 = nfft>>2;
-          Complex * rtw = real_twiddles(ncfft2);
-
-          // use optimized mode for even real
-          fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
-          Complex dc = dst[0].real() +  dst[0].imag();
-          Complex nyquist = dst[0].real() -  dst[0].imag();
-          int k;
-          for ( k=1;k <= ncfft2 ; ++k ) {
-            Complex fpk = dst[k];
-            Complex fpnk = conj(dst[ncfft-k]);
-            Complex f1k = fpk + fpnk;
-            Complex f2k = fpk - fpnk;
-            Complex tw= f2k * rtw[k-1];
-            dst[k] =  (f1k + tw) * Scalar(.5);
-            dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
-          }
+  // inverse complex-to-complex
+  inline
+    void inv(Complex * dst,const Complex  *src,int nfft)
+    {
+      get_plan(nfft,true).work(0, dst, src, 1,1);
+    }
 
-          // place conjugate-symmetric half at the end for completeness
-          // TODO: make this configurable ( opt-out )
-          for ( k=1;k < ncfft ; ++k )
-            dst[nfft-k] = conj(dst[k]);
-          dst[0] = dc;
-          dst[ncfft] = nyquist;
+  // half-complex to scalar
+  inline
+    void inv( Scalar * dst,const Complex * src,int nfft) 
+    {
+      if (nfft&3) {
+        m_tmpBuf1.resize(nfft);
+        m_tmpBuf2.resize(nfft);
+        std::copy(src,src+(nfft>>1)+1,m_tmpBuf1.begin() );
+        for (int k=1;k<(nfft>>1)+1;++k)
+          m_tmpBuf1[nfft-k] = conj(m_tmpBuf1[k]);
+        inv(&m_tmpBuf2[0],&m_tmpBuf1[0],nfft);
+        for (int k=0;k<nfft;++k)
+          dst[k] = m_tmpBuf2[k].real();
+      }else{
+        // optimized version for multiple of 4
+        int ncfft = nfft>>1;
+        int ncfft2 = nfft>>2;
+        Complex * rtw = real_twiddles(ncfft2);
+        m_tmpBuf1.resize(ncfft);
+        m_tmpBuf1[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
+        for (int k = 1; k <= ncfft / 2; ++k) {
+          Complex fk = src[k];
+          Complex fnkc = conj(src[ncfft-k]);
+          Complex fek = fk + fnkc;
+          Complex tmp = fk - fnkc;
+          Complex fok = tmp * conj(rtw[k-1]);
+          m_tmpBuf1[k] = fek + fok;
+          m_tmpBuf1[ncfft-k] = conj(fek - fok);
         }
+        get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf1[0], 1,1);
       }
+    }
 
-      // inverse complex-to-complex
-      inline
-      void inv(Complex * dst,const Complex  *src,int nfft)
-      {
-        get_plan(nfft,true).work(0, dst, src, 1,1);
-        scale(dst, nfft, Scalar(1)/nfft );
-      }
+  protected:
+  typedef ei_kiss_cpx_fft<Scalar> PlanData;
+  typedef std::map<int,PlanData> PlanMap;
 
-      // half-complex to scalar
-      inline
-      void inv( Scalar * dst,const Complex * src,int nfft) 
-      {
-        if (nfft&3) {
-          m_tmpBuf.resize(nfft);
-          inv(&m_tmpBuf[0],src,nfft);
-          for (int k=0;k<nfft;++k)
-            dst[k] = m_tmpBuf[k].real();
-        }else{
-          // optimized version for multiple of 4
-          int ncfft = nfft>>1;
-          int ncfft2 = nfft>>2;
-          Complex * rtw = real_twiddles(ncfft2);
-          m_tmpBuf.resize(ncfft);
-          m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
-          for (int k = 1; k <= ncfft / 2; ++k) {
-            Complex fk = src[k];
-            Complex fnkc = conj(src[ncfft-k]);
-            Complex fek = fk + fnkc;
-            Complex tmp = fk - fnkc;
-            Complex fok = tmp * conj(rtw[k-1]);
-            m_tmpBuf[k] = fek + fok;
-            m_tmpBuf[ncfft-k] = conj(fek - fok);
-          }
-          scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft );
-          get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf[0], 1,1);
-        }
-      }
+  PlanMap m_plans;
+  std::map<int, std::vector<Complex> > m_realTwiddles;
+  std::vector<Complex> m_tmpBuf1;
+  std::vector<Complex> m_tmpBuf2;
 
-      protected:
-      typedef ei_kiss_cpx_fft<Scalar> PlanData;
-      typedef std::map<int,PlanData> PlanMap;
-
-      PlanMap m_plans;
-      std::map<int, std::vector<Complex> > m_realTwiddles;
-      std::vector<Complex> m_tmpBuf;
-
-      inline
-      int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
-
-      inline
-      PlanData & get_plan(int nfft,bool inverse)
-      {
-        // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
-        PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
-        if ( pd.m_twiddles.size() == 0 ) {
-          pd.make_twiddles(nfft,inverse);
-          pd.factorize(nfft);
-        }
-        return pd;
-      }
+  inline
+    int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
 
-      inline
-      Complex * real_twiddles(int ncfft2)
-      {
-        std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
-        if ( (int)twidref.size() != ncfft2 ) {
-          twidref.resize(ncfft2);
-          int ncfft= ncfft2<<1;
-          Scalar pi =  acos( Scalar(-1) );
-          for (int k=1;k<=ncfft2;++k) 
-            twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
-        }
-        return &twidref[0];
+  inline
+    PlanData & get_plan(int nfft,bool inverse)
+    {
+      // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
+      PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
+      if ( pd.m_twiddles.size() == 0 ) {
+        pd.make_twiddles(nfft,inverse);
+        pd.factorize(nfft);
       }
+      return pd;
+    }
 
-      // TODO move scaling up into Eigen::FFT
-      inline
-      void scale(Complex *dst,int n,Scalar s) 
-      {
-        for (int k=0;k<n;++k)
-          dst[k] *= s;
+  inline
+    Complex * real_twiddles(int ncfft2)
+    {
+      std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
+      if ( (int)twidref.size() != ncfft2 ) {
+        twidref.resize(ncfft2);
+        int ncfft= ncfft2<<1;
+        Scalar pi =  acos( Scalar(-1) );
+        for (int k=1;k<=ncfft2;++k) 
+          twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
       }
-    };
+      return &twidref[0];
+    }
+};
+
+/* vim: set filetype=cpp et sw=2 ts=2 ai: */
+
author	Benoit Jacob <jacob.benoit.1@gmail.com>	2009-11-09 09:08:03 -0500
committer	Benoit Jacob <jacob.benoit.1@gmail.com>	2009-11-09 09:08:03 -0500
commit	92749eed11d000300cfa54654f1043cd52399ed8 (patch)
tree	ba227522582b2f9f4280ed1404e74c654e21ccb3 /unsupported/Eigen/src
parent	4b366b07be4e409239c61158a23d93e8ebf3811b (diff)
parent	670651e2e0932c5edfe2a2da4b9f3c42af3b7dec (diff)