From 92ca9fc032ecaa7f9595f5c5f7d9d8df6c972bbe Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Tue, 19 May 2009 00:21:04 -0400
Subject: initial pass of FFT module -- includes complex 1-d case only

---
 unsupported/Eigen/src/FFT/simple_fft_traits.h | 296 ++++++++++++++++++++++++++
 1 file changed, 296 insertions(+)
 create mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h

(limited to 'unsupported/Eigen/src')
diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
new file mode 100644
index 000000000..fe9d24b84
--- /dev/null
+++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h
@@ -0,0 +1,296 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include <complex>
+#include <vector>
+
+namespace Eigen {
+
+  template <typename _Scalar>
+  struct simple_fft_traits
+  {
+    typedef _Scalar Scalar;
+    typedef std::complex<Scalar> Complex;
+    simple_fft_traits() : m_nfft(0) {} 
+
+    void prepare(int nfft,bool inverse,Complex * dst,const Complex *src)
+    {
+      if (m_nfft == nfft) {
+        // reuse the twiddles, conjugate if necessary
+        if (inverse != m_inverse)
+          for (int i=0;i<nfft;++i)
+            m_twiddles[i] = conj( m_twiddles[i] );
+        m_inverse = inverse;
+        return;
+      }
+      m_nfft = nfft;
+      m_inverse = inverse;
+      m_twiddles.resize(nfft);
+      Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+      for (int i=0;i<nfft;++i)
+        m_twiddles[i] = exp( Complex(0,i*phinc) );
+
+      m_stageRadix.resize(0);
+      m_stageRemainder.resize(0);
+      //factorize
+      //start factoring out 4's, then 2's, then 3,5,7,9,...
+      int n= nfft;
+      int p=4;
+      do {
+        while (n % p) {
+          switch (p) {
+            case 4: p = 2; break;
+            case 2: p = 3; break;
+            default: p += 2; break;
+          }
+          if (p*p>n)
+            p=n;// no more factors
+        }
+        n /= p;
+        m_stageRadix.push_back(p);
+        m_stageRemainder.push_back(n);
+      }while(n>1);
+    }
+
+    void exec(Complex * dst, const Complex * src)
+    {
+      work(0, dst, src, 1,1);
+    }
+
+    void postprocess(Complex *dst) 
+    {
+        if (m_inverse) {
+            Scalar scale = 1./m_nfft;
+            for (int k=0;k<m_nfft;++k)
+                dst[k] *= scale;
+        }
+    }
+  private:
+    void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride)
+    {
+      int p = m_stageRadix[stage];
+      int m = m_stageRemainder[stage];
+      Complex * Fout_beg = Fout;
+      Complex * Fout_end = Fout + p*m;
+
+      if (m==1) {
+        do{
+          *Fout = *f;
+          f += fstride*in_stride;
+        }while(++Fout != Fout_end );
+      }else{
+        do{
+          // recursive call:
+          // DFT of size m*p performed by doing
+          // p instances of smaller DFTs of size m, 
+          // each one takes a decimated version of the input
+          work(stage+1, Fout , f, fstride*p,in_stride);
+          f += fstride*in_stride;
+        }while( (Fout += m) != Fout_end );
+      }
+
+      Fout=Fout_beg;
+
+      // recombine the p smaller DFTs 
+      switch (p) {
+        case 2: bfly2(Fout,fstride,m); break;
+        case 3: bfly3(Fout,fstride,m); break;
+        case 4: bfly4(Fout,fstride,m); break;
+        case 5: bfly5(Fout,fstride,m); break;
+        default: bfly_generic(Fout,fstride,m,p); break;
+      }
+    }
+
+    void bfly2( Complex * Fout, const size_t fstride, int m)
+    {
+      for (int k=0;k<m;++k) {
+        Complex t = Fout[m+k] * m_twiddles[k*fstride];
+        Fout[m+k] = Fout[k] - t;
+        Fout[k] += t;
+      }
+    }
+
+    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex scratch[7];
+      int negative_if_inverse = m_inverse * -2 +1;
+      for (size_t k=0;k<m;++k) {
+        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+        scratch[5] = Fout[k] - scratch[1];
+
+        Fout[k] += scratch[1];
+        scratch[3] = scratch[0] + scratch[2];
+        scratch[4] = scratch[0] - scratch[2];
+        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+        Fout[k+2*m]  = Fout[k] - scratch[3];
+        Fout[k] += scratch[3];
+        Fout[k+m] = scratch[5] + scratch[4];
+        Fout[k+3*m] = scratch[5] - scratch[4];
+      }
+    }
+
+    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      size_t k=m;
+      const size_t m2 = 2*m;
+      Complex *tw1,*tw2;
+      Complex scratch[5];
+      Complex epi3;
+      epi3 = m_twiddles[fstride*m];
+
+      tw1=tw2=&m_twiddles[0];
+
+      do{
+        scratch[1]=Fout[m] * *tw1;
+        scratch[2]=Fout[m2] * *tw2;
+
+        scratch[3]=scratch[1]+scratch[2];
+        scratch[0]=scratch[1]-scratch[2];
+        tw1 += fstride;
+        tw2 += fstride*2;
+
+        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+
+        scratch[0] *= epi3.imag();
+
+        *Fout += scratch[3];
+
+        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+
+        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+        ++Fout;
+      }while(--k);
+    }
+
+    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+      size_t u;
+      Complex scratch[13];
+      Complex * twiddles = &m_twiddles[0];
+      Complex *tw;
+      Complex ya,yb;
+      ya = twiddles[fstride*m];
+      yb = twiddles[fstride*2*m];
+
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      tw=twiddles;
+      for ( u=0; u<m; ++u ) {
+        scratch[0] = *Fout0;
+
+        scratch[1]  = *Fout1 * tw[u*fstride];
+        scratch[2]  = *Fout2 * tw[2*u*fstride];
+        scratch[3]  = *Fout3 * tw[3*u*fstride];
+        scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+        scratch[7] = scratch[1] + scratch[4];
+        scratch[10] = scratch[1] - scratch[4];
+        scratch[8] = scratch[2] + scratch[3];
+        scratch[9] = scratch[2] - scratch[3];
+
+        *Fout0 +=  scratch[7];
+        *Fout0 +=  scratch[8];
+
+        scratch[5] = scratch[0] + Complex(
+            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+            );
+
+        scratch[6] = Complex(
+            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+            );
+
+        *Fout1 = scratch[5] - scratch[6];
+        *Fout4 = scratch[5] + scratch[6];
+
+        scratch[11] = scratch[0] +
+          Complex(
+              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+              );
+
+        scratch[12] = Complex(
+            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+            );
+
+        *Fout2=scratch[11]+scratch[12];
+        *Fout3=scratch[11]-scratch[12];
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+    }
+
+    /* perform the butterfly for one stage of a mixed radix FFT */
+    void bfly_generic(
+        Complex * Fout,
+        const size_t fstride,
+        int m,
+        int p
+        )
+    {
+      int u,k,q1,q;
+      Complex * twiddles = &m_twiddles[0];
+      Complex t;
+      int Norig = m_nfft;
+      Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
+
+      for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          scratchbuf[q1] = Fout[ k  ];
+          k += m;
+        }
+
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          int twidx=0;
+          Fout[ k ] = scratchbuf[0];
+          for (q=1;q<p;++q ) {
+            twidx += fstride * k;
+            if (twidx>=Norig) twidx-=Norig;
+            t=scratchbuf[q] * twiddles[twidx];
+            Fout[ k ] += t;
+          }
+          k += m;
+        }
+      }
+    }
+
+    int m_nfft;
+    bool m_inverse;
+    std::vector<Complex> m_twiddles;
+    std::vector<int> m_stageRadix;
+    std::vector<int> m_stageRemainder;
+  };
+}
-- 
cgit v1.2.3


From 68cad98bc935e53102a9432560085b81c5766743 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Tue, 19 May 2009 00:34:38 -0400
Subject: indent level change

---
 unsupported/Eigen/src/FFT/simple_fft_traits.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
index fe9d24b84..6fbbeac2e 100644
--- a/unsupported/Eigen/src/FFT/simple_fft_traits.h
+++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h
@@ -80,13 +80,15 @@ namespace Eigen {
 
     void postprocess(Complex *dst) 
     {
-        if (m_inverse) {
-            Scalar scale = 1./m_nfft;
-            for (int k=0;k<m_nfft;++k)
-                dst[k] *= scale;
-        }
+      if (m_inverse) {
+        Scalar scale = 1./m_nfft;
+        for (int k=0;k<m_nfft;++k)
+          dst[k] *= scale;
+      }
     }
-  private:
+
+    private:
+
     void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride)
     {
       int p = m_stageRadix[stage];
-- 
cgit v1.2.3


From 8b4afe3debb47bf15ea291a7f2d21d863d546536 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Fri, 22 May 2009 22:37:59 -0400
Subject: added non-optimized real forward fft (no inverse yet)

---
 unsupported/Eigen/FFT.h                       |  27 ++++--
 unsupported/Eigen/src/FFT/simple_fft_traits.h |  10 ++-
 unsupported/test/FFT.cpp                      | 117 ++++++++++++++++++--------
 3 files changed, 108 insertions(+), 46 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h
index 03490d2c5..a1f87a609 100644
--- a/unsupported/Eigen/FFT.h
+++ b/unsupported/Eigen/FFT.h
@@ -57,21 +57,36 @@ class FFT
 
     FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { }
 
-    void fwd( Complex * dst, const Complex * src, int nfft)
+    template <typename _Input>
+    void fwd( Complex * dst, const _Input * src, int nfft)
     {
       m_traits.prepare(nfft,false,dst,src);
       m_traits.exec(dst,src);
       m_traits.postprocess(dst);
     }
 
-    void inv( Complex * dst, const Complex * src, int nfft)
+    template <typename _Input>
+    void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
     {
-      m_traits.prepare(nfft,true,dst,src);
-      m_traits.exec(dst,src);
-      m_traits.postprocess(dst);
+        dst.resize( src.size() );
+        fwd( &dst[0],&src[0],src.size() );
+    }
+
+    template <typename _Output>
+    void inv( _Output * dst, const Complex * src, int nfft)
+    {
+        m_traits.prepare(nfft,true,dst,src);
+        m_traits.exec(dst,src);
+        m_traits.postprocess(dst);
+    }
+
+    template <typename _Output>
+    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src) 
+    {
+        dst.resize( src.size() );
+        inv( &dst[0],&src[0],src.size() );
     }
 
-    // TODO: fwd,inv for Scalar
     // TODO: multi-dimensional FFTs
     // TODO: handle Eigen MatrixBase
 
diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
index 6fbbeac2e..5a910dd1f 100644
--- a/unsupported/Eigen/src/FFT/simple_fft_traits.h
+++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h
@@ -34,7 +34,8 @@ namespace Eigen {
     typedef std::complex<Scalar> Complex;
     simple_fft_traits() : m_nfft(0) {} 
 
-    void prepare(int nfft,bool inverse,Complex * dst,const Complex *src)
+    template <typename _Src>
+    void prepare(int nfft,bool inverse,Complex * dst,const _Src *src)
     {
       if (m_nfft == nfft) {
         // reuse the twiddles, conjugate if necessary
@@ -73,7 +74,8 @@ namespace Eigen {
       }while(n>1);
     }
 
-    void exec(Complex * dst, const Complex * src)
+    template <typename _Src>
+    void exec(Complex * dst, const _Src * src)
     {
       work(0, dst, src, 1,1);
     }
@@ -89,7 +91,9 @@ namespace Eigen {
 
     private:
 
-    void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride)
+   
+    template <typename _Src>
+    void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride)
     {
       int p = m_stageRadix[stage];
       int m = m_stageRemainder[stage];
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index 8347bb76b..ef03359e2 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -25,55 +25,98 @@
 #include "main.h"
 #include <unsupported/Eigen/FFT.h>
 
+
 using namespace std;
 
+template < typename T>
+complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+
+complex<long double>  promote(float x) { return complex<long double>( x); }
+complex<long double>  promote(double x) { return complex<long double>( x); }
+complex<long double>  promote(long double x) { return complex<long double>( x); }
+    
+
+    template <typename T1,typename T2>
+    long double fft_rmse( const vector<T1> & fftbuf,const vector<T2> & timebuf)
+    {
+        long double totalpower=0;
+        long double difpower=0;
+        for (size_t k0=0;k0<fftbuf.size();++k0) {
+            complex<long double> acc = 0;
+            long double phinc = -2.*k0* M_PIl / timebuf.size();
+            for (size_t k1=0;k1<timebuf.size();++k1) {
+                acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
+            }
+            totalpower += norm(acc);
+            complex<long double> x = promote(fftbuf[k0]); 
+            complex<long double> dif = acc - x;
+            difpower += norm(dif);
+            cerr << k0 << ":" << acc << " " <<  x << endl;
+        }
+        cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
+        return sqrt(difpower/totalpower);
+    }
+
+    template <typename T1,typename T2>
+    long double dif_rmse( const vector<T1> buf1,const vector<T2> buf2)
+    {
+        long double totalpower=0;
+        long double difpower=0;
+        size_t n = min( buf1.size(),buf2.size() );
+        for (size_t k=0;k<n;++k) {
+            totalpower += (norm( buf1[k] ) + norm(buf2[k]) )/2.;
+            difpower += norm(buf1[k] - buf2[k]);
+        }
+        return sqrt(difpower/totalpower);
+    }
+
+template <class T>
+void test_scalar(int nfft)
+{
+    typedef typename Eigen::FFT<T>::Complex Complex;
+    typedef typename Eigen::FFT<T>::Scalar Scalar;
+
+    FFT<T> fft;
+    vector<Scalar> inbuf(nfft);
+    vector<Complex> outbuf;
+    for (int k=0;k<nfft;++k)
+        inbuf[k]= (T)(rand()/(double)RAND_MAX - .5);
+    fft.fwd( outbuf,inbuf);
+    VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check
+}
+
 template <class T>
-void test_fft(int nfft)
+void test_complex(int nfft)
 {
     typedef typename Eigen::FFT<T>::Complex Complex;
 
     FFT<T> fft;
 
     vector<Complex> inbuf(nfft);
-    vector<Complex> buf3(nfft);
-    vector<Complex> outbuf(nfft);
+    vector<Complex> outbuf;
+    vector<Complex> buf3;
     for (int k=0;k<nfft;++k)
-        inbuf[k]= Complex( 
-                (T)(rand()/(double)RAND_MAX - .5),
-                (T)(rand()/(double)RAND_MAX - .5) );
-    fft.fwd( &outbuf[0] , &inbuf[0] ,nfft);
-    fft.inv( &buf3[0] , &outbuf[0] ,nfft);
-
-    long double totalpower=0;
-    long double difpower=0;
-    for (int k0=0;k0<nfft;++k0) {
-        complex<long double> acc = 0;
-        long double phinc = 2*k0* M_PIl / nfft;
-        for (int k1=0;k1<nfft;++k1) {
-            complex<long double> x(inbuf[k1].real(),inbuf[k1].imag()); 
-            acc += x * exp( complex<long double>(0,-k1*phinc) );
-        }
-        totalpower += norm(acc);
-        complex<long double> x(outbuf[k0].real(),outbuf[k0].imag()); 
-        complex<long double> dif = acc - x;
-        difpower += norm(dif);
-    }
-    long double rmse = sqrt(difpower/totalpower);
-    VERIFY( rmse < 1e-5 );// gross check
-
-    totalpower=0;
-    difpower=0;
-    for (int k=0;k<nfft;++k) {
-        totalpower += norm( inbuf[k] );
-        difpower += norm(inbuf[k] - buf3[k]);
-    }
-    rmse = sqrt(difpower/totalpower);
-    VERIFY( rmse < 1e-5 );// gross check
+        inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
+    fft.fwd( outbuf , inbuf);
+
+    VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check
+
+    fft.inv( buf3 , outbuf);
+
+    VERIFY( dif_rmse(inbuf,buf3) < 1e-5 );// gross check
 }
 
 void test_FFT()
 {
-  CALL_SUBTEST(( test_fft<float>(32) )); CALL_SUBTEST(( test_fft<double>(32) )); CALL_SUBTEST(( test_fft<long double>(32) ));
-  CALL_SUBTEST(( test_fft<float>(1024) )); CALL_SUBTEST(( test_fft<double>(1024) )); CALL_SUBTEST(( test_fft<long double>(1024) ));
-  CALL_SUBTEST(( test_fft<float>(2*3*4*5*7) )); CALL_SUBTEST(( test_fft<double>(2*3*4*5*7) )); CALL_SUBTEST(( test_fft<long double>(2*3*4*5*7) ));
+  CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); CALL_SUBTEST( test_complex<long double>(32) );
+  CALL_SUBTEST( test_complex<float>(1024) ); CALL_SUBTEST( test_complex<double>(1024) ); CALL_SUBTEST( test_complex<long double>(1024) );
+  CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); CALL_SUBTEST( test_complex<long double>(3*8) );
+  CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) ); CALL_SUBTEST( test_complex<long double>(5*32) );
+  CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); CALL_SUBTEST( test_complex<long double>(2*3*4) );
+  CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
+  CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
+
+  CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
+  CALL_SUBTEST( test_scalar<float>(1024) ); CALL_SUBTEST( test_scalar<double>(1024) ); CALL_SUBTEST( test_scalar<long double>(1024) );
+  CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
 }
-- 
cgit v1.2.3


From 9c0fcd0f6213143216710a5b215aa2bb4a857ce5 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Sat, 23 May 2009 10:09:48 -0400
Subject: started real optimization, added benchmark for FFT

---
 bench/benchFFT.cpp                            |  64 +++++++++++++
 unsupported/Eigen/FFT.h                       |   8 +-
 unsupported/Eigen/src/FFT/simple_fft_traits.h | 125 ++++++++++++++++++--------
 unsupported/test/FFT.cpp                      |   3 +-
 4 files changed, 157 insertions(+), 43 deletions(-)
 create mode 100644 bench/benchFFT.cpp

(limited to 'unsupported/Eigen/src')

diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp
new file mode 100644
index 000000000..041576b75
--- /dev/null
+++ b/bench/benchFFT.cpp
@@ -0,0 +1,64 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include <complex>
+#include <vector>
+#include <Eigen/Core>
+#include <bench/BenchTimer.h>
+#include <unsupported/Eigen/FFT.h>
+
+using namespace Eigen;
+using namespace std;
+
+#ifndef NFFT
+#define NFFT 1024
+#endif
+
+#ifndef TYPE
+#define TYPE float
+#endif
+
+#ifndef NITS 
+#define NITS (10000000/NFFT)
+#endif
+
+int main() 
+{
+  vector<complex<TYPE> > inbuf(NFFT);
+  vector<complex<TYPE> > outbuf(NFFT);
+  Eigen::FFT<TYPE> fft;
+
+  fft.fwd( outbuf , inbuf);
+
+  BenchTimer timer;
+  timer.reset();
+  for (int k=0;k<8;++k) {
+      timer.start();
+      for(int i = 0; i < NITS; i++)
+          fft.fwd( outbuf , inbuf);
+      timer.stop();
+  }
+  double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS );
+  cout << "NFFT=" << NFFT << "  " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
+}
diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h
index a1f87a609..c466423b7 100644
--- a/unsupported/Eigen/FFT.h
+++ b/unsupported/Eigen/FFT.h
@@ -60,9 +60,7 @@ class FFT
     template <typename _Input>
     void fwd( Complex * dst, const _Input * src, int nfft)
     {
-      m_traits.prepare(nfft,false,dst,src);
-      m_traits.exec(dst,src);
-      m_traits.postprocess(dst);
+        m_traits.fwd(dst,src,nfft);
     }
 
     template <typename _Input>
@@ -75,9 +73,7 @@ class FFT
     template <typename _Output>
     void inv( _Output * dst, const Complex * src, int nfft)
     {
-        m_traits.prepare(nfft,true,dst,src);
-        m_traits.exec(dst,src);
-        m_traits.postprocess(dst);
+        m_traits.inv( dst,src,nfft );
     }
 
     template <typename _Output>
diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
index 5a910dd1f..33433ae03 100644
--- a/unsupported/Eigen/src/FFT/simple_fft_traits.h
+++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h
@@ -35,7 +35,73 @@ namespace Eigen {
     simple_fft_traits() : m_nfft(0) {} 
 
     template <typename _Src>
-    void prepare(int nfft,bool inverse,Complex * dst,const _Src *src)
+    void fwd( Complex * dst,const _Src *src,int nfft)
+    {
+        prepare(nfft,false);
+        work(0, dst, src, 1,1);
+        scale(dst);
+    }
+
+    void fwd( Complex * dst,const Scalar * src,int nfft) 
+    {
+        if ( nfft&1 ) {
+            // use generic mode for odd
+            prepare(nfft,false);
+            work(0, dst, src, 1,1);
+            scale(dst);
+        }else{
+            int ncfft = nfft>>1;
+            // use optimized mode for even real
+            prepare(nfft,false);
+            work(0,dst, reinterpret_cast<const Complex*> (src),2,1);
+            Complex dc = dst[0].real() +  dst[0].imag();
+            Complex nyquist = dst[0].real() -  dst[0].imag();
+            
+            int k;
+            for ( k=1;k <= ncfft/2 ; ++k ) {
+/**
+        fpk    = st->tmpbuf[k];
+        fpnk.r =   st->tmpbuf[ncfft-k].r;
+        fpnk.i = - st->tmpbuf[ncfft-k].i;
+
+        C_ADD( f1k, fpk , fpnk );
+        C_SUB( f2k, fpk , fpnk );
+        C_MUL( tw , f2k , st->super_twiddles[k-1]);
+
+        freqdata[k].r = HALF_OF(f1k.r + tw.r);
+        freqdata[k].i = HALF_OF(f1k.i + tw.i);
+        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+ */
+                Complex fpk = dst[k];
+                Complex fpnk = conj(dst[ncfft-k]);
+
+                Complex f1k = fpk + fpnk;
+                Complex f2k = fpnk - fpk;
+                //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles
+                Complex tw = f2k * m_twiddles[2*k];;
+                
+                dst[k] =  (f1k + tw) * Scalar(.5);
+                dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
+            }
+            // place conjugate-symmetric half at the end for completeness
+            // TODO: make this configurable ( opt-out )
+            for ( k=1;k < ncfft ; ++k )
+                dst[nfft-k] = conj(dst[k]);
+
+            dst[0] = dc;
+            dst[ncfft] = nyquist;
+        }
+    }
+
+    void inv(Complex * dst,const Complex  *src,int nfft)
+    {
+        prepare(nfft,true);
+        work(0, dst, src, 1,1);
+        scale(dst);
+    }
+
+    void prepare(int nfft,bool inverse)
     {
       if (m_nfft == nfft) {
         // reuse the twiddles, conjugate if necessary
@@ -74,57 +140,49 @@ namespace Eigen {
       }while(n>1);
     }
 
-    template <typename _Src>
-    void exec(Complex * dst, const _Src * src)
-    {
-      work(0, dst, src, 1,1);
-    }
-
-    void postprocess(Complex *dst) 
+    void scale(Complex *dst) 
     {
       if (m_inverse) {
-        Scalar scale = 1./m_nfft;
+        Scalar s = 1./m_nfft;
         for (int k=0;k<m_nfft;++k)
-          dst[k] *= scale;
+          dst[k] *= s;
       }
     }
 
     private:
 
-   
     template <typename _Src>
-    void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride)
+    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
     {
       int p = m_stageRadix[stage];
       int m = m_stageRemainder[stage];
-      Complex * Fout_beg = Fout;
-      Complex * Fout_end = Fout + p*m;
+      Complex * Fout_beg = xout;
+      Complex * Fout_end = xout + p*m;
 
-      if (m==1) {
-        do{
-          *Fout = *f;
-          f += fstride*in_stride;
-        }while(++Fout != Fout_end );
-      }else{
+      if (m>1) {
         do{
           // recursive call:
           // DFT of size m*p performed by doing
           // p instances of smaller DFTs of size m, 
           // each one takes a decimated version of the input
-          work(stage+1, Fout , f, fstride*p,in_stride);
-          f += fstride*in_stride;
-        }while( (Fout += m) != Fout_end );
+          work(stage+1, xout , xin, fstride*p,in_stride);
+          xin += fstride*in_stride;
+        }while( (xout += m) != Fout_end );
+      }else{
+          do{
+              *xout = *xin;
+              xin += fstride*in_stride;
+          }while(++xout != Fout_end );
       }
-
-      Fout=Fout_beg;
+      xout=Fout_beg;
 
       // recombine the p smaller DFTs 
       switch (p) {
-        case 2: bfly2(Fout,fstride,m); break;
-        case 3: bfly3(Fout,fstride,m); break;
-        case 4: bfly4(Fout,fstride,m); break;
-        case 5: bfly5(Fout,fstride,m); break;
-        default: bfly_generic(Fout,fstride,m,p); break;
+        case 2: bfly2(xout,fstride,m); break;
+        case 3: bfly3(xout,fstride,m); break;
+        case 4: bfly4(xout,fstride,m); break;
+        case 5: bfly5(xout,fstride,m); break;
+        default: bfly_generic(xout,fstride,m,p); break;
       }
     }
 
@@ -139,7 +197,7 @@ namespace Eigen {
 
     void bfly4( Complex * Fout, const size_t fstride, const size_t m)
     {
-      Complex scratch[7];
+      Complex scratch[6];
       int negative_if_inverse = m_inverse * -2 +1;
       for (size_t k=0;k<m;++k) {
         scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
@@ -178,15 +236,10 @@ namespace Eigen {
         scratch[0]=scratch[1]-scratch[2];
         tw1 += fstride;
         tw2 += fstride*2;
-
         Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-
         scratch[0] *= epi3.imag();
-
         *Fout += scratch[3];
-
         Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-
         Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
         ++Fout;
       }while(--k);
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index ef03359e2..13e98ba77 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -115,8 +115,9 @@ void test_FFT()
   CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); CALL_SUBTEST( test_complex<long double>(2*3*4) );
   CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
   CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
-
+/*
   CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
   CALL_SUBTEST( test_scalar<float>(1024) ); CALL_SUBTEST( test_scalar<double>(1024) ); CALL_SUBTEST( test_scalar<long double>(1024) );
   CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
+  */
 }
-- 
cgit v1.2.3


From 304798817268706463f3ff645c8c8b2c887c090a Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Sat, 23 May 2009 12:50:07 -0400
Subject: scalar forward FFT optimized for even size, converts to cpx for odd

---
 unsupported/Eigen/src/FFT/simple_fft_traits.h | 152 +++++++++++++++-----------
 unsupported/test/FFT.cpp                      |  10 +-
 2 files changed, 95 insertions(+), 67 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
index 33433ae03..f7dd2b9cf 100644
--- a/unsupported/Eigen/src/FFT/simple_fft_traits.h
+++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h
@@ -24,6 +24,7 @@
 
 #include <complex>
 #include <vector>
+#include <iostream>
 
 namespace Eigen {
 
@@ -39,51 +40,54 @@ namespace Eigen {
     {
         prepare(nfft,false);
         work(0, dst, src, 1,1);
-        scale(dst);
     }
 
+    // real-to-complex forward FFT
+    // perform two FFTs of src even and src odd
+    // then twiddle to recombine them into the half-spectrum format
+    // then fill in the conjugate symmetric half
     void fwd( Complex * dst,const Scalar * src,int nfft) 
     {
         if ( nfft&1 ) {
             // use generic mode for odd
             prepare(nfft,false);
             work(0, dst, src, 1,1);
-            scale(dst);
         }else{
             int ncfft = nfft>>1;
             // use optimized mode for even real
-            prepare(nfft,false);
-            work(0,dst, reinterpret_cast<const Complex*> (src),2,1);
+            fwd( dst, reinterpret_cast<const Complex*> (src),ncfft);
+            make_real_twiddles(nfft);
+            std::cerr << "dst[0] = " << dst[0] << "\n";
             Complex dc = dst[0].real() +  dst[0].imag();
             Complex nyquist = dst[0].real() -  dst[0].imag();
-            
             int k;
-            for ( k=1;k <= ncfft/2 ; ++k ) {
-/**
-        fpk    = st->tmpbuf[k];
-        fpnk.r =   st->tmpbuf[ncfft-k].r;
-        fpnk.i = - st->tmpbuf[ncfft-k].i;
-
-        C_ADD( f1k, fpk , fpnk );
-        C_SUB( f2k, fpk , fpnk );
-        C_MUL( tw , f2k , st->super_twiddles[k-1]);
-
-        freqdata[k].r = HALF_OF(f1k.r + tw.r);
-        freqdata[k].i = HALF_OF(f1k.i + tw.i);
-        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
-        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
- */
+#if 0
+            using namespace std;
+            cerr << "desired:\n";
+            for ( k=1;k <= (ncfft>>1) ; ++k ) {
+                Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
+                cerr << k << " " << x << "angle(x):" << arg(x) << "\n";
+            }
+            dc=0;
+            cerr << "twiddles:\n";
+            for (k=0;k<ncfft;++k) {
+                Complex x = m_twiddles[k];
+                cerr << k << " " << x << "angle(-x):" << arg(-x) << "\n";
+            }
+#endif
+            for ( k=1;k <= (ncfft>>1) ; ++k ) {
                 Complex fpk = dst[k];
                 Complex fpnk = conj(dst[ncfft-k]);
-
                 Complex f1k = fpk + fpnk;
-                Complex f2k = fpnk - fpk;
-                //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles
-                Complex tw = f2k * m_twiddles[2*k];;
-                
+                Complex f2k = fpk - fpnk;
+                //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
+                Complex tw= f2k * m_realTwiddles[k-1];
+
                 dst[k] =  (f1k + tw) * Scalar(.5);
                 dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
             }
+ 
+
             // place conjugate-symmetric half at the end for completeness
             // TODO: make this configurable ( opt-out )
             for ( k=1;k < ncfft ; ++k )
@@ -98,55 +102,74 @@ namespace Eigen {
     {
         prepare(nfft,true);
         work(0, dst, src, 1,1);
-        scale(dst);
+        scale(dst, Scalar(1)/m_nfft );
     }
 
     void prepare(int nfft,bool inverse)
     {
-      if (m_nfft == nfft) {
-        // reuse the twiddles, conjugate if necessary
-        if (inverse != m_inverse)
-          for (int i=0;i<nfft;++i)
-            m_twiddles[i] = conj( m_twiddles[i] );
+        make_twiddles(nfft,inverse);
+        factorize(nfft);
+    }
+
+    void make_real_twiddles(int nfft)
+    {
+        int ncfft2 = nfft>>2;
+        if ( m_realTwiddles.size() != ncfft2) {
+            m_realTwiddles.resize(ncfft2);
+            int ncfft= nfft>>1;
+            for (int k=1;k<=ncfft2;++k) 
+                m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
+        }
+    }
+
+    void make_twiddles(int nfft,bool inverse)
+    {
+        if ( m_twiddles.size() == nfft) {
+            // reuse the twiddles, conjugate if necessary
+            if (inverse != m_inverse)
+                for (int i=0;i<nfft;++i)
+                    m_twiddles[i] = conj( m_twiddles[i] );
+        }else{
+            m_twiddles.resize(nfft);
+            Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+            for (int i=0;i<nfft;++i)
+                m_twiddles[i] = exp( Complex(0,i*phinc) );
+        }
         m_inverse = inverse;
-        return;
-      }
-      m_nfft = nfft;
-      m_inverse = inverse;
-      m_twiddles.resize(nfft);
-      Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-      for (int i=0;i<nfft;++i)
-        m_twiddles[i] = exp( Complex(0,i*phinc) );
-
-      m_stageRadix.resize(0);
-      m_stageRemainder.resize(0);
-      //factorize
-      //start factoring out 4's, then 2's, then 3,5,7,9,...
-      int n= nfft;
-      int p=4;
-      do {
-        while (n % p) {
-          switch (p) {
-            case 4: p = 2; break;
-            case 2: p = 3; break;
-            default: p += 2; break;
-          }
-          if (p*p>n)
-            p=n;// no more factors
+    }
+
+    void factorize(int nfft)
+    {
+        if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft)
+        {
+            m_stageRadix.resize(0);
+            m_stageRemainder.resize(0);
+            //factorize
+            //start factoring out 4's, then 2's, then 3,5,7,9,...
+            int n= nfft;
+            int p=4;
+            do {
+                while (n % p) {
+                    switch (p) {
+                        case 4: p = 2; break;
+                        case 2: p = 3; break;
+                        default: p += 2; break;
+                    }
+                    if (p*p>n)
+                        p=n;// no more factors
+                }
+                n /= p;
+                m_stageRadix.push_back(p);
+                m_stageRemainder.push_back(n);
+            }while(n>1);
         }
-        n /= p;
-        m_stageRadix.push_back(p);
-        m_stageRemainder.push_back(n);
-      }while(n>1);
+        m_nfft = nfft;
     }
 
-    void scale(Complex *dst) 
+    void scale(Complex *dst,Scalar s) 
     {
-      if (m_inverse) {
-        Scalar s = 1./m_nfft;
         for (int k=0;k<m_nfft;++k)
-          dst[k] *= s;
-      }
+            dst[k] *= s;
     }
 
     private:
@@ -349,6 +372,7 @@ namespace Eigen {
     int m_nfft;
     bool m_inverse;
     std::vector<Complex> m_twiddles;
+    std::vector<Complex> m_realTwiddles;
     std::vector<int> m_stageRadix;
     std::vector<int> m_stageRemainder;
   };
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index 13e98ba77..41c7fed7b 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -41,6 +41,7 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
     {
         long double totalpower=0;
         long double difpower=0;
+        cerr <<"idx\ttruth\t\tvalue\n";
         for (size_t k0=0;k0<fftbuf.size();++k0) {
             complex<long double> acc = 0;
             long double phinc = -2.*k0* M_PIl / timebuf.size();
@@ -51,7 +52,7 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
             complex<long double> x = promote(fftbuf[k0]); 
             complex<long double> dif = acc - x;
             difpower += norm(dif);
-            cerr << k0 << ":" << acc << " " <<  x << endl;
+            cerr << k0 << "\t" << acc << "\t" <<  x << endl;
         }
         cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
         return sqrt(difpower/totalpower);
@@ -108,6 +109,7 @@ void test_complex(int nfft)
 
 void test_FFT()
 {
+#if 0
   CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); CALL_SUBTEST( test_complex<long double>(32) );
   CALL_SUBTEST( test_complex<float>(1024) ); CALL_SUBTEST( test_complex<double>(1024) ); CALL_SUBTEST( test_complex<long double>(1024) );
   CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); CALL_SUBTEST( test_complex<long double>(3*8) );
@@ -115,9 +117,11 @@ void test_FFT()
   CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); CALL_SUBTEST( test_complex<long double>(2*3*4) );
   CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
   CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
-/*
+#endif
+
+#if 1
   CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
   CALL_SUBTEST( test_scalar<float>(1024) ); CALL_SUBTEST( test_scalar<double>(1024) ); CALL_SUBTEST( test_scalar<long double>(1024) );
   CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
-  */
+#endif
 }
-- 
cgit v1.2.3


From 326ea773908c2d7e46101085af8f72d20b3f8cbc Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Sat, 23 May 2009 22:50:07 -0400
Subject: added FFT inverse complex-to-scalar interface (not yet optimized)

---
 bench/benchFFT.cpp                            | 73 +++++++++++++++++++--------
 unsupported/Eigen/src/FFT/simple_fft_traits.h | 31 +++++-------
 unsupported/test/FFT.cpp                      | 21 +++++---
 3 files changed, 81 insertions(+), 44 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp
index 041576b75..84cc49fe3 100644
--- a/bench/benchFFT.cpp
+++ b/bench/benchFFT.cpp
@@ -31,34 +31,67 @@
 using namespace Eigen;
 using namespace std;
 
-#ifndef NFFT
-#define NFFT 1024
-#endif
+
+template <typename T>
+string nameof();
+
+template <> string nameof<float>() {return "float";}
+template <> string nameof<double>() {return "double";}
+template <> string nameof<long double>() {return "long double";}
 
 #ifndef TYPE
 #define TYPE float
 #endif
 
-#ifndef NITS 
-#define NITS (10000000/NFFT)
+#ifndef NFFT
+#define NFFT 1024
+#endif
+#ifndef NDATA
+#define NDATA 1000000
 #endif
 
-int main() 
+using namespace Eigen;
+
+template <typename T>
+void bench(int nfft)
 {
-  vector<complex<TYPE> > inbuf(NFFT);
-  vector<complex<TYPE> > outbuf(NFFT);
-  Eigen::FFT<TYPE> fft;
+    typedef typename NumTraits<T>::Real Scalar;
+    typedef typename std::complex<Scalar> Complex;
+    int nits = NDATA/nfft;
+    vector<T> inbuf(nfft);
+    vector<Complex > outbuf(nfft);
+    FFT< Scalar > fft;
+
+    fft.fwd( outbuf , inbuf);
+
+    BenchTimer timer;
+    timer.reset();
+    for (int k=0;k<8;++k) {
+        timer.start();
+        for(int i = 0; i < nits; i++)
+            fft.fwd( outbuf , inbuf);
+        timer.stop();
+    }
 
-  fft.fwd( outbuf , inbuf);
+    cout << nameof<Scalar>() << " ";
+    double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits );
+    if ( NumTraits<T>::IsComplex ) {
+        cout << "complex";
+    }else{
+        cout << "real   ";
+        mflops /= 2;
+    }
 
-  BenchTimer timer;
-  timer.reset();
-  for (int k=0;k<8;++k) {
-      timer.start();
-      for(int i = 0; i < NITS; i++)
-          fft.fwd( outbuf , inbuf);
-      timer.stop();
-  }
-  double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS );
-  cout << "NFFT=" << NFFT << "  " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
+    cout << " NFFT=" << nfft << "  " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
+}
+
+int main(int argc,char ** argv)
+{
+    bench<complex<float> >(NFFT);
+    bench<float>(NFFT);
+    bench<complex<double> >(NFFT);
+    bench<double>(NFFT);
+    bench<complex<long double> >(NFFT);
+    bench<long double>(NFFT);
+    return 0;
 }
diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
index f7dd2b9cf..1e2be8f79 100644
--- a/unsupported/Eigen/src/FFT/simple_fft_traits.h
+++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h
@@ -54,28 +54,14 @@ namespace Eigen {
             work(0, dst, src, 1,1);
         }else{
             int ncfft = nfft>>1;
+            int ncfft2 = nfft>>2;
             // use optimized mode for even real
             fwd( dst, reinterpret_cast<const Complex*> (src),ncfft);
             make_real_twiddles(nfft);
-            std::cerr << "dst[0] = " << dst[0] << "\n";
             Complex dc = dst[0].real() +  dst[0].imag();
             Complex nyquist = dst[0].real() -  dst[0].imag();
             int k;
-#if 0
-            using namespace std;
-            cerr << "desired:\n";
-            for ( k=1;k <= (ncfft>>1) ; ++k ) {
-                Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
-                cerr << k << " " << x << "angle(x):" << arg(x) << "\n";
-            }
-            dc=0;
-            cerr << "twiddles:\n";
-            for (k=0;k<ncfft;++k) {
-                Complex x = m_twiddles[k];
-                cerr << k << " " << x << "angle(-x):" << arg(-x) << "\n";
-            }
-#endif
-            for ( k=1;k <= (ncfft>>1) ; ++k ) {
+            for ( k=1;k <= ncfft2 ; ++k ) {
                 Complex fpk = dst[k];
                 Complex fpnk = conj(dst[ncfft-k]);
                 Complex f1k = fpk + fpnk;
@@ -87,7 +73,6 @@ namespace Eigen {
                 dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
             }
  
-
             // place conjugate-symmetric half at the end for completeness
             // TODO: make this configurable ( opt-out )
             for ( k=1;k < ncfft ; ++k )
@@ -98,6 +83,16 @@ namespace Eigen {
         }
     }
 
+    // half-complex to scalar
+    void inv( Scalar * dst,const Complex * src,int nfft) 
+    {
+        // TODO add optimized version for even numbers
+        std::vector<Complex> tmp(nfft);
+        inv(&tmp[0],src,nfft);
+        for (int k=0;k<nfft;++k)
+            dst[k] = tmp[k].real();
+    }
+
     void inv(Complex * dst,const Complex  *src,int nfft)
     {
         prepare(nfft,true);
@@ -156,7 +151,7 @@ namespace Eigen {
                         default: p += 2; break;
                     }
                     if (p*p>n)
-                        p=n;// no more factors
+                        p=n;// impossible to have a factor > sqrt(n)
                 }
                 n /= p;
                 m_stageRadix.push_back(p);
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index 41c7fed7b..75c33277d 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -28,6 +28,10 @@
 
 using namespace std;
 
+float norm(float x) {return x*x;}
+double norm(double x) {return x*x;}
+long double norm(long double x) {return x*x;}
+
 template < typename T>
 complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
 
@@ -83,7 +87,11 @@ void test_scalar(int nfft)
     for (int k=0;k<nfft;++k)
         inbuf[k]= (T)(rand()/(double)RAND_MAX - .5);
     fft.fwd( outbuf,inbuf);
-    VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
+
+    vector<Scalar> buf3;
+    fft.inv( buf3 , outbuf);
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
 template <class T>
@@ -100,18 +108,18 @@ void test_complex(int nfft)
         inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
     fft.fwd( outbuf , inbuf);
 
-    VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check
+    VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>()  );// gross check
 
     fft.inv( buf3 , outbuf);
 
-    VERIFY( dif_rmse(inbuf,buf3) < 1e-5 );// gross check
+    VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>()  );// gross check
 }
 
 void test_FFT()
 {
-#if 0
+#if 1
   CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); CALL_SUBTEST( test_complex<long double>(32) );
-  CALL_SUBTEST( test_complex<float>(1024) ); CALL_SUBTEST( test_complex<double>(1024) ); CALL_SUBTEST( test_complex<long double>(1024) );
+  CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) ); CALL_SUBTEST( test_complex<long double>(256) );
   CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); CALL_SUBTEST( test_complex<long double>(3*8) );
   CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) ); CALL_SUBTEST( test_complex<long double>(5*32) );
   CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); CALL_SUBTEST( test_complex<long double>(2*3*4) );
@@ -120,8 +128,9 @@ void test_FFT()
 #endif
 
 #if 1
+  CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); CALL_SUBTEST( test_scalar<long double>(45) );
   CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
-  CALL_SUBTEST( test_scalar<float>(1024) ); CALL_SUBTEST( test_scalar<double>(1024) ); CALL_SUBTEST( test_scalar<long double>(1024) );
+  CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) ); CALL_SUBTEST( test_scalar<long double>(256) );
   CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
 #endif
 }
-- 
cgit v1.2.3


From 210092d16c57ec2fd2f8f515151de284e8a737e3 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Mon, 25 May 2009 20:35:24 -0400
Subject: changed name from simple_fft_traits to ei_kissfft_impl

---
 bench/benchFFT.cpp                            |   2 +-
 unsupported/Eigen/FFT                         |  95 ++++++
 unsupported/Eigen/FFT.h                       |  95 ------
 unsupported/Eigen/src/FFT/ei_kissfft_impl.h   | 397 ++++++++++++++++++++++++++
 unsupported/Eigen/src/FFT/simple_fft_traits.h | 374 ------------------------
 unsupported/test/FFT.cpp                      |   3 +-
 6 files changed, 494 insertions(+), 472 deletions(-)
 create mode 100644 unsupported/Eigen/FFT
 delete mode 100644 unsupported/Eigen/FFT.h
 create mode 100644 unsupported/Eigen/src/FFT/ei_kissfft_impl.h
 delete mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h

(limited to 'unsupported/Eigen/src')

diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp
index 84cc49fe3..ffa4ffffc 100644
--- a/bench/benchFFT.cpp
+++ b/bench/benchFFT.cpp
@@ -26,7 +26,7 @@
 #include <vector>
 #include <Eigen/Core>
 #include <bench/BenchTimer.h>
-#include <unsupported/Eigen/FFT.h>
+#include <unsupported/Eigen/FFT>
 
 using namespace Eigen;
 using namespace std;
diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
new file mode 100644
index 000000000..3d852f5a2
--- /dev/null
+++ b/unsupported/Eigen/FFT
@@ -0,0 +1,95 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_FFT_H
+#define EIGEN_FFT_H
+
+// ei_kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
+#include "src/FFT/ei_kissfft_impl.h"
+#define DEFAULT_FFT_IMPL ei_kissfft_impl
+
+// FFTW: faster, GPL-not LGPL, bigger code size
+#ifdef FFTW_PATIENT  // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines
+// TODO 
+// #include "src/FFT/ei_fftw_impl.h"
+// #define DEFAULT_FFT_IMPL ei_fftw_impl
+#endif
+
+// intel Math Kernel Library: fastest, commerical
+#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines
+// TODO 
+// #include "src/FFT/ei_imkl_impl.h"
+// #define DEFAULT_FFT_IMPL ei_imkl_impl
+#endif
+
+namespace Eigen {
+
+template <typename _Scalar,
+         typename _Traits=DEFAULT_FFT_IMPL<_Scalar> 
+         >
+class FFT
+{
+  public:
+    typedef _Traits traits_type;
+    typedef typename traits_type::Scalar Scalar;
+    typedef typename traits_type::Complex Complex;
+
+    FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { }
+
+    template <typename _Input>
+    void fwd( Complex * dst, const _Input * src, int nfft)
+    {
+        m_traits.fwd(dst,src,nfft);
+    }
+
+    template <typename _Input>
+    void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
+    {
+        dst.resize( src.size() );
+        fwd( &dst[0],&src[0],src.size() );
+    }
+
+    template <typename _Output>
+    void inv( _Output * dst, const Complex * src, int nfft)
+    {
+        m_traits.inv( dst,src,nfft );
+    }
+
+    template <typename _Output>
+    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src) 
+    {
+        dst.resize( src.size() );
+        inv( &dst[0],&src[0],src.size() );
+    }
+
+    // TODO: multi-dimensional FFTs
+    // TODO: handle Eigen MatrixBase
+
+    traits_type & traits() {return m_traits;}
+  private:
+    traits_type m_traits;
+};
+#undef DEFAULT_FFT_IMPL
+}
+#endif
diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h
deleted file mode 100644
index c466423b7..000000000
--- a/unsupported/Eigen/FFT.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
-//
-// Copyright (C) 2009 Mark Borgerding mark a borgerding net
-//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
-
-#ifndef EIGEN_FFT_H
-#define EIGEN_FFT_H
-
-// simple_fft_traits:  small, free, reasonably efficient default, derived from kissfft
-#include "src/FFT/simple_fft_traits.h"
-#define DEFAULT_FFT_TRAITS simple_fft_traits
-
-// FFTW: faster, GPL-not LGPL, bigger code size
-#ifdef FFTW_PATIENT  // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines
-// TODO 
-// #include "src/FFT/fftw_traits.h"
-// #define DEFAULT_FFT_TRAITS fftw_traits
-#endif
-
-// intel Math Kernel Library: fastest, commerical
-#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines
-// TODO 
-// #include "src/FFT/imkl_traits.h"
-// #define DEFAULT_FFT_TRAITS imkl_traits
-#endif
-
-namespace Eigen {
-
-template <typename _Scalar,
-         typename _Traits=DEFAULT_FFT_TRAITS<_Scalar> 
-         >
-class FFT
-{
-  public:
-    typedef _Traits traits_type;
-    typedef typename traits_type::Scalar Scalar;
-    typedef typename traits_type::Complex Complex;
-
-    FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { }
-
-    template <typename _Input>
-    void fwd( Complex * dst, const _Input * src, int nfft)
-    {
-        m_traits.fwd(dst,src,nfft);
-    }
-
-    template <typename _Input>
-    void fwd( std::vector<Complex> & dst, const std::vector<_Input> & src) 
-    {
-        dst.resize( src.size() );
-        fwd( &dst[0],&src[0],src.size() );
-    }
-
-    template <typename _Output>
-    void inv( _Output * dst, const Complex * src, int nfft)
-    {
-        m_traits.inv( dst,src,nfft );
-    }
-
-    template <typename _Output>
-    void inv( std::vector<_Output> & dst, const std::vector<Complex> & src) 
-    {
-        dst.resize( src.size() );
-        inv( &dst[0],&src[0],src.size() );
-    }
-
-    // TODO: multi-dimensional FFTs
-    // TODO: handle Eigen MatrixBase
-
-    traits_type & traits() {return m_traits;}
-  private:
-    traits_type m_traits;
-};
-#undef DEFAULT_FFT_TRAITS
-}
-#endif
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
new file mode 100644
index 000000000..ce2c9f16e
--- /dev/null
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -0,0 +1,397 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. Eigen itself is part of the KDE project.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#include <complex>
+#include <vector>
+
+namespace Eigen {
+
+  template <typename _Scalar>
+  struct ei_kissfft_impl
+  {
+    typedef _Scalar Scalar;
+    typedef std::complex<Scalar> Complex;
+    ei_kissfft_impl() : m_nfft(0) {} 
+
+    template <typename _Src>
+    void fwd( Complex * dst,const _Src *src,int nfft)
+    {
+        prepare(nfft,false);
+        work(0, dst, src, 1,1);
+    }
+
+    // real-to-complex forward FFT
+    // perform two FFTs of src even and src odd
+    // then twiddle to recombine them into the half-spectrum format
+    // then fill in the conjugate symmetric half
+    void fwd( Complex * dst,const Scalar * src,int nfft) 
+    {
+        if ( nfft&1 ) {
+            // use generic mode for odd
+            prepare(nfft,false);
+            work(0, dst, src, 1,1);
+        }else{
+            int ncfft = nfft>>1;
+            int ncfft2 = nfft>>2;
+            // use optimized mode for even real
+            fwd( dst, reinterpret_cast<const Complex*> (src),ncfft);
+            make_real_twiddles(nfft);
+            Complex dc = dst[0].real() +  dst[0].imag();
+            Complex nyquist = dst[0].real() -  dst[0].imag();
+            int k;
+            for ( k=1;k <= ncfft2 ; ++k ) {
+                Complex fpk = dst[k];
+                Complex fpnk = conj(dst[ncfft-k]);
+                Complex f1k = fpk + fpnk;
+                Complex f2k = fpk - fpnk;
+                //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
+                Complex tw= f2k * m_realTwiddles[k-1];
+
+                dst[k] =  (f1k + tw) * Scalar(.5);
+                dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
+            }
+ 
+            // place conjugate-symmetric half at the end for completeness
+            // TODO: make this configurable ( opt-out )
+            for ( k=1;k < ncfft ; ++k )
+                dst[nfft-k] = conj(dst[k]);
+
+            dst[0] = dc;
+            dst[ncfft] = nyquist;
+        }
+    }
+
+    // half-complex to scalar
+    void inv( Scalar * dst,const Complex * src,int nfft) 
+    {
+        // TODO add optimized version for even numbers
+        std::vector<Complex> tmp(nfft);
+        inv(&tmp[0],src,nfft);
+        for (int k=0;k<nfft;++k)
+            dst[k] = tmp[k].real();
+    }
+
+    void inv(Complex * dst,const Complex  *src,int nfft)
+    {
+        prepare(nfft,true);
+        work(0, dst, src, 1,1);
+        scale(dst, Scalar(1)/m_nfft );
+    }
+
+    void prepare(int nfft,bool inverse)
+    {
+        make_twiddles(nfft,inverse);
+        factorize(nfft);
+    }
+
+    void make_real_twiddles(int nfft)
+    {
+        int ncfft2 = nfft>>2;
+        if ( m_realTwiddles.size() != ncfft2) {
+            m_realTwiddles.resize(ncfft2);
+            int ncfft= nfft>>1;
+            for (int k=1;k<=ncfft2;++k) 
+                m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
+        }
+    }
+
+    void make_twiddles(int nfft,bool inverse)
+    {
+        if ( m_twiddles.size() == nfft) {
+            // reuse the twiddles, conjugate if necessary
+            if (inverse != m_inverse)
+                for (int i=0;i<nfft;++i)
+                    m_twiddles[i] = conj( m_twiddles[i] );
+        }else{
+            m_twiddles.resize(nfft);
+            Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+            for (int i=0;i<nfft;++i)
+                m_twiddles[i] = exp( Complex(0,i*phinc) );
+        }
+        m_inverse = inverse;
+    }
+
+    void factorize(int nfft)
+    {
+        if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft)
+        {
+            m_stageRadix.resize(0);
+            m_stageRemainder.resize(0);
+            //factorize
+            //start factoring out 4's, then 2's, then 3,5,7,9,...
+            int n= nfft;
+            int p=4;
+            do {
+                while (n % p) {
+                    switch (p) {
+                        case 4: p = 2; break;
+                        case 2: p = 3; break;
+                        default: p += 2; break;
+                    }
+                    if (p*p>n)
+                        p=n;// impossible to have a factor > sqrt(n)
+                }
+                n /= p;
+                m_stageRadix.push_back(p);
+                m_stageRemainder.push_back(n);
+            }while(n>1);
+        }
+        m_nfft = nfft;
+    }
+
+    void scale(Complex *dst,Scalar s) 
+    {
+        for (int k=0;k<m_nfft;++k)
+            dst[k] *= s;
+    }
+
+    private:
+
+    template <typename _Src>
+    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+    {
+      int p = m_stageRadix[stage];
+      int m = m_stageRemainder[stage];
+      Complex * Fout_beg = xout;
+      Complex * Fout_end = xout + p*m;
+
+      if (m>1) {
+        do{
+          // recursive call:
+          // DFT of size m*p performed by doing
+          // p instances of smaller DFTs of size m, 
+          // each one takes a decimated version of the input
+          work(stage+1, xout , xin, fstride*p,in_stride);
+          xin += fstride*in_stride;
+        }while( (xout += m) != Fout_end );
+      }else{
+          do{
+              *xout = *xin;
+              xin += fstride*in_stride;
+          }while(++xout != Fout_end );
+      }
+      xout=Fout_beg;
+
+      // recombine the p smaller DFTs 
+      switch (p) {
+        case 2: bfly2(xout,fstride,m); break;
+        case 3: bfly3(xout,fstride,m); break;
+        case 4: bfly4(xout,fstride,m); break;
+        case 5: bfly5(xout,fstride,m); break;
+        default: bfly_generic(xout,fstride,m,p); break;
+      }
+    }
+
+    void bfly2( Complex * Fout, const size_t fstride, int m)
+    {
+      for (int k=0;k<m;++k) {
+        Complex t = Fout[m+k] * m_twiddles[k*fstride];
+        Fout[m+k] = Fout[k] - t;
+        Fout[k] += t;
+      }
+    }
+
+    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex scratch[6];
+      int negative_if_inverse = m_inverse * -2 +1;
+      for (size_t k=0;k<m;++k) {
+        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+        scratch[5] = Fout[k] - scratch[1];
+
+        Fout[k] += scratch[1];
+        scratch[3] = scratch[0] + scratch[2];
+        scratch[4] = scratch[0] - scratch[2];
+        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+        Fout[k+2*m]  = Fout[k] - scratch[3];
+        Fout[k] += scratch[3];
+        Fout[k+m] = scratch[5] + scratch[4];
+        Fout[k+3*m] = scratch[5] - scratch[4];
+      }
+    }
+
+    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      size_t k=m;
+      const size_t m2 = 2*m;
+      Complex *tw1,*tw2;
+      Complex scratch[5];
+      Complex epi3;
+      epi3 = m_twiddles[fstride*m];
+
+      tw1=tw2=&m_twiddles[0];
+
+      do{
+        scratch[1]=Fout[m] * *tw1;
+        scratch[2]=Fout[m2] * *tw2;
+
+        scratch[3]=scratch[1]+scratch[2];
+        scratch[0]=scratch[1]-scratch[2];
+        tw1 += fstride;
+        tw2 += fstride*2;
+        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+        scratch[0] *= epi3.imag();
+        *Fout += scratch[3];
+        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+        ++Fout;
+      }while(--k);
+    }
+
+    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+      size_t u;
+      Complex scratch[13];
+      Complex * twiddles = &m_twiddles[0];
+      Complex *tw;
+      Complex ya,yb;
+      ya = twiddles[fstride*m];
+      yb = twiddles[fstride*2*m];
+
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      tw=twiddles;
+      for ( u=0; u<m; ++u ) {
+        scratch[0] = *Fout0;
+
+        scratch[1]  = *Fout1 * tw[u*fstride];
+        scratch[2]  = *Fout2 * tw[2*u*fstride];
+        scratch[3]  = *Fout3 * tw[3*u*fstride];
+        scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+        scratch[7] = scratch[1] + scratch[4];
+        scratch[10] = scratch[1] - scratch[4];
+        scratch[8] = scratch[2] + scratch[3];
+        scratch[9] = scratch[2] - scratch[3];
+
+        *Fout0 +=  scratch[7];
+        *Fout0 +=  scratch[8];
+
+        scratch[5] = scratch[0] + Complex(
+            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+            );
+
+        scratch[6] = Complex(
+            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+            );
+
+        *Fout1 = scratch[5] - scratch[6];
+        *Fout4 = scratch[5] + scratch[6];
+
+        scratch[11] = scratch[0] +
+          Complex(
+              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+              );
+
+        scratch[12] = Complex(
+            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+            );
+
+        *Fout2=scratch[11]+scratch[12];
+        *Fout3=scratch[11]-scratch[12];
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+    }
+
+    /* perform the butterfly for one stage of a mixed radix FFT */
+    void bfly_generic(
+        Complex * Fout,
+        const size_t fstride,
+        int m,
+        int p
+        )
+    {
+      int u,k,q1,q;
+      Complex * twiddles = &m_twiddles[0];
+      Complex t;
+      int Norig = m_nfft;
+      Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
+
+      for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          scratchbuf[q1] = Fout[ k  ];
+          k += m;
+        }
+
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          int twidx=0;
+          Fout[ k ] = scratchbuf[0];
+          for (q=1;q<p;++q ) {
+            twidx += fstride * k;
+            if (twidx>=Norig) twidx-=Norig;
+            t=scratchbuf[q] * twiddles[twidx];
+            Fout[ k ] += t;
+          }
+          k += m;
+        }
+      }
+    }
+
+    int m_nfft;
+    bool m_inverse;
+    std::vector<Complex> m_twiddles;
+    std::vector<Complex> m_realTwiddles;
+    std::vector<int> m_stageRadix;
+    std::vector<int> m_stageRemainder;
+/*
+    enum {FORWARD,INVERSE,REAL,COMPLEX};
+
+    struct PlanKey
+    {
+        PlanKey(int nfft,bool isinverse,bool iscomplex)
+        {
+            _key = (nfft<<2) | (isinverse<<1) | iscomplex;
+        }
+
+        bool operator<(const PlanKey & other) const
+        {
+            return this->_key < other._key;
+        }
+        int _key;
+    };
+
+    struct PlanData
+    {
+        std::vector<Complex> m_twiddles;
+    };
+
+    std::map<PlanKey,
+*/
+  };
+}
diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h
deleted file mode 100644
index 1e2be8f79..000000000
--- a/unsupported/Eigen/src/FFT/simple_fft_traits.h
+++ /dev/null
@@ -1,374 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
-//
-// Copyright (C) 2009 Mark Borgerding mark a borgerding net
-//
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
-
-#include <complex>
-#include <vector>
-#include <iostream>
-
-namespace Eigen {
-
-  template <typename _Scalar>
-  struct simple_fft_traits
-  {
-    typedef _Scalar Scalar;
-    typedef std::complex<Scalar> Complex;
-    simple_fft_traits() : m_nfft(0) {} 
-
-    template <typename _Src>
-    void fwd( Complex * dst,const _Src *src,int nfft)
-    {
-        prepare(nfft,false);
-        work(0, dst, src, 1,1);
-    }
-
-    // real-to-complex forward FFT
-    // perform two FFTs of src even and src odd
-    // then twiddle to recombine them into the half-spectrum format
-    // then fill in the conjugate symmetric half
-    void fwd( Complex * dst,const Scalar * src,int nfft) 
-    {
-        if ( nfft&1 ) {
-            // use generic mode for odd
-            prepare(nfft,false);
-            work(0, dst, src, 1,1);
-        }else{
-            int ncfft = nfft>>1;
-            int ncfft2 = nfft>>2;
-            // use optimized mode for even real
-            fwd( dst, reinterpret_cast<const Complex*> (src),ncfft);
-            make_real_twiddles(nfft);
-            Complex dc = dst[0].real() +  dst[0].imag();
-            Complex nyquist = dst[0].real() -  dst[0].imag();
-            int k;
-            for ( k=1;k <= ncfft2 ; ++k ) {
-                Complex fpk = dst[k];
-                Complex fpnk = conj(dst[ncfft-k]);
-                Complex f1k = fpk + fpnk;
-                Complex f2k = fpk - fpnk;
-                //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
-                Complex tw= f2k * m_realTwiddles[k-1];
-
-                dst[k] =  (f1k + tw) * Scalar(.5);
-                dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
-            }
- 
-            // place conjugate-symmetric half at the end for completeness
-            // TODO: make this configurable ( opt-out )
-            for ( k=1;k < ncfft ; ++k )
-                dst[nfft-k] = conj(dst[k]);
-
-            dst[0] = dc;
-            dst[ncfft] = nyquist;
-        }
-    }
-
-    // half-complex to scalar
-    void inv( Scalar * dst,const Complex * src,int nfft) 
-    {
-        // TODO add optimized version for even numbers
-        std::vector<Complex> tmp(nfft);
-        inv(&tmp[0],src,nfft);
-        for (int k=0;k<nfft;++k)
-            dst[k] = tmp[k].real();
-    }
-
-    void inv(Complex * dst,const Complex  *src,int nfft)
-    {
-        prepare(nfft,true);
-        work(0, dst, src, 1,1);
-        scale(dst, Scalar(1)/m_nfft );
-    }
-
-    void prepare(int nfft,bool inverse)
-    {
-        make_twiddles(nfft,inverse);
-        factorize(nfft);
-    }
-
-    void make_real_twiddles(int nfft)
-    {
-        int ncfft2 = nfft>>2;
-        if ( m_realTwiddles.size() != ncfft2) {
-            m_realTwiddles.resize(ncfft2);
-            int ncfft= nfft>>1;
-            for (int k=1;k<=ncfft2;++k) 
-                m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
-        }
-    }
-
-    void make_twiddles(int nfft,bool inverse)
-    {
-        if ( m_twiddles.size() == nfft) {
-            // reuse the twiddles, conjugate if necessary
-            if (inverse != m_inverse)
-                for (int i=0;i<nfft;++i)
-                    m_twiddles[i] = conj( m_twiddles[i] );
-        }else{
-            m_twiddles.resize(nfft);
-            Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-            for (int i=0;i<nfft;++i)
-                m_twiddles[i] = exp( Complex(0,i*phinc) );
-        }
-        m_inverse = inverse;
-    }
-
-    void factorize(int nfft)
-    {
-        if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft)
-        {
-            m_stageRadix.resize(0);
-            m_stageRemainder.resize(0);
-            //factorize
-            //start factoring out 4's, then 2's, then 3,5,7,9,...
-            int n= nfft;
-            int p=4;
-            do {
-                while (n % p) {
-                    switch (p) {
-                        case 4: p = 2; break;
-                        case 2: p = 3; break;
-                        default: p += 2; break;
-                    }
-                    if (p*p>n)
-                        p=n;// impossible to have a factor > sqrt(n)
-                }
-                n /= p;
-                m_stageRadix.push_back(p);
-                m_stageRemainder.push_back(n);
-            }while(n>1);
-        }
-        m_nfft = nfft;
-    }
-
-    void scale(Complex *dst,Scalar s) 
-    {
-        for (int k=0;k<m_nfft;++k)
-            dst[k] *= s;
-    }
-
-    private:
-
-    template <typename _Src>
-    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
-    {
-      int p = m_stageRadix[stage];
-      int m = m_stageRemainder[stage];
-      Complex * Fout_beg = xout;
-      Complex * Fout_end = xout + p*m;
-
-      if (m>1) {
-        do{
-          // recursive call:
-          // DFT of size m*p performed by doing
-          // p instances of smaller DFTs of size m, 
-          // each one takes a decimated version of the input
-          work(stage+1, xout , xin, fstride*p,in_stride);
-          xin += fstride*in_stride;
-        }while( (xout += m) != Fout_end );
-      }else{
-          do{
-              *xout = *xin;
-              xin += fstride*in_stride;
-          }while(++xout != Fout_end );
-      }
-      xout=Fout_beg;
-
-      // recombine the p smaller DFTs 
-      switch (p) {
-        case 2: bfly2(xout,fstride,m); break;
-        case 3: bfly3(xout,fstride,m); break;
-        case 4: bfly4(xout,fstride,m); break;
-        case 5: bfly5(xout,fstride,m); break;
-        default: bfly_generic(xout,fstride,m,p); break;
-      }
-    }
-
-    void bfly2( Complex * Fout, const size_t fstride, int m)
-    {
-      for (int k=0;k<m;++k) {
-        Complex t = Fout[m+k] * m_twiddles[k*fstride];
-        Fout[m+k] = Fout[k] - t;
-        Fout[k] += t;
-      }
-    }
-
-    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      Complex scratch[6];
-      int negative_if_inverse = m_inverse * -2 +1;
-      for (size_t k=0;k<m;++k) {
-        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
-        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
-        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
-        scratch[5] = Fout[k] - scratch[1];
-
-        Fout[k] += scratch[1];
-        scratch[3] = scratch[0] + scratch[2];
-        scratch[4] = scratch[0] - scratch[2];
-        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
-
-        Fout[k+2*m]  = Fout[k] - scratch[3];
-        Fout[k] += scratch[3];
-        Fout[k+m] = scratch[5] + scratch[4];
-        Fout[k+3*m] = scratch[5] - scratch[4];
-      }
-    }
-
-    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      size_t k=m;
-      const size_t m2 = 2*m;
-      Complex *tw1,*tw2;
-      Complex scratch[5];
-      Complex epi3;
-      epi3 = m_twiddles[fstride*m];
-
-      tw1=tw2=&m_twiddles[0];
-
-      do{
-        scratch[1]=Fout[m] * *tw1;
-        scratch[2]=Fout[m2] * *tw2;
-
-        scratch[3]=scratch[1]+scratch[2];
-        scratch[0]=scratch[1]-scratch[2];
-        tw1 += fstride;
-        tw2 += fstride*2;
-        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-        scratch[0] *= epi3.imag();
-        *Fout += scratch[3];
-        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
-        ++Fout;
-      }while(--k);
-    }
-
-    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-      size_t u;
-      Complex scratch[13];
-      Complex * twiddles = &m_twiddles[0];
-      Complex *tw;
-      Complex ya,yb;
-      ya = twiddles[fstride*m];
-      yb = twiddles[fstride*2*m];
-
-      Fout0=Fout;
-      Fout1=Fout0+m;
-      Fout2=Fout0+2*m;
-      Fout3=Fout0+3*m;
-      Fout4=Fout0+4*m;
-
-      tw=twiddles;
-      for ( u=0; u<m; ++u ) {
-        scratch[0] = *Fout0;
-
-        scratch[1]  = *Fout1 * tw[u*fstride];
-        scratch[2]  = *Fout2 * tw[2*u*fstride];
-        scratch[3]  = *Fout3 * tw[3*u*fstride];
-        scratch[4]  = *Fout4 * tw[4*u*fstride];
-
-        scratch[7] = scratch[1] + scratch[4];
-        scratch[10] = scratch[1] - scratch[4];
-        scratch[8] = scratch[2] + scratch[3];
-        scratch[9] = scratch[2] - scratch[3];
-
-        *Fout0 +=  scratch[7];
-        *Fout0 +=  scratch[8];
-
-        scratch[5] = scratch[0] + Complex(
-            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
-            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
-            );
-
-        scratch[6] = Complex(
-            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
-            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
-            );
-
-        *Fout1 = scratch[5] - scratch[6];
-        *Fout4 = scratch[5] + scratch[6];
-
-        scratch[11] = scratch[0] +
-          Complex(
-              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
-              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
-              );
-
-        scratch[12] = Complex(
-            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
-            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
-            );
-
-        *Fout2=scratch[11]+scratch[12];
-        *Fout3=scratch[11]-scratch[12];
-
-        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-      }
-    }
-
-    /* perform the butterfly for one stage of a mixed radix FFT */
-    void bfly_generic(
-        Complex * Fout,
-        const size_t fstride,
-        int m,
-        int p
-        )
-    {
-      int u,k,q1,q;
-      Complex * twiddles = &m_twiddles[0];
-      Complex t;
-      int Norig = m_nfft;
-      Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
-
-      for ( u=0; u<m; ++u ) {
-        k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {
-          scratchbuf[q1] = Fout[ k  ];
-          k += m;
-        }
-
-        k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {
-          int twidx=0;
-          Fout[ k ] = scratchbuf[0];
-          for (q=1;q<p;++q ) {
-            twidx += fstride * k;
-            if (twidx>=Norig) twidx-=Norig;
-            t=scratchbuf[q] * twiddles[twidx];
-            Fout[ k ] += t;
-          }
-          k += m;
-        }
-      }
-    }
-
-    int m_nfft;
-    bool m_inverse;
-    std::vector<Complex> m_twiddles;
-    std::vector<Complex> m_realTwiddles;
-    std::vector<int> m_stageRadix;
-    std::vector<int> m_stageRemainder;
-  };
-}
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index 75c33277d..daf397790 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -23,8 +23,7 @@
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
 #include "main.h"
-#include <unsupported/Eigen/FFT.h>
-
+#include <unsupported/Eigen/FFT>
 
 using namespace std;
 
-- 
cgit v1.2.3


From 03ed6f9bfb63879d475f5bb8ea46cff96063d010 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Mon, 25 May 2009 23:06:49 -0400
Subject: refactored ei_kissfft_impl to maintain a cache of cpx fft plans

---
 unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 606 ++++++++++++++--------------
 unsupported/test/FFT.cpp                    |   7 +-
 2 files changed, 317 insertions(+), 296 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index ce2c9f16e..3580e6c61 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -24,21 +24,279 @@
 
 #include <complex>
 #include <vector>
+#include <map>
 
 namespace Eigen {
 
+    template <typename _Scalar>
+    struct ei_kiss_cpx_fft
+    {
+        typedef  _Scalar Scalar;
+        typedef  std::complex<Scalar> Complex;
+        std::vector<Complex> m_twiddles;
+        std::vector<int> m_stageRadix;
+        std::vector<int> m_stageRemainder;
+        bool m_inverse;
+
+        ei_kiss_cpx_fft() { }
+
+        void make_twiddles(int nfft,bool inverse)
+        {
+            m_inverse = inverse;
+            m_twiddles.resize(nfft);
+            Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+            for (int i=0;i<nfft;++i)
+                m_twiddles[i] = exp( Complex(0,i*phinc) );
+        }
+
+        void invert()
+        {
+            m_inverse = !m_inverse;
+            for ( size_t i=0;i<m_twiddles.size() ;++i)
+                m_twiddles[i] = conj( m_twiddles[i] );
+        }
+
+        void factorize(int nfft)
+        {
+            if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft)
+            {
+                m_stageRadix.resize(0);
+                m_stageRemainder.resize(0);
+                //factorize
+                //start factoring out 4's, then 2's, then 3,5,7,9,...
+                int n= nfft;
+                int p=4;
+                do {
+                    while (n % p) {
+                        switch (p) {
+                            case 4: p = 2; break;
+                            case 2: p = 3; break;
+                            default: p += 2; break;
+                        }
+                        if (p*p>n)
+                            p=n;// impossible to have a factor > sqrt(n)
+                    }
+                    n /= p;
+                    m_stageRadix.push_back(p);
+                    m_stageRemainder.push_back(n);
+                }while(n>1);
+            }
+        }
+
+        template <typename _Src>
+            void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+            {
+                int p = m_stageRadix[stage];
+                int m = m_stageRemainder[stage];
+                Complex * Fout_beg = xout;
+                Complex * Fout_end = xout + p*m;
+
+                if (m>1) {
+                    do{
+                        // recursive call:
+                        // DFT of size m*p performed by doing
+                        // p instances of smaller DFTs of size m, 
+                        // each one takes a decimated version of the input
+                        work(stage+1, xout , xin, fstride*p,in_stride);
+                        xin += fstride*in_stride;
+                    }while( (xout += m) != Fout_end );
+                }else{
+                    do{
+                        *xout = *xin;
+                        xin += fstride*in_stride;
+                    }while(++xout != Fout_end );
+                }
+                xout=Fout_beg;
+
+                // recombine the p smaller DFTs 
+                switch (p) {
+                    case 2: bfly2(xout,fstride,m); break;
+                    case 3: bfly3(xout,fstride,m); break;
+                    case 4: bfly4(xout,fstride,m); break;
+                    case 5: bfly5(xout,fstride,m); break;
+                    default: bfly_generic(xout,fstride,m,p); break;
+                }
+            }
+
+        void bfly2( Complex * Fout, const size_t fstride, int m)
+        {
+            for (int k=0;k<m;++k) {
+                Complex t = Fout[m+k] * m_twiddles[k*fstride];
+                Fout[m+k] = Fout[k] - t;
+                Fout[k] += t;
+            }
+        }
+
+        void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+        {
+            Complex scratch[6];
+            int negative_if_inverse = m_inverse * -2 +1;
+            for (size_t k=0;k<m;++k) {
+                scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+                scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+                scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+                scratch[5] = Fout[k] - scratch[1];
+
+                Fout[k] += scratch[1];
+                scratch[3] = scratch[0] + scratch[2];
+                scratch[4] = scratch[0] - scratch[2];
+                scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+                Fout[k+2*m]  = Fout[k] - scratch[3];
+                Fout[k] += scratch[3];
+                Fout[k+m] = scratch[5] + scratch[4];
+                Fout[k+3*m] = scratch[5] - scratch[4];
+            }
+        }
+
+        void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+        {
+            size_t k=m;
+            const size_t m2 = 2*m;
+            Complex *tw1,*tw2;
+            Complex scratch[5];
+            Complex epi3;
+            epi3 = m_twiddles[fstride*m];
+
+            tw1=tw2=&m_twiddles[0];
+
+            do{
+                scratch[1]=Fout[m] * *tw1;
+                scratch[2]=Fout[m2] * *tw2;
+
+                scratch[3]=scratch[1]+scratch[2];
+                scratch[0]=scratch[1]-scratch[2];
+                tw1 += fstride;
+                tw2 += fstride*2;
+                Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+                scratch[0] *= epi3.imag();
+                *Fout += scratch[3];
+                Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+                Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+                ++Fout;
+            }while(--k);
+        }
+
+        void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+        {
+            Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+            size_t u;
+            Complex scratch[13];
+            Complex * twiddles = &m_twiddles[0];
+            Complex *tw;
+            Complex ya,yb;
+            ya = twiddles[fstride*m];
+            yb = twiddles[fstride*2*m];
+
+            Fout0=Fout;
+            Fout1=Fout0+m;
+            Fout2=Fout0+2*m;
+            Fout3=Fout0+3*m;
+            Fout4=Fout0+4*m;
+
+            tw=twiddles;
+            for ( u=0; u<m; ++u ) {
+                scratch[0] = *Fout0;
+
+                scratch[1]  = *Fout1 * tw[u*fstride];
+                scratch[2]  = *Fout2 * tw[2*u*fstride];
+                scratch[3]  = *Fout3 * tw[3*u*fstride];
+                scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+                scratch[7] = scratch[1] + scratch[4];
+                scratch[10] = scratch[1] - scratch[4];
+                scratch[8] = scratch[2] + scratch[3];
+                scratch[9] = scratch[2] - scratch[3];
+
+                *Fout0 +=  scratch[7];
+                *Fout0 +=  scratch[8];
+
+                scratch[5] = scratch[0] + Complex(
+                        (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+                        (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+                        );
+
+                scratch[6] = Complex(
+                        (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+                        -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+                        );
+
+                *Fout1 = scratch[5] - scratch[6];
+                *Fout4 = scratch[5] + scratch[6];
+
+                scratch[11] = scratch[0] +
+                    Complex(
+                            (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+                            (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+                           );
+
+                scratch[12] = Complex(
+                        -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+                        (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+                        );
+
+                *Fout2=scratch[11]+scratch[12];
+                *Fout3=scratch[11]-scratch[12];
+
+                ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+            }
+        }
+
+        /* perform the butterfly for one stage of a mixed radix FFT */
+        void bfly_generic(
+                Complex * Fout,
+                const size_t fstride,
+                int m,
+                int p
+                )
+        {
+            int u,k,q1,q;
+            Complex * twiddles = &m_twiddles[0];
+            Complex t;
+            int Norig = m_twiddles.size();
+            Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
+
+            for ( u=0; u<m; ++u ) {
+                k=u;
+                for ( q1=0 ; q1<p ; ++q1 ) {
+                    scratchbuf[q1] = Fout[ k  ];
+                    k += m;
+                }
+
+                k=u;
+                for ( q1=0 ; q1<p ; ++q1 ) {
+                    int twidx=0;
+                    Fout[ k ] = scratchbuf[0];
+                    for (q=1;q<p;++q ) {
+                        twidx += fstride * k;
+                        if (twidx>=Norig) twidx-=Norig;
+                        t=scratchbuf[q] * twiddles[twidx];
+                        Fout[ k ] += t;
+                    }
+                    k += m;
+                }
+            }
+        }
+    };
+
+
   template <typename _Scalar>
   struct ei_kissfft_impl
   {
     typedef _Scalar Scalar;
     typedef std::complex<Scalar> Complex;
-    ei_kissfft_impl() : m_nfft(0) {} 
+    ei_kissfft_impl() {} 
+
+    void clear() 
+    {
+        m_plans.clear();
+        m_realTwiddles.clear();
+    }
 
     template <typename _Src>
     void fwd( Complex * dst,const _Src *src,int nfft)
     {
-        prepare(nfft,false);
-        work(0, dst, src, 1,1);
+        get_plan(nfft,false).work(0, dst, src, 1,1);
     }
 
     // real-to-complex forward FFT
@@ -47,16 +305,16 @@ namespace Eigen {
     // then fill in the conjugate symmetric half
     void fwd( Complex * dst,const Scalar * src,int nfft) 
     {
-        if ( nfft&1 ) {
+        if ( nfft&3  ) {
             // use generic mode for odd
-            prepare(nfft,false);
-            work(0, dst, src, 1,1);
+            get_plan(nfft,false).work(0, dst, src, 1,1);
         }else{
             int ncfft = nfft>>1;
             int ncfft2 = nfft>>2;
+            Complex * rtw = real_twiddles(ncfft2);
+
             // use optimized mode for even real
-            fwd( dst, reinterpret_cast<const Complex*> (src),ncfft);
-            make_real_twiddles(nfft);
+            fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
             Complex dc = dst[0].real() +  dst[0].imag();
             Complex nyquist = dst[0].real() -  dst[0].imag();
             int k;
@@ -65,8 +323,7 @@ namespace Eigen {
                 Complex fpnk = conj(dst[ncfft-k]);
                 Complex f1k = fpk + fpnk;
                 Complex f2k = fpk - fpnk;
-                //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
-                Complex tw= f2k * m_realTwiddles[k-1];
+                Complex tw= f2k * rtw[k-1];
 
                 dst[k] =  (f1k + tw) * Scalar(.5);
                 dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
@@ -94,304 +351,67 @@ namespace Eigen {
 
     void inv(Complex * dst,const Complex  *src,int nfft)
     {
-        prepare(nfft,true);
-        work(0, dst, src, 1,1);
-        scale(dst, Scalar(1)/m_nfft );
-    }
-
-    void prepare(int nfft,bool inverse)
-    {
-        make_twiddles(nfft,inverse);
-        factorize(nfft);
-    }
-
-    void make_real_twiddles(int nfft)
-    {
-        int ncfft2 = nfft>>2;
-        if ( m_realTwiddles.size() != ncfft2) {
-            m_realTwiddles.resize(ncfft2);
-            int ncfft= nfft>>1;
-            for (int k=1;k<=ncfft2;++k) 
-                m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) );
-        }
-    }
-
-    void make_twiddles(int nfft,bool inverse)
-    {
-        if ( m_twiddles.size() == nfft) {
-            // reuse the twiddles, conjugate if necessary
-            if (inverse != m_inverse)
-                for (int i=0;i<nfft;++i)
-                    m_twiddles[i] = conj( m_twiddles[i] );
-        }else{
-            m_twiddles.resize(nfft);
-            Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-            for (int i=0;i<nfft;++i)
-                m_twiddles[i] = exp( Complex(0,i*phinc) );
-        }
-        m_inverse = inverse;
-    }
-
-    void factorize(int nfft)
-    {
-        if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft)
-        {
-            m_stageRadix.resize(0);
-            m_stageRemainder.resize(0);
-            //factorize
-            //start factoring out 4's, then 2's, then 3,5,7,9,...
-            int n= nfft;
-            int p=4;
-            do {
-                while (n % p) {
-                    switch (p) {
-                        case 4: p = 2; break;
-                        case 2: p = 3; break;
-                        default: p += 2; break;
-                    }
-                    if (p*p>n)
-                        p=n;// impossible to have a factor > sqrt(n)
-                }
-                n /= p;
-                m_stageRadix.push_back(p);
-                m_stageRemainder.push_back(n);
-            }while(n>1);
-        }
-        m_nfft = nfft;
-    }
-
-    void scale(Complex *dst,Scalar s) 
-    {
-        for (int k=0;k<m_nfft;++k)
-            dst[k] *= s;
+        get_plan(nfft,true).work(0, dst, src, 1,1);
+        scale(dst, nfft, Scalar(1)/nfft );
     }
 
     private:
 
-    template <typename _Src>
-    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
-    {
-      int p = m_stageRadix[stage];
-      int m = m_stageRemainder[stage];
-      Complex * Fout_beg = xout;
-      Complex * Fout_end = xout + p*m;
-
-      if (m>1) {
-        do{
-          // recursive call:
-          // DFT of size m*p performed by doing
-          // p instances of smaller DFTs of size m, 
-          // each one takes a decimated version of the input
-          work(stage+1, xout , xin, fstride*p,in_stride);
-          xin += fstride*in_stride;
-        }while( (xout += m) != Fout_end );
-      }else{
-          do{
-              *xout = *xin;
-              xin += fstride*in_stride;
-          }while(++xout != Fout_end );
-      }
-      xout=Fout_beg;
-
-      // recombine the p smaller DFTs 
-      switch (p) {
-        case 2: bfly2(xout,fstride,m); break;
-        case 3: bfly3(xout,fstride,m); break;
-        case 4: bfly4(xout,fstride,m); break;
-        case 5: bfly5(xout,fstride,m); break;
-        default: bfly_generic(xout,fstride,m,p); break;
-      }
-    }
-
-    void bfly2( Complex * Fout, const size_t fstride, int m)
-    {
-      for (int k=0;k<m;++k) {
-        Complex t = Fout[m+k] * m_twiddles[k*fstride];
-        Fout[m+k] = Fout[k] - t;
-        Fout[k] += t;
-      }
-    }
-
-    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      Complex scratch[6];
-      int negative_if_inverse = m_inverse * -2 +1;
-      for (size_t k=0;k<m;++k) {
-        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
-        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
-        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
-        scratch[5] = Fout[k] - scratch[1];
-
-        Fout[k] += scratch[1];
-        scratch[3] = scratch[0] + scratch[2];
-        scratch[4] = scratch[0] - scratch[2];
-        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
-
-        Fout[k+2*m]  = Fout[k] - scratch[3];
-        Fout[k] += scratch[3];
-        Fout[k+m] = scratch[5] + scratch[4];
-        Fout[k+3*m] = scratch[5] - scratch[4];
-      }
-    }
+    typedef ei_kiss_cpx_fft<Scalar> PlanData;
 
-    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      size_t k=m;
-      const size_t m2 = 2*m;
-      Complex *tw1,*tw2;
-      Complex scratch[5];
-      Complex epi3;
-      epi3 = m_twiddles[fstride*m];
-
-      tw1=tw2=&m_twiddles[0];
-
-      do{
-        scratch[1]=Fout[m] * *tw1;
-        scratch[2]=Fout[m2] * *tw2;
-
-        scratch[3]=scratch[1]+scratch[2];
-        scratch[0]=scratch[1]-scratch[2];
-        tw1 += fstride;
-        tw2 += fstride*2;
-        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-        scratch[0] *= epi3.imag();
-        *Fout += scratch[3];
-        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
-        ++Fout;
-      }while(--k);
-    }
+    typedef std::map<int,PlanData> PlanMap;
+    PlanMap m_plans;
+    std::map<int, std::vector<Complex> > m_realTwiddles;
 
-    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-      size_t u;
-      Complex scratch[13];
-      Complex * twiddles = &m_twiddles[0];
-      Complex *tw;
-      Complex ya,yb;
-      ya = twiddles[fstride*m];
-      yb = twiddles[fstride*2*m];
-
-      Fout0=Fout;
-      Fout1=Fout0+m;
-      Fout2=Fout0+2*m;
-      Fout3=Fout0+3*m;
-      Fout4=Fout0+4*m;
-
-      tw=twiddles;
-      for ( u=0; u<m; ++u ) {
-        scratch[0] = *Fout0;
-
-        scratch[1]  = *Fout1 * tw[u*fstride];
-        scratch[2]  = *Fout2 * tw[2*u*fstride];
-        scratch[3]  = *Fout3 * tw[3*u*fstride];
-        scratch[4]  = *Fout4 * tw[4*u*fstride];
-
-        scratch[7] = scratch[1] + scratch[4];
-        scratch[10] = scratch[1] - scratch[4];
-        scratch[8] = scratch[2] + scratch[3];
-        scratch[9] = scratch[2] - scratch[3];
-
-        *Fout0 +=  scratch[7];
-        *Fout0 +=  scratch[8];
-
-        scratch[5] = scratch[0] + Complex(
-            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
-            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
-            );
-
-        scratch[6] = Complex(
-            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
-            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
-            );
-
-        *Fout1 = scratch[5] - scratch[6];
-        *Fout4 = scratch[5] + scratch[6];
-
-        scratch[11] = scratch[0] +
-          Complex(
-              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
-              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
-              );
-
-        scratch[12] = Complex(
-            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
-            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
-            );
-
-        *Fout2=scratch[11]+scratch[12];
-        *Fout3=scratch[11]-scratch[12];
-
-        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-      }
-    }
+    int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
 
-    /* perform the butterfly for one stage of a mixed radix FFT */
-    void bfly_generic(
-        Complex * Fout,
-        const size_t fstride,
-        int m,
-        int p
-        )
+    PlanData & get_plan(int nfft,bool inverse)
     {
-      int u,k,q1,q;
-      Complex * twiddles = &m_twiddles[0];
-      Complex t;
-      int Norig = m_nfft;
-      Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
-
-      for ( u=0; u<m; ++u ) {
-        k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {
-          scratchbuf[q1] = Fout[ k  ];
-          k += m;
+        /* 
+         * for some reason this does not work
+         *
+        typedef typename std::map<int,PlanData>::iterator MapIt;
+        MapIt it;
+        it = m_plans.find( PlanKey(nfft,inverse) );
+        if (it == m_plans.end() ) {
+            // create new entry
+            it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) );
+            MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) );
+            if (it2 != m_plans.end() ) {
+                it->second = it2.second;
+                it->second.invert();
+            }else{
+                it->second.make_twiddles(nfft,inverse);
+                it->second.factorize(nfft);
+            }
         }
-
-        k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {
-          int twidx=0;
-          Fout[ k ] = scratchbuf[0];
-          for (q=1;q<p;++q ) {
-            twidx += fstride * k;
-            if (twidx>=Norig) twidx-=Norig;
-            t=scratchbuf[q] * twiddles[twidx];
-            Fout[ k ] += t;
-          }
-          k += m;
+        return it->second;
+        */
+        PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
+        if ( pd.m_twiddles.size() == 0 ) {
+            pd.make_twiddles(nfft,inverse);
+            pd.factorize(nfft);
         }
-      }
+        return pd;
     }
 
-    int m_nfft;
-    bool m_inverse;
-    std::vector<Complex> m_twiddles;
-    std::vector<Complex> m_realTwiddles;
-    std::vector<int> m_stageRadix;
-    std::vector<int> m_stageRemainder;
-/*
-    enum {FORWARD,INVERSE,REAL,COMPLEX};
-
-    struct PlanKey
+    Complex * real_twiddles(int ncfft2)
     {
-        PlanKey(int nfft,bool isinverse,bool iscomplex)
-        {
-            _key = (nfft<<2) | (isinverse<<1) | iscomplex;
-        }
-
-        bool operator<(const PlanKey & other) const
-        {
-            return this->_key < other._key;
+        std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
+        if ( (int)twidref.size() != ncfft2 ) {
+            twidref.resize(ncfft2);
+            int ncfft= ncfft2<<1;
+            Scalar pi =  acos( Scalar(-1) );
+            for (int k=1;k<=ncfft2;++k) 
+                twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
         }
-        int _key;
-    };
+        return &twidref[0];
+    }
 
-    struct PlanData
+    void scale(Complex *dst,int n,Scalar s) 
     {
-        std::vector<Complex> m_twiddles;
-    };
-
-    std::map<PlanKey,
-*/
+        for (int k=0;k<n;++k)
+            dst[k] *= s;
+    }
   };
 }
diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp
index daf397790..32d1393d0 100644
--- a/unsupported/test/FFT.cpp
+++ b/unsupported/test/FFT.cpp
@@ -44,7 +44,7 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
     {
         long double totalpower=0;
         long double difpower=0;
-        cerr <<"idx\ttruth\t\tvalue\n";
+        cerr <<"idx\ttruth\t\tvalue\t|dif|=\n";
         for (size_t k0=0;k0<fftbuf.size();++k0) {
             complex<long double> acc = 0;
             long double phinc = -2.*k0* M_PIl / timebuf.size();
@@ -55,7 +55,7 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
             complex<long double> x = promote(fftbuf[k0]); 
             complex<long double> dif = acc - x;
             difpower += norm(dif);
-            cerr << k0 << "\t" << acc << "\t" <<  x << endl;
+            cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(norm(dif)) << endl;
         }
         cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
         return sqrt(difpower/totalpower);
@@ -127,8 +127,9 @@ void test_FFT()
 #endif
 
 #if 1
-  CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); CALL_SUBTEST( test_scalar<long double>(45) );
   CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); CALL_SUBTEST( test_scalar<long double>(32) );
+  CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); CALL_SUBTEST( test_scalar<long double>(45) );
+  CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) ); CALL_SUBTEST( test_scalar<long double>(50) );
   CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) ); CALL_SUBTEST( test_scalar<long double>(256) );
   CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
 #endif
-- 
cgit v1.2.3


From 09b47332553a79dab30516e6b1d410dea90cf9b7 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Mon, 25 May 2009 23:52:21 -0400
Subject: added real-optimized inverse FFT (NFFT must be multiple of 4)

---
 bench/benchFFT.cpp                          |  30 +-
 unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 684 ++++++++++++++--------------
 2 files changed, 368 insertions(+), 346 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp
index ffa4ffffc..14f5063fb 100644
--- a/bench/benchFFT.cpp
+++ b/bench/benchFFT.cpp
@@ -53,7 +53,7 @@ template <> string nameof<long double>() {return "long double";}
 using namespace Eigen;
 
 template <typename T>
-void bench(int nfft)
+void bench(int nfft,bool fwd)
 {
     typedef typename NumTraits<T>::Real Scalar;
     typedef typename std::complex<Scalar> Complex;
@@ -69,7 +69,10 @@ void bench(int nfft)
     for (int k=0;k<8;++k) {
         timer.start();
         for(int i = 0; i < nits; i++)
-            fft.fwd( outbuf , inbuf);
+            if (fwd)
+                fft.fwd( outbuf , inbuf);
+            else
+                fft.inv(inbuf,outbuf);
         timer.stop();
     }
 
@@ -82,16 +85,27 @@ void bench(int nfft)
         mflops /= 2;
     }
 
+    if (fwd)
+        cout << " fwd";
+    else
+        cout << " inv";
+
     cout << " NFFT=" << nfft << "  " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s  " << mflops << "MFLOPS\n";
 }
 
 int main(int argc,char ** argv)
 {
-    bench<complex<float> >(NFFT);
-    bench<float>(NFFT);
-    bench<complex<double> >(NFFT);
-    bench<double>(NFFT);
-    bench<complex<long double> >(NFFT);
-    bench<long double>(NFFT);
+    bench<complex<float> >(NFFT,true);
+    bench<complex<float> >(NFFT,false);
+    bench<float>(NFFT,true);
+    bench<float>(NFFT,false);
+    bench<complex<double> >(NFFT,true);
+    bench<complex<double> >(NFFT,false);
+    bench<double>(NFFT,true);
+    bench<double>(NFFT,false);
+    bench<complex<long double> >(NFFT,true);
+    bench<complex<long double> >(NFFT,false);
+    bench<long double>(NFFT,true);
+    bench<long double>(NFFT,false);
     return 0;
 }
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index 3580e6c61..453c7f6da 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -28,390 +28,398 @@
 
 namespace Eigen {
 
-    template <typename _Scalar>
-    struct ei_kiss_cpx_fft
+  template <typename _Scalar>
+  struct ei_kiss_cpx_fft
+  {
+    typedef  _Scalar Scalar;
+    typedef  std::complex<Scalar> Complex;
+    std::vector<Complex> m_twiddles;
+    std::vector<int> m_stageRadix;
+    std::vector<int> m_stageRemainder;
+    bool m_inverse;
+
+    void make_twiddles(int nfft,bool inverse)
     {
-        typedef  _Scalar Scalar;
-        typedef  std::complex<Scalar> Complex;
-        std::vector<Complex> m_twiddles;
-        std::vector<int> m_stageRadix;
-        std::vector<int> m_stageRemainder;
-        bool m_inverse;
-
-        ei_kiss_cpx_fft() { }
-
-        void make_twiddles(int nfft,bool inverse)
-        {
-            m_inverse = inverse;
-            m_twiddles.resize(nfft);
-            Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-            for (int i=0;i<nfft;++i)
-                m_twiddles[i] = exp( Complex(0,i*phinc) );
-        }
+      m_inverse = inverse;
+      m_twiddles.resize(nfft);
+      Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+      for (int i=0;i<nfft;++i)
+        m_twiddles[i] = exp( Complex(0,i*phinc) );
+    }
 
-        void invert()
-        {
-            m_inverse = !m_inverse;
-            for ( size_t i=0;i<m_twiddles.size() ;++i)
-                m_twiddles[i] = conj( m_twiddles[i] );
-        }
+    void conjugate()
+    {
+      m_inverse = !m_inverse;
+      for ( size_t i=0;i<m_twiddles.size() ;++i)
+        m_twiddles[i] = conj( m_twiddles[i] );
+    }
 
-        void factorize(int nfft)
-        {
-            if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft)
-            {
-                m_stageRadix.resize(0);
-                m_stageRemainder.resize(0);
-                //factorize
-                //start factoring out 4's, then 2's, then 3,5,7,9,...
-                int n= nfft;
-                int p=4;
-                do {
-                    while (n % p) {
-                        switch (p) {
-                            case 4: p = 2; break;
-                            case 2: p = 3; break;
-                            default: p += 2; break;
-                        }
-                        if (p*p>n)
-                            p=n;// impossible to have a factor > sqrt(n)
-                    }
-                    n /= p;
-                    m_stageRadix.push_back(p);
-                    m_stageRemainder.push_back(n);
-                }while(n>1);
-            }
+    void factorize(int nfft)
+    {
+      //start factoring out 4's, then 2's, then 3,5,7,9,...
+      int n= nfft;
+      int p=4;
+      do {
+        while (n % p) {
+          switch (p) {
+            case 4: p = 2; break;
+            case 2: p = 3; break;
+            default: p += 2; break;
+          }
+          if (p*p>n)
+            p=n;// impossible to have a factor > sqrt(n)
         }
+        n /= p;
+        m_stageRadix.push_back(p);
+        m_stageRemainder.push_back(n);
+      }while(n>1);
+    }
 
-        template <typename _Src>
-            void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
-            {
-                int p = m_stageRadix[stage];
-                int m = m_stageRemainder[stage];
-                Complex * Fout_beg = xout;
-                Complex * Fout_end = xout + p*m;
-
-                if (m>1) {
-                    do{
-                        // recursive call:
-                        // DFT of size m*p performed by doing
-                        // p instances of smaller DFTs of size m, 
-                        // each one takes a decimated version of the input
-                        work(stage+1, xout , xin, fstride*p,in_stride);
-                        xin += fstride*in_stride;
-                    }while( (xout += m) != Fout_end );
-                }else{
-                    do{
-                        *xout = *xin;
-                        xin += fstride*in_stride;
-                    }while(++xout != Fout_end );
-                }
-                xout=Fout_beg;
-
-                // recombine the p smaller DFTs 
-                switch (p) {
-                    case 2: bfly2(xout,fstride,m); break;
-                    case 3: bfly3(xout,fstride,m); break;
-                    case 4: bfly4(xout,fstride,m); break;
-                    case 5: bfly5(xout,fstride,m); break;
-                    default: bfly_generic(xout,fstride,m,p); break;
-                }
-            }
-
-        void bfly2( Complex * Fout, const size_t fstride, int m)
-        {
-            for (int k=0;k<m;++k) {
-                Complex t = Fout[m+k] * m_twiddles[k*fstride];
-                Fout[m+k] = Fout[k] - t;
-                Fout[k] += t;
-            }
-        }
+    template <typename _Src>
+    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+    {
+      int p = m_stageRadix[stage];
+      int m = m_stageRemainder[stage];
+      Complex * Fout_beg = xout;
+      Complex * Fout_end = xout + p*m;
+
+      if (m>1) {
+        do{
+          // recursive call:
+          // DFT of size m*p performed by doing
+          // p instances of smaller DFTs of size m, 
+          // each one takes a decimated version of the input
+          work(stage+1, xout , xin, fstride*p,in_stride);
+          xin += fstride*in_stride;
+        }while( (xout += m) != Fout_end );
+      }else{
+        do{
+          *xout = *xin;
+          xin += fstride*in_stride;
+        }while(++xout != Fout_end );
+      }
+      xout=Fout_beg;
+
+      // recombine the p smaller DFTs 
+      switch (p) {
+        case 2: bfly2(xout,fstride,m); break;
+        case 3: bfly3(xout,fstride,m); break;
+        case 4: bfly4(xout,fstride,m); break;
+        case 5: bfly5(xout,fstride,m); break;
+        default: bfly_generic(xout,fstride,m,p); break;
+      }
+    }
 
-        void bfly4( Complex * Fout, const size_t fstride, const size_t m)
-        {
-            Complex scratch[6];
-            int negative_if_inverse = m_inverse * -2 +1;
-            for (size_t k=0;k<m;++k) {
-                scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
-                scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
-                scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
-                scratch[5] = Fout[k] - scratch[1];
-
-                Fout[k] += scratch[1];
-                scratch[3] = scratch[0] + scratch[2];
-                scratch[4] = scratch[0] - scratch[2];
-                scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
-
-                Fout[k+2*m]  = Fout[k] - scratch[3];
-                Fout[k] += scratch[3];
-                Fout[k+m] = scratch[5] + scratch[4];
-                Fout[k+3*m] = scratch[5] - scratch[4];
-            }
-        }
+    void bfly2( Complex * Fout, const size_t fstride, int m)
+    {
+      for (int k=0;k<m;++k) {
+        Complex t = Fout[m+k] * m_twiddles[k*fstride];
+        Fout[m+k] = Fout[k] - t;
+        Fout[k] += t;
+      }
+    }
 
-        void bfly3( Complex * Fout, const size_t fstride, const size_t m)
-        {
-            size_t k=m;
-            const size_t m2 = 2*m;
-            Complex *tw1,*tw2;
-            Complex scratch[5];
-            Complex epi3;
-            epi3 = m_twiddles[fstride*m];
-
-            tw1=tw2=&m_twiddles[0];
-
-            do{
-                scratch[1]=Fout[m] * *tw1;
-                scratch[2]=Fout[m2] * *tw2;
-
-                scratch[3]=scratch[1]+scratch[2];
-                scratch[0]=scratch[1]-scratch[2];
-                tw1 += fstride;
-                tw2 += fstride*2;
-                Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-                scratch[0] *= epi3.imag();
-                *Fout += scratch[3];
-                Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-                Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
-                ++Fout;
-            }while(--k);
-        }
+    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex scratch[6];
+      int negative_if_inverse = m_inverse * -2 +1;
+      for (size_t k=0;k<m;++k) {
+        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+        scratch[5] = Fout[k] - scratch[1];
+
+        Fout[k] += scratch[1];
+        scratch[3] = scratch[0] + scratch[2];
+        scratch[4] = scratch[0] - scratch[2];
+        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+        Fout[k+2*m]  = Fout[k] - scratch[3];
+        Fout[k] += scratch[3];
+        Fout[k+m] = scratch[5] + scratch[4];
+        Fout[k+3*m] = scratch[5] - scratch[4];
+      }
+    }
 
-        void bfly5( Complex * Fout, const size_t fstride, const size_t m)
-        {
-            Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-            size_t u;
-            Complex scratch[13];
-            Complex * twiddles = &m_twiddles[0];
-            Complex *tw;
-            Complex ya,yb;
-            ya = twiddles[fstride*m];
-            yb = twiddles[fstride*2*m];
-
-            Fout0=Fout;
-            Fout1=Fout0+m;
-            Fout2=Fout0+2*m;
-            Fout3=Fout0+3*m;
-            Fout4=Fout0+4*m;
-
-            tw=twiddles;
-            for ( u=0; u<m; ++u ) {
-                scratch[0] = *Fout0;
-
-                scratch[1]  = *Fout1 * tw[u*fstride];
-                scratch[2]  = *Fout2 * tw[2*u*fstride];
-                scratch[3]  = *Fout3 * tw[3*u*fstride];
-                scratch[4]  = *Fout4 * tw[4*u*fstride];
-
-                scratch[7] = scratch[1] + scratch[4];
-                scratch[10] = scratch[1] - scratch[4];
-                scratch[8] = scratch[2] + scratch[3];
-                scratch[9] = scratch[2] - scratch[3];
-
-                *Fout0 +=  scratch[7];
-                *Fout0 +=  scratch[8];
-
-                scratch[5] = scratch[0] + Complex(
-                        (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
-                        (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
-                        );
-
-                scratch[6] = Complex(
-                        (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
-                        -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
-                        );
-
-                *Fout1 = scratch[5] - scratch[6];
-                *Fout4 = scratch[5] + scratch[6];
-
-                scratch[11] = scratch[0] +
-                    Complex(
-                            (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
-                            (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
-                           );
-
-                scratch[12] = Complex(
-                        -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
-                        (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
-                        );
-
-                *Fout2=scratch[11]+scratch[12];
-                *Fout3=scratch[11]-scratch[12];
-
-                ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-            }
-        }
+    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      size_t k=m;
+      const size_t m2 = 2*m;
+      Complex *tw1,*tw2;
+      Complex scratch[5];
+      Complex epi3;
+      epi3 = m_twiddles[fstride*m];
+
+      tw1=tw2=&m_twiddles[0];
+
+      do{
+        scratch[1]=Fout[m] * *tw1;
+        scratch[2]=Fout[m2] * *tw2;
+
+        scratch[3]=scratch[1]+scratch[2];
+        scratch[0]=scratch[1]-scratch[2];
+        tw1 += fstride;
+        tw2 += fstride*2;
+        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+        scratch[0] *= epi3.imag();
+        *Fout += scratch[3];
+        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+        ++Fout;
+      }while(--k);
+    }
+
+    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+    {
+      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+      size_t u;
+      Complex scratch[13];
+      Complex * twiddles = &m_twiddles[0];
+      Complex *tw;
+      Complex ya,yb;
+      ya = twiddles[fstride*m];
+      yb = twiddles[fstride*2*m];
+
+      Fout0=Fout;
+      Fout1=Fout0+m;
+      Fout2=Fout0+2*m;
+      Fout3=Fout0+3*m;
+      Fout4=Fout0+4*m;
+
+      tw=twiddles;
+      for ( u=0; u<m; ++u ) {
+        scratch[0] = *Fout0;
+
+        scratch[1]  = *Fout1 * tw[u*fstride];
+        scratch[2]  = *Fout2 * tw[2*u*fstride];
+        scratch[3]  = *Fout3 * tw[3*u*fstride];
+        scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+        scratch[7] = scratch[1] + scratch[4];
+        scratch[10] = scratch[1] - scratch[4];
+        scratch[8] = scratch[2] + scratch[3];
+        scratch[9] = scratch[2] - scratch[3];
+
+        *Fout0 +=  scratch[7];
+        *Fout0 +=  scratch[8];
+
+        scratch[5] = scratch[0] + Complex(
+            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+            );
+
+        scratch[6] = Complex(
+            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
+            );
+
+        *Fout1 = scratch[5] - scratch[6];
+        *Fout4 = scratch[5] + scratch[6];
+
+        scratch[11] = scratch[0] +
+          Complex(
+              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+              );
+
+        scratch[12] = Complex(
+            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+            );
+
+        *Fout2=scratch[11]+scratch[12];
+        *Fout3=scratch[11]-scratch[12];
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+      }
+    }
 
-        /* perform the butterfly for one stage of a mixed radix FFT */
-        void bfly_generic(
-                Complex * Fout,
-                const size_t fstride,
-                int m,
-                int p
-                )
-        {
-            int u,k,q1,q;
-            Complex * twiddles = &m_twiddles[0];
-            Complex t;
-            int Norig = m_twiddles.size();
-            Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
-
-            for ( u=0; u<m; ++u ) {
-                k=u;
-                for ( q1=0 ; q1<p ; ++q1 ) {
-                    scratchbuf[q1] = Fout[ k  ];
-                    k += m;
-                }
-
-                k=u;
-                for ( q1=0 ; q1<p ; ++q1 ) {
-                    int twidx=0;
-                    Fout[ k ] = scratchbuf[0];
-                    for (q=1;q<p;++q ) {
-                        twidx += fstride * k;
-                        if (twidx>=Norig) twidx-=Norig;
-                        t=scratchbuf[q] * twiddles[twidx];
-                        Fout[ k ] += t;
-                    }
-                    k += m;
-                }
-            }
+    /* perform the butterfly for one stage of a mixed radix FFT */
+    void bfly_generic(
+        Complex * Fout,
+        const size_t fstride,
+        int m,
+        int p
+        )
+    {
+      int u,k,q1,q;
+      Complex * twiddles = &m_twiddles[0];
+      Complex t;
+      int Norig = m_twiddles.size();
+      Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
+
+      for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          scratchbuf[q1] = Fout[ k  ];
+          k += m;
         }
-    };
 
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+          int twidx=0;
+          Fout[ k ] = scratchbuf[0];
+          for (q=1;q<p;++q ) {
+            twidx += fstride * k;
+            if (twidx>=Norig) twidx-=Norig;
+            t=scratchbuf[q] * twiddles[twidx];
+            Fout[ k ] += t;
+          }
+          k += m;
+        }
+      }
+    }
+  };
 
   template <typename _Scalar>
   struct ei_kissfft_impl
   {
-    typedef _Scalar Scalar;
-    typedef std::complex<Scalar> Complex;
-    ei_kissfft_impl() {} 
+      typedef _Scalar Scalar;
+      typedef std::complex<Scalar> Complex;
 
-    void clear() 
-    {
+      void clear() 
+      {
         m_plans.clear();
         m_realTwiddles.clear();
-    }
+      }
 
-    template <typename _Src>
-    void fwd( Complex * dst,const _Src *src,int nfft)
-    {
+      template <typename _Src>
+      void fwd( Complex * dst,const _Src *src,int nfft)
+      {
         get_plan(nfft,false).work(0, dst, src, 1,1);
-    }
-
-    // real-to-complex forward FFT
-    // perform two FFTs of src even and src odd
-    // then twiddle to recombine them into the half-spectrum format
-    // then fill in the conjugate symmetric half
-    void fwd( Complex * dst,const Scalar * src,int nfft) 
-    {
+      }
+
+      // real-to-complex forward FFT
+      // perform two FFTs of src even and src odd
+      // then twiddle to recombine them into the half-spectrum format
+      // then fill in the conjugate symmetric half
+      void fwd( Complex * dst,const Scalar * src,int nfft) 
+      {
         if ( nfft&3  ) {
-            // use generic mode for odd
-            get_plan(nfft,false).work(0, dst, src, 1,1);
+          // use generic mode for odd
+          get_plan(nfft,false).work(0, dst, src, 1,1);
         }else{
-            int ncfft = nfft>>1;
-            int ncfft2 = nfft>>2;
-            Complex * rtw = real_twiddles(ncfft2);
-
-            // use optimized mode for even real
-            fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
-            Complex dc = dst[0].real() +  dst[0].imag();
-            Complex nyquist = dst[0].real() -  dst[0].imag();
-            int k;
-            for ( k=1;k <= ncfft2 ; ++k ) {
-                Complex fpk = dst[k];
-                Complex fpnk = conj(dst[ncfft-k]);
-                Complex f1k = fpk + fpnk;
-                Complex f2k = fpk - fpnk;
-                Complex tw= f2k * rtw[k-1];
-
-                dst[k] =  (f1k + tw) * Scalar(.5);
-                dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
-            }
- 
-            // place conjugate-symmetric half at the end for completeness
-            // TODO: make this configurable ( opt-out )
-            for ( k=1;k < ncfft ; ++k )
-                dst[nfft-k] = conj(dst[k]);
-
-            dst[0] = dc;
-            dst[ncfft] = nyquist;
+          int ncfft = nfft>>1;
+          int ncfft2 = nfft>>2;
+          Complex * rtw = real_twiddles(ncfft2);
+
+          // use optimized mode for even real
+          fwd( dst, reinterpret_cast<const Complex*> (src), ncfft);
+          Complex dc = dst[0].real() +  dst[0].imag();
+          Complex nyquist = dst[0].real() -  dst[0].imag();
+          int k;
+          for ( k=1;k <= ncfft2 ; ++k ) {
+            Complex fpk = dst[k];
+            Complex fpnk = conj(dst[ncfft-k]);
+            Complex f1k = fpk + fpnk;
+            Complex f2k = fpk - fpnk;
+            Complex tw= f2k * rtw[k-1];
+            dst[k] =  (f1k + tw) * Scalar(.5);
+            dst[ncfft-k] =  conj(f1k -tw)*Scalar(.5);
+          }
+
+          // place conjugate-symmetric half at the end for completeness
+          // TODO: make this configurable ( opt-out )
+          for ( k=1;k < ncfft ; ++k )
+            dst[nfft-k] = conj(dst[k]);
+          dst[0] = dc;
+          dst[ncfft] = nyquist;
         }
-    }
-
-    // half-complex to scalar
-    void inv( Scalar * dst,const Complex * src,int nfft) 
-    {
-        // TODO add optimized version for even numbers
-        std::vector<Complex> tmp(nfft);
-        inv(&tmp[0],src,nfft);
-        for (int k=0;k<nfft;++k)
-            dst[k] = tmp[k].real();
-    }
+      }
 
-    void inv(Complex * dst,const Complex  *src,int nfft)
-    {
+      // inverse complex-to-complex
+      void inv(Complex * dst,const Complex  *src,int nfft)
+      {
         get_plan(nfft,true).work(0, dst, src, 1,1);
         scale(dst, nfft, Scalar(1)/nfft );
-    }
+      }
+
+      // half-complex to scalar
+      void inv( Scalar * dst,const Complex * src,int nfft) 
+      {
+        if (nfft&3) {
+          m_scratchBuf.resize(nfft);
+          inv(&m_scratchBuf[0],src,nfft);
+          for (int k=0;k<nfft;++k)
+            dst[k] = m_scratchBuf[k].real();
+        }else{
+          // optimized version for multiple of 4
+          int ncfft = nfft>>1;
+          int ncfft2 = nfft>>2;
+          Complex * rtw = real_twiddles(ncfft2);
+          m_scratchBuf.resize(ncfft);
+          m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
+          for (int k = 1; k <= ncfft / 2; ++k) {
+            Complex fk = src[k];
+            Complex fnkc = conj(src[ncfft-k]);
+            Complex fek = fk + fnkc;
+            Complex tmp = fk - fnkc;
+            Complex fok = tmp * conj(rtw[k-1]);
+            m_scratchBuf[k] = fek + fok;
+            m_scratchBuf[ncfft-k] = conj(fek - fok);
+          }
+          scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft );
+          get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_scratchBuf[0], 1,1);
+        }
+      }
 
-    private:
+  private:
 
-    typedef ei_kiss_cpx_fft<Scalar> PlanData;
+      typedef ei_kiss_cpx_fft<Scalar> PlanData;
 
-    typedef std::map<int,PlanData> PlanMap;
-    PlanMap m_plans;
-    std::map<int, std::vector<Complex> > m_realTwiddles;
+      typedef std::map<int,PlanData> PlanMap;
+      PlanMap m_plans;
+      std::map<int, std::vector<Complex> > m_realTwiddles;
+      std::vector<Complex> m_scratchBuf;
 
-    int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
+      int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
 
-    PlanData & get_plan(int nfft,bool inverse)
-    {
-        /* 
+      PlanData & get_plan(int nfft,bool inverse)
+      {
+        /*  TODO: figure out why this does not work (g++ 4.3.2)
          * for some reason this does not work
          *
-        typedef typename std::map<int,PlanData>::iterator MapIt;
-        MapIt it;
-        it = m_plans.find( PlanKey(nfft,inverse) );
-        if (it == m_plans.end() ) {
-            // create new entry
-            it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) );
-            MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) );
-            if (it2 != m_plans.end() ) {
-                it->second = it2.second;
-                it->second.invert();
-            }else{
-                it->second.make_twiddles(nfft,inverse);
-                it->second.factorize(nfft);
-            }
+         PlanMap::iterator it;
+         it = m_plans.find( PlanKey(nfft,inverse) );
+         if (it == m_plans.end() ) {
+        // create new entry
+        it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) );
+        MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) );
+        if (it2 != m_plans.end() ) {
+        it->second = it2.second;
+        it->second.conjugate();
+        }else{
+        it->second.make_twiddles(nfft,inverse);
+        it->second.factorize(nfft);
+        }
         }
         return it->second;
         */
         PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
         if ( pd.m_twiddles.size() == 0 ) {
-            pd.make_twiddles(nfft,inverse);
-            pd.factorize(nfft);
+          pd.make_twiddles(nfft,inverse);
+          pd.factorize(nfft);
         }
         return pd;
-    }
+      }
 
-    Complex * real_twiddles(int ncfft2)
-    {
+      Complex * real_twiddles(int ncfft2)
+      {
         std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
         if ( (int)twidref.size() != ncfft2 ) {
-            twidref.resize(ncfft2);
-            int ncfft= ncfft2<<1;
-            Scalar pi =  acos( Scalar(-1) );
-            for (int k=1;k<=ncfft2;++k) 
-                twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
+          twidref.resize(ncfft2);
+          int ncfft= ncfft2<<1;
+          Scalar pi =  acos( Scalar(-1) );
+          for (int k=1;k<=ncfft2;++k) 
+            twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) );
         }
         return &twidref[0];
-    }
+      }
 
-    void scale(Complex *dst,int n,Scalar s) 
-    {
+      void scale(Complex *dst,int n,Scalar s) 
+      {
         for (int k=0;k<n;++k)
-            dst[k] *= s;
-    }
+          dst[k] *= s;
+      }
   };
 }
-- 
cgit v1.2.3


From f13e000b454fed0a9b0d71ca1481d6894bacd8eb Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Wed, 27 May 2009 21:32:42 -0400
Subject: various comment changes

---
 unsupported/Eigen/FFT                       |   6 +-
 unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 484 ++++++++++++++--------------
 2 files changed, 237 insertions(+), 253 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 3d852f5a2..31d8c74c5 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -1,5 +1,5 @@
 // This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
+// for linear algebra. 
 //
 // Copyright (C) 2009 Mark Borgerding mark a borgerding net
 //
@@ -29,14 +29,14 @@
 #include "src/FFT/ei_kissfft_impl.h"
 #define DEFAULT_FFT_IMPL ei_kissfft_impl
 
-// FFTW: faster, GPL-not LGPL, bigger code size
+// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
 #ifdef FFTW_PATIENT  // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines
 // TODO 
 // #include "src/FFT/ei_fftw_impl.h"
 // #define DEFAULT_FFT_IMPL ei_fftw_impl
 #endif
 
-// intel Math Kernel Library: fastest, commerical
+// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form
 #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines
 // TODO 
 // #include "src/FFT/ei_imkl_impl.h"
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index 453c7f6da..91fa5ca18 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -1,5 +1,5 @@
 // This file is part of Eigen, a lightweight C++ template library
-// for linear algebra. Eigen itself is part of the KDE project.
+// for linear algebra.
 //
 // Copyright (C) 2009 Mark Borgerding mark a borgerding net
 //
@@ -28,252 +28,255 @@
 
 namespace Eigen {
 
+  // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
+  // Copyright 2003-2009 Mark Borgerding
+
   template <typename _Scalar>
-  struct ei_kiss_cpx_fft
-  {
-    typedef  _Scalar Scalar;
-    typedef  std::complex<Scalar> Complex;
-    std::vector<Complex> m_twiddles;
-    std::vector<int> m_stageRadix;
-    std::vector<int> m_stageRemainder;
-    bool m_inverse;
-
-    void make_twiddles(int nfft,bool inverse)
-    {
-      m_inverse = inverse;
-      m_twiddles.resize(nfft);
-      Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
-      for (int i=0;i<nfft;++i)
-        m_twiddles[i] = exp( Complex(0,i*phinc) );
-    }
-
-    void conjugate()
+    struct ei_kiss_cpx_fft
     {
-      m_inverse = !m_inverse;
-      for ( size_t i=0;i<m_twiddles.size() ;++i)
-        m_twiddles[i] = conj( m_twiddles[i] );
-    }
+      typedef _Scalar Scalar;
+      typedef std::complex<Scalar> Complex;
+      std::vector<Complex> m_twiddles;
+      std::vector<int> m_stageRadix;
+      std::vector<int> m_stageRemainder;
+      bool m_inverse;
 
-    void factorize(int nfft)
-    {
-      //start factoring out 4's, then 2's, then 3,5,7,9,...
-      int n= nfft;
-      int p=4;
-      do {
-        while (n % p) {
+      void make_twiddles(int nfft,bool inverse)
+      {
+        m_inverse = inverse;
+        m_twiddles.resize(nfft);
+        Scalar phinc =  (inverse?2:-2)* acos( (Scalar) -1)  / nfft;
+        for (int i=0;i<nfft;++i)
+          m_twiddles[i] = exp( Complex(0,i*phinc) );
+      }
+
+      void conjugate()
+      {
+        m_inverse = !m_inverse;
+        for ( size_t i=0;i<m_twiddles.size() ;++i)
+          m_twiddles[i] = conj( m_twiddles[i] );
+      }
+
+      void factorize(int nfft)
+      {
+        //start factoring out 4's, then 2's, then 3,5,7,9,...
+        int n= nfft;
+        int p=4;
+        do {
+          while (n % p) {
+            switch (p) {
+              case 4: p = 2; break;
+              case 2: p = 3; break;
+              default: p += 2; break;
+            }
+            if (p*p>n)
+              p=n;// impossible to have a factor > sqrt(n)
+          }
+          n /= p;
+          m_stageRadix.push_back(p);
+          m_stageRemainder.push_back(n);
+        }while(n>1);
+      }
+
+      template <typename _Src>
+        void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
+        {
+          int p = m_stageRadix[stage];
+          int m = m_stageRemainder[stage];
+          Complex * Fout_beg = xout;
+          Complex * Fout_end = xout + p*m;
+
+          if (m>1) {
+            do{
+              // recursive call:
+              // DFT of size m*p performed by doing
+              // p instances of smaller DFTs of size m, 
+              // each one takes a decimated version of the input
+              work(stage+1, xout , xin, fstride*p,in_stride);
+              xin += fstride*in_stride;
+            }while( (xout += m) != Fout_end );
+          }else{
+            do{
+              *xout = *xin;
+              xin += fstride*in_stride;
+            }while(++xout != Fout_end );
+          }
+          xout=Fout_beg;
+
+          // recombine the p smaller DFTs 
           switch (p) {
-            case 4: p = 2; break;
-            case 2: p = 3; break;
-            default: p += 2; break;
+            case 2: bfly2(xout,fstride,m); break;
+            case 3: bfly3(xout,fstride,m); break;
+            case 4: bfly4(xout,fstride,m); break;
+            case 5: bfly5(xout,fstride,m); break;
+            default: bfly_generic(xout,fstride,m,p); break;
           }
-          if (p*p>n)
-            p=n;// impossible to have a factor > sqrt(n)
         }
-        n /= p;
-        m_stageRadix.push_back(p);
-        m_stageRemainder.push_back(n);
-      }while(n>1);
-    }
-
-    template <typename _Src>
-    void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride)
-    {
-      int p = m_stageRadix[stage];
-      int m = m_stageRemainder[stage];
-      Complex * Fout_beg = xout;
-      Complex * Fout_end = xout + p*m;
 
-      if (m>1) {
-        do{
-          // recursive call:
-          // DFT of size m*p performed by doing
-          // p instances of smaller DFTs of size m, 
-          // each one takes a decimated version of the input
-          work(stage+1, xout , xin, fstride*p,in_stride);
-          xin += fstride*in_stride;
-        }while( (xout += m) != Fout_end );
-      }else{
-        do{
-          *xout = *xin;
-          xin += fstride*in_stride;
-        }while(++xout != Fout_end );
-      }
-      xout=Fout_beg;
-
-      // recombine the p smaller DFTs 
-      switch (p) {
-        case 2: bfly2(xout,fstride,m); break;
-        case 3: bfly3(xout,fstride,m); break;
-        case 4: bfly4(xout,fstride,m); break;
-        case 5: bfly5(xout,fstride,m); break;
-        default: bfly_generic(xout,fstride,m,p); break;
+      void bfly2( Complex * Fout, const size_t fstride, int m)
+      {
+        for (int k=0;k<m;++k) {
+          Complex t = Fout[m+k] * m_twiddles[k*fstride];
+          Fout[m+k] = Fout[k] - t;
+          Fout[k] += t;
+        }
       }
-    }
 
-    void bfly2( Complex * Fout, const size_t fstride, int m)
-    {
-      for (int k=0;k<m;++k) {
-        Complex t = Fout[m+k] * m_twiddles[k*fstride];
-        Fout[m+k] = Fout[k] - t;
-        Fout[k] += t;
+      void bfly4( Complex * Fout, const size_t fstride, const size_t m)
+      {
+        Complex scratch[6];
+        int negative_if_inverse = m_inverse * -2 +1;
+        for (size_t k=0;k<m;++k) {
+          scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
+          scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
+          scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
+          scratch[5] = Fout[k] - scratch[1];
+
+          Fout[k] += scratch[1];
+          scratch[3] = scratch[0] + scratch[2];
+          scratch[4] = scratch[0] - scratch[2];
+          scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
+
+          Fout[k+2*m]  = Fout[k] - scratch[3];
+          Fout[k] += scratch[3];
+          Fout[k+m] = scratch[5] + scratch[4];
+          Fout[k+3*m] = scratch[5] - scratch[4];
+        }
       }
-    }
 
-    void bfly4( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      Complex scratch[6];
-      int negative_if_inverse = m_inverse * -2 +1;
-      for (size_t k=0;k<m;++k) {
-        scratch[0] = Fout[k+m] * m_twiddles[k*fstride];
-        scratch[1] = Fout[k+2*m] * m_twiddles[k*fstride*2];
-        scratch[2] = Fout[k+3*m] * m_twiddles[k*fstride*3];
-        scratch[5] = Fout[k] - scratch[1];
-
-        Fout[k] += scratch[1];
-        scratch[3] = scratch[0] + scratch[2];
-        scratch[4] = scratch[0] - scratch[2];
-        scratch[4] = Complex( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse );
-
-        Fout[k+2*m]  = Fout[k] - scratch[3];
-        Fout[k] += scratch[3];
-        Fout[k+m] = scratch[5] + scratch[4];
-        Fout[k+3*m] = scratch[5] - scratch[4];
+      void bfly3( Complex * Fout, const size_t fstride, const size_t m)
+      {
+        size_t k=m;
+        const size_t m2 = 2*m;
+        Complex *tw1,*tw2;
+        Complex scratch[5];
+        Complex epi3;
+        epi3 = m_twiddles[fstride*m];
+
+        tw1=tw2=&m_twiddles[0];
+
+        do{
+          scratch[1]=Fout[m] * *tw1;
+          scratch[2]=Fout[m2] * *tw2;
+
+          scratch[3]=scratch[1]+scratch[2];
+          scratch[0]=scratch[1]-scratch[2];
+          tw1 += fstride;
+          tw2 += fstride*2;
+          Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
+          scratch[0] *= epi3.imag();
+          *Fout += scratch[3];
+          Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
+          Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
+          ++Fout;
+        }while(--k);
       }
-    }
 
-    void bfly3( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      size_t k=m;
-      const size_t m2 = 2*m;
-      Complex *tw1,*tw2;
-      Complex scratch[5];
-      Complex epi3;
-      epi3 = m_twiddles[fstride*m];
-
-      tw1=tw2=&m_twiddles[0];
-
-      do{
-        scratch[1]=Fout[m] * *tw1;
-        scratch[2]=Fout[m2] * *tw2;
-
-        scratch[3]=scratch[1]+scratch[2];
-        scratch[0]=scratch[1]-scratch[2];
-        tw1 += fstride;
-        tw2 += fstride*2;
-        Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() );
-        scratch[0] *= epi3.imag();
-        *Fout += scratch[3];
-        Fout[m2] = Complex(  Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() );
-        Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() );
-        ++Fout;
-      }while(--k);
-    }
-
-    void bfly5( Complex * Fout, const size_t fstride, const size_t m)
-    {
-      Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
-      size_t u;
-      Complex scratch[13];
-      Complex * twiddles = &m_twiddles[0];
-      Complex *tw;
-      Complex ya,yb;
-      ya = twiddles[fstride*m];
-      yb = twiddles[fstride*2*m];
-
-      Fout0=Fout;
-      Fout1=Fout0+m;
-      Fout2=Fout0+2*m;
-      Fout3=Fout0+3*m;
-      Fout4=Fout0+4*m;
-
-      tw=twiddles;
-      for ( u=0; u<m; ++u ) {
-        scratch[0] = *Fout0;
-
-        scratch[1]  = *Fout1 * tw[u*fstride];
-        scratch[2]  = *Fout2 * tw[2*u*fstride];
-        scratch[3]  = *Fout3 * tw[3*u*fstride];
-        scratch[4]  = *Fout4 * tw[4*u*fstride];
-
-        scratch[7] = scratch[1] + scratch[4];
-        scratch[10] = scratch[1] - scratch[4];
-        scratch[8] = scratch[2] + scratch[3];
-        scratch[9] = scratch[2] - scratch[3];
-
-        *Fout0 +=  scratch[7];
-        *Fout0 +=  scratch[8];
-
-        scratch[5] = scratch[0] + Complex(
-            (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
-            (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
-            );
-
-        scratch[6] = Complex(
-            (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
-            -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
-            );
-
-        *Fout1 = scratch[5] - scratch[6];
-        *Fout4 = scratch[5] + scratch[6];
-
-        scratch[11] = scratch[0] +
-          Complex(
-              (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
-              (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+      void bfly5( Complex * Fout, const size_t fstride, const size_t m)
+      {
+        Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+        size_t u;
+        Complex scratch[13];
+        Complex * twiddles = &m_twiddles[0];
+        Complex *tw;
+        Complex ya,yb;
+        ya = twiddles[fstride*m];
+        yb = twiddles[fstride*2*m];
+
+        Fout0=Fout;
+        Fout1=Fout0+m;
+        Fout2=Fout0+2*m;
+        Fout3=Fout0+3*m;
+        Fout4=Fout0+4*m;
+
+        tw=twiddles;
+        for ( u=0; u<m; ++u ) {
+          scratch[0] = *Fout0;
+
+          scratch[1]  = *Fout1 * tw[u*fstride];
+          scratch[2]  = *Fout2 * tw[2*u*fstride];
+          scratch[3]  = *Fout3 * tw[3*u*fstride];
+          scratch[4]  = *Fout4 * tw[4*u*fstride];
+
+          scratch[7] = scratch[1] + scratch[4];
+          scratch[10] = scratch[1] - scratch[4];
+          scratch[8] = scratch[2] + scratch[3];
+          scratch[9] = scratch[2] - scratch[3];
+
+          *Fout0 +=  scratch[7];
+          *Fout0 +=  scratch[8];
+
+          scratch[5] = scratch[0] + Complex(
+              (scratch[7].real()*ya.real() ) + (scratch[8].real() *yb.real() ),
+              (scratch[7].imag()*ya.real()) + (scratch[8].imag()*yb.real())
+              );
+
+          scratch[6] = Complex(
+              (scratch[10].imag()*ya.imag()) + (scratch[9].imag()*yb.imag()),
+              -(scratch[10].real()*ya.imag()) - (scratch[9].real()*yb.imag())
               );
 
-        scratch[12] = Complex(
-            -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
-            (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
-            );
+          *Fout1 = scratch[5] - scratch[6];
+          *Fout4 = scratch[5] + scratch[6];
 
-        *Fout2=scratch[11]+scratch[12];
-        *Fout3=scratch[11]-scratch[12];
+          scratch[11] = scratch[0] +
+            Complex(
+                (scratch[7].real()*yb.real()) + (scratch[8].real()*ya.real()),
+                (scratch[7].imag()*yb.real()) + (scratch[8].imag()*ya.real())
+                );
 
-        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
-      }
-    }
-
-    /* perform the butterfly for one stage of a mixed radix FFT */
-    void bfly_generic(
-        Complex * Fout,
-        const size_t fstride,
-        int m,
-        int p
-        )
-    {
-      int u,k,q1,q;
-      Complex * twiddles = &m_twiddles[0];
-      Complex t;
-      int Norig = m_twiddles.size();
-      Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
-
-      for ( u=0; u<m; ++u ) {
-        k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {
-          scratchbuf[q1] = Fout[ k  ];
-          k += m;
+          scratch[12] = Complex(
+              -(scratch[10].imag()*yb.imag()) + (scratch[9].imag()*ya.imag()),
+              (scratch[10].real()*yb.imag()) - (scratch[9].real()*ya.imag())
+              );
+
+          *Fout2=scratch[11]+scratch[12];
+          *Fout3=scratch[11]-scratch[12];
+
+          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
         }
+      }
 
-        k=u;
-        for ( q1=0 ; q1<p ; ++q1 ) {
-          int twidx=0;
-          Fout[ k ] = scratchbuf[0];
-          for (q=1;q<p;++q ) {
-            twidx += fstride * k;
-            if (twidx>=Norig) twidx-=Norig;
-            t=scratchbuf[q] * twiddles[twidx];
-            Fout[ k ] += t;
+      /* perform the butterfly for one stage of a mixed radix FFT */
+      void bfly_generic(
+          Complex * Fout,
+          const size_t fstride,
+          int m,
+          int p
+          )
+      {
+        int u,k,q1,q;
+        Complex * twiddles = &m_twiddles[0];
+        Complex t;
+        int Norig = m_twiddles.size();
+        Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
+
+        for ( u=0; u<m; ++u ) {
+          k=u;
+          for ( q1=0 ; q1<p ; ++q1 ) {
+            scratchbuf[q1] = Fout[ k  ];
+            k += m;
+          }
+
+          k=u;
+          for ( q1=0 ; q1<p ; ++q1 ) {
+            int twidx=0;
+            Fout[ k ] = scratchbuf[0];
+            for (q=1;q<p;++q ) {
+              twidx += fstride * k;
+              if (twidx>=Norig) twidx-=Norig;
+              t=scratchbuf[q] * twiddles[twidx];
+              Fout[ k ] += t;
+            }
+            k += m;
           }
-          k += m;
         }
       }
-    }
-  };
+    };
 
   template <typename _Scalar>
-  struct ei_kissfft_impl
-  {
+    struct ei_kissfft_impl
+    {
       typedef _Scalar Scalar;
       typedef std::complex<Scalar> Complex;
 
@@ -284,10 +287,10 @@ namespace Eigen {
       }
 
       template <typename _Src>
-      void fwd( Complex * dst,const _Src *src,int nfft)
-      {
-        get_plan(nfft,false).work(0, dst, src, 1,1);
-      }
+        void fwd( Complex * dst,const _Src *src,int nfft)
+        {
+          get_plan(nfft,false).work(0, dst, src, 1,1);
+        }
 
       // real-to-complex forward FFT
       // perform two FFTs of src even and src odd
@@ -363,11 +366,10 @@ namespace Eigen {
         }
       }
 
-  private:
-
+      private:
       typedef ei_kiss_cpx_fft<Scalar> PlanData;
-
       typedef std::map<int,PlanData> PlanMap;
+
       PlanMap m_plans;
       std::map<int, std::vector<Complex> > m_realTwiddles;
       std::vector<Complex> m_scratchBuf;
@@ -376,25 +378,7 @@ namespace Eigen {
 
       PlanData & get_plan(int nfft,bool inverse)
       {
-        /*  TODO: figure out why this does not work (g++ 4.3.2)
-         * for some reason this does not work
-         *
-         PlanMap::iterator it;
-         it = m_plans.find( PlanKey(nfft,inverse) );
-         if (it == m_plans.end() ) {
-        // create new entry
-        it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) );
-        MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) );
-        if (it2 != m_plans.end() ) {
-        it->second = it2.second;
-        it->second.conjugate();
-        }else{
-        it->second.make_twiddles(nfft,inverse);
-        it->second.factorize(nfft);
-        }
-        }
-        return it->second;
-        */
+        // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
         PlanData & pd = m_plans[ PlanKey(nfft,inverse) ];
         if ( pd.m_twiddles.size() == 0 ) {
           pd.make_twiddles(nfft,inverse);
@@ -421,5 +405,5 @@ namespace Eigen {
         for (int k=0;k<n;++k)
           dst[k] *= s;
       }
-  };
+    };
 }
-- 
cgit v1.2.3


From 1fd6dfe428e6b65383b84f8dfe8f67c70ec370b4 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Sat, 30 May 2009 17:55:47 -0400
Subject: added ei_fftw_impl

---
 unsupported/Eigen/FFT                       |   5 +-
 unsupported/Eigen/src/FFT/ei_fftw_impl.h    | 198 ++++++++++++++++++++++++++++
 unsupported/Eigen/src/FFT/ei_kissfft_impl.h |  25 ++--
 3 files changed, 214 insertions(+), 14 deletions(-)
 create mode 100644 unsupported/Eigen/src/FFT/ei_fftw_impl.h

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 31d8c74c5..03f8504a4 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -30,9 +30,8 @@
 #define DEFAULT_FFT_IMPL ei_kissfft_impl
 
 // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
-#ifdef FFTW_PATIENT  // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines
-// TODO 
-// #include "src/FFT/ei_fftw_impl.h"
+#ifdef FFTW_ESTIMATE  // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines
+#include "src/FFT/ei_fftw_impl.h"
 // #define DEFAULT_FFT_IMPL ei_fftw_impl
 #endif
 
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
new file mode 100644
index 000000000..d592bbb20
--- /dev/null
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -0,0 +1,198 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra. 
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+namespace Eigen {
+  // FFTW uses non-const arguments
+  // so we must use ugly const_cast calls for all the args it uses
+  //
+  // This should be safe as long as 
+  // 1. we use FFTW_ESTIMATE for all our planning
+  //       see the FFTW docs section 4.3.2 "Planner Flags"
+  // 2. fftw_complex is compatible with std::complex
+  //    This assumes std::complex<T> layout is array of size 2 with real,imag
+  template <typename T> 
+  T * ei_fftw_cast(const T* p) 
+  { 
+      return const_cast<T*>( p); 
+  }
+
+  fftw_complex * ei_fftw_cast( const std::complex<double> * p) 
+  { 
+      return const_cast<fftw_complex*>( reinterpret_cast<const fftw_complex*>(p) ); 
+  }
+
+  fftwf_complex * ei_fftw_cast( const std::complex<float> * p) 
+  { 
+      return const_cast<fftwf_complex*>( reinterpret_cast<const fftwf_complex*>(p) ); 
+  }
+
+  fftwl_complex * ei_fftw_cast( const std::complex<long double> * p) 
+  { 
+      return const_cast<fftwl_complex*>( reinterpret_cast<const fftwl_complex*>(p) ); 
+  }
+
+  template <typename T> 
+  struct ei_fftw_plan {};
+
+  template <> 
+  struct ei_fftw_plan<float>
+  {
+      typedef float scalar_type;
+      typedef fftwf_complex complex_type;
+      fftwf_plan m_plan;
+      ei_fftw_plan() :m_plan(NULL) {}
+      ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);}
+
+      void fwd(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
+          fftwf_execute_dft( m_plan, src,dst);
+      }
+      void inv(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
+          fftwf_execute_dft( m_plan, src,dst);
+      }
+      void fwd(complex_type * dst,scalar_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwf_execute_dft_r2c( m_plan,src,dst);
+      }
+      void inv(scalar_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL)
+              m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwf_execute_dft_c2r( m_plan, src,dst);
+      }
+  };
+  template <> 
+  struct ei_fftw_plan<double>
+  {
+      typedef double scalar_type;
+      typedef fftw_complex complex_type;
+      fftw_plan m_plan;
+      ei_fftw_plan() :m_plan(NULL) {}
+      ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);}
+
+      void fwd(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
+          fftw_execute_dft( m_plan, src,dst);
+      }
+      void inv(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
+          fftw_execute_dft( m_plan, src,dst);
+      }
+      void fwd(complex_type * dst,scalar_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftw_execute_dft_r2c( m_plan,src,dst);
+      }
+      void inv(scalar_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL)
+              m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftw_execute_dft_c2r( m_plan, src,dst);
+      }
+  };
+  template <> 
+  struct ei_fftw_plan<long double>
+  {
+      typedef long double scalar_type;
+      typedef fftwl_complex complex_type;
+      fftwl_plan m_plan;
+      ei_fftw_plan() :m_plan(NULL) {}
+      ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);}
+
+      void fwd(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
+          fftwl_execute_dft( m_plan, src,dst);
+      }
+      void inv(complex_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
+          fftwl_execute_dft( m_plan, src,dst);
+      }
+      void fwd(complex_type * dst,scalar_type * src,int nfft) {
+          if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwl_execute_dft_r2c( m_plan,src,dst);
+      }
+      void inv(scalar_type * dst,complex_type * src,int nfft) {
+          if (m_plan==NULL)
+              m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
+          fftwl_execute_dft_c2r( m_plan, src,dst);
+      }
+  };
+
+  template <typename _Scalar>
+  struct ei_fftw_impl
+  {
+      typedef _Scalar Scalar;
+      typedef std::complex<Scalar> Complex;
+
+      void clear() 
+      {
+        m_plans.clear();
+      }
+
+      void fwd( Complex * dst,const Complex *src,int nfft)
+      {
+        get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+      }
+
+      // real-to-complex forward FFT
+      void fwd( Complex * dst,const Scalar * src,int nfft) 
+      {
+          get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft);
+          int nhbins=(nfft>>1)+1;
+          for (int k=nhbins;k < nfft; ++k )
+              dst[k] = conj(dst[nfft-k]);
+      }
+
+      // inverse complex-to-complex
+      void inv(Complex * dst,const Complex  *src,int nfft)
+      {
+        get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+        // scaling
+        Scalar s = 1./nfft;
+        for (int k=0;k<nfft;++k)
+          dst[k] *= s;
+      }
+
+      // half-complex to scalar
+      void inv( Scalar * dst,const Complex * src,int nfft) 
+      {
+        get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+        Scalar s = 1./nfft;
+        for (int k=0;k<nfft;++k)
+          dst[k] *= s;
+      }
+
+  private:
+      typedef ei_fftw_plan<Scalar> PlanData;
+      typedef std::map<int,PlanData> PlanMap;
+
+      PlanMap m_plans;
+
+      PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src)
+      {
+          bool inplace = (dst==src);
+          bool aligned = ( (reinterpret_cast<size_t>(src)&15) | (reinterpret_cast<size_t>(dst)&15) ) == 0;
+          int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned;
+          return m_plans[key];
+      }
+  };
+}
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index 91fa5ca18..a84ac68a0 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -39,6 +39,7 @@ namespace Eigen {
       std::vector<Complex> m_twiddles;
       std::vector<int> m_stageRadix;
       std::vector<int> m_stageRemainder;
+      std::vector<Complex> m_scratchBuf;
       bool m_inverse;
 
       void make_twiddles(int nfft,bool inverse)
@@ -75,6 +76,8 @@ namespace Eigen {
           n /= p;
           m_stageRadix.push_back(p);
           m_stageRemainder.push_back(n);
+          if ( p > 5 )
+              m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic
         }while(n>1);
       }
 
@@ -249,7 +252,7 @@ namespace Eigen {
         Complex * twiddles = &m_twiddles[0];
         Complex t;
         int Norig = m_twiddles.size();
-        Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) );
+        Complex * scratchbuf = &m_scratchBuf[0];
 
         for ( u=0; u<m; ++u ) {
           k=u;
@@ -341,28 +344,28 @@ namespace Eigen {
       void inv( Scalar * dst,const Complex * src,int nfft) 
       {
         if (nfft&3) {
-          m_scratchBuf.resize(nfft);
-          inv(&m_scratchBuf[0],src,nfft);
+          m_tmpBuf.resize(nfft);
+          inv(&m_tmpBuf[0],src,nfft);
           for (int k=0;k<nfft;++k)
-            dst[k] = m_scratchBuf[k].real();
+            dst[k] = m_tmpBuf[k].real();
         }else{
           // optimized version for multiple of 4
           int ncfft = nfft>>1;
           int ncfft2 = nfft>>2;
           Complex * rtw = real_twiddles(ncfft2);
-          m_scratchBuf.resize(ncfft);
-          m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
+          m_tmpBuf.resize(ncfft);
+          m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() );
           for (int k = 1; k <= ncfft / 2; ++k) {
             Complex fk = src[k];
             Complex fnkc = conj(src[ncfft-k]);
             Complex fek = fk + fnkc;
             Complex tmp = fk - fnkc;
             Complex fok = tmp * conj(rtw[k-1]);
-            m_scratchBuf[k] = fek + fok;
-            m_scratchBuf[ncfft-k] = conj(fek - fok);
+            m_tmpBuf[k] = fek + fok;
+            m_tmpBuf[ncfft-k] = conj(fek - fok);
           }
-          scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft );
-          get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_scratchBuf[0], 1,1);
+          scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft );
+          get_plan(ncfft,true).work(0, reinterpret_cast<Complex*>(dst), &m_tmpBuf[0], 1,1);
         }
       }
 
@@ -372,7 +375,7 @@ namespace Eigen {
 
       PlanMap m_plans;
       std::map<int, std::vector<Complex> > m_realTwiddles;
-      std::vector<Complex> m_scratchBuf;
+      std::vector<Complex> m_tmpBuf;
 
       int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
 
-- 
cgit v1.2.3


From 44ba4b1d6d5cd39d824bb83876175d0dc39a9cc3 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 16 Oct 2009 11:27:04 +0200
Subject: add operator+ scalar to AutoDiffScalar

---
 unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 888aa5c8c..fc5e237ab 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -108,6 +108,22 @@ class AutoDiffScalar
     inline const DerType& derivatives() const { return m_derivatives; }
     inline DerType& derivatives() { return m_derivatives; }
 
+    inline const AutoDiffScalar<DerType> operator+(const Scalar& other) const
+    {
+      return AutoDiffScalar<DerType>(m_value + other, m_derivatives);
+    }
+
+    friend inline const AutoDiffScalar<DerType> operator+(const Scalar& a, const AutoDiffScalar& b)
+    {
+      return AutoDiffScalar<DerType>(a + b.value(), b.derivatives());
+    }
+
+    inline AutoDiffScalar& operator+=(const Scalar& other)
+    {
+      value() += other;
+      return *this;
+    }
+
     template<typename OtherDerType>
     inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,OtherDerType> >
     operator+(const AutoDiffScalar<OtherDerType>& other) const
-- 
cgit v1.2.3


From 7b0c4102facc9b5f6ca99ef76febb05a9499b8b0 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 16 Oct 2009 13:22:38 +0200
Subject: * add a Make* expression type builder to allow the   construction of
 generic expressions working   for both dense and sparse matrix. A nicer
 solution   would be to use CwiseBinaryOp for any kind of matrix.   To this
 end we either need to change the overall design   so that the base class(es)
 depends on the kind of matrix,   or we could add a template parameter to each
 expression   type (e.g., int Kind = ei_traits<MatrixType>::Kind)   allowing
 to specialize each expression for each kind of matrix. * Extend
 AutoDiffScalar to work with sparse vector expression   for the derivatives.

---
 Eigen/Core                                        |  1 +
 Eigen/Sparse                                      |  1 +
 Eigen/src/Core/ExpressionMaker.h                  | 61 ++++++++++++++++
 Eigen/src/Sparse/SparseExpressionMaker.h          | 48 +++++++++++++
 unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h |  4 +-
 unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h   | 85 +++++++++++++----------
 6 files changed, 161 insertions(+), 39 deletions(-)
 create mode 100644 Eigen/src/Core/ExpressionMaker.h
 create mode 100644 Eigen/src/Sparse/SparseExpressionMaker.h

(limited to 'unsupported/Eigen/src')

diff --git a/Eigen/Core b/Eigen/Core
index c8fcb1c09..3dce6422f 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -200,6 +200,7 @@ namespace Eigen {
 #include "src/Core/products/TriangularMatrixMatrix.h"
 #include "src/Core/products/TriangularSolverMatrix.h"
 #include "src/Core/BandMatrix.h"
+#include "src/Core/ExpressionMaker.h"
 
 } // namespace Eigen
 
diff --git a/Eigen/Sparse b/Eigen/Sparse
index a8888daa3..96bd61419 100644
--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@@ -110,6 +110,7 @@ namespace Eigen {
 #include "src/Sparse/SparseLLT.h"
 #include "src/Sparse/SparseLDLT.h"
 #include "src/Sparse/SparseLU.h"
+#include "src/Sparse/SparseExpressionMaker.h"
 
 #ifdef EIGEN_CHOLMOD_SUPPORT
 # include "src/Sparse/CholmodSupport.h"
diff --git a/Eigen/src/Core/ExpressionMaker.h b/Eigen/src/Core/ExpressionMaker.h
new file mode 100644
index 000000000..1d265b63c
--- /dev/null
+++ b/Eigen/src/Core/ExpressionMaker.h
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_EXPRESSIONMAKER_H
+#define EIGEN_EXPRESSIONMAKER_H
+
+// computes the shape of a matrix from its traits flag
+template<typename XprType> struct ei_shape_of
+{
+  enum { ret = ei_traits<XprType>::Flags&SparseBit ? IsSparse : IsDense };
+};
+
+
+// Since the Sparse module is completely separated from the Core module, there is
+// no way to write the type of a generic expression working for both dense and sparse
+// matrix. Unless we change the overall design, here is a workaround.
+// There is an example in unsuported/Eigen/src/AutoDiff/AutoDiffScalar.
+
+template<typename XprType, int Shape = ei_shape_of<XprType>::ret>
+struct MakeNestByValue
+{
+  typedef NestByValue<XprType> Type;
+};
+
+template<typename Func, typename XprType, int Shape = ei_shape_of<XprType>::ret>
+struct MakeCwiseUnaryOp
+{
+  typedef CwiseUnaryOp<Func,XprType> Type;
+};
+
+template<typename Func, typename A, typename B, int Shape = ei_shape_of<A>::ret>
+struct MakeCwiseBinaryOp
+{
+  typedef CwiseBinaryOp<Func,A,B> Type;
+};
+
+// TODO complete the list
+
+
+#endif // EIGEN_EXPRESSIONMAKER_H
diff --git a/Eigen/src/Sparse/SparseExpressionMaker.h b/Eigen/src/Sparse/SparseExpressionMaker.h
new file mode 100644
index 000000000..1fdcbb1f2
--- /dev/null
+++ b/Eigen/src/Sparse/SparseExpressionMaker.h
@@ -0,0 +1,48 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_SPARSE_EXPRESSIONMAKER_H
+#define EIGEN_SPARSE_EXPRESSIONMAKER_H
+
+template<typename XprType>
+struct MakeNestByValue<XprType,IsSparse>
+{
+  typedef SparseNestByValue<XprType> Type;
+};
+
+template<typename Func, typename XprType>
+struct MakeCwiseUnaryOp<Func,XprType,IsSparse>
+{
+  typedef SparseCwiseUnaryOp<Func,XprType> Type;
+};
+
+template<typename Func, typename A, typename B>
+struct MakeCwiseBinaryOp<Func,A,B,IsSparse>
+{
+  typedef SparseCwiseBinaryOp<Func,A,B> Type;
+};
+
+// TODO complete the list
+
+#endif // EIGEN_SPARSE_EXPRESSIONMAKER_H
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
index a5e881487..b3983f8a6 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h
@@ -50,10 +50,12 @@ public:
   typedef typename Functor::InputType InputType;
   typedef typename Functor::ValueType ValueType;
   typedef typename Functor::JacobianType JacobianType;
+  typedef typename JacobianType::Scalar Scalar;
 
-  typedef Matrix<double,InputsAtCompileTime,1> DerivativeType;
+  typedef Matrix<Scalar,InputsAtCompileTime,1> DerivativeType;
   typedef AutoDiffScalar<DerivativeType> ActiveScalar;
 
+
   typedef Matrix<ActiveScalar, InputsAtCompileTime, 1> ActiveInput;
   typedef Matrix<ActiveScalar, ValuesAtCompileTime, 1> ActiveValue;
 
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index fc5e237ab..2fb733a99 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -42,9 +42,17 @@ void ei_make_coherent(const A& a, const B&b)
 /** \class AutoDiffScalar
   * \brief A scalar type replacement with automatic differentation capability
   *
-  * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f)
+  * \param _DerType the vector type used to store/represent the derivatives. The base scalar type
+  *                 as well as the number of derivatives to compute are determined from this type.
+  *                 Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf
+  *                 if the number of derivatives is not known at compile time, and/or, the number
+  *                 of derivatives is large.
+  *                 Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a
+  *                 existing vector into an AutoDiffScalar.
+  *                 Finally, _DerType can also be any Eigen compatible expression.
   *
-  * This class represents a scalar value while tracking its respective derivatives.
+  * This class represents a scalar value while tracking its respective derivatives using Eigen's expression
+  * template mechanism.
   *
   * It supports the following list of global math function:
   *  - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos,
@@ -56,10 +64,11 @@ void ei_make_coherent(const A& a, const B&b)
   * while derivatives are computed right away.
   *
   */
-template<typename DerType>
+template<typename _DerType>
 class AutoDiffScalar
 {
   public:
+    typedef typename ei_cleantype<_DerType>::type DerType;
     typedef typename ei_traits<DerType>::Scalar Scalar;
 
     inline AutoDiffScalar() {}
@@ -108,12 +117,12 @@ class AutoDiffScalar
     inline const DerType& derivatives() const { return m_derivatives; }
     inline DerType& derivatives() { return m_derivatives; }
 
-    inline const AutoDiffScalar<DerType> operator+(const Scalar& other) const
+    inline const AutoDiffScalar<DerType&> operator+(const Scalar& other) const
     {
       return AutoDiffScalar<DerType>(m_value + other, m_derivatives);
     }
 
-    friend inline const AutoDiffScalar<DerType> operator+(const Scalar& a, const AutoDiffScalar& b)
+    friend inline const AutoDiffScalar<DerType&> operator+(const Scalar& a, const AutoDiffScalar& b)
     {
       return AutoDiffScalar<DerType>(a + b.value(), b.derivatives());
     }
@@ -125,11 +134,11 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,OtherDerType> >
+    inline const AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,typename ei_cleantype<OtherDerType>::type>::Type >
     operator+(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,OtherDerType> >(
+      return AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,DerType,typename ei_cleantype<OtherDerType>::type>::Type >(
         m_value + other.value(),
         m_derivatives + other.derivatives());
     }
@@ -143,11 +152,11 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,OtherDerType> >
+    inline const AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,typename ei_cleantype<OtherDerType>::type>::Type >
     operator-(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,OtherDerType> >(
+      return AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>, DerType,typename ei_cleantype<OtherDerType>::type>::Type >(
         m_value - other.value(),
         m_derivatives - other.derivatives());
     }
@@ -161,73 +170,73 @@ class AutoDiffScalar
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType> >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType>::Type >
     operator-() const
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_opposite_op<Scalar>, DerType>::Type >(
         -m_value,
         -m_derivatives);
     }
 
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator*(const Scalar& other) const
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         m_value * other,
         (m_derivatives * other));
     }
 
-    friend inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    friend inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator*(const Scalar& other, const AutoDiffScalar& a)
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         a.value() * other,
         a.derivatives() * other);
     }
 
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator/(const Scalar& other) const
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         m_value / other,
         (m_derivatives * (Scalar(1)/other)));
     }
 
-    friend inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >
+    friend inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >
     operator/(const Scalar& other, const AutoDiffScalar& a)
     {
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
         other / a.value(),
         a.derivatives() * (-Scalar(1)/other));
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
-        NestByValue<CwiseBinaryOp<ei_scalar_difference_op<Scalar>,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > > > >
+    inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >::Type >::Type >
     operator/(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
-        NestByValue<CwiseBinaryOp<ei_scalar_difference_op<Scalar>,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-          NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > > > >(
+      return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseBinaryOp<ei_scalar_difference_op<Scalar>,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+          typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >::Type >::Type >(
         m_value / other.value(),
           ((m_derivatives * other.value()).nestByValue() - (m_value * other.derivatives()).nestByValue()).nestByValue()
         * (Scalar(1)/(other.value()*other.value())));
     }
 
     template<typename OtherDerType>
-    inline const AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > >
+    inline const AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >
     operator*(const AutoDiffScalar<OtherDerType>& other) const
     {
       ei_make_coherent(m_derivatives, other.derivatives());
-      return AutoDiffScalar<CwiseBinaryOp<ei_scalar_sum_op<Scalar>,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >,
-        NestByValue<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, OtherDerType> > > >(
+      return AutoDiffScalar<typename MakeCwiseBinaryOp<ei_scalar_sum_op<Scalar>,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type>::Type,
+        typename MakeNestByValue<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, typename ei_cleantype<OtherDerType>::type>::Type>::Type >::Type >(
         m_value * other.value(),
         (m_derivatives * other.value()).nestByValue() + (m_value * other.derivatives()).nestByValue());
     }
@@ -299,11 +308,11 @@ struct ei_make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRo
 
 #define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \
   template<typename DerType> \
-  inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType> > \
+  inline const Eigen::AutoDiffScalar<typename Eigen::MakeCwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType>::Type > \
   FUNC(const Eigen::AutoDiffScalar<DerType>& x) { \
     using namespace Eigen; \
     typedef typename ei_traits<DerType>::Scalar Scalar; \
-    typedef AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> > ReturnType; \
+    typedef AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type > ReturnType; \
     CODE; \
   }
 
@@ -330,12 +339,12 @@ namespace std
     return ReturnType(std::log(x.value),x.derivatives() * (Scalar(1).x.value()));)
 
   template<typename DerType>
-  inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType> >
+  inline const Eigen::AutoDiffScalar<typename Eigen::MakeCwiseUnaryOp<Eigen::ei_scalar_multiple_op<typename Eigen::ei_traits<DerType>::Scalar>, DerType>::Type >
   pow(const Eigen::AutoDiffScalar<DerType>& x, typename Eigen::ei_traits<DerType>::Scalar y)
   {
     using namespace Eigen;
     typedef typename ei_traits<DerType>::Scalar Scalar;
-    return AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType> >(
+    return AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<Scalar>, DerType>::Type >(
       std::pow(x.value(),y),
       x.derivatives() * (y * std::pow(x.value(),y-1)));
   }
@@ -375,7 +384,7 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(ei_log,
   return ReturnType(ei_log(x.value),x.derivatives() * (Scalar(1).x.value()));)
 
 template<typename DerType>
-inline const AutoDiffScalar<CwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<DerType>::Scalar>, DerType> >
+inline const AutoDiffScalar<typename MakeCwiseUnaryOp<ei_scalar_multiple_op<typename ei_traits<DerType>::Scalar>, DerType>::Type >
 ei_pow(const AutoDiffScalar<DerType>& x, typename ei_traits<DerType>::Scalar y)
 { return std::pow(x,y);}
 
-- 
cgit v1.2.3


From e3d08443dc272f740447de0147efc69cf7de1c93 Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Wed, 21 Oct 2009 20:53:05 -0400
Subject: inlining,all namespace declaration moved to FFT, removed preprocessor
 definitions,

---
 unsupported/Eigen/FFT                       | 41 ++++++++++++++++++-----------
 unsupported/Eigen/src/FFT/ei_fftw_impl.h    | 38 +++++++++++++++++++++-----
 unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 28 +++++++++++---------
 unsupported/test/CMakeLists.txt             |  2 +-
 4 files changed, 73 insertions(+), 36 deletions(-)

(limited to 'unsupported/Eigen/src')

diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index 97ec8e49b..36afdde8d 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -25,29 +25,39 @@
 #ifndef EIGEN_FFT_H
 #define EIGEN_FFT_H
 
-// ei_kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
-#include "src/FFT/ei_kissfft_impl.h"
-#define DEFAULT_FFT_IMPL ei_kissfft_impl
+#include <complex>
+#include <vector>
+#include <map>
 
+#ifdef EIGEN_FFTW_DEFAULT
 // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size
-#ifdef FFTW_ESTIMATE  // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines
-#include "src/FFT/ei_fftw_impl.h"
-#undef DEFAULT_FFT_IMPL
-#define DEFAULT_FFT_IMPL ei_fftw_impl
-#endif
-
-// intel Math Kernel Library: fastest, commercial -- incompatible with Eigen in GPL form
-#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines
+#  include <fftw3.h>
+   namespace Eigen {
+#    include "src/FFT/ei_fftw_impl.h"
+     //template <typename T> typedef struct ei_fftw_impl  default_fft_impl; this does not work
+     template <typename T> struct default_fft_impl : public ei_fftw_impl<T> {};
+   }
+#elif defined EIGEN_MKL_DEFAULT
 // TODO 
-// #include "src/FFT/ei_imkl_impl.h"
-// #define DEFAULT_FFT_IMPL ei_imkl_impl
+// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form
+   namespace Eigen {
+#    include "src/FFT/ei_imklfft_impl.h"
+     template <typename T> struct default_fft_impl : public ei_imklfft_impl {};
+   }
+#else
+// ei_kissfft_impl:  small, free, reasonably efficient default, derived from kissfft
+//
+  namespace Eigen {
+#   include "src/FFT/ei_kissfft_impl.h"
+     template <typename T> 
+       struct default_fft_impl : public ei_kissfft_impl<T> {};
+  }
 #endif
 
 namespace Eigen {
 
 template <typename _Scalar,
-         typename _Impl=DEFAULT_FFT_IMPL<_Scalar> 
-         >
+         typename _Impl=default_fft_impl<_Scalar> >
 class FFT
 {
   public:
@@ -120,7 +130,6 @@ class FFT
   private:
     impl_type m_impl;
 };
-#undef DEFAULT_FFT_IMPL
 }
 #endif
 /* vim: set filetype=cpp et sw=2 ts=2 ai: */
diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
index d592bbb20..e1f67f334 100644
--- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h
@@ -22,7 +22,8 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
-namespace Eigen {
+
+
   // FFTW uses non-const arguments
   // so we must use ugly const_cast calls for all the args it uses
   //
@@ -32,21 +33,25 @@ namespace Eigen {
   // 2. fftw_complex is compatible with std::complex
   //    This assumes std::complex<T> layout is array of size 2 with real,imag
   template <typename T> 
+  inline 
   T * ei_fftw_cast(const T* p) 
   { 
       return const_cast<T*>( p); 
   }
 
+  inline 
   fftw_complex * ei_fftw_cast( const std::complex<double> * p) 
-  { 
+  {
       return const_cast<fftw_complex*>( reinterpret_cast<const fftw_complex*>(p) ); 
   }
 
+  inline 
   fftwf_complex * ei_fftw_cast( const std::complex<float> * p) 
   { 
       return const_cast<fftwf_complex*>( reinterpret_cast<const fftwf_complex*>(p) ); 
   }
 
+  inline 
   fftwl_complex * ei_fftw_cast( const std::complex<long double> * p) 
   { 
       return const_cast<fftwl_complex*>( reinterpret_cast<const fftwl_complex*>(p) ); 
@@ -64,18 +69,22 @@ namespace Eigen {
       ei_fftw_plan() :m_plan(NULL) {}
       ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);}
 
+      inline
       void fwd(complex_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
           fftwf_execute_dft( m_plan, src,dst);
       }
+      inline
       void inv(complex_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
           fftwf_execute_dft( m_plan, src,dst);
       }
+      inline
       void fwd(complex_type * dst,scalar_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
           fftwf_execute_dft_r2c( m_plan,src,dst);
       }
+      inline
       void inv(scalar_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL)
               m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
@@ -91,18 +100,22 @@ namespace Eigen {
       ei_fftw_plan() :m_plan(NULL) {}
       ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);}
 
+      inline
       void fwd(complex_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
           fftw_execute_dft( m_plan, src,dst);
       }
+      inline
       void inv(complex_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
           fftw_execute_dft( m_plan, src,dst);
       }
+      inline
       void fwd(complex_type * dst,scalar_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
           fftw_execute_dft_r2c( m_plan,src,dst);
       }
+      inline
       void inv(scalar_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL)
               m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
@@ -118,18 +131,22 @@ namespace Eigen {
       ei_fftw_plan() :m_plan(NULL) {}
       ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);}
 
+      inline
       void fwd(complex_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE);
           fftwl_execute_dft( m_plan, src,dst);
       }
+      inline
       void inv(complex_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE);
           fftwl_execute_dft( m_plan, src,dst);
       }
+      inline
       void fwd(complex_type * dst,scalar_type * src,int nfft) {
           if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE);
           fftwl_execute_dft_r2c( m_plan,src,dst);
       }
+      inline
       void inv(scalar_type * dst,complex_type * src,int nfft) {
           if (m_plan==NULL)
               m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE);
@@ -143,17 +160,20 @@ namespace Eigen {
       typedef _Scalar Scalar;
       typedef std::complex<Scalar> Complex;
 
+      inline
       void clear() 
       {
         m_plans.clear();
       }
 
+      inline
       void fwd( Complex * dst,const Complex *src,int nfft)
       {
         get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
       }
 
       // real-to-complex forward FFT
+      inline
       void fwd( Complex * dst,const Scalar * src,int nfft) 
       {
           get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft);
@@ -163,30 +183,37 @@ namespace Eigen {
       }
 
       // inverse complex-to-complex
+      inline
       void inv(Complex * dst,const Complex  *src,int nfft)
       {
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
+
+        //TODO move scaling to Eigen::FFT
         // scaling
-        Scalar s = 1./nfft;
+        Scalar s = Scalar(1.)/nfft;
         for (int k=0;k<nfft;++k)
           dst[k] *= s;
       }
 
       // half-complex to scalar
+      inline
       void inv( Scalar * dst,const Complex * src,int nfft) 
       {
         get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft );
-        Scalar s = 1./nfft;
+
+        //TODO move scaling to Eigen::FFT
+        Scalar s = Scalar(1.)/nfft;
         for (int k=0;k<nfft;++k)
           dst[k] *= s;
       }
 
-  private:
+  protected:
       typedef ei_fftw_plan<Scalar> PlanData;
       typedef std::map<int,PlanData> PlanMap;
 
       PlanMap m_plans;
 
+      inline
       PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src)
       {
           bool inplace = (dst==src);
@@ -195,4 +222,3 @@ namespace Eigen {
           return m_plans[key];
       }
   };
-}
diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
index a84ac68a0..c068d8765 100644
--- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
+++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h
@@ -22,11 +22,7 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
-#include <complex>
-#include <vector>
-#include <map>
 
-namespace Eigen {
 
   // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft
   // Copyright 2003-2009 Mark Borgerding
@@ -51,13 +47,6 @@ namespace Eigen {
           m_twiddles[i] = exp( Complex(0,i*phinc) );
       }
 
-      void conjugate()
-      {
-        m_inverse = !m_inverse;
-        for ( size_t i=0;i<m_twiddles.size() ;++i)
-          m_twiddles[i] = conj( m_twiddles[i] );
-      }
-
       void factorize(int nfft)
       {
         //start factoring out 4's, then 2's, then 3,5,7,9,...
@@ -116,6 +105,7 @@ namespace Eigen {
           }
         }
 
+      inline
       void bfly2( Complex * Fout, const size_t fstride, int m)
       {
         for (int k=0;k<m;++k) {
@@ -125,6 +115,7 @@ namespace Eigen {
         }
       }
 
+      inline
       void bfly4( Complex * Fout, const size_t fstride, const size_t m)
       {
         Complex scratch[6];
@@ -147,6 +138,7 @@ namespace Eigen {
         }
       }
 
+      inline
       void bfly3( Complex * Fout, const size_t fstride, const size_t m)
       {
         size_t k=m;
@@ -175,6 +167,7 @@ namespace Eigen {
         }while(--k);
       }
 
+      inline
       void bfly5( Complex * Fout, const size_t fstride, const size_t m)
       {
         Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
@@ -241,6 +234,7 @@ namespace Eigen {
       }
 
       /* perform the butterfly for one stage of a mixed radix FFT */
+      inline
       void bfly_generic(
           Complex * Fout,
           const size_t fstride,
@@ -290,6 +284,7 @@ namespace Eigen {
       }
 
       template <typename _Src>
+      inline
         void fwd( Complex * dst,const _Src *src,int nfft)
         {
           get_plan(nfft,false).work(0, dst, src, 1,1);
@@ -299,6 +294,7 @@ namespace Eigen {
       // perform two FFTs of src even and src odd
       // then twiddle to recombine them into the half-spectrum format
       // then fill in the conjugate symmetric half
+      inline
       void fwd( Complex * dst,const Scalar * src,int nfft) 
       {
         if ( nfft&3  ) {
@@ -334,6 +330,7 @@ namespace Eigen {
       }
 
       // inverse complex-to-complex
+      inline
       void inv(Complex * dst,const Complex  *src,int nfft)
       {
         get_plan(nfft,true).work(0, dst, src, 1,1);
@@ -341,6 +338,7 @@ namespace Eigen {
       }
 
       // half-complex to scalar
+      inline
       void inv( Scalar * dst,const Complex * src,int nfft) 
       {
         if (nfft&3) {
@@ -369,7 +367,7 @@ namespace Eigen {
         }
       }
 
-      private:
+      protected:
       typedef ei_kiss_cpx_fft<Scalar> PlanData;
       typedef std::map<int,PlanData> PlanMap;
 
@@ -377,8 +375,10 @@ namespace Eigen {
       std::map<int, std::vector<Complex> > m_realTwiddles;
       std::vector<Complex> m_tmpBuf;
 
+      inline
       int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; }
 
+      inline
       PlanData & get_plan(int nfft,bool inverse)
       {
         // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles
@@ -390,6 +390,7 @@ namespace Eigen {
         return pd;
       }
 
+      inline
       Complex * real_twiddles(int ncfft2)
       {
         std::vector<Complex> & twidref = m_realTwiddles[ncfft2];// creates new if not there
@@ -403,10 +404,11 @@ namespace Eigen {
         return &twidref[0];
       }
 
+      // TODO move scaling up into Eigen::FFT
+      inline
       void scale(Complex *dst,int n,Scalar s) 
       {
         for (int k=0;k<n;++k)
           dst[k] *= s;
       }
     };
-}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index f42077bdc..d182c9abf 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -23,6 +23,6 @@ ei_add_test(FFT)
 
 find_package(FFTW)
 if(FFTW_FOUND)
-  ei_add_test(FFTW  " " "-lfftw3 -lfftw3f -lfftw3l" )
+  ei_add_test(FFTW  "-DEIGEN_FFTW_DEFAULT " "-lfftw3 -lfftw3f -lfftw3l" )
 endif(FFTW_FOUND)
 
-- 
cgit v1.2.3