From 92ca9fc032ecaa7f9595f5c5f7d9d8df6c972bbe Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:21:04 -0400 Subject: initial pass of FFT module -- includes complex 1-d case only --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 296 ++++++++++++++++++++++++++ 1 file changed, 296 insertions(+) create mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h new file mode 100644 index 000000000..fe9d24b84 --- /dev/null +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -0,0 +1,296 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include + +namespace Eigen { + + template + struct simple_fft_traits + { + typedef _Scalar Scalar; + typedef std::complex Complex; + simple_fft_traits() : m_nfft(0) {} + + void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + { + if (m_nfft == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + void exec(Complex * dst, const Complex * src) + { + work(0, dst, src, 1,1); + } + + void postprocess(Complex *dst) + { + if (m_inverse) { + Scalar scale = 1./m_nfft; + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + + scratch[0] *= epi3.imag(); + + *Fout += scratch[3]; + + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + + int m_nfft; + bool m_inverse; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + }; +} -- cgit v1.2.3 From 68cad98bc935e53102a9432560085b81c5766743 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Tue, 19 May 2009 00:34:38 -0400 Subject: indent level change --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index fe9d24b84..6fbbeac2e 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -80,13 +80,15 @@ namespace Eigen { void postprocess(Complex *dst) { - if (m_inverse) { - Scalar scale = 1./m_nfft; - for (int k=0;k Date: Fri, 22 May 2009 22:37:59 -0400 Subject: added non-optimized real forward fft (no inverse yet) --- unsupported/Eigen/FFT.h | 27 ++++-- unsupported/Eigen/src/FFT/simple_fft_traits.h | 10 ++- unsupported/test/FFT.cpp | 117 ++++++++++++++++++-------- 3 files changed, 108 insertions(+), 46 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index 03490d2c5..a1f87a609 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -57,21 +57,36 @@ class FFT FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } - void fwd( Complex * dst, const Complex * src, int nfft) + template + void fwd( Complex * dst, const _Input * src, int nfft) { m_traits.prepare(nfft,false,dst,src); m_traits.exec(dst,src); m_traits.postprocess(dst); } - void inv( Complex * dst, const Complex * src, int nfft) + template + void fwd( std::vector & dst, const std::vector<_Input> & src) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + dst.resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + + template + void inv( _Output * dst, const Complex * src, int nfft) + { + m_traits.prepare(nfft,true,dst,src); + m_traits.exec(dst,src); + m_traits.postprocess(dst); + } + + template + void inv( std::vector<_Output> & dst, const std::vector & src) + { + dst.resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); } - // TODO: fwd,inv for Scalar // TODO: multi-dimensional FFTs // TODO: handle Eigen MatrixBase diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 6fbbeac2e..5a910dd1f 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -34,7 +34,8 @@ namespace Eigen { typedef std::complex Complex; simple_fft_traits() : m_nfft(0) {} - void prepare(int nfft,bool inverse,Complex * dst,const Complex *src) + template + void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -73,7 +74,8 @@ namespace Eigen { }while(n>1); } - void exec(Complex * dst, const Complex * src) + template + void exec(Complex * dst, const _Src * src) { work(0, dst, src, 1,1); } @@ -89,7 +91,9 @@ namespace Eigen { private: - void work( int stage,Complex * Fout, const Complex * f, size_t fstride,size_t in_stride) + + template + void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 8347bb76b..ef03359e2 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -25,55 +25,98 @@ #include "main.h" #include + using namespace std; +template < typename T> +complex promote(complex x) { return complex(x.real(),x.imag()); } + +complex promote(float x) { return complex( x); } +complex promote(double x) { return complex( x); } +complex promote(long double x) { return complex( x); } + + + template + long double fft_rmse( const vector & fftbuf,const vector & timebuf) + { + long double totalpower=0; + long double difpower=0; + for (size_t k0=0;k0 acc = 0; + long double phinc = -2.*k0* M_PIl / timebuf.size(); + for (size_t k1=0;k1(0,k1*phinc) ); + } + totalpower += norm(acc); + complex x = promote(fftbuf[k0]); + complex dif = acc - x; + difpower += norm(dif); + cerr << k0 << ":" << acc << " " << x << endl; + } + cerr << "rmse:" << sqrt(difpower/totalpower) << endl; + return sqrt(difpower/totalpower); + } + + template + long double dif_rmse( const vector buf1,const vector buf2) + { + long double totalpower=0; + long double difpower=0; + size_t n = min( buf1.size(),buf2.size() ); + for (size_t k=0;k +void test_scalar(int nfft) +{ + typedef typename Eigen::FFT::Complex Complex; + typedef typename Eigen::FFT::Scalar Scalar; + + FFT fft; + vector inbuf(nfft); + vector outbuf; + for (int k=0;k -void test_fft(int nfft) +void test_complex(int nfft) { typedef typename Eigen::FFT::Complex Complex; FFT fft; vector inbuf(nfft); - vector buf3(nfft); - vector outbuf(nfft); + vector outbuf; + vector buf3; for (int k=0;k acc = 0; - long double phinc = 2*k0* M_PIl / nfft; - for (int k1=0;k1 x(inbuf[k1].real(),inbuf[k1].imag()); - acc += x * exp( complex(0,-k1*phinc) ); - } - totalpower += norm(acc); - complex x(outbuf[k0].real(),outbuf[k0].imag()); - complex dif = acc - x; - difpower += norm(dif); - } - long double rmse = sqrt(difpower/totalpower); - VERIFY( rmse < 1e-5 );// gross check - - totalpower=0; - difpower=0; - for (int k=0;k(32) )); CALL_SUBTEST(( test_fft(32) )); CALL_SUBTEST(( test_fft(32) )); - CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); CALL_SUBTEST(( test_fft(1024) )); - CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); CALL_SUBTEST(( test_fft(2*3*4*5*7) )); + CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); + CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); + CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); + CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); + CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); + CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); + + CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); } -- cgit v1.2.3 From 9c0fcd0f6213143216710a5b215aa2bb4a857ce5 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 10:09:48 -0400 Subject: started real optimization, added benchmark for FFT --- bench/benchFFT.cpp | 64 +++++++++++++ unsupported/Eigen/FFT.h | 8 +- unsupported/Eigen/src/FFT/simple_fft_traits.h | 125 ++++++++++++++++++-------- unsupported/test/FFT.cpp | 3 +- 4 files changed, 157 insertions(+), 43 deletions(-) create mode 100644 bench/benchFFT.cpp (limited to 'unsupported/Eigen/src') diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp new file mode 100644 index 000000000..041576b75 --- /dev/null +++ b/bench/benchFFT.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include +#include +#include +#include + +using namespace Eigen; +using namespace std; + +#ifndef NFFT +#define NFFT 1024 +#endif + +#ifndef TYPE +#define TYPE float +#endif + +#ifndef NITS +#define NITS (10000000/NFFT) +#endif + +int main() +{ + vector > inbuf(NFFT); + vector > outbuf(NFFT); + Eigen::FFT fft; + + fft.fwd( outbuf , inbuf); + + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < NITS; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } + double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); + cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h index a1f87a609..c466423b7 100644 --- a/unsupported/Eigen/FFT.h +++ b/unsupported/Eigen/FFT.h @@ -60,9 +60,7 @@ class FFT template void fwd( Complex * dst, const _Input * src, int nfft) { - m_traits.prepare(nfft,false,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.fwd(dst,src,nfft); } template @@ -75,9 +73,7 @@ class FFT template void inv( _Output * dst, const Complex * src, int nfft) { - m_traits.prepare(nfft,true,dst,src); - m_traits.exec(dst,src); - m_traits.postprocess(dst); + m_traits.inv( dst,src,nfft ); } template diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 5a910dd1f..33433ae03 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -35,7 +35,73 @@ namespace Eigen { simple_fft_traits() : m_nfft(0) {} template - void prepare(int nfft,bool inverse,Complex * dst,const _Src *src) + void fwd( Complex * dst,const _Src *src,int nfft) + { + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + } + + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&1 ) { + // use generic mode for odd + prepare(nfft,false); + work(0, dst, src, 1,1); + scale(dst); + }else{ + int ncfft = nfft>>1; + // use optimized mode for even real + prepare(nfft,false); + work(0,dst, reinterpret_cast (src),2,1); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + + int k; + for ( k=1;k <= ncfft/2 ; ++k ) { +/** + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + */ + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + + Complex f1k = fpk + fpnk; + Complex f2k = fpnk - fpk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles + Complex tw = f2k * m_twiddles[2*k];; + + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + + dst[0] = dc; + dst[ncfft] = nyquist; + } + } + + void inv(Complex * dst,const Complex *src,int nfft) + { + prepare(nfft,true); + work(0, dst, src, 1,1); + scale(dst); + } + + void prepare(int nfft,bool inverse) { if (m_nfft == nfft) { // reuse the twiddles, conjugate if necessary @@ -74,57 +140,49 @@ namespace Eigen { }while(n>1); } - template - void exec(Complex * dst, const _Src * src) - { - work(0, dst, src, 1,1); - } - - void postprocess(Complex *dst) + void scale(Complex *dst) { if (m_inverse) { - Scalar scale = 1./m_nfft; + Scalar s = 1./m_nfft; for (int k=0;k - void work( int stage,Complex * Fout, const _Src * f, size_t fstride,size_t in_stride) + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) { int p = m_stageRadix[stage]; int m = m_stageRemainder[stage]; - Complex * Fout_beg = Fout; - Complex * Fout_end = Fout + p*m; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; - if (m==1) { - do{ - *Fout = *f; - f += fstride*in_stride; - }while(++Fout != Fout_end ); - }else{ + if (m>1) { do{ // recursive call: // DFT of size m*p performed by doing // p instances of smaller DFTs of size m, // each one takes a decimated version of the input - work(stage+1, Fout , f, fstride*p,in_stride); - f += fstride*in_stride; - }while( (Fout += m) != Fout_end ); + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); } - - Fout=Fout_beg; + xout=Fout_beg; // recombine the p smaller DFTs switch (p) { - case 2: bfly2(Fout,fstride,m); break; - case 3: bfly3(Fout,fstride,m); break; - case 4: bfly4(Fout,fstride,m); break; - case 5: bfly5(Fout,fstride,m); break; - default: bfly_generic(Fout,fstride,m,p); break; + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } } @@ -139,7 +197,7 @@ namespace Eigen { void bfly4( Complex * Fout, const size_t fstride, const size_t m) { - Complex scratch[7]; + Complex scratch[6]; int negative_if_inverse = m_inverse * -2 +1; for (size_t k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); ++Fout; }while(--k); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index ef03359e2..13e98ba77 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -115,8 +115,9 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); - +/* CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); + */ } -- cgit v1.2.3 From 304798817268706463f3ff645c8c8b2c887c090a Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 12:50:07 -0400 Subject: scalar forward FFT optimized for even size, converts to cpx for odd --- unsupported/Eigen/src/FFT/simple_fft_traits.h | 152 +++++++++++++++----------- unsupported/test/FFT.cpp | 10 +- 2 files changed, 95 insertions(+), 67 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index 33433ae03..f7dd2b9cf 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -24,6 +24,7 @@ #include #include +#include namespace Eigen { @@ -39,51 +40,54 @@ namespace Eigen { { prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); } + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&1 ) { // use generic mode for odd prepare(nfft,false); work(0, dst, src, 1,1); - scale(dst); }else{ int ncfft = nfft>>1; // use optimized mode for even real - prepare(nfft,false); - work(0,dst, reinterpret_cast (src),2,1); + fwd( dst, reinterpret_cast (src),ncfft); + make_real_twiddles(nfft); + std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft/2 ; ++k ) { -/** - fpk = st->tmpbuf[k]; - fpnk.r = st->tmpbuf[ncfft-k].r; - fpnk.i = - st->tmpbuf[ncfft-k].i; - - C_ADD( f1k, fpk , fpnk ); - C_SUB( f2k, fpk , fpnk ); - C_MUL( tw , f2k , st->super_twiddles[k-1]); - - freqdata[k].r = HALF_OF(f1k.r + tw.r); - freqdata[k].i = HALF_OF(f1k.i + tw.i); - freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); - freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); - */ +#if 0 + using namespace std; + cerr << "desired:\n"; + for ( k=1;k <= (ncfft>>1) ; ++k ) { + Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; + } + dc=0; + cerr << "twiddles:\n"; + for (k=0;k>1) ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpnk - fpk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double)k / ncfft + 1) ) ); // TODO repalce this with index into twiddles - Complex tw = f2k * m_twiddles[2*k];; - + Complex f2k = fpk - fpnk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + Complex tw= f2k * m_realTwiddles[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } + + // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,55 +102,74 @@ namespace Eigen { { prepare(nfft,true); work(0, dst, src, 1,1); - scale(dst); + scale(dst, Scalar(1)/m_nfft ); } void prepare(int nfft,bool inverse) { - if (m_nfft == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;i>2; + if ( m_realTwiddles.size() != ncfft2) { + m_realTwiddles.resize(ncfft2); + int ncfft= nfft>>1; + for (int k=1;k<=ncfft2;++k) + m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + } + } + + void make_twiddles(int nfft,bool inverse) + { + if ( m_twiddles.size() == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) - p=n;// no more factors + } + + void factorize(int nfft) + { + if (m_stageRadix.size()==0 || m_stageRadix[0] * m_stageRemainder[0] != nfft) + { + m_stageRadix.resize(0); + m_stageRemainder.resize(0); + //factorize + //start factoring out 4's, then 2's, then 3,5,7,9,... + int n= nfft; + int p=4; + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p*p>n) + p=n;// no more factors + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); + m_nfft = nfft; } - void scale(Complex *dst) + void scale(Complex *dst,Scalar s) { - if (m_inverse) { - Scalar s = 1./m_nfft; for (int k=0;k m_twiddles; + std::vector m_realTwiddles; std::vector m_stageRadix; std::vector m_stageRemainder; }; diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 13e98ba77..41c7fed7b 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -41,6 +41,7 @@ complex promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; + cerr <<"idx\ttruth\t\tvalue\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -51,7 +52,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << ":" << acc << " " << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -108,6 +109,7 @@ void test_complex(int nfft) void test_FFT() { +#if 0 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); @@ -115,9 +117,11 @@ void test_FFT() CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); CALL_SUBTEST( test_complex(2*3*4*5*7) ); -/* +#endif + +#if 1 CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); - */ +#endif } -- cgit v1.2.3 From 326ea773908c2d7e46101085af8f72d20b3f8cbc Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Sat, 23 May 2009 22:50:07 -0400 Subject: added FFT inverse complex-to-scalar interface (not yet optimized) --- bench/benchFFT.cpp | 73 +++++++++++++++++++-------- unsupported/Eigen/src/FFT/simple_fft_traits.h | 31 +++++------- unsupported/test/FFT.cpp | 21 +++++--- 3 files changed, 81 insertions(+), 44 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 041576b75..84cc49fe3 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -31,34 +31,67 @@ using namespace Eigen; using namespace std; -#ifndef NFFT -#define NFFT 1024 -#endif + +template +string nameof(); + +template <> string nameof() {return "float";} +template <> string nameof() {return "double";} +template <> string nameof() {return "long double";} #ifndef TYPE #define TYPE float #endif -#ifndef NITS -#define NITS (10000000/NFFT) +#ifndef NFFT +#define NFFT 1024 +#endif +#ifndef NDATA +#define NDATA 1000000 #endif -int main() +using namespace Eigen; + +template +void bench(int nfft) { - vector > inbuf(NFFT); - vector > outbuf(NFFT); - Eigen::FFT fft; + typedef typename NumTraits::Real Scalar; + typedef typename std::complex Complex; + int nits = NDATA/nfft; + vector inbuf(nfft); + vector outbuf(nfft); + FFT< Scalar > fft; + + fft.fwd( outbuf , inbuf); + + BenchTimer timer; + timer.reset(); + for (int k=0;k<8;++k) { + timer.start(); + for(int i = 0; i < nits; i++) + fft.fwd( outbuf , inbuf); + timer.stop(); + } - fft.fwd( outbuf , inbuf); + cout << nameof() << " "; + double mflops = 5.*nfft*log2((double)nfft) / (1e6 * timer.value() / (double)nits ); + if ( NumTraits::IsComplex ) { + cout << "complex"; + }else{ + cout << "real "; + mflops /= 2; + } - BenchTimer timer; - timer.reset(); - for (int k=0;k<8;++k) { - timer.start(); - for(int i = 0; i < NITS; i++) - fft.fwd( outbuf , inbuf); - timer.stop(); - } - double mflops = 5.*NFFT*log2((double)NFFT) / (1e6 * timer.value() / (double)NITS ); - cout << "NFFT=" << NFFT << " " << (double(1e-6*NFFT*NITS)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; +} + +int main(int argc,char ** argv) +{ + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + bench >(NFFT); + bench(NFFT); + return 0; } diff --git a/unsupported/Eigen/src/FFT/simple_fft_traits.h b/unsupported/Eigen/src/FFT/simple_fft_traits.h index f7dd2b9cf..1e2be8f79 100644 --- a/unsupported/Eigen/src/FFT/simple_fft_traits.h +++ b/unsupported/Eigen/src/FFT/simple_fft_traits.h @@ -54,28 +54,14 @@ namespace Eigen { work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; + int ncfft2 = nfft>>2; // use optimized mode for even real fwd( dst, reinterpret_cast (src),ncfft); make_real_twiddles(nfft); - std::cerr << "dst[0] = " << dst[0] << "\n"; Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; -#if 0 - using namespace std; - cerr << "desired:\n"; - for ( k=1;k <= (ncfft>>1) ; ++k ) { - Complex x = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - cerr << k << " " << x << "angle(x):" << arg(x) << "\n"; - } - dc=0; - cerr << "twiddles:\n"; - for (k=0;k>1) ; ++k ) { + for ( k=1;k <= ncfft2 ; ++k ) { Complex fpk = dst[k]; Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; @@ -87,7 +73,6 @@ namespace Eigen { dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); } - // place conjugate-symmetric half at the end for completeness // TODO: make this configurable ( opt-out ) for ( k=1;k < ncfft ; ++k ) @@ -98,6 +83,16 @@ namespace Eigen { } } + // half-complex to scalar + void inv( Scalar * dst,const Complex * src,int nfft) + { + // TODO add optimized version for even numbers + std::vector tmp(nfft); + inv(&tmp[0],src,nfft); + for (int k=0;kn) - p=n;// no more factors + p=n;// impossible to have a factor > sqrt(n) } n /= p; m_stageRadix.push_back(p); diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 41c7fed7b..75c33277d 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -28,6 +28,10 @@ using namespace std; +float norm(float x) {return x*x;} +double norm(double x) {return x*x;} +long double norm(long double x) {return x*x;} + template < typename T> complex promote(complex x) { return complex(x.real(),x.imag()); } @@ -83,7 +87,11 @@ void test_scalar(int nfft) for (int k=0;k() );// gross check + + vector buf3; + fft.inv( buf3 , outbuf); + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } template @@ -100,18 +108,18 @@ void test_complex(int nfft) inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) ); fft.fwd( outbuf , inbuf); - VERIFY( fft_rmse(outbuf,inbuf) < 1e-5 );// gross check + VERIFY( fft_rmse(outbuf,inbuf) < test_precision() );// gross check fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < 1e-5 );// gross check + VERIFY( dif_rmse(inbuf,buf3) < test_precision() );// gross check } void test_FFT() { -#if 0 +#if 1 CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); CALL_SUBTEST( test_complex(32) ); - CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); CALL_SUBTEST( test_complex(1024) ); + CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(256) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(3*8) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(5*32) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); CALL_SUBTEST( test_complex(2*3*4) ); @@ -120,8 +128,9 @@ void test_FFT() #endif #if 1 + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); - CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); CALL_SUBTEST( test_scalar(1024) ); + CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif } -- cgit v1.2.3 From 210092d16c57ec2fd2f8f515151de284e8a737e3 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 20:35:24 -0400 Subject: changed name from simple_fft_traits to ei_kissfft_impl --- bench/benchFFT.cpp | 2 +- unsupported/Eigen/FFT | 95 ++++++ unsupported/Eigen/FFT.h | 95 ------ unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 397 ++++++++++++++++++++++++++ unsupported/Eigen/src/FFT/simple_fft_traits.h | 374 ------------------------ unsupported/test/FFT.cpp | 3 +- 6 files changed, 494 insertions(+), 472 deletions(-) create mode 100644 unsupported/Eigen/FFT delete mode 100644 unsupported/Eigen/FFT.h create mode 100644 unsupported/Eigen/src/FFT/ei_kissfft_impl.h delete mode 100644 unsupported/Eigen/src/FFT/simple_fft_traits.h (limited to 'unsupported/Eigen/src') diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index 84cc49fe3..ffa4ffffc 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include using namespace Eigen; using namespace std; diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT new file mode 100644 index 000000000..3d852f5a2 --- /dev/null +++ b/unsupported/Eigen/FFT @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_FFT_H +#define EIGEN_FFT_H + +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +#include "src/FFT/ei_kissfft_impl.h" +#define DEFAULT_FFT_IMPL ei_kissfft_impl + +// FFTW: faster, GPL-not LGPL, bigger code size +#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines +// TODO +// #include "src/FFT/ei_fftw_impl.h" +// #define DEFAULT_FFT_IMPL ei_fftw_impl +#endif + +// intel Math Kernel Library: fastest, commerical +#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +// TODO +// #include "src/FFT/ei_imkl_impl.h" +// #define DEFAULT_FFT_IMPL ei_imkl_impl +#endif + +namespace Eigen { + +template + > +class FFT +{ + public: + typedef _Traits traits_type; + typedef typename traits_type::Scalar Scalar; + typedef typename traits_type::Complex Complex; + + FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } + + template + void fwd( Complex * dst, const _Input * src, int nfft) + { + m_traits.fwd(dst,src,nfft); + } + + template + void fwd( std::vector & dst, const std::vector<_Input> & src) + { + dst.resize( src.size() ); + fwd( &dst[0],&src[0],src.size() ); + } + + template + void inv( _Output * dst, const Complex * src, int nfft) + { + m_traits.inv( dst,src,nfft ); + } + + template + void inv( std::vector<_Output> & dst, const std::vector & src) + { + dst.resize( src.size() ); + inv( &dst[0],&src[0],src.size() ); + } + + // TODO: multi-dimensional FFTs + // TODO: handle Eigen MatrixBase + + traits_type & traits() {return m_traits;} + private: + traits_type m_traits; +}; +#undef DEFAULT_FFT_IMPL +} +#endif diff --git a/unsupported/Eigen/FFT.h b/unsupported/Eigen/FFT.h deleted file mode 100644 index c466423b7..000000000 --- a/unsupported/Eigen/FFT.h +++ /dev/null @@ -1,95 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. -// -// Copyright (C) 2009 Mark Borgerding mark a borgerding net -// -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - -#ifndef EIGEN_FFT_H -#define EIGEN_FFT_H - -// simple_fft_traits: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/simple_fft_traits.h" -#define DEFAULT_FFT_TRAITS simple_fft_traits - -// FFTW: faster, GPL-not LGPL, bigger code size -#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines -// TODO -// #include "src/FFT/fftw_traits.h" -// #define DEFAULT_FFT_TRAITS fftw_traits -#endif - -// intel Math Kernel Library: fastest, commerical -#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines -// TODO -// #include "src/FFT/imkl_traits.h" -// #define DEFAULT_FFT_TRAITS imkl_traits -#endif - -namespace Eigen { - -template - > -class FFT -{ - public: - typedef _Traits traits_type; - typedef typename traits_type::Scalar Scalar; - typedef typename traits_type::Complex Complex; - - FFT(const traits_type & traits=traits_type() ) :m_traits(traits) { } - - template - void fwd( Complex * dst, const _Input * src, int nfft) - { - m_traits.fwd(dst,src,nfft); - } - - template - void fwd( std::vector & dst, const std::vector<_Input> & src) - { - dst.resize( src.size() ); - fwd( &dst[0],&src[0],src.size() ); - } - - template - void inv( _Output * dst, const Complex * src, int nfft) - { - m_traits.inv( dst,src,nfft ); - } - - template - void inv( std::vector<_Output> & dst, const std::vector & src) - { - dst.resize( src.size() ); - inv( &dst[0],&src[0],src.size() ); - } - - // TODO: multi-dimensional FFTs - // TODO: handle Eigen MatrixBase - - traits_type & traits() {return m_traits;} - private: - traits_type m_traits; -}; -#undef DEFAULT_FFT_TRAITS -} -#endif diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h new file mode 100644 index 000000000..ce2c9f16e --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -0,0 +1,397 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. Eigen itself is part of the KDE project. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include +#include + +namespace Eigen { + + template + struct ei_kissfft_impl + { + typedef _Scalar Scalar; + typedef std::complex Complex; + ei_kissfft_impl() : m_nfft(0) {} + + template + void fwd( Complex * dst,const _Src *src,int nfft) + { + prepare(nfft,false); + work(0, dst, src, 1,1); + } + + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + void fwd( Complex * dst,const Scalar * src,int nfft) + { + if ( nfft&1 ) { + // use generic mode for odd + prepare(nfft,false); + work(0, dst, src, 1,1); + }else{ + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + // use optimized mode for even real + fwd( dst, reinterpret_cast (src),ncfft); + make_real_twiddles(nfft); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + Complex tw= f2k * m_realTwiddles[k-1]; + + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + + dst[0] = dc; + dst[ncfft] = nyquist; + } + } + + // half-complex to scalar + void inv( Scalar * dst,const Complex * src,int nfft) + { + // TODO add optimized version for even numbers + std::vector tmp(nfft); + inv(&tmp[0],src,nfft); + for (int k=0;k>2; + if ( m_realTwiddles.size() != ncfft2) { + m_realTwiddles.resize(ncfft2); + int ncfft= nfft>>1; + for (int k=1;k<=ncfft2;++k) + m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); + } + } + + void make_twiddles(int nfft,bool inverse) + { + if ( m_twiddles.size() == nfft) { + // reuse the twiddles, conjugate if necessary + if (inverse != m_inverse) + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + m_nfft = nfft; + } + + void scale(Complex *dst,Scalar s) + { + for (int k=0;k + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + + int m_nfft; + bool m_inverse; + std::vector m_twiddles; + std::vector m_realTwiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; +/* + enum {FORWARD,INVERSE,REAL,COMPLEX}; + + struct PlanKey + { + PlanKey(int nfft,bool isinverse,bool iscomplex) + { + _key = (nfft<<2) | (isinverse<<1) | iscomplex; + } + + bool operator<(const PlanKey & other) const + { + return this->_key < other._key; + } + int _key; + }; + + struct PlanData + { + std::vector m_twiddles; + }; + + std::map. - -#include -#include -#include - -namespace Eigen { - - template - struct simple_fft_traits - { - typedef _Scalar Scalar; - typedef std::complex Complex; - simple_fft_traits() : m_nfft(0) {} - - template - void fwd( Complex * dst,const _Src *src,int nfft) - { - prepare(nfft,false); - work(0, dst, src, 1,1); - } - - // real-to-complex forward FFT - // perform two FFTs of src even and src odd - // then twiddle to recombine them into the half-spectrum format - // then fill in the conjugate symmetric half - void fwd( Complex * dst,const Scalar * src,int nfft) - { - if ( nfft&1 ) { - // use generic mode for odd - prepare(nfft,false); - work(0, dst, src, 1,1); - }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - // use optimized mode for even real - fwd( dst, reinterpret_cast (src),ncfft); - make_real_twiddles(nfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - Complex tw= f2k * m_realTwiddles[k-1]; - - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); - } - - // place conjugate-symmetric half at the end for completeness - // TODO: make this configurable ( opt-out ) - for ( k=1;k < ncfft ; ++k ) - dst[nfft-k] = conj(dst[k]); - - dst[0] = dc; - dst[ncfft] = nyquist; - } - } - - // half-complex to scalar - void inv( Scalar * dst,const Complex * src,int nfft) - { - // TODO add optimized version for even numbers - std::vector tmp(nfft); - inv(&tmp[0],src,nfft); - for (int k=0;k>2; - if ( m_realTwiddles.size() != ncfft2) { - m_realTwiddles.resize(ncfft2); - int ncfft= nfft>>1; - for (int k=1;k<=ncfft2;++k) - m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - } - } - - void make_twiddles(int nfft,bool inverse) - { - if ( m_twiddles.size() == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - m_nfft = nfft; - } - - void scale(Complex *dst,Scalar s) - { - for (int k=0;k - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } - } - - int m_nfft; - bool m_inverse; - std::vector m_twiddles; - std::vector m_realTwiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - }; -} diff --git a/unsupported/test/FFT.cpp b/unsupported/test/FFT.cpp index 75c33277d..daf397790 100644 --- a/unsupported/test/FFT.cpp +++ b/unsupported/test/FFT.cpp @@ -23,8 +23,7 @@ // Eigen. If not, see . #include "main.h" -#include - +#include using namespace std; -- cgit v1.2.3 From 03ed6f9bfb63879d475f5bb8ea46cff96063d010 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:06:49 -0400 Subject: refactored ei_kissfft_impl to maintain a cache of cpx fft plans --- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 606 ++++++++++++++-------------- unsupported/test/FFT.cpp | 7 +- 2 files changed, 317 insertions(+), 296 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index ce2c9f16e..3580e6c61 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -24,21 +24,279 @@ #include #include +#include namespace Eigen { + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + ei_kiss_cpx_fft() { } + + void make_twiddles(int nfft,bool inverse) + { + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } + + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + }; + + template struct ei_kissfft_impl { typedef _Scalar Scalar; typedef std::complex Complex; - ei_kissfft_impl() : m_nfft(0) {} + ei_kissfft_impl() {} + + void clear() + { + m_plans.clear(); + m_realTwiddles.clear(); + } template void fwd( Complex * dst,const _Src *src,int nfft) { - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); } // real-to-complex forward FFT @@ -47,16 +305,16 @@ namespace Eigen { // then fill in the conjugate symmetric half void fwd( Complex * dst,const Scalar * src,int nfft) { - if ( nfft&1 ) { + if ( nfft&3 ) { // use generic mode for odd - prepare(nfft,false); - work(0, dst, src, 1,1); + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ int ncfft = nfft>>1; int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + // use optimized mode for even real - fwd( dst, reinterpret_cast (src),ncfft); - make_real_twiddles(nfft); + fwd( dst, reinterpret_cast (src), ncfft); Complex dc = dst[0].real() + dst[0].imag(); Complex nyquist = dst[0].real() - dst[0].imag(); int k; @@ -65,8 +323,7 @@ namespace Eigen { Complex fpnk = conj(dst[ncfft-k]); Complex f1k = fpk + fpnk; Complex f2k = fpk - fpnk; - //Complex tw = f2k * exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - Complex tw= f2k * m_realTwiddles[k-1]; + Complex tw= f2k * rtw[k-1]; dst[k] = (f1k + tw) * Scalar(.5); dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); @@ -94,304 +351,67 @@ namespace Eigen { void inv(Complex * dst,const Complex *src,int nfft) { - prepare(nfft,true); - work(0, dst, src, 1,1); - scale(dst, Scalar(1)/m_nfft ); - } - - void prepare(int nfft,bool inverse) - { - make_twiddles(nfft,inverse); - factorize(nfft); - } - - void make_real_twiddles(int nfft) - { - int ncfft2 = nfft>>2; - if ( m_realTwiddles.size() != ncfft2) { - m_realTwiddles.resize(ncfft2); - int ncfft= nfft>>1; - for (int k=1;k<=ncfft2;++k) - m_realTwiddles[k-1] = exp( Complex(0,-3.14159265358979323846264338327 * ((double) (k) / ncfft + .5) ) ); - } - } - - void make_twiddles(int nfft,bool inverse) - { - if ( m_twiddles.size() == nfft) { - // reuse the twiddles, conjugate if necessary - if (inverse != m_inverse) - for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - m_nfft = nfft; - } - - void scale(Complex *dst,Scalar s) - { - for (int k=0;k - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;k PlanData; - void bfly3( Complex * Fout, const size_t fstride, const size_t m) - { - size_t k=m; - const size_t m2 = 2*m; - Complex *tw1,*tw2; - Complex scratch[5]; - Complex epi3; - epi3 = m_twiddles[fstride*m]; - - tw1=tw2=&m_twiddles[0]; - - do{ - scratch[1]=Fout[m] * *tw1; - scratch[2]=Fout[m2] * *tw2; - - scratch[3]=scratch[1]+scratch[2]; - scratch[0]=scratch[1]-scratch[2]; - tw1 += fstride; - tw2 += fstride*2; - Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u::iterator MapIt; + MapIt it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.invert(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } - - k=u; - for ( q1=0 ; q1

=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; + return it->second; + */ + PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; + if ( pd.m_twiddles.size() == 0 ) { + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } - } + return pd; } - int m_nfft; - bool m_inverse; - std::vector m_twiddles; - std::vector m_realTwiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; -/* - enum {FORWARD,INVERSE,REAL,COMPLEX}; - - struct PlanKey + Complex * real_twiddles(int ncfft2) { - PlanKey(int nfft,bool isinverse,bool iscomplex) - { - _key = (nfft<<2) | (isinverse<<1) | iscomplex; - } - - bool operator<(const PlanKey & other) const - { - return this->_key < other._key; + std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there + if ( (int)twidref.size() != ncfft2 ) { + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } - int _key; - }; + return &twidref[0]; + } - struct PlanData + void scale(Complex *dst,int n,Scalar s) { - std::vector m_twiddles; - }; - - std::map promote(long double x) { return complex( x); { long double totalpower=0; long double difpower=0; - cerr <<"idx\ttruth\t\tvalue\n"; + cerr <<"idx\ttruth\t\tvalue\t|dif|=\n"; for (size_t k0=0;k0 acc = 0; long double phinc = -2.*k0* M_PIl / timebuf.size(); @@ -55,7 +55,7 @@ complex promote(long double x) { return complex( x); complex x = promote(fftbuf[k0]); complex dif = acc - x; difpower += norm(dif); - cerr << k0 << "\t" << acc << "\t" << x << endl; + cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(norm(dif)) << endl; } cerr << "rmse:" << sqrt(difpower/totalpower) << endl; return sqrt(difpower/totalpower); @@ -127,8 +127,9 @@ void test_FFT() #endif #if 1 - CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); CALL_SUBTEST( test_scalar(32) ); + CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); CALL_SUBTEST( test_scalar(45) ); + CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(50) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(256) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); CALL_SUBTEST( test_scalar(2*3*4*5*7) ); #endif -- cgit v1.2.3 From 09b47332553a79dab30516e6b1d410dea90cf9b7 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Mon, 25 May 2009 23:52:21 -0400 Subject: added real-optimized inverse FFT (NFFT must be multiple of 4) --- bench/benchFFT.cpp | 30 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 684 ++++++++++++++-------------- 2 files changed, 368 insertions(+), 346 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/bench/benchFFT.cpp b/bench/benchFFT.cpp index ffa4ffffc..14f5063fb 100644 --- a/bench/benchFFT.cpp +++ b/bench/benchFFT.cpp @@ -53,7 +53,7 @@ template <> string nameof() {return "long double";} using namespace Eigen; template -void bench(int nfft) +void bench(int nfft,bool fwd) { typedef typename NumTraits::Real Scalar; typedef typename std::complex Complex; @@ -69,7 +69,10 @@ void bench(int nfft) for (int k=0;k<8;++k) { timer.start(); for(int i = 0; i < nits; i++) - fft.fwd( outbuf , inbuf); + if (fwd) + fft.fwd( outbuf , inbuf); + else + fft.inv(inbuf,outbuf); timer.stop(); } @@ -82,16 +85,27 @@ void bench(int nfft) mflops /= 2; } + if (fwd) + cout << " fwd"; + else + cout << " inv"; + cout << " NFFT=" << nfft << " " << (double(1e-6*nfft*nits)/timer.value()) << " MS/s " << mflops << "MFLOPS\n"; } int main(int argc,char ** argv) { - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); - bench >(NFFT); - bench(NFFT); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); + bench >(NFFT,true); + bench >(NFFT,false); + bench(NFFT,true); + bench(NFFT,false); return 0; } diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 3580e6c61..453c7f6da 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -28,390 +28,398 @@ namespace Eigen { - template - struct ei_kiss_cpx_fft + template + struct ei_kiss_cpx_fft + { + typedef _Scalar Scalar; + typedef std::complex Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; + + void make_twiddles(int nfft,bool inverse) { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; - - ei_kiss_cpx_fft() { } - - void make_twiddles(int nfft,bool inverse) - { - m_inverse = inverse; - m_twiddles.resize(nfft); - Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; - for (int i=0;in) - p=n;// impossible to have a factor > sqrt(n) - } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } + void factorize(int nfft) + { + //start factoring out 4's, then 2's, then 3,5,7,9,... + int n= nfft; + int p=4; + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p*p>n) + p=n;// impossible to have a factor > sqrt(n) } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; - } - } - - void bfly2( Complex * Fout, const size_t fstride, int m) - { - for (int k=0;k + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs + switch (p) { + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; + } + } - void bfly4( Complex * Fout, const size_t fstride, const size_t m) - { - Complex scratch[6]; - int negative_if_inverse = m_inverse * -2 +1; - for (size_t k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } + void bfly4( Complex * Fout, const size_t fstride, const size_t m) + { + Complex scratch[6]; + int negative_if_inverse = m_inverse * -2 +1; + for (size_t k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); + } + + void bfly5( Complex * Fout, const size_t fstride, const size_t m) + { + Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + size_t u; + Complex scratch[13]; + Complex * twiddles = &m_twiddles[0]; + Complex *tw; + Complex ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=twiddles; + for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; - } - k += m; - } - } + /* perform the butterfly for one stage of a mixed radix FFT */ + void bfly_generic( + Complex * Fout, + const size_t fstride, + int m, + int p + ) + { + int u,k,q1,q; + Complex * twiddles = &m_twiddles[0]; + Complex t; + int Norig = m_twiddles.size(); + Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; + } + } + } + }; template struct ei_kissfft_impl { - typedef _Scalar Scalar; - typedef std::complex Complex; - ei_kissfft_impl() {} + typedef _Scalar Scalar; + typedef std::complex Complex; - void clear() - { + void clear() + { m_plans.clear(); m_realTwiddles.clear(); - } + } - template - void fwd( Complex * dst,const _Src *src,int nfft) - { + template + void fwd( Complex * dst,const _Src *src,int nfft) + { get_plan(nfft,false).work(0, dst, src, 1,1); - } - - // real-to-complex forward FFT - // perform two FFTs of src even and src odd - // then twiddle to recombine them into the half-spectrum format - // then fill in the conjugate symmetric half - void fwd( Complex * dst,const Scalar * src,int nfft) - { + } + + // real-to-complex forward FFT + // perform two FFTs of src even and src odd + // then twiddle to recombine them into the half-spectrum format + // then fill in the conjugate symmetric half + void fwd( Complex * dst,const Scalar * src,int nfft) + { if ( nfft&3 ) { - // use generic mode for odd - get_plan(nfft,false).work(0, dst, src, 1,1); + // use generic mode for odd + get_plan(nfft,false).work(0, dst, src, 1,1); }else{ - int ncfft = nfft>>1; - int ncfft2 = nfft>>2; - Complex * rtw = real_twiddles(ncfft2); - - // use optimized mode for even real - fwd( dst, reinterpret_cast (src), ncfft); - Complex dc = dst[0].real() + dst[0].imag(); - Complex nyquist = dst[0].real() - dst[0].imag(); - int k; - for ( k=1;k <= ncfft2 ; ++k ) { - Complex fpk = dst[k]; - Complex fpnk = conj(dst[ncfft-k]); - Complex f1k = fpk + fpnk; - Complex f2k = fpk - fpnk; - Complex tw= f2k * rtw[k-1]; - - dst[k] = (f1k + tw) * Scalar(.5); - dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); - } - - // place conjugate-symmetric half at the end for completeness - // TODO: make this configurable ( opt-out ) - for ( k=1;k < ncfft ; ++k ) - dst[nfft-k] = conj(dst[k]); - - dst[0] = dc; - dst[ncfft] = nyquist; + int ncfft = nfft>>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + + // use optimized mode for even real + fwd( dst, reinterpret_cast (src), ncfft); + Complex dc = dst[0].real() + dst[0].imag(); + Complex nyquist = dst[0].real() - dst[0].imag(); + int k; + for ( k=1;k <= ncfft2 ; ++k ) { + Complex fpk = dst[k]; + Complex fpnk = conj(dst[ncfft-k]); + Complex f1k = fpk + fpnk; + Complex f2k = fpk - fpnk; + Complex tw= f2k * rtw[k-1]; + dst[k] = (f1k + tw) * Scalar(.5); + dst[ncfft-k] = conj(f1k -tw)*Scalar(.5); + } + + // place conjugate-symmetric half at the end for completeness + // TODO: make this configurable ( opt-out ) + for ( k=1;k < ncfft ; ++k ) + dst[nfft-k] = conj(dst[k]); + dst[0] = dc; + dst[ncfft] = nyquist; } - } - - // half-complex to scalar - void inv( Scalar * dst,const Complex * src,int nfft) - { - // TODO add optimized version for even numbers - std::vector tmp(nfft); - inv(&tmp[0],src,nfft); - for (int k=0;k>1; + int ncfft2 = nfft>>2; + Complex * rtw = real_twiddles(ncfft2); + m_scratchBuf.resize(ncfft); + m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + for (int k = 1; k <= ncfft / 2; ++k) { + Complex fk = src[k]; + Complex fnkc = conj(src[ncfft-k]); + Complex fek = fk + fnkc; + Complex tmp = fk - fnkc; + Complex fok = tmp * conj(rtw[k-1]); + m_scratchBuf[k] = fek + fok; + m_scratchBuf[ncfft-k] = conj(fek - fok); + } + scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + } + } - private: + private: - typedef ei_kiss_cpx_fft PlanData; + typedef ei_kiss_cpx_fft PlanData; - typedef std::map PlanMap; - PlanMap m_plans; - std::map > m_realTwiddles; + typedef std::map PlanMap; + PlanMap m_plans; + std::map > m_realTwiddles; + std::vector m_scratchBuf; - int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } - PlanData & get_plan(int nfft,bool inverse) - { - /* + PlanData & get_plan(int nfft,bool inverse) + { + /* TODO: figure out why this does not work (g++ 4.3.2) * for some reason this does not work * - typedef typename std::map::iterator MapIt; - MapIt it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.invert(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } + PlanMap::iterator it; + it = m_plans.find( PlanKey(nfft,inverse) ); + if (it == m_plans.end() ) { + // create new entry + it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); + MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); + if (it2 != m_plans.end() ) { + it->second = it2.second; + it->second.conjugate(); + }else{ + it->second.make_twiddles(nfft,inverse); + it->second.factorize(nfft); + } } return it->second; */ PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { - pd.make_twiddles(nfft,inverse); - pd.factorize(nfft); + pd.make_twiddles(nfft,inverse); + pd.factorize(nfft); } return pd; - } + } - Complex * real_twiddles(int ncfft2) - { + Complex * real_twiddles(int ncfft2) + { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there if ( (int)twidref.size() != ncfft2 ) { - twidref.resize(ncfft2); - int ncfft= ncfft2<<1; - Scalar pi = acos( Scalar(-1) ); - for (int k=1;k<=ncfft2;++k) - twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); + twidref.resize(ncfft2); + int ncfft= ncfft2<<1; + Scalar pi = acos( Scalar(-1) ); + for (int k=1;k<=ncfft2;++k) + twidref[k-1] = exp( Complex(0,-pi * ((double) (k) / ncfft + .5) ) ); } return &twidref[0]; - } + } - void scale(Complex *dst,int n,Scalar s) - { + void scale(Complex *dst,int n,Scalar s) + { for (int k=0;k Date: Wed, 27 May 2009 21:32:42 -0400 Subject: various comment changes --- unsupported/Eigen/FFT | 6 +- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 484 ++++++++++++++-------------- 2 files changed, 237 insertions(+), 253 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 3d852f5a2..31d8c74c5 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -29,14 +29,14 @@ #include "src/FFT/ei_kissfft_impl.h" #define DEFAULT_FFT_IMPL ei_kissfft_impl -// FFTW: faster, GPL-not LGPL, bigger code size +// FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size #ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines // TODO // #include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif -// intel Math Kernel Library: fastest, commerical +// intel Math Kernel Library: fastest, commerical -- incompatible with Eigen in GPL form #ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines // TODO // #include "src/FFT/ei_imkl_impl.h" diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 453c7f6da..91fa5ca18 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -1,5 +1,5 @@ // This file is part of Eigen, a lightweight C++ template library -// for linear algebra. Eigen itself is part of the KDE project. +// for linear algebra. // // Copyright (C) 2009 Mark Borgerding mark a borgerding net // @@ -28,252 +28,255 @@ namespace Eigen { + // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft + // Copyright 2003-2009 Mark Borgerding + template - struct ei_kiss_cpx_fft - { - typedef _Scalar Scalar; - typedef std::complex Complex; - std::vector m_twiddles; - std::vector m_stageRadix; - std::vector m_stageRemainder; - bool m_inverse; - - void make_twiddles(int nfft,bool inverse) - { - m_inverse = inverse; - m_twiddles.resize(nfft); - Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; - for (int i=0;i Complex; + std::vector m_twiddles; + std::vector m_stageRadix; + std::vector m_stageRemainder; + bool m_inverse; - void factorize(int nfft) - { - //start factoring out 4's, then 2's, then 3,5,7,9,... - int n= nfft; - int p=4; - do { - while (n % p) { + void make_twiddles(int nfft,bool inverse) + { + m_inverse = inverse; + m_twiddles.resize(nfft); + Scalar phinc = (inverse?2:-2)* acos( (Scalar) -1) / nfft; + for (int i=0;in) + p=n;// impossible to have a factor > sqrt(n) + } + n /= p; + m_stageRadix.push_back(p); + m_stageRemainder.push_back(n); + }while(n>1); + } + + template + void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) + { + int p = m_stageRadix[stage]; + int m = m_stageRemainder[stage]; + Complex * Fout_beg = xout; + Complex * Fout_end = xout + p*m; + + if (m>1) { + do{ + // recursive call: + // DFT of size m*p performed by doing + // p instances of smaller DFTs of size m, + // each one takes a decimated version of the input + work(stage+1, xout , xin, fstride*p,in_stride); + xin += fstride*in_stride; + }while( (xout += m) != Fout_end ); + }else{ + do{ + *xout = *xin; + xin += fstride*in_stride; + }while(++xout != Fout_end ); + } + xout=Fout_beg; + + // recombine the p smaller DFTs switch (p) { - case 4: p = 2; break; - case 2: p = 3; break; - default: p += 2; break; + case 2: bfly2(xout,fstride,m); break; + case 3: bfly3(xout,fstride,m); break; + case 4: bfly4(xout,fstride,m); break; + case 5: bfly5(xout,fstride,m); break; + default: bfly_generic(xout,fstride,m,p); break; } - if (p*p>n) - p=n;// impossible to have a factor > sqrt(n) } - n /= p; - m_stageRadix.push_back(p); - m_stageRemainder.push_back(n); - }while(n>1); - } - - template - void work( int stage,Complex * xout, const _Src * xin, size_t fstride,size_t in_stride) - { - int p = m_stageRadix[stage]; - int m = m_stageRemainder[stage]; - Complex * Fout_beg = xout; - Complex * Fout_end = xout + p*m; - if (m>1) { - do{ - // recursive call: - // DFT of size m*p performed by doing - // p instances of smaller DFTs of size m, - // each one takes a decimated version of the input - work(stage+1, xout , xin, fstride*p,in_stride); - xin += fstride*in_stride; - }while( (xout += m) != Fout_end ); - }else{ - do{ - *xout = *xin; - xin += fstride*in_stride; - }while(++xout != Fout_end ); - } - xout=Fout_beg; - - // recombine the p smaller DFTs - switch (p) { - case 2: bfly2(xout,fstride,m); break; - case 3: bfly3(xout,fstride,m); break; - case 4: bfly4(xout,fstride,m); break; - case 5: bfly5(xout,fstride,m); break; - default: bfly_generic(xout,fstride,m,p); break; + void bfly2( Complex * Fout, const size_t fstride, int m) + { + for (int k=0;kreal() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); + scratch[0] *= epi3.imag(); + *Fout += scratch[3]; + Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); + Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); + ++Fout; + }while(--k); } - } - void bfly3( Complex * Fout, const size_t fstride, const size_t m) - { - size_t k=m; - const size_t m2 = 2*m; - Complex *tw1,*tw2; - Complex scratch[5]; - Complex epi3; - epi3 = m_twiddles[fstride*m]; - - tw1=tw2=&m_twiddles[0]; - - do{ - scratch[1]=Fout[m] * *tw1; - scratch[2]=Fout[m2] * *tw2; - - scratch[3]=scratch[1]+scratch[2]; - scratch[0]=scratch[1]-scratch[2]; - tw1 += fstride; - tw2 += fstride*2; - Fout[m] = Complex( Fout->real() - .5*scratch[3].real() , Fout->imag() - .5*scratch[3].imag() ); - scratch[0] *= epi3.imag(); - *Fout += scratch[3]; - Fout[m2] = Complex( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); - Fout[m] += Complex( -scratch[0].imag(),scratch[0].real() ); - ++Fout; - }while(--k); - } - - void bfly5( Complex * Fout, const size_t fstride, const size_t m) - { - Complex *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - size_t u; - Complex scratch[13]; - Complex * twiddles = &m_twiddles[0]; - Complex *tw; - Complex ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=twiddles; - for ( u=0; u=Norig) twidx-=Norig; - t=scratchbuf[q] * twiddles[twidx]; - Fout[ k ] += t; + /* perform the butterfly for one stage of a mixed radix FFT */ + void bfly_generic( + Complex * Fout, + const size_t fstride, + int m, + int p + ) + { + int u,k,q1,q; + Complex * twiddles = &m_twiddles[0]; + Complex t; + int Norig = m_twiddles.size(); + Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + + for ( u=0; u=Norig) twidx-=Norig; + t=scratchbuf[q] * twiddles[twidx]; + Fout[ k ] += t; + } + k += m; } - k += m; } } - } - }; + }; template - struct ei_kissfft_impl - { + struct ei_kissfft_impl + { typedef _Scalar Scalar; typedef std::complex Complex; @@ -284,10 +287,10 @@ namespace Eigen { } template - void fwd( Complex * dst,const _Src *src,int nfft) - { - get_plan(nfft,false).work(0, dst, src, 1,1); - } + void fwd( Complex * dst,const _Src *src,int nfft) + { + get_plan(nfft,false).work(0, dst, src, 1,1); + } // real-to-complex forward FFT // perform two FFTs of src even and src odd @@ -363,11 +366,10 @@ namespace Eigen { } } - private: - + private: typedef ei_kiss_cpx_fft PlanData; - typedef std::map PlanMap; + PlanMap m_plans; std::map > m_realTwiddles; std::vector m_scratchBuf; @@ -376,25 +378,7 @@ namespace Eigen { PlanData & get_plan(int nfft,bool inverse) { - /* TODO: figure out why this does not work (g++ 4.3.2) - * for some reason this does not work - * - PlanMap::iterator it; - it = m_plans.find( PlanKey(nfft,inverse) ); - if (it == m_plans.end() ) { - // create new entry - it = m_plans.insert( make_pair( PlanKey(nfft,inverse) , PlanData() ) ); - MapIt it2 = m_plans.find( PlanKey(nfft,!inverse) ); - if (it2 != m_plans.end() ) { - it->second = it2.second; - it->second.conjugate(); - }else{ - it->second.make_twiddles(nfft,inverse); - it->second.factorize(nfft); - } - } - return it->second; - */ + // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles PlanData & pd = m_plans[ PlanKey(nfft,inverse) ]; if ( pd.m_twiddles.size() == 0 ) { pd.make_twiddles(nfft,inverse); @@ -421,5 +405,5 @@ namespace Eigen { for (int k=0;k Date: Sat, 30 May 2009 17:55:47 -0400 Subject: added ei_fftw_impl --- unsupported/Eigen/FFT | 5 +- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 198 ++++++++++++++++++++++++++++ unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 25 ++-- 3 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 unsupported/Eigen/src/FFT/ei_fftw_impl.h (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 31d8c74c5..03f8504a4 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -30,9 +30,8 @@ #define DEFAULT_FFT_IMPL ei_kissfft_impl // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_PATIENT // definition of FFTW_PATIENT indicates the caller has included fftw3.h, we can use FFTW routines -// TODO -// #include "src/FFT/ei_fftw_impl.h" +#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines +#include "src/FFT/ei_fftw_impl.h" // #define DEFAULT_FFT_IMPL ei_fftw_impl #endif diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h new file mode 100644 index 000000000..d592bbb20 --- /dev/null +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -0,0 +1,198 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Mark Borgerding mark a borgerding net +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +namespace Eigen { + // FFTW uses non-const arguments + // so we must use ugly const_cast calls for all the args it uses + // + // This should be safe as long as + // 1. we use FFTW_ESTIMATE for all our planning + // see the FFTW docs section 4.3.2 "Planner Flags" + // 2. fftw_complex is compatible with std::complex + // This assumes std::complex layout is array of size 2 with real,imag + template + T * ei_fftw_cast(const T* p) + { + return const_cast( p); + } + + fftw_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwf_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + fftwl_complex * ei_fftw_cast( const std::complex * p) + { + return const_cast( reinterpret_cast(p) ); + } + + template + struct ei_fftw_plan {}; + + template <> + struct ei_fftw_plan + { + typedef float scalar_type; + typedef fftwf_complex complex_type; + fftwf_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwf_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwf_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef double scalar_type; + typedef fftw_complex complex_type; + fftw_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftw_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftw_execute_dft_c2r( m_plan, src,dst); + } + }; + template <> + struct ei_fftw_plan + { + typedef long double scalar_type; + typedef fftwl_complex complex_type; + fftwl_plan m_plan; + ei_fftw_plan() :m_plan(NULL) {} + ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + + void fwd(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void inv(complex_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); + fftwl_execute_dft( m_plan, src,dst); + } + void fwd(complex_type * dst,scalar_type * src,int nfft) { + if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_r2c( m_plan,src,dst); + } + void inv(scalar_type * dst,complex_type * src,int nfft) { + if (m_plan==NULL) + m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); + fftwl_execute_dft_c2r( m_plan, src,dst); + } + }; + + template + struct ei_fftw_impl + { + typedef _Scalar Scalar; + typedef std::complex Complex; + + void clear() + { + m_plans.clear(); + } + + void fwd( Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + } + + // real-to-complex forward FFT + void fwd( Complex * dst,const Scalar * src,int nfft) + { + get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); + int nhbins=(nfft>>1)+1; + for (int k=nhbins;k < nfft; ++k ) + dst[k] = conj(dst[nfft-k]); + } + + // inverse complex-to-complex + void inv(Complex * dst,const Complex *src,int nfft) + { + get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + // scaling + Scalar s = 1./nfft; + for (int k=0;k PlanData; + typedef std::map PlanMap; + + PlanMap m_plans; + + PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) + { + bool inplace = (dst==src); + bool aligned = ( (reinterpret_cast(src)&15) | (reinterpret_cast(dst)&15) ) == 0; + int key = (nfft<<3 ) | (inverse<<2) | (inplace<<1) | aligned; + return m_plans[key]; + } + }; +} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index 91fa5ca18..a84ac68a0 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -39,6 +39,7 @@ namespace Eigen { std::vector m_twiddles; std::vector m_stageRadix; std::vector m_stageRemainder; + std::vector m_scratchBuf; bool m_inverse; void make_twiddles(int nfft,bool inverse) @@ -75,6 +76,8 @@ namespace Eigen { n /= p; m_stageRadix.push_back(p); m_stageRemainder.push_back(n); + if ( p > 5 ) + m_scratchBuf.resize(p); // scratchbuf will be needed in bfly_generic }while(n>1); } @@ -249,7 +252,7 @@ namespace Eigen { Complex * twiddles = &m_twiddles[0]; Complex t; int Norig = m_twiddles.size(); - Complex * scratchbuf = (Complex*)alloca(p*sizeof(Complex) ); + Complex * scratchbuf = &m_scratchBuf[0]; for ( u=0; u>1; int ncfft2 = nfft>>2; Complex * rtw = real_twiddles(ncfft2); - m_scratchBuf.resize(ncfft); - m_scratchBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); + m_tmpBuf.resize(ncfft); + m_tmpBuf[0] = Complex( src[0].real() + src[ncfft].real(), src[0].real() - src[ncfft].real() ); for (int k = 1; k <= ncfft / 2; ++k) { Complex fk = src[k]; Complex fnkc = conj(src[ncfft-k]); Complex fek = fk + fnkc; Complex tmp = fk - fnkc; Complex fok = tmp * conj(rtw[k-1]); - m_scratchBuf[k] = fek + fok; - m_scratchBuf[ncfft-k] = conj(fek - fok); + m_tmpBuf[k] = fek + fok; + m_tmpBuf[ncfft-k] = conj(fek - fok); } - scale(&m_scratchBuf[0], ncfft, Scalar(1)/nfft ); - get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_scratchBuf[0], 1,1); + scale(&m_tmpBuf[0], ncfft, Scalar(1)/nfft ); + get_plan(ncfft,true).work(0, reinterpret_cast(dst), &m_tmpBuf[0], 1,1); } } @@ -372,7 +375,7 @@ namespace Eigen { PlanMap m_plans; std::map > m_realTwiddles; - std::vector m_scratchBuf; + std::vector m_tmpBuf; int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } -- cgit v1.2.3 From 44ba4b1d6d5cd39d824bb83876175d0dc39a9cc3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 16 Oct 2009 11:27:04 +0200 Subject: add operator+ scalar to AutoDiffScalar --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 888aa5c8c..fc5e237ab 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -108,6 +108,22 @@ class AutoDiffScalar inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } + inline const AutoDiffScalar operator+(const Scalar& other) const + { + return AutoDiffScalar(m_value + other, m_derivatives); + } + + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + { + return AutoDiffScalar(a + b.value(), b.derivatives()); + } + + inline AutoDiffScalar& operator+=(const Scalar& other) + { + value() += other; + return *this; + } + template inline const AutoDiffScalar,DerType,OtherDerType> > operator+(const AutoDiffScalar& other) const -- cgit v1.2.3 From 7b0c4102facc9b5f6ca99ef76febb05a9499b8b0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 16 Oct 2009 13:22:38 +0200 Subject: * add a Make* expression type builder to allow the construction of generic expressions working for both dense and sparse matrix. A nicer solution would be to use CwiseBinaryOp for any kind of matrix. To this end we either need to change the overall design so that the base class(es) depends on the kind of matrix, or we could add a template parameter to each expression type (e.g., int Kind = ei_traits::Kind) allowing to specialize each expression for each kind of matrix. * Extend AutoDiffScalar to work with sparse vector expression for the derivatives. --- Eigen/Core | 1 + Eigen/Sparse | 1 + Eigen/src/Core/ExpressionMaker.h | 61 ++++++++++++++++ Eigen/src/Sparse/SparseExpressionMaker.h | 48 +++++++++++++ unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h | 4 +- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 85 +++++++++++++---------- 6 files changed, 161 insertions(+), 39 deletions(-) create mode 100644 Eigen/src/Core/ExpressionMaker.h create mode 100644 Eigen/src/Sparse/SparseExpressionMaker.h (limited to 'unsupported/Eigen/src') diff --git a/Eigen/Core b/Eigen/Core index c8fcb1c09..3dce6422f 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -200,6 +200,7 @@ namespace Eigen { #include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/products/TriangularSolverMatrix.h" #include "src/Core/BandMatrix.h" +#include "src/Core/ExpressionMaker.h" } // namespace Eigen diff --git a/Eigen/Sparse b/Eigen/Sparse index a8888daa3..96bd61419 100644 --- a/Eigen/Sparse +++ b/Eigen/Sparse @@ -110,6 +110,7 @@ namespace Eigen { #include "src/Sparse/SparseLLT.h" #include "src/Sparse/SparseLDLT.h" #include "src/Sparse/SparseLU.h" +#include "src/Sparse/SparseExpressionMaker.h" #ifdef EIGEN_CHOLMOD_SUPPORT # include "src/Sparse/CholmodSupport.h" diff --git a/Eigen/src/Core/ExpressionMaker.h b/Eigen/src/Core/ExpressionMaker.h new file mode 100644 index 000000000..1d265b63c --- /dev/null +++ b/Eigen/src/Core/ExpressionMaker.h @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_EXPRESSIONMAKER_H +#define EIGEN_EXPRESSIONMAKER_H + +// computes the shape of a matrix from its traits flag +template struct ei_shape_of +{ + enum { ret = ei_traits::Flags&SparseBit ? IsSparse : IsDense }; +}; + + +// Since the Sparse module is completely separated from the Core module, there is +// no way to write the type of a generic expression working for both dense and sparse +// matrix. Unless we change the overall design, here is a workaround. +// There is an example in unsuported/Eigen/src/AutoDiff/AutoDiffScalar. + +template::ret> +struct MakeNestByValue +{ + typedef NestByValue Type; +}; + +template::ret> +struct MakeCwiseUnaryOp +{ + typedef CwiseUnaryOp Type; +}; + +template::ret> +struct MakeCwiseBinaryOp +{ + typedef CwiseBinaryOp Type; +}; + +// TODO complete the list + + +#endif // EIGEN_EXPRESSIONMAKER_H diff --git a/Eigen/src/Sparse/SparseExpressionMaker.h b/Eigen/src/Sparse/SparseExpressionMaker.h new file mode 100644 index 000000000..1fdcbb1f2 --- /dev/null +++ b/Eigen/src/Sparse/SparseExpressionMaker.h @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_SPARSE_EXPRESSIONMAKER_H +#define EIGEN_SPARSE_EXPRESSIONMAKER_H + +template +struct MakeNestByValue +{ + typedef SparseNestByValue Type; +}; + +template +struct MakeCwiseUnaryOp +{ + typedef SparseCwiseUnaryOp Type; +}; + +template +struct MakeCwiseBinaryOp +{ + typedef SparseCwiseBinaryOp Type; +}; + +// TODO complete the list + +#endif // EIGEN_SPARSE_EXPRESSIONMAKER_H diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h index a5e881487..b3983f8a6 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffJacobian.h @@ -50,10 +50,12 @@ public: typedef typename Functor::InputType InputType; typedef typename Functor::ValueType ValueType; typedef typename Functor::JacobianType JacobianType; + typedef typename JacobianType::Scalar Scalar; - typedef Matrix DerivativeType; + typedef Matrix DerivativeType; typedef AutoDiffScalar ActiveScalar; + typedef Matrix ActiveInput; typedef Matrix ActiveValue; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index fc5e237ab..2fb733a99 100644 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -42,9 +42,17 @@ void ei_make_coherent(const A& a, const B&b) /** \class AutoDiffScalar * \brief A scalar type replacement with automatic differentation capability * - * \param DerType the vector type used to store/represent the derivatives (e.g. Vector3f) + * \param _DerType the vector type used to store/represent the derivatives. The base scalar type + * as well as the number of derivatives to compute are determined from this type. + * Typical choices include, e.g., \c Vector4f for 4 derivatives, or \c VectorXf + * if the number of derivatives is not known at compile time, and/or, the number + * of derivatives is large. + * Note that _DerType can also be a reference (e.g., \c VectorXf&) to wrap a + * existing vector into an AutoDiffScalar. + * Finally, _DerType can also be any Eigen compatible expression. * - * This class represents a scalar value while tracking its respective derivatives. + * This class represents a scalar value while tracking its respective derivatives using Eigen's expression + * template mechanism. * * It supports the following list of global math function: * - std::abs, std::sqrt, std::pow, std::exp, std::log, std::sin, std::cos, @@ -56,10 +64,11 @@ void ei_make_coherent(const A& a, const B&b) * while derivatives are computed right away. * */ -template +template class AutoDiffScalar { public: + typedef typename ei_cleantype<_DerType>::type DerType; typedef typename ei_traits::Scalar Scalar; inline AutoDiffScalar() {} @@ -108,12 +117,12 @@ class AutoDiffScalar inline const DerType& derivatives() const { return m_derivatives; } inline DerType& derivatives() { return m_derivatives; } - inline const AutoDiffScalar operator+(const Scalar& other) const + inline const AutoDiffScalar operator+(const Scalar& other) const { return AutoDiffScalar(m_value + other, m_derivatives); } - friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) + friend inline const AutoDiffScalar operator+(const Scalar& a, const AutoDiffScalar& b) { return AutoDiffScalar(a + b.value(), b.derivatives()); } @@ -125,11 +134,11 @@ class AutoDiffScalar } template - inline const AutoDiffScalar,DerType,OtherDerType> > + inline const AutoDiffScalar,DerType,typename ei_cleantype::type>::Type > operator+(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar,DerType,OtherDerType> >( + return AutoDiffScalar,DerType,typename ei_cleantype::type>::Type >( m_value + other.value(), m_derivatives + other.derivatives()); } @@ -143,11 +152,11 @@ class AutoDiffScalar } template - inline const AutoDiffScalar, DerType,OtherDerType> > + inline const AutoDiffScalar, DerType,typename ei_cleantype::type>::Type > operator-(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, DerType,OtherDerType> >( + return AutoDiffScalar, DerType,typename ei_cleantype::type>::Type >( m_value - other.value(), m_derivatives - other.derivatives()); } @@ -161,73 +170,73 @@ class AutoDiffScalar } template - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator-() const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( -m_value, -m_derivatives); } - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator*(const Scalar& other) const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( m_value * other, (m_derivatives * other)); } - friend inline const AutoDiffScalar, DerType> > + friend inline const AutoDiffScalar, DerType>::Type > operator*(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( a.value() * other, a.derivatives() * other); } - inline const AutoDiffScalar, DerType> > + inline const AutoDiffScalar, DerType>::Type > operator/(const Scalar& other) const { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( m_value / other, (m_derivatives * (Scalar(1)/other))); } - friend inline const AutoDiffScalar, DerType> > + friend inline const AutoDiffScalar, DerType>::Type > operator/(const Scalar& other, const AutoDiffScalar& a) { - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( other / a.value(), a.derivatives() * (-Scalar(1)/other)); } template - inline const AutoDiffScalar, - NestByValue, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > > > + inline const AutoDiffScalar, + typename MakeNestByValue, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >::Type >::Type > operator/(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - NestByValue, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > > >( + return AutoDiffScalar, + typename MakeNestByValue, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >::Type >::Type >( m_value / other.value(), ((m_derivatives * other.value()).nestByValue() - (m_value * other.derivatives()).nestByValue()).nestByValue() * (Scalar(1)/(other.value()*other.value()))); } template - inline const AutoDiffScalar, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > > + inline const AutoDiffScalar, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type > operator*(const AutoDiffScalar& other) const { ei_make_coherent(m_derivatives, other.derivatives()); - return AutoDiffScalar, - NestByValue, DerType> >, - NestByValue, OtherDerType> > > >( + return AutoDiffScalar, + typename MakeNestByValue, DerType>::Type>::Type, + typename MakeNestByValue, typename ei_cleantype::type>::Type>::Type >::Type >( m_value * other.value(), (m_derivatives * other.value()).nestByValue() + (m_value * other.derivatives()).nestByValue()); } @@ -299,11 +308,11 @@ struct ei_make_coherent_impl \ - inline const Eigen::AutoDiffScalar::Scalar>, DerType> > \ + inline const Eigen::AutoDiffScalar::Scalar>, DerType>::Type > \ FUNC(const Eigen::AutoDiffScalar& x) { \ using namespace Eigen; \ typedef typename ei_traits::Scalar Scalar; \ - typedef AutoDiffScalar, DerType> > ReturnType; \ + typedef AutoDiffScalar, DerType>::Type > ReturnType; \ CODE; \ } @@ -330,12 +339,12 @@ namespace std return ReturnType(std::log(x.value),x.derivatives() * (Scalar(1).x.value()));) template - inline const Eigen::AutoDiffScalar::Scalar>, DerType> > + inline const Eigen::AutoDiffScalar::Scalar>, DerType>::Type > pow(const Eigen::AutoDiffScalar& x, typename Eigen::ei_traits::Scalar y) { using namespace Eigen; typedef typename ei_traits::Scalar Scalar; - return AutoDiffScalar, DerType> >( + return AutoDiffScalar, DerType>::Type >( std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1))); } @@ -375,7 +384,7 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(ei_log, return ReturnType(ei_log(x.value),x.derivatives() * (Scalar(1).x.value()));) template -inline const AutoDiffScalar::Scalar>, DerType> > +inline const AutoDiffScalar::Scalar>, DerType>::Type > ei_pow(const AutoDiffScalar& x, typename ei_traits::Scalar y) { return std::pow(x,y);} -- cgit v1.2.3 From e3d08443dc272f740447de0147efc69cf7de1c93 Mon Sep 17 00:00:00 2001 From: Mark Borgerding Date: Wed, 21 Oct 2009 20:53:05 -0400 Subject: inlining,all namespace declaration moved to FFT, removed preprocessor definitions, --- unsupported/Eigen/FFT | 41 ++++++++++++++++++----------- unsupported/Eigen/src/FFT/ei_fftw_impl.h | 38 +++++++++++++++++++++----- unsupported/Eigen/src/FFT/ei_kissfft_impl.h | 28 +++++++++++--------- unsupported/test/CMakeLists.txt | 2 +- 4 files changed, 73 insertions(+), 36 deletions(-) (limited to 'unsupported/Eigen/src') diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT index 97ec8e49b..36afdde8d 100644 --- a/unsupported/Eigen/FFT +++ b/unsupported/Eigen/FFT @@ -25,29 +25,39 @@ #ifndef EIGEN_FFT_H #define EIGEN_FFT_H -// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft -#include "src/FFT/ei_kissfft_impl.h" -#define DEFAULT_FFT_IMPL ei_kissfft_impl +#include +#include +#include +#ifdef EIGEN_FFTW_DEFAULT // FFTW: faster, GPL -- incompatible with Eigen in LGPL form, bigger code size -#ifdef FFTW_ESTIMATE // definition of FFTW_ESTIMATE indicates the caller has included fftw3.h, we can use FFTW routines -#include "src/FFT/ei_fftw_impl.h" -#undef DEFAULT_FFT_IMPL -#define DEFAULT_FFT_IMPL ei_fftw_impl -#endif - -// intel Math Kernel Library: fastest, commercial -- incompatible with Eigen in GPL form -#ifdef _MKL_DFTI_H_ // mkl_dfti.h has been included, we can use MKL FFT routines +# include + namespace Eigen { +# include "src/FFT/ei_fftw_impl.h" + //template typedef struct ei_fftw_impl default_fft_impl; this does not work + template struct default_fft_impl : public ei_fftw_impl {}; + } +#elif defined EIGEN_MKL_DEFAULT // TODO -// #include "src/FFT/ei_imkl_impl.h" -// #define DEFAULT_FFT_IMPL ei_imkl_impl +// intel Math Kernel Library: fastest, commercial -- may be incompatible with Eigen in GPL form + namespace Eigen { +# include "src/FFT/ei_imklfft_impl.h" + template struct default_fft_impl : public ei_imklfft_impl {}; + } +#else +// ei_kissfft_impl: small, free, reasonably efficient default, derived from kissfft +// + namespace Eigen { +# include "src/FFT/ei_kissfft_impl.h" + template + struct default_fft_impl : public ei_kissfft_impl {}; + } #endif namespace Eigen { template - > + typename _Impl=default_fft_impl<_Scalar> > class FFT { public: @@ -120,7 +130,6 @@ class FFT private: impl_type m_impl; }; -#undef DEFAULT_FFT_IMPL } #endif /* vim: set filetype=cpp et sw=2 ts=2 ai: */ diff --git a/unsupported/Eigen/src/FFT/ei_fftw_impl.h b/unsupported/Eigen/src/FFT/ei_fftw_impl.h index d592bbb20..e1f67f334 100644 --- a/unsupported/Eigen/src/FFT/ei_fftw_impl.h +++ b/unsupported/Eigen/src/FFT/ei_fftw_impl.h @@ -22,7 +22,8 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -namespace Eigen { + + // FFTW uses non-const arguments // so we must use ugly const_cast calls for all the args it uses // @@ -32,21 +33,25 @@ namespace Eigen { // 2. fftw_complex is compatible with std::complex // This assumes std::complex layout is array of size 2 with real,imag template + inline T * ei_fftw_cast(const T* p) { return const_cast( p); } + inline fftw_complex * ei_fftw_cast( const std::complex * p) - { + { return const_cast( reinterpret_cast(p) ); } + inline fftwf_complex * ei_fftw_cast( const std::complex * p) { return const_cast( reinterpret_cast(p) ); } + inline fftwl_complex * ei_fftw_cast( const std::complex * p) { return const_cast( reinterpret_cast(p) ); @@ -64,18 +69,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftwf_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftwf_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftwf_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftwf_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwf_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -91,18 +100,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftw_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftw_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftw_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftw_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftw_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -118,18 +131,22 @@ namespace Eigen { ei_fftw_plan() :m_plan(NULL) {} ~ei_fftw_plan() {if (m_plan) fftwl_destroy_plan(m_plan);} + inline void fwd(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_FORWARD, FFTW_ESTIMATE); fftwl_execute_dft( m_plan, src,dst); } + inline void inv(complex_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_1d(nfft,src,dst, FFTW_BACKWARD , FFTW_ESTIMATE); fftwl_execute_dft( m_plan, src,dst); } + inline void fwd(complex_type * dst,scalar_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_r2c_1d(nfft,src,dst,FFTW_ESTIMATE); fftwl_execute_dft_r2c( m_plan,src,dst); } + inline void inv(scalar_type * dst,complex_type * src,int nfft) { if (m_plan==NULL) m_plan = fftwl_plan_dft_c2r_1d(nfft,src,dst,FFTW_ESTIMATE); @@ -143,17 +160,20 @@ namespace Eigen { typedef _Scalar Scalar; typedef std::complex Complex; + inline void clear() { m_plans.clear(); } + inline void fwd( Complex * dst,const Complex *src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); } // real-to-complex forward FFT + inline void fwd( Complex * dst,const Scalar * src,int nfft) { get_plan(nfft,false,dst,src).fwd(ei_fftw_cast(dst), ei_fftw_cast(src) ,nfft); @@ -163,30 +183,37 @@ namespace Eigen { } // inverse complex-to-complex + inline void inv(Complex * dst,const Complex *src,int nfft) { get_plan(nfft,true,dst,src).inv(ei_fftw_cast(dst), ei_fftw_cast(src),nfft ); + + //TODO move scaling to Eigen::FFT // scaling - Scalar s = 1./nfft; + Scalar s = Scalar(1.)/nfft; for (int k=0;k PlanData; typedef std::map PlanMap; PlanMap m_plans; + inline PlanData & get_plan(int nfft,bool inverse,void * dst,const void * src) { bool inplace = (dst==src); @@ -195,4 +222,3 @@ namespace Eigen { return m_plans[key]; } }; -} diff --git a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h index a84ac68a0..c068d8765 100644 --- a/unsupported/Eigen/src/FFT/ei_kissfft_impl.h +++ b/unsupported/Eigen/src/FFT/ei_kissfft_impl.h @@ -22,11 +22,7 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -#include -#include -#include -namespace Eigen { // This FFT implementation was derived from kissfft http:sourceforge.net/projects/kissfft // Copyright 2003-2009 Mark Borgerding @@ -51,13 +47,6 @@ namespace Eigen { m_twiddles[i] = exp( Complex(0,i*phinc) ); } - void conjugate() - { - m_inverse = !m_inverse; - for ( size_t i=0;i + inline void fwd( Complex * dst,const _Src *src,int nfft) { get_plan(nfft,false).work(0, dst, src, 1,1); @@ -299,6 +294,7 @@ namespace Eigen { // perform two FFTs of src even and src odd // then twiddle to recombine them into the half-spectrum format // then fill in the conjugate symmetric half + inline void fwd( Complex * dst,const Scalar * src,int nfft) { if ( nfft&3 ) { @@ -334,6 +330,7 @@ namespace Eigen { } // inverse complex-to-complex + inline void inv(Complex * dst,const Complex *src,int nfft) { get_plan(nfft,true).work(0, dst, src, 1,1); @@ -341,6 +338,7 @@ namespace Eigen { } // half-complex to scalar + inline void inv( Scalar * dst,const Complex * src,int nfft) { if (nfft&3) { @@ -369,7 +367,7 @@ namespace Eigen { } } - private: + protected: typedef ei_kiss_cpx_fft PlanData; typedef std::map PlanMap; @@ -377,8 +375,10 @@ namespace Eigen { std::map > m_realTwiddles; std::vector m_tmpBuf; + inline int PlanKey(int nfft,bool isinverse) const { return (nfft<<1) | isinverse; } + inline PlanData & get_plan(int nfft,bool inverse) { // TODO look for PlanKey(nfft, ! inverse) and conjugate the twiddles @@ -390,6 +390,7 @@ namespace Eigen { return pd; } + inline Complex * real_twiddles(int ncfft2) { std::vector & twidref = m_realTwiddles[ncfft2];// creates new if not there @@ -403,10 +404,11 @@ namespace Eigen { return &twidref[0]; } + // TODO move scaling up into Eigen::FFT + inline void scale(Complex *dst,int n,Scalar s) { for (int k=0;k