renamed soundtouch and uade2 plugin folders and output .so

author: waker <wakeroid@gmail.com> 2011-03-23 21:26:26 +0100
committer: waker <wakeroid@gmail.com> 2011-03-23 21:26:26 +0100
commit: fd302d7abc36942e7ff14b22fae1e72b4495bef1 (patch)
tree: 2a36f8361c907a5bea91a9d905957a709f31ea64 /plugins/soundtouch/soundtouch/source
parent: 11e63b53b8c91da89592c373bb32fc2b656c6024 (diff)
19 files changed, 5702 insertions, 0 deletions
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/3dnow_win.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/3dnow_win.cpp
new file mode 100644
index 00000000..f0a9d7ec
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/3dnow_win.cpp
@@ -0,0 +1,349 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Win32 version of the AMD 3DNow! optimized routines for AMD K6-2/Athlon 
+/// processors. All 3DNow! optimized functions have been gathered into this
+/// single source code file, regardless to their class or original source code 
+/// file, in order to ease porting the library to other compiler and processor 
+/// platforms.
+///
+/// By the way; the performance gain depends heavily on the CPU generation: On 
+/// K6-2 these routines provided speed-up of even 2.4 times, while on Athlon the 
+/// difference to the original routines stayed at unremarkable 8%! Such a small 
+/// improvement on Athlon is due to 3DNow can perform only two operations in 
+/// parallel, and obviously also the Athlon FPU is doing a very good job with
+/// the standard C floating point routines! Here these routines are anyway, 
+/// although it might not be worth the effort to convert these to GCC platform, 
+/// for Athlon CPU at least. The situation is different regarding the SSE 
+/// optimizations though, thanks to the four parallel operations of SSE that 
+/// already make a difference.
+/// 
+/// This file is to be compiled in Windows platform with Microsoft Visual C++ 
+/// Compiler. Please see '3dnow_gcc.cpp' for the gcc compiler version for all
+/// GNU platforms (if file supplied).
+///
+/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ 
+/// 6.0 processor pack" update to support 3DNow! instruction set. The update is 
+/// available for download at Microsoft Developers Network, see here:
+/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
+///
+/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and 
+/// perform a search with keywords "processor pack".
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: 3dnow_win.cpp 63 2009-02-21 16:00:14Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "cpu_detect.h"
+#include "STTypes.h"
+
+#ifndef WIN32
+#error "wrong platform - this source code file is exclusively for Win32 platform"
+#endif
+
+using namespace soundtouch;
+
+#ifdef ALLOW_3DNOW
+// 3DNow! routines available only with float sample type    
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// implementation of 3DNow! optimized functions of class 'TDStretch3DNow'
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#include "TDStretch.h"
+
+
+// Calculates cross correlation of two buffers
+double TDStretch3DNow::calcCrossCorrStereo(const float *pV1, const float *pV2) const
+{
+    int overlapLengthLocal = overlapLength;
+    float corr = 0;
+
+    // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
+    /*
+    c-pseudocode:
+
+        corr = 0;
+        for (i = 0; i < overlapLength / 4; i ++)
+        {
+            corr += pV1[0] * pV2[0];
+                    pV1[1] * pV2[1];
+                    pV1[2] * pV2[2];
+                    pV1[3] * pV2[3];
+                    pV1[4] * pV2[4];
+                    pV1[5] * pV2[5];
+                    pV1[6] * pV2[6];
+                    pV1[7] * pV2[7];
+
+            pV1 += 8;
+            pV2 += 8;
+        }
+    */
+
+    _asm 
+    {
+        // give prefetch hints to CPU of what data are to be needed soonish.
+        // give more aggressive hints on pV1 as that changes more between different calls 
+        // while pV2 stays the same.
+        prefetch [pV1]
+        prefetch [pV2]
+        prefetch [pV1 + 32]
+
+        mov     eax, dword ptr pV2
+        mov     ebx, dword ptr pV1
+
+        pxor    mm0, mm0
+
+        mov     ecx, overlapLengthLocal
+        shr     ecx, 2  // div by four
+
+    loop1:
+        movq    mm1, [eax]
+        prefetch [eax + 32]     // give a prefetch hint to CPU what data are to be needed soonish
+        pfmul   mm1, [ebx]
+        prefetch [ebx + 64]     // give a prefetch hint to CPU what data are to be needed soonish
+
+        movq    mm2, [eax + 8]
+        pfadd   mm0, mm1
+        pfmul   mm2, [ebx + 8]
+
+        movq    mm3, [eax + 16]
+        pfadd   mm0, mm2
+        pfmul   mm3, [ebx + 16]
+
+        movq    mm4, [eax + 24]
+        pfadd   mm0, mm3
+        pfmul   mm4, [ebx + 24]
+
+        add     eax, 32
+        pfadd   mm0, mm4
+        add     ebx, 32
+
+        dec     ecx
+        jnz     loop1
+
+        // add halfs of mm0 together and return the result. 
+        // note: mm1 is used as a dummy parameter only, we actually don't care about it's value
+        pfacc   mm0, mm1
+        movd    corr, mm0
+        femms
+    }
+
+    return corr;
+}
+
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// implementation of 3DNow! optimized functions of class 'FIRFilter'
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#include "FIRFilter.h"
+
+FIRFilter3DNow::FIRFilter3DNow() : FIRFilter()
+{
+    filterCoeffsUnalign = NULL;
+    filterCoeffsAlign = NULL;
+}
+
+
+FIRFilter3DNow::~FIRFilter3DNow()
+{
+    delete[] filterCoeffsUnalign;
+    filterCoeffsUnalign = NULL;
+    filterCoeffsAlign = NULL;
+}
+
+
+// (overloaded) Calculates filter coefficients for 3DNow! routine
+void FIRFilter3DNow::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor)
+{
+    uint i;
+    float fDivider;
+
+    FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);
+
+    // Scale the filter coefficients so that it won't be necessary to scale the filtering result
+    // also rearrange coefficients suitably for 3DNow!
+    // Ensure that filter coeffs array is aligned to 16-byte boundary
+    delete[] filterCoeffsUnalign;
+    filterCoeffsUnalign = new float[2 * newLength + 4];
+    filterCoeffsAlign = (float *)(((uint)filterCoeffsUnalign + 15) & (uint)-16);
+
+    fDivider = (float)resultDivider;
+
+    // rearrange the filter coefficients for mmx routines 
+    for (i = 0; i < newLength; i ++)
+    {
+        filterCoeffsAlign[2 * i + 0] =
+        filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider;
+    }
+}
+
+
+// 3DNow!-optimized version of the filter routine for stereo sound
+uint FIRFilter3DNow::evaluateFilterStereo(float *dest, const float *src, uint numSamples) const
+{
+    float *filterCoeffsLocal = filterCoeffsAlign;
+    uint count = (numSamples - length) & (uint)-2;
+    uint lengthLocal = length / 4;
+
+    assert(length != 0);
+    assert(count % 2 == 0);
+
+    /* original code:
+
+    double suml1, suml2;
+    double sumr1, sumr2;
+    uint i, j;
+
+    for (j = 0; j < count; j += 2)
+    {
+        const float *ptr;
+
+        suml1 = sumr1 = 0.0;
+        suml2 = sumr2 = 0.0;
+        ptr = src;
+        filterCoeffsLocal = filterCoeffs;
+        for (i = 0; i < lengthLocal; i ++) 
+        {
+            // unroll loop for efficiency.
+
+            suml1 += ptr[0] * filterCoeffsLocal[0] + 
+                     ptr[2] * filterCoeffsLocal[2] +
+                     ptr[4] * filterCoeffsLocal[4] +
+                     ptr[6] * filterCoeffsLocal[6];
+
+            sumr1 += ptr[1] * filterCoeffsLocal[1] + 
+                     ptr[3] * filterCoeffsLocal[3] +
+                     ptr[5] * filterCoeffsLocal[5] +
+                     ptr[7] * filterCoeffsLocal[7];
+
+            suml2 += ptr[8] * filterCoeffsLocal[0] + 
+                     ptr[10] * filterCoeffsLocal[2] +
+                     ptr[12] * filterCoeffsLocal[4] +
+                     ptr[14] * filterCoeffsLocal[6];
+
+            sumr2 += ptr[9] * filterCoeffsLocal[1] + 
+                     ptr[11] * filterCoeffsLocal[3] +
+                     ptr[13] * filterCoeffsLocal[5] +
+                     ptr[15] * filterCoeffsLocal[7];
+
+            ptr += 16;
+            filterCoeffsLocal += 8;
+        }
+        dest[0] = (float)suml1;
+        dest[1] = (float)sumr1;
+        dest[2] = (float)suml2;
+        dest[3] = (float)sumr2;
+
+        src += 4;
+        dest += 4;
+    }
+
+    */
+    _asm
+    {
+        mov     eax, dword ptr dest
+        mov     ebx, dword ptr src
+        mov     edx, count
+        shr     edx, 1
+
+    loop1:
+        // "outer loop" : during each round 2*2 output samples are calculated
+        prefetch  [ebx]                 // give a prefetch hint to CPU what data are to be needed soonish
+        prefetch  [filterCoeffsLocal]   // give a prefetch hint to CPU what data are to be needed soonish
+
+        mov     esi, ebx
+        mov     edi, filterCoeffsLocal
+        pxor    mm0, mm0
+        pxor    mm1, mm1
+        mov     ecx, lengthLocal
+
+    loop2:
+        // "inner loop" : during each round four FIR filter taps are evaluated for 2*2 output samples
+        movq    mm2, [edi]
+        movq    mm3, mm2
+        prefetch  [edi + 32]     // give a prefetch hint to CPU what data are to be needed soonish
+        pfmul   mm2, [esi]
+        prefetch  [esi + 32]     // give a prefetch hint to CPU what data are to be needed soonish
+        pfmul   mm3, [esi + 8]
+
+        movq    mm4, [edi + 8]
+        movq    mm5, mm4
+        pfadd   mm0, mm2
+        pfmul   mm4, [esi + 8]
+        pfadd   mm1, mm3
+        pfmul   mm5, [esi + 16]
+
+        movq    mm2, [edi + 16]
+        movq    mm6, mm2
+        pfadd   mm0, mm4
+        pfmul   mm2, [esi + 16]
+        pfadd   mm1, mm5
+        pfmul   mm6, [esi + 24]
+
+        movq    mm3, [edi + 24]
+        movq    mm7, mm3
+        pfadd   mm0, mm2
+        pfmul   mm3, [esi + 24]
+        pfadd   mm1, mm6
+        pfmul   mm7, [esi + 32]
+        add     esi, 32
+        pfadd   mm0, mm3
+        add     edi, 32
+        pfadd   mm1, mm7
+
+        dec     ecx
+        jnz     loop2
+
+        movq    [eax], mm0
+        add     ebx, 16
+        movq    [eax + 8], mm1
+        add     eax, 16
+
+        dec     edx
+        jnz     loop1
+
+        femms
+    }
+
+    return count;
+}
+
+
+#endif  // ALLOW_3DNOW
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/AAFilter.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/AAFilter.cpp
new file mode 100644
index 00000000..96abda49
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/AAFilter.cpp
@@ -0,0 +1,184 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// FIR low-pass (anti-alias) filter with filter coefficient design routine and
+/// MMX optimization. 
+/// 
+/// Anti-alias filter is used to prevent folding of high frequencies when 
+/// transposing the sample rate with interpolation.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-01-11 13:34:24 +0200 (Sun, 11 Jan 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: AAFilter.cpp 45 2009-01-11 11:34:24Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <memory.h>
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include "AAFilter.h"
+#include "FIRFilter.h"
+
+using namespace soundtouch;
+
+#define PI        3.141592655357989
+#define TWOPI    (2 * PI)
+
+/*****************************************************************************
+ *
+ * Implementation of the class 'AAFilter'
+ *
+ *****************************************************************************/
+
+AAFilter::AAFilter(uint len)
+{
+    pFIR = FIRFilter::newInstance();
+    cutoffFreq = 0.5;
+    setLength(len);
+}
+
+
+
+AAFilter::~AAFilter()
+{
+    delete pFIR;
+}
+
+
+
+// Sets new anti-alias filter cut-off edge frequency, scaled to
+// sampling frequency (nyquist frequency = 0.5).
+// The filter will cut frequencies higher than the given frequency.
+void AAFilter::setCutoffFreq(double newCutoffFreq)
+{
+    cutoffFreq = newCutoffFreq;
+    calculateCoeffs();
+}
+
+
+
+// Sets number of FIR filter taps
+void AAFilter::setLength(uint newLength)
+{
+    length = newLength;
+    calculateCoeffs();
+}
+
+
+
+// Calculates coefficients for a low-pass FIR filter using Hamming window
+void AAFilter::calculateCoeffs()
+{
+    uint i;
+    double cntTemp, temp, tempCoeff,h, w;
+    double fc2, wc;
+    double scaleCoeff, sum;
+    double *work;
+    SAMPLETYPE *coeffs;
+
+    assert(length >= 2);
+    assert(length % 4 == 0);
+    assert(cutoffFreq >= 0);
+    assert(cutoffFreq <= 0.5);
+
+    work = new double[length];
+    coeffs = new SAMPLETYPE[length];
+
+    fc2 = 2.0 * cutoffFreq; 
+    wc = PI * fc2;
+    tempCoeff = TWOPI / (double)length;
+
+    sum = 0;
+    for (i = 0; i < length; i ++) 
+    {
+        cntTemp = (double)i - (double)(length / 2);
+
+        temp = cntTemp * wc;
+        if (temp != 0) 
+        {
+            h = fc2 * sin(temp) / temp;                     // sinc function
+        } 
+        else 
+        {
+            h = 1.0;
+        }
+        w = 0.54 + 0.46 * cos(tempCoeff * cntTemp);       // hamming window
+
+        temp = w * h;
+        work[i] = temp;
+
+        // calc net sum of coefficients 
+        sum += temp;
+    }
+
+    // ensure the sum of coefficients is larger than zero
+    assert(sum > 0);
+
+    // ensure we've really designed a lowpass filter...
+    assert(work[length/2] > 0);
+    assert(work[length/2 + 1] > -1e-6);
+    assert(work[length/2 - 1] > -1e-6);
+
+    // Calculate a scaling coefficient in such a way that the result can be
+    // divided by 16384
+    scaleCoeff = 16384.0f / sum;
+
+    for (i = 0; i < length; i ++) 
+    {
+        // scale & round to nearest integer
+        temp = work[i] * scaleCoeff;
+        temp += (temp >= 0) ? 0.5 : -0.5;
+        // ensure no overfloods
+        assert(temp >= -32768 && temp <= 32767);
+        coeffs[i] = (SAMPLETYPE)temp;
+    }
+
+    // Set coefficients. Use divide factor 14 => divide result by 2^14 = 16384
+    pFIR->setCoefficients(coeffs, length, 14);
+
+    delete[] work;
+    delete[] coeffs;
+}
+
+
+// Applies the filter to the given sequence of samples. 
+// Note : The amount of outputted samples is by value of 'filter length' 
+// smaller than the amount of input samples.
+uint AAFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
+{
+    return pFIR->evaluate(dest, src, numSamples, numChannels);
+}
+
+
+uint AAFilter::getLength() const
+{
+    return pFIR->getLength();
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/AAFilter.h b/plugins/soundtouch/soundtouch/source/SoundTouch/AAFilter.h
new file mode 100644
index 00000000..d5c8ce4c
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/AAFilter.h
@@ -0,0 +1,91 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
+/// while maintaining the original pitch by using a time domain WSOLA-like method 
+/// with several performance-increasing tweaks.
+///
+/// Anti-alias filter is used to prevent folding of high frequencies when 
+/// transposing the sample rate with interpolation.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// File revision : $Revision: 4 $
+//
+// $Id: AAFilter.h 11 2008-02-10 16:26:55Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef AAFilter_H
+#define AAFilter_H
+
+#include "STTypes.h"
+
+namespace soundtouch
+{
+
+class AAFilter
+{
+protected:
+    class FIRFilter *pFIR;
+
+    /// Low-pass filter cut-off frequency, negative = invalid
+    double cutoffFreq;
+
+    /// num of filter taps
+    uint length;
+
+    /// Calculate the FIR coefficients realizing the given cutoff-frequency
+    void calculateCoeffs();
+public:
+    AAFilter(uint length);
+
+    ~AAFilter();
+
+    /// Sets new anti-alias filter cut-off edge frequency, scaled to sampling 
+    /// frequency (nyquist frequency = 0.5). The filter will cut off the 
+    /// frequencies than that.
+    void setCutoffFreq(double newCutoffFreq);
+
+    /// Sets number of FIR filter taps, i.e. ~filter complexity
+    void setLength(uint newLength);
+
+    uint getLength() const;
+
+    /// Applies the filter to the given sequence of samples. 
+    /// Note : The amount of outputted samples is by value of 'filter length' 
+    /// smaller than the amount of input samples.
+    uint evaluate(SAMPLETYPE *dest, 
+                  const SAMPLETYPE *src, 
+                  uint numSamples, 
+                  uint numChannels) const;
+};
+
+}
+
+#endif
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/BPMDetect.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/BPMDetect.cpp
new file mode 100644
index 00000000..405f514b
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/BPMDetect.cpp
@@ -0,0 +1,308 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Beats-per-minute (BPM) detection routine.
+///
+/// The beat detection algorithm works as follows:
+/// - Use function 'inputSamples' to input a chunks of samples to the class for
+///   analysis. It's a good idea to enter a large sound file or stream in smallish
+///   chunks of around few kilosamples in order not to extinguish too much RAM memory.
+/// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden,
+///   which is basically ok as low (bass) frequencies mostly determine the beat rate.
+///   Simple averaging is used for anti-alias filtering because the resulting signal
+///   quality isn't of that high importance.
+/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by
+///   taking absolute value that's smoothed by sliding average. Signal levels that
+///   are below a couple of times the general RMS amplitude level are cut away to
+///   leave only notable peaks there.
+/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term 
+///   autocorrelation function of the enveloped signal.
+/// - After whole sound data file has been analyzed as above, the bpm level is 
+///   detected by function 'getBpm' that finds the highest peak of the autocorrelation 
+///   function, calculates it's precise location and converts this reading to bpm's.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: BPMDetect.cpp 63 2009-02-21 16:00:14Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <math.h>
+#include <assert.h>
+#include <string.h>
+#include "FIFOSampleBuffer.h"
+#include "PeakFinder.h"
+#include "BPMDetect.h"
+
+using namespace soundtouch;
+
+#define INPUT_BLOCK_SAMPLES       2048
+#define DECIMATED_BLOCK_SAMPLES   256
+
+/// decay constant for calculating RMS volume sliding average approximation 
+/// (time constant is about 10 sec)
+const float avgdecay = 0.99986f;
+
+/// Normalization coefficient for calculating RMS sliding average approximation.
+const float avgnorm = (1 - avgdecay);
+
+
+
+BPMDetect::BPMDetect(int numChannels, int aSampleRate)
+{
+    this->sampleRate = aSampleRate;
+    this->channels = numChannels;
+
+    decimateSum = 0;
+    decimateCount = 0;
+
+    envelopeAccu = 0;
+
+    // Initialize RMS volume accumulator to RMS level of 3000 (out of 32768) that's
+    // a typical RMS signal level value for song data. This value is then adapted
+    // to the actual level during processing.
+#ifdef INTEGER_SAMPLES
+    // integer samples
+    RMSVolumeAccu = (3000 * 3000) / avgnorm;
+#else
+    // float samples, scaled to range [-1..+1[
+    RMSVolumeAccu = (0.092f * 0.092f) / avgnorm;
+#endif
+
+    // choose decimation factor so that result is approx. 500 Hz
+    decimateBy = sampleRate / 500;
+    assert(decimateBy > 0);
+    assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES);
+
+    // Calculate window length & starting item according to desired min & max bpms
+    windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM);
+    windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM);
+
+    assert(windowLen > windowStart);
+
+    // allocate new working objects
+    xcorr = new float[windowLen];
+    memset(xcorr, 0, windowLen * sizeof(float));
+
+    // allocate processing buffer
+    buffer = new FIFOSampleBuffer();
+    // we do processing in mono mode
+    buffer->setChannels(1);
+    buffer->clear();
+}
+
+
+
+BPMDetect::~BPMDetect()
+{
+    delete[] xcorr;
+    delete buffer;
+}
+
+
+
+/// convert to mono, low-pass filter & decimate to about 500 Hz. 
+/// return number of outputted samples.
+///
+/// Decimation is used to remove the unnecessary frequencies and thus to reduce 
+/// the amount of data needed to be processed as calculating autocorrelation 
+/// function is a very-very heavy operation.
+///
+/// Anti-alias filtering is done simply by averaging the samples. This is really a 
+/// poor-man's anti-alias filtering, but it's not so critical in this kind of application
+/// (it'd also be difficult to design a high-quality filter with steep cut-off at very 
+/// narrow band)
+int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples)
+{
+    int count, outcount;
+    LONG_SAMPLETYPE out;
+
+    assert(channels > 0);
+    assert(decimateBy > 0);
+    outcount = 0;
+    for (count = 0; count < numsamples; count ++) 
+    {
+        int j;
+
+        // convert to mono and accumulate
+        for (j = 0; j < channels; j ++)
+        {
+            decimateSum += src[j];
+        }
+        src += j;
+
+        decimateCount ++;
+        if (decimateCount >= decimateBy) 
+        {
+            // Store every Nth sample only
+            out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels));
+            decimateSum = 0;
+            decimateCount = 0;
+#ifdef INTEGER_SAMPLES
+            // check ranges for sure (shouldn't actually be necessary)
+            if (out > 32767) 
+            {
+                out = 32767;
+            } 
+            else if (out < -32768) 
+            {
+                out = -32768;
+            }
+#endif // INTEGER_SAMPLES
+            dest[outcount] = (SAMPLETYPE)out;
+            outcount ++;
+        }
+    }
+    return outcount;
+}
+
+
+
+// Calculates autocorrelation function of the sample history buffer
+void BPMDetect::updateXCorr(int process_samples)
+{
+    int offs;
+    SAMPLETYPE *pBuffer;
+    
+    assert(buffer->numSamples() >= (uint)(process_samples + windowLen));
+
+    pBuffer = buffer->ptrBegin();
+    for (offs = windowStart; offs < windowLen; offs ++) 
+    {
+        LONG_SAMPLETYPE sum;
+        int i;
+
+        sum = 0;
+        for (i = 0; i < process_samples; i ++) 
+        {
+            sum += pBuffer[i] * pBuffer[i + offs];    // scaling the sub-result shouldn't be necessary
+        }
+//        xcorr[offs] *= xcorr_decay;   // decay 'xcorr' here with suitable coefficients 
+                                        // if it's desired that the system adapts automatically to
+                                        // various bpms, e.g. in processing continouos music stream.
+                                        // The 'xcorr_decay' should be a value that's smaller than but 
+                                        // close to one, and should also depend on 'process_samples' value.
+
+        xcorr[offs] += (float)sum;
+    }
+}
+
+
+
+// Calculates envelope of the sample data
+void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples) 
+{
+    const float decay = 0.7f;               // decay constant for smoothing the envelope
+    const float norm = (1 - decay);
+
+    int i;
+    LONG_SAMPLETYPE out;
+    float val;
+
+    for (i = 0; i < numsamples; i ++) 
+    {
+        // calc average RMS volume
+        RMSVolumeAccu *= avgdecay;
+        val = (float)fabs((float)samples[i]);
+        RMSVolumeAccu += val * val;
+
+        // cut amplitudes that are below 2 times average RMS volume
+        // (we're interested in peak values, not the silent moments)
+        val -= 2 * (float)sqrt(RMSVolumeAccu * avgnorm);
+        val = (val > 0) ? val : 0;
+
+        // smooth amplitude envelope
+        envelopeAccu *= decay;
+        envelopeAccu += val;
+        out = (LONG_SAMPLETYPE)(envelopeAccu * norm);
+
+#ifdef INTEGER_SAMPLES
+        // cut peaks (shouldn't be necessary though)
+        if (out > 32767) out = 32767;
+#endif // INTEGER_SAMPLES
+        samples[i] = (SAMPLETYPE)out;
+    }
+}
+
+
+
+void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples)
+{
+    SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES];
+
+    // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration
+    while (numSamples > 0)
+    {
+        int block;
+        int decSamples;
+
+        block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples;
+
+        // decimate. note that converts to mono at the same time
+        decSamples = decimate(decimated, samples, block);
+        samples += block * channels;
+        numSamples -= block;
+
+        // envelope new samples and add them to buffer
+        calcEnvelope(decimated, decSamples);
+        buffer->putSamples(decimated, decSamples);
+    }
+
+    // when the buffer has enought samples for processing...
+    if ((int)buffer->numSamples() > windowLen) 
+    {
+        int processLength;
+
+        // how many samples are processed
+        processLength = (int)buffer->numSamples() - windowLen;
+
+        // ... calculate autocorrelations for oldest samples...
+        updateXCorr(processLength);
+        // ... and remove them from the buffer
+        buffer->receiveSamples(processLength);
+    }
+}
+
+
+
+float BPMDetect::getBpm()
+{
+    double peakPos;
+    PeakFinder peakFinder;
+
+    // find peak position
+    peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen);
+
+    assert(decimateBy != 0);
+    if (peakPos < 1e-6) return 0.0; // detection failed.
+
+    // calculate BPM
+    return (float)(60.0 * (((double)sampleRate / (double)decimateBy) / peakPos));
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/FIFOSampleBuffer.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/FIFOSampleBuffer.cpp
new file mode 100644
index 00000000..01f64b08
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/FIFOSampleBuffer.cpp
@@ -0,0 +1,262 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// A buffer class for temporarily storaging sound samples, operates as a 
+/// first-in-first-out pipe.
+///
+/// Samples are added to the end of the sample buffer with the 'putSamples' 
+/// function, and are received from the beginning of the buffer by calling
+/// the 'receiveSamples' function. The class automatically removes the 
+/// outputted samples from the buffer, as well as grows the buffer size 
+/// whenever necessary.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-27 19:24:42 +0200 (Fri, 27 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: FIFOSampleBuffer.cpp 68 2009-02-27 17:24:42Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <stdlib.h>
+#include <memory.h>
+#include <string.h>
+#include <assert.h>
+#include <stdexcept>
+
+#include "FIFOSampleBuffer.h"
+
+using namespace soundtouch;
+
+// Constructor
+FIFOSampleBuffer::FIFOSampleBuffer(int numChannels)
+{
+    assert(numChannels > 0);
+    sizeInBytes = 0; // reasonable initial value
+    buffer = NULL;
+    bufferUnaligned = NULL;
+    samplesInBuffer = 0;
+    bufferPos = 0;
+    channels = (uint)numChannels;
+    ensureCapacity(32);     // allocate initial capacity 
+}
+
+
+// destructor
+FIFOSampleBuffer::~FIFOSampleBuffer()
+{
+    delete[] bufferUnaligned;
+    bufferUnaligned = NULL;
+    buffer = NULL;
+}
+
+
+// Sets number of channels, 1 = mono, 2 = stereo
+void FIFOSampleBuffer::setChannels(int numChannels)
+{
+    uint usedBytes;
+
+    assert(numChannels > 0);
+    usedBytes = channels * samplesInBuffer;
+    channels = (uint)numChannels;
+    samplesInBuffer = usedBytes / channels;
+}
+
+
+// if output location pointer 'bufferPos' isn't zero, 'rewinds' the buffer and
+// zeroes this pointer by copying samples from the 'bufferPos' pointer 
+// location on to the beginning of the buffer.
+void FIFOSampleBuffer::rewind()
+{
+    if (buffer && bufferPos) 
+    {
+        memmove(buffer, ptrBegin(), sizeof(SAMPLETYPE) * channels * samplesInBuffer);
+        bufferPos = 0;
+    }
+}
+
+
+// Adds 'numSamples' pcs of samples from the 'samples' memory position to 
+// the sample buffer.
+void FIFOSampleBuffer::putSamples(const SAMPLETYPE *samples, uint nSamples)
+{
+    memcpy(ptrEnd(nSamples), samples, sizeof(SAMPLETYPE) * nSamples * channels);
+    samplesInBuffer += nSamples;
+}
+
+
+// Increases the number of samples in the buffer without copying any actual
+// samples.
+//
+// This function is used to update the number of samples in the sample buffer
+// when accessing the buffer directly with 'ptrEnd' function. Please be 
+// careful though!
+void FIFOSampleBuffer::putSamples(uint nSamples)
+{
+    uint req;
+
+    req = samplesInBuffer + nSamples;
+    ensureCapacity(req);
+    samplesInBuffer += nSamples;
+}
+
+
+// Returns a pointer to the end of the used part of the sample buffer (i.e. 
+// where the new samples are to be inserted). This function may be used for 
+// inserting new samples into the sample buffer directly. Please be careful! 
+//
+// Parameter 'slackCapacity' tells the function how much free capacity (in
+// terms of samples) there _at least_ should be, in order to the caller to
+// succesfully insert all the required samples to the buffer. When necessary, 
+// the function grows the buffer size to comply with this requirement.
+//
+// When using this function as means for inserting new samples, also remember 
+// to increase the sample count afterwards, by calling  the 
+// 'putSamples(numSamples)' function.
+SAMPLETYPE *FIFOSampleBuffer::ptrEnd(uint slackCapacity) 
+{
+    ensureCapacity(samplesInBuffer + slackCapacity);
+    return buffer + samplesInBuffer * channels;
+}
+
+
+// Returns a pointer to the beginning of the currently non-outputted samples. 
+// This function is provided for accessing the output samples directly. 
+// Please be careful!
+//
+// When using this function to output samples, also remember to 'remove' the
+// outputted samples from the buffer by calling the 
+// 'receiveSamples(numSamples)' function
+SAMPLETYPE *FIFOSampleBuffer::ptrBegin()
+{
+    assert(buffer);
+    return buffer + bufferPos * channels;
+}
+
+
+// Ensures that the buffer has enought capacity, i.e. space for _at least_
+// 'capacityRequirement' number of samples. The buffer is grown in steps of
+// 4 kilobytes to eliminate the need for frequently growing up the buffer,
+// as well as to round the buffer size up to the virtual memory page size.
+void FIFOSampleBuffer::ensureCapacity(uint capacityRequirement)
+{
+    SAMPLETYPE *tempUnaligned, *temp;
+
+    if (capacityRequirement > getCapacity()) 
+    {
+        // enlarge the buffer in 4kbyte steps (round up to next 4k boundary)
+        sizeInBytes = (capacityRequirement * channels * sizeof(SAMPLETYPE) + 4095) & (uint)-4096;
+        assert(sizeInBytes % 2 == 0);
+        tempUnaligned = new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE) + 16 / sizeof(SAMPLETYPE)];
+        if (tempUnaligned == NULL)
+        {
+            throw std::runtime_error("Couldn't allocate memory!\n");
+        }
+        // Align the buffer to begin at 16byte cache line boundary for optimal performance
+        temp = (SAMPLETYPE *)(((ulong)tempUnaligned + 15) & (ulong)-16);
+        if (samplesInBuffer)
+        {
+            memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE));
+        }
+        delete[] bufferUnaligned;
+        buffer = temp;
+        bufferUnaligned = tempUnaligned;
+        bufferPos = 0;
+    } 
+    else 
+    {
+        // simply rewind the buffer (if necessary)
+        rewind();
+    }
+}
+
+
+// Returns the current buffer capacity in terms of samples
+uint FIFOSampleBuffer::getCapacity() const
+{
+    return sizeInBytes / (channels * sizeof(SAMPLETYPE));
+}
+
+
+// Returns the number of samples currently in the buffer
+uint FIFOSampleBuffer::numSamples() const
+{
+    return samplesInBuffer;
+}
+
+
+// Output samples from beginning of the sample buffer. Copies demanded number
+// of samples to output and removes them from the sample buffer. If there
+// are less than 'numsample' samples in the buffer, returns all available.
+//
+// Returns number of samples copied.
+uint FIFOSampleBuffer::receiveSamples(SAMPLETYPE *output, uint maxSamples)
+{
+    uint num;
+
+    num = (maxSamples > samplesInBuffer) ? samplesInBuffer : maxSamples;
+
+    memcpy(output, ptrBegin(), channels * sizeof(SAMPLETYPE) * num);
+    return receiveSamples(num);
+}
+
+
+// Removes samples from the beginning of the sample buffer without copying them
+// anywhere. Used to reduce the number of samples in the buffer, when accessing
+// the sample buffer with the 'ptrBegin' function.
+uint FIFOSampleBuffer::receiveSamples(uint maxSamples)
+{
+    if (maxSamples >= samplesInBuffer)
+    {
+        uint temp;
+
+        temp = samplesInBuffer;
+        samplesInBuffer = 0;
+        return temp;
+    }
+
+    samplesInBuffer -= maxSamples;
+    bufferPos += maxSamples;
+
+    return maxSamples;
+}
+
+
+// Returns nonzero if the sample buffer is empty
+int FIFOSampleBuffer::isEmpty() const
+{
+    return (samplesInBuffer == 0) ? 1 : 0;
+}
+
+
+// Clears the sample buffer
+void FIFOSampleBuffer::clear()
+{
+    samplesInBuffer = 0;
+    bufferPos = 0;
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/FIRFilter.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/FIRFilter.cpp
new file mode 100644
index 00000000..231263ad
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/FIRFilter.cpp
@@ -0,0 +1,269 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// General FIR digital filter routines with MMX optimization. 
+///
+/// Note : MMX optimized functions reside in a separate, platform-specific file, 
+/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-25 19:13:51 +0200 (Wed, 25 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: FIRFilter.cpp 67 2009-02-25 17:13:51Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <memory.h>
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdexcept>
+#include "FIRFilter.h"
+#include "cpu_detect.h"
+
+using namespace soundtouch;
+
+/*****************************************************************************
+ *
+ * Implementation of the class 'FIRFilter'
+ *
+ *****************************************************************************/
+
+FIRFilter::FIRFilter()
+{
+    resultDivFactor = 0;
+    resultDivider = 0;
+    length = 0;
+    lengthDiv8 = 0;
+    filterCoeffs = NULL;
+}
+
+
+FIRFilter::~FIRFilter()
+{
+    delete[] filterCoeffs;
+}
+
+// Usual C-version of the filter routine for stereo sound
+uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
+{
+    uint i, j, end;
+    LONG_SAMPLETYPE suml, sumr;
+#ifdef FLOAT_SAMPLES
+    // when using floating point samples, use a scaler instead of a divider
+    // because division is much slower operation than multiplying.
+    double dScaler = 1.0 / (double)resultDivider;
+#endif
+
+    assert(length != 0);
+    assert(src != NULL);
+    assert(dest != NULL);
+    assert(filterCoeffs != NULL);
+
+    end = 2 * (numSamples - length);
+
+    for (j = 0; j < end; j += 2) 
+    {
+        const SAMPLETYPE *ptr;
+
+        suml = sumr = 0;
+        ptr = src + j;
+
+        for (i = 0; i < length; i += 4) 
+        {
+            // loop is unrolled by factor of 4 here for efficiency
+            suml += ptr[2 * i + 0] * filterCoeffs[i + 0] +
+                    ptr[2 * i + 2] * filterCoeffs[i + 1] +
+                    ptr[2 * i + 4] * filterCoeffs[i + 2] +
+                    ptr[2 * i + 6] * filterCoeffs[i + 3];
+            sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] +
+                    ptr[2 * i + 3] * filterCoeffs[i + 1] +
+                    ptr[2 * i + 5] * filterCoeffs[i + 2] +
+                    ptr[2 * i + 7] * filterCoeffs[i + 3];
+        }
+
+#ifdef INTEGER_SAMPLES
+        suml >>= resultDivFactor;
+        sumr >>= resultDivFactor;
+        // saturate to 16 bit integer limits
+        suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml;
+        // saturate to 16 bit integer limits
+        sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr;
+#else
+        suml *= dScaler;
+        sumr *= dScaler;
+#endif // INTEGER_SAMPLES
+        dest[j] = (SAMPLETYPE)suml;
+        dest[j + 1] = (SAMPLETYPE)sumr;
+    }
+    return numSamples - length;
+}
+
+
+
+
+// Usual C-version of the filter routine for mono sound
+uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const
+{
+    uint i, j, end;
+    LONG_SAMPLETYPE sum;
+#ifdef FLOAT_SAMPLES
+    // when using floating point samples, use a scaler instead of a divider
+    // because division is much slower operation than multiplying.
+    double dScaler = 1.0 / (double)resultDivider;
+#endif
+
+
+    assert(length != 0);
+
+    end = numSamples - length;
+    for (j = 0; j < end; j ++) 
+    {
+        sum = 0;
+        for (i = 0; i < length; i += 4) 
+        {
+            // loop is unrolled by factor of 4 here for efficiency
+            sum += src[i + 0] * filterCoeffs[i + 0] + 
+                   src[i + 1] * filterCoeffs[i + 1] + 
+                   src[i + 2] * filterCoeffs[i + 2] + 
+                   src[i + 3] * filterCoeffs[i + 3];
+        }
+#ifdef INTEGER_SAMPLES
+        sum >>= resultDivFactor;
+        // saturate to 16 bit integer limits
+        sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum;
+#else
+        sum *= dScaler;
+#endif // INTEGER_SAMPLES
+        dest[j] = (SAMPLETYPE)sum;
+        src ++;
+    }
+    return end;
+}
+
+
+// Set filter coeffiecients and length.
+//
+// Throws an exception if filter length isn't divisible by 8
+void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor)
+{
+    assert(newLength > 0);
+    if (newLength % 8) throw std::runtime_error("FIR filter length not divisible by 8");
+
+    lengthDiv8 = newLength / 8;
+    length = lengthDiv8 * 8;
+    assert(length == newLength);
+
+    resultDivFactor = uResultDivFactor;
+    resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor);
+
+    delete[] filterCoeffs;
+    filterCoeffs = new SAMPLETYPE[length];
+    memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE));
+}
+
+
+uint FIRFilter::getLength() const
+{
+    return length;
+}
+
+
+
+// Applies the filter to the given sequence of samples. 
+//
+// Note : The amount of outputted samples is by value of 'filter_length' 
+// smaller than the amount of input samples.
+uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const
+{
+    assert(numChannels == 1 || numChannels == 2);
+
+    assert(length > 0);
+    assert(lengthDiv8 * 8 == length);
+    if (numSamples < length) return 0;
+    if (numChannels == 2) 
+    {
+        return evaluateFilterStereo(dest, src, numSamples);
+    } else {
+        return evaluateFilterMono(dest, src, numSamples);
+    }
+}
+
+
+
+// Operator 'new' is overloaded so that it automatically creates a suitable instance 
+// depending on if we've a MMX-capable CPU available or not.
+void * FIRFilter::operator new(size_t s)
+{
+    // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead!
+    throw std::runtime_error("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!");
+    return NULL;
+}
+
+
+FIRFilter * FIRFilter::newInstance()
+{
+    uint uExtensions;
+
+    uExtensions = detectCPUextensions();
+
+    // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU
+
+#ifdef ALLOW_MMX
+    // MMX routines available only with integer sample types
+    if (uExtensions & SUPPORT_MMX)
+    {
+        return ::new FIRFilterMMX;
+    }
+    else
+#endif // ALLOW_MMX
+
+#ifdef ALLOW_SSE
+    if (uExtensions & SUPPORT_SSE)
+    {
+        // SSE support
+        return ::new FIRFilterSSE;
+    }
+    else
+#endif // ALLOW_SSE
+
+#ifdef ALLOW_3DNOW
+    if (uExtensions & SUPPORT_3DNOW)
+    {
+        // 3DNow! support
+        return ::new FIRFilter3DNow;
+    }
+    else
+#endif // ALLOW_3DNOW
+
+    {
+        // ISA optimizations not supported, use plain C version
+        return ::new FIRFilter;
+    }
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/FIRFilter.h b/plugins/soundtouch/soundtouch/source/SoundTouch/FIRFilter.h
new file mode 100644
index 00000000..5713f7bb
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/FIRFilter.h
@@ -0,0 +1,164 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// General FIR digital filter routines with MMX optimization. 
+///
+/// Note : MMX optimized functions reside in a separate, platform-specific file, 
+/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: FIRFilter.h 63 2009-02-21 16:00:14Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef FIRFilter_H
+#define FIRFilter_H
+
+#include <stddef.h>
+#include "STTypes.h"
+
+namespace soundtouch
+{
+
+class FIRFilter 
+{
+protected:
+    // Number of FIR filter taps
+    uint length;    
+    // Number of FIR filter taps divided by 8
+    uint lengthDiv8;
+
+    // Result divider factor in 2^k format
+    uint resultDivFactor;
+
+    // Result divider value.
+    SAMPLETYPE resultDivider;
+
+    // Memory for filter coefficients
+    SAMPLETYPE *filterCoeffs;
+
+    virtual uint evaluateFilterStereo(SAMPLETYPE *dest, 
+                                      const SAMPLETYPE *src, 
+                                      uint numSamples) const;
+    virtual uint evaluateFilterMono(SAMPLETYPE *dest, 
+                                    const SAMPLETYPE *src, 
+                                    uint numSamples) const;
+
+public:
+    FIRFilter();
+    virtual ~FIRFilter();
+
+    /// Operator 'new' is overloaded so that it automatically creates a suitable instance 
+    /// depending on if we've a MMX-capable CPU available or not.
+    static void * operator new(size_t s);
+
+    static FIRFilter *newInstance();
+
+    /// Applies the filter to the given sequence of samples. 
+    /// Note : The amount of outputted samples is by value of 'filter_length' 
+    /// smaller than the amount of input samples.
+    ///
+    /// \return Number of samples copied to 'dest'.
+    uint evaluate(SAMPLETYPE *dest, 
+                  const SAMPLETYPE *src, 
+                  uint numSamples, 
+                  uint numChannels) const;
+
+    uint getLength() const;
+
+    virtual void setCoefficients(const SAMPLETYPE *coeffs, 
+                                 uint newLength, 
+                                 uint uResultDivFactor);
+};
+
+
+// Optional subclasses that implement CPU-specific optimizations:
+
+#ifdef ALLOW_MMX
+
+/// Class that implements MMX optimized functions exclusive for 16bit integer samples type.
+    class FIRFilterMMX : public FIRFilter
+    {
+    protected:
+        short *filterCoeffsUnalign;
+        short *filterCoeffsAlign;
+
+        virtual uint evaluateFilterStereo(short *dest, const short *src, uint numSamples) const;
+    public:
+        FIRFilterMMX();
+        ~FIRFilterMMX();
+
+        virtual void setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor);
+    };
+
+#endif // ALLOW_MMX
+
+
+#ifdef ALLOW_3DNOW
+
+    /// Class that implements 3DNow! optimized functions exclusive for floating point samples type.
+    class FIRFilter3DNow : public FIRFilter
+    {
+    protected:
+        float *filterCoeffsUnalign;
+        float *filterCoeffsAlign;
+
+        virtual uint evaluateFilterStereo(float *dest, const float *src, uint numSamples) const;
+    public:
+        FIRFilter3DNow();
+        ~FIRFilter3DNow();
+        virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor);
+    };
+
+#endif  // ALLOW_3DNOW
+
+
+#ifdef ALLOW_SSE
+    /// Class that implements SSE optimized functions exclusive for floating point samples type.
+    class FIRFilterSSE : public FIRFilter
+    {
+    protected:
+        float *filterCoeffsUnalign;
+        float *filterCoeffsAlign;
+
+        virtual uint evaluateFilterStereo(float *dest, const float *src, uint numSamples) const;
+    public:
+        FIRFilterSSE();
+        ~FIRFilterSSE();
+
+        virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor);
+    };
+
+#endif // ALLOW_SSE
+
+}
+
+#endif  // FIRFilter_H
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/PeakFinder.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/PeakFinder.cpp
new file mode 100644
index 00000000..03f60bfa
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/PeakFinder.cpp
@@ -0,0 +1,239 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Peak detection routine. 
+///
+/// The routine detects highest value on an array of values and calculates the 
+/// precise peak location as a mass-center of the 'hump' around the peak value.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: PeakFinder.cpp 63 2009-02-21 16:00:14Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <math.h>
+#include <assert.h>
+
+#include "PeakFinder.h"
+
+using namespace soundtouch;
+
+#define max(x, y) (((x) > (y)) ? (x) : (y))
+
+
+PeakFinder::PeakFinder()
+{
+    minPos = maxPos = 0;
+}
+
+
+// Finds 'ground level' of a peak hump by starting from 'peakpos' and proceeding
+// to direction defined by 'direction' until next 'hump' after minimum value will 
+// begin
+int PeakFinder::findGround(const float *data, int peakpos, int direction) const
+{
+    float refvalue;
+    int lowpos;
+    int pos;
+    int climb_count;
+    float delta;
+
+    climb_count = 0;
+    refvalue = data[peakpos];
+    lowpos = peakpos;
+
+    pos = peakpos;
+
+    while ((pos > minPos) && (pos < maxPos))
+    {
+        int prevpos;
+
+        prevpos = pos;
+        pos += direction;
+
+        // calculate derivate
+        delta = data[pos] - data[prevpos];
+        if (delta <= 0)
+        {
+            // going downhill, ok
+            if (climb_count)
+            {
+                climb_count --;  // decrease climb count
+            }
+
+            // check if new minimum found
+            if (data[pos] < refvalue)
+            {
+                // new minimum found
+                lowpos = pos;
+                refvalue = data[pos];
+            }
+        }
+        else
+        {
+            // going uphill, increase climbing counter
+            climb_count ++;
+            if (climb_count > 5) break;    // we've been climbing too long => it's next uphill => quit
+        }
+    }
+    return lowpos;
+}
+
+
+// Find offset where the value crosses the given level, when starting from 'peakpos' and
+// proceeds to direction defined in 'direction'
+int PeakFinder::findCrossingLevel(const float *data, float level, int peakpos, int direction) const
+{
+    float peaklevel;
+    int pos;
+
+    peaklevel = data[peakpos];
+    assert(peaklevel >= level);
+    pos = peakpos;
+    while ((pos >= minPos) && (pos < maxPos))
+    {
+        if (data[pos + direction] < level) return pos;   // crossing found
+        pos += direction;
+    }
+    return -1;  // not found
+}
+
+
+// Calculates the center of mass location of 'data' array items between 'firstPos' and 'lastPos'
+double PeakFinder::calcMassCenter(const float *data, int firstPos, int lastPos) const
+{
+    int i;
+    float sum;
+    float wsum;
+
+    sum = 0;
+    wsum = 0;
+    for (i = firstPos; i <= lastPos; i ++)
+    {
+        sum += (float)i * data[i];
+        wsum += data[i];
+    }
+
+    if (wsum < 1e-6) return 0;
+    return sum / wsum;
+}
+
+
+
+/// get exact center of peak near given position by calculating local mass of center
+double PeakFinder::getPeakCenter(const float *data, int peakpos) const
+{
+    float peakLevel;            // peak level
+    int crosspos1, crosspos2;   // position where the peak 'hump' crosses cutting level
+    float cutLevel;             // cutting value
+    float groundLevel;          // ground level of the peak
+    int gp1, gp2;               // bottom positions of the peak 'hump'
+
+    // find ground positions.
+    gp1 = findGround(data, peakpos, -1);
+    gp2 = findGround(data, peakpos, 1);
+
+    groundLevel = max(data[gp1], data[gp2]);
+    peakLevel = data[peakpos];
+
+    if (groundLevel < 1e-6) return 0;                // ground level too small => detection failed
+    if ((peakLevel / groundLevel) < 1.3) return 0;   // peak less than 30% of the ground level => no good peak detected
+
+    // calculate 70%-level of the peak
+    cutLevel = 0.70f * peakLevel + 0.30f * groundLevel;
+    // find mid-level crossings
+    crosspos1 = findCrossingLevel(data, cutLevel, peakpos, -1);
+    crosspos2 = findCrossingLevel(data, cutLevel, peakpos, 1);
+
+    if ((crosspos1 < 0) || (crosspos2 < 0)) return 0;   // no crossing, no peak..
+
+    // calculate mass center of the peak surroundings
+    return calcMassCenter(data, crosspos1, crosspos2);
+}
+
+
+
+double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos) 
+{
+
+    int i;
+    int peakpos;                // position of peak level
+    double highPeak, peak;
+
+    this->minPos = aminPos;
+    this->maxPos = amaxPos;
+
+    // find absolute peak
+    peakpos = minPos;
+    peak = data[minPos];
+    for (i = minPos + 1; i < maxPos; i ++)
+    {
+        if (data[i] > peak) 
+        {
+            peak = data[i];
+            peakpos = i;
+        }
+    }
+    
+    // Calculate exact location of the highest peak mass center
+    highPeak = getPeakCenter(data, peakpos);
+    peak = highPeak;
+
+    // Now check if the highest peak were in fact harmonic of the true base beat peak 
+    // - sometimes the highest peak can be Nth harmonic of the true base peak yet 
+    // just a slightly higher than the true base
+    for (i = 2; i < 10; i ++)
+    {
+        double peaktmp, tmp;
+        int i1,i2;
+
+        peakpos = (int)(highPeak / (double)i + 0.5f);
+        if (peakpos < minPos) break;
+
+        // calculate mass-center of possible base peak
+        peaktmp = getPeakCenter(data, peakpos);
+
+        // now compare to highest detected peak
+        i1 = (int)(highPeak + 0.5);
+        i2 = (int)(peaktmp + 0.5);
+        tmp = 2 * (data[i2] - data[i1]) / (data[i2] + data[i1]);
+        if (fabs(tmp) < 0.1)
+        {
+            // The highest peak is harmonic of almost as high base peak,
+            // thus use the base peak instead
+            peak = peaktmp;
+        }
+    }
+
+    return peak;
+}
+
+
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/PeakFinder.h b/plugins/soundtouch/soundtouch/source/SoundTouch/PeakFinder.h
new file mode 100644
index 00000000..e3640cc6
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/PeakFinder.h
@@ -0,0 +1,93 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// The routine detects highest value on an array of values and calculates the 
+/// precise peak location as a mass-center of the 'hump' around the peak value.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: PeakFinder.h 63 2009-02-21 16:00:14Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _PeakFinder_H_
+#define _PeakFinder_H_
+
+namespace soundtouch
+{
+
+class PeakFinder
+{
+protected:
+    /// Min, max allowed peak positions within the data vector
+    int minPos, maxPos;
+
+    /// Calculates the mass center between given vector items.
+    double calcMassCenter(const float *data, ///< Data vector.
+                         int firstPos,      ///< Index of first vector item beloging to the peak.
+                         int lastPos        ///< Index of last vector item beloging to the peak.
+                         ) const;
+
+    /// Finds the data vector index where the monotoniously decreasing signal crosses the
+    /// given level.
+    int   findCrossingLevel(const float *data,  ///< Data vector.
+                            float level,        ///< Goal crossing level.
+                            int peakpos,        ///< Peak position index within the data vector.
+                            int direction       /// Direction where to proceed from the peak: 1 = right, -1 = left.
+                            ) const;
+
+    /// Finds the 'ground' level, i.e. smallest level between two neighbouring peaks, to right- 
+    /// or left-hand side of the given peak position.
+    int   findGround(const float *data,     /// Data vector.
+                     int peakpos,           /// Peak position index within the data vector.
+                     int direction          /// Direction where to proceed from the peak: 1 = right, -1 = left.
+                     ) const;
+
+    /// get exact center of peak near given position by calculating local mass of center
+    double getPeakCenter(const float *data, int peakpos) const;
+
+public:
+    /// Constructor. 
+    PeakFinder();
+
+    /// Detect exact peak position of the data vector by finding the largest peak 'hump'
+    /// and calculating the mass-center location of the peak hump.
+    ///
+    /// \return The location of the largest base harmonic peak hump.
+    double detectPeak(const float *data, /// Data vector to be analyzed. The data vector has
+                                        /// to be at least 'maxPos' items long.
+                     int minPos,        ///< Min allowed peak location within the vector data.
+                     int maxPos         ///< Max allowed peak location within the vector data.
+                     );
+};
+
+}
+
+#endif // _PeakFinder_H_
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/RateTransposer.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/RateTransposer.cpp
new file mode 100644
index 00000000..7e0b277d
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/RateTransposer.cpp
@@ -0,0 +1,628 @@
+////////////////////////////////////////////////////////////////////////////////
+/// 
+/// Sample rate transposer. Changes sample rate by using linear interpolation 
+/// together with anti-alias filtering (first order interpolation with anti-
+/// alias filtering should be quite adequate for this application)
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-10-31 16:37:24 +0200 (Sat, 31 Oct 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: RateTransposer.cpp 74 2009-10-31 14:37:24Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <memory.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdexcept>
+#include "RateTransposer.h"
+#include "AAFilter.h"
+
+using namespace std;
+using namespace soundtouch;
+
+
+/// A linear samplerate transposer class that uses integer arithmetics.
+/// for the transposing.
+class RateTransposerInteger : public RateTransposer
+{
+protected:
+    int iSlopeCount;
+    int iRate;
+    SAMPLETYPE sPrevSampleL, sPrevSampleR;
+
+    virtual void resetRegisters();
+
+    virtual uint transposeStereo(SAMPLETYPE *dest, 
+                         const SAMPLETYPE *src, 
+                         uint numSamples);
+    virtual uint transposeMono(SAMPLETYPE *dest, 
+                       const SAMPLETYPE *src, 
+                       uint numSamples);
+
+public:
+    RateTransposerInteger();
+    virtual ~RateTransposerInteger();
+
+    /// Sets new target rate. Normal rate = 1.0, smaller values represent slower 
+    /// rate, larger faster rates.
+    virtual void setRate(float newRate);
+
+};
+
+
+/// A linear samplerate transposer class that uses floating point arithmetics
+/// for the transposing.
+class RateTransposerFloat : public RateTransposer
+{
+protected:
+    float fSlopeCount;
+    SAMPLETYPE sPrevSampleL, sPrevSampleR;
+
+    virtual void resetRegisters();
+
+    virtual uint transposeStereo(SAMPLETYPE *dest, 
+                         const SAMPLETYPE *src, 
+                         uint numSamples);
+    virtual uint transposeMono(SAMPLETYPE *dest, 
+                       const SAMPLETYPE *src, 
+                       uint numSamples);
+
+public:
+    RateTransposerFloat();
+    virtual ~RateTransposerFloat();
+};
+
+
+
+
+// Operator 'new' is overloaded so that it automatically creates a suitable instance 
+// depending on if we've a MMX/SSE/etc-capable CPU available or not.
+void * RateTransposer::operator new(size_t s)
+{
+    throw runtime_error("Error in RateTransoser::new: don't use \"new TDStretch\" directly, use \"newInstance\" to create a new instance instead!");
+    return NULL;
+}
+
+
+RateTransposer *RateTransposer::newInstance()
+{
+#ifdef INTEGER_SAMPLES
+    return ::new RateTransposerInteger;
+#else
+    return ::new RateTransposerFloat;
+#endif
+}
+
+
+// Constructor
+RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer)
+{
+    numChannels = 2;
+    bUseAAFilter = TRUE;
+    fRate = 0;
+
+    // Instantiates the anti-alias filter with default tap length
+    // of 32
+    pAAFilter = new AAFilter(32);
+}
+
+
+
+RateTransposer::~RateTransposer()
+{
+    delete pAAFilter;
+}
+
+
+
+/// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
+void RateTransposer::enableAAFilter(BOOL newMode)
+{
+    bUseAAFilter = newMode;
+}
+
+
+/// Returns nonzero if anti-alias filter is enabled.
+BOOL RateTransposer::isAAFilterEnabled() const
+{
+    return bUseAAFilter;
+}
+
+
+AAFilter *RateTransposer::getAAFilter()
+{
+    return pAAFilter;
+}
+
+
+
+// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower 
+// iRate, larger faster iRates.
+void RateTransposer::setRate(float newRate)
+{
+    double fCutoff;
+
+    fRate = newRate;
+
+    // design a new anti-alias filter
+    if (newRate > 1.0f) 
+    {
+        fCutoff = 0.5f / newRate;
+    } 
+    else 
+    {
+        fCutoff = 0.5f * newRate;
+    }
+    pAAFilter->setCutoffFreq(fCutoff);
+}
+
+
+// Outputs as many samples of the 'outputBuffer' as possible, and if there's
+// any room left, outputs also as many of the incoming samples as possible.
+// The goal is to drive the outputBuffer empty.
+//
+// It's allowed for 'output' and 'input' parameters to point to the same
+// memory position.
+/*
+void RateTransposer::flushStoreBuffer()
+{
+    if (storeBuffer.isEmpty()) return;
+
+    outputBuffer.moveSamples(storeBuffer);
+}
+*/
+
+
+// Adds 'nSamples' pcs of samples from the 'samples' memory position into
+// the input of the object.
+void RateTransposer::putSamples(const SAMPLETYPE *samples, uint nSamples)
+{
+    processSamples(samples, nSamples);
+}
+
+
+
+// Transposes up the sample rate, causing the observed playback 'rate' of the
+// sound to decrease
+void RateTransposer::upsample(const SAMPLETYPE *src, uint nSamples)
+{
+    uint count, sizeTemp, num;
+
+    // If the parameter 'uRate' value is smaller than 'SCALE', first transpose
+    // the samples and then apply the anti-alias filter to remove aliasing.
+
+    // First check that there's enough room in 'storeBuffer' 
+    // (+16 is to reserve some slack in the destination buffer)
+    sizeTemp = (uint)((float)nSamples / fRate + 16.0f);
+
+    // Transpose the samples, store the result into the end of "storeBuffer"
+    count = transpose(storeBuffer.ptrEnd(sizeTemp), src, nSamples);
+    storeBuffer.putSamples(count);
+
+    // Apply the anti-alias filter to samples in "store output", output the
+    // result to "dest"
+    num = storeBuffer.numSamples();
+    count = pAAFilter->evaluate(outputBuffer.ptrEnd(num), 
+        storeBuffer.ptrBegin(), num, (uint)numChannels);
+    outputBuffer.putSamples(count);
+
+    // Remove the processed samples from "storeBuffer"
+    storeBuffer.receiveSamples(count);
+}
+
+
+// Transposes down the sample rate, causing the observed playback 'rate' of the
+// sound to increase
+void RateTransposer::downsample(const SAMPLETYPE *src, uint nSamples)
+{
+    uint count, sizeTemp;
+
+    // If the parameter 'uRate' value is larger than 'SCALE', first apply the
+    // anti-alias filter to remove high frequencies (prevent them from folding
+    // over the lover frequencies), then transpose.
+
+    // Add the new samples to the end of the storeBuffer
+    storeBuffer.putSamples(src, nSamples);
+
+    // Anti-alias filter the samples to prevent folding and output the filtered 
+    // data to tempBuffer. Note : because of the FIR filter length, the
+    // filtering routine takes in 'filter_length' more samples than it outputs.
+    assert(tempBuffer.isEmpty());
+    sizeTemp = storeBuffer.numSamples();
+
+    count = pAAFilter->evaluate(tempBuffer.ptrEnd(sizeTemp), 
+        storeBuffer.ptrBegin(), sizeTemp, (uint)numChannels);
+
+	if (count == 0) return;
+
+    // Remove the filtered samples from 'storeBuffer'
+    storeBuffer.receiveSamples(count);
+
+    // Transpose the samples (+16 is to reserve some slack in the destination buffer)
+    sizeTemp = (uint)((float)nSamples / fRate + 16.0f);
+    count = transpose(outputBuffer.ptrEnd(sizeTemp), tempBuffer.ptrBegin(), count);
+    outputBuffer.putSamples(count);
+}
+
+
+// Transposes sample rate by applying anti-alias filter to prevent folding. 
+// Returns amount of samples returned in the "dest" buffer.
+// The maximum amount of samples that can be returned at a time is set by
+// the 'set_returnBuffer_size' function.
+void RateTransposer::processSamples(const SAMPLETYPE *src, uint nSamples)
+{
+    uint count;
+    uint sizeReq;
+
+    if (nSamples == 0) return;
+    assert(pAAFilter);
+
+    // If anti-alias filter is turned off, simply transpose without applying
+    // the filter
+    if (bUseAAFilter == FALSE) 
+    {
+        sizeReq = (uint)((float)nSamples / fRate + 1.0f);
+        count = transpose(outputBuffer.ptrEnd(sizeReq), src, nSamples);
+        outputBuffer.putSamples(count);
+        return;
+    }
+
+    // Transpose with anti-alias filter
+    if (fRate < 1.0f) 
+    {
+        upsample(src, nSamples);
+    } 
+    else  
+    {
+        downsample(src, nSamples);
+    }
+}
+
+
+// Transposes the sample rate of the given samples using linear interpolation. 
+// Returns the number of samples returned in the "dest" buffer
+inline uint RateTransposer::transpose(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
+{
+    if (numChannels == 2) 
+    {
+        return transposeStereo(dest, src, nSamples);
+    } 
+    else 
+    {
+        return transposeMono(dest, src, nSamples);
+    }
+}
+
+
+// Sets the number of channels, 1 = mono, 2 = stereo
+void RateTransposer::setChannels(int nChannels)
+{
+    assert(nChannels > 0);
+    if (numChannels == nChannels) return;
+
+    assert(nChannels == 1 || nChannels == 2);
+    numChannels = nChannels;
+
+    storeBuffer.setChannels(numChannels);
+    tempBuffer.setChannels(numChannels);
+    outputBuffer.setChannels(numChannels);
+
+    // Inits the linear interpolation registers
+    resetRegisters();
+}
+
+
+// Clears all the samples in the object
+void RateTransposer::clear()
+{
+    outputBuffer.clear();
+    storeBuffer.clear();
+}
+
+
+// Returns nonzero if there aren't any samples available for outputting.
+int RateTransposer::isEmpty() const
+{
+    int res;
+
+    res = FIFOProcessor::isEmpty();
+    if (res == 0) return 0;
+    return storeBuffer.isEmpty();
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// RateTransposerInteger - integer arithmetic implementation
+// 
+
+/// fixed-point interpolation routine precision
+#define SCALE    65536
+
+// Constructor
+RateTransposerInteger::RateTransposerInteger() : RateTransposer()
+{
+    // Notice: use local function calling syntax for sake of clarity, 
+    // to indicate the fact that C++ constructor can't call virtual functions.
+    RateTransposerInteger::resetRegisters();
+    RateTransposerInteger::setRate(1.0f);
+}
+
+
+RateTransposerInteger::~RateTransposerInteger()
+{
+}
+
+
+void RateTransposerInteger::resetRegisters()
+{
+    iSlopeCount = 0;
+    sPrevSampleL = 
+    sPrevSampleR = 0;
+}
+
+
+
+// Transposes the sample rate of the given samples using linear interpolation. 
+// 'Mono' version of the routine. Returns the number of samples returned in 
+// the "dest" buffer
+uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
+{
+    unsigned int i, used;
+    LONG_SAMPLETYPE temp, vol1;
+
+    if (nSamples == 0) return 0;  // no samples, no work
+
+	used = 0;    
+    i = 0;
+
+    // Process the last sample saved from the previous call first...
+    while (iSlopeCount <= SCALE) 
+    {
+        vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount);
+        temp = vol1 * sPrevSampleL + iSlopeCount * src[0];
+        dest[i] = (SAMPLETYPE)(temp / SCALE);
+        i++;
+        iSlopeCount += iRate;
+    }
+    // now always (iSlopeCount > SCALE)
+    iSlopeCount -= SCALE;
+
+    while (1)
+    {
+        while (iSlopeCount > SCALE) 
+        {
+            iSlopeCount -= SCALE;
+            used ++;
+            if (used >= nSamples - 1) goto end;
+        }
+        vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount);
+        temp = src[used] * vol1 + iSlopeCount * src[used + 1];
+        dest[i] = (SAMPLETYPE)(temp / SCALE);
+
+        i++;
+        iSlopeCount += iRate;
+    }
+end:
+    // Store the last sample for the next round
+    sPrevSampleL = src[nSamples - 1];
+
+    return i;
+}
+
+
+// Transposes the sample rate of the given samples using linear interpolation. 
+// 'Stereo' version of the routine. Returns the number of samples returned in 
+// the "dest" buffer
+uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
+{
+    unsigned int srcPos, i, used;
+    LONG_SAMPLETYPE temp, vol1;
+
+    if (nSamples == 0) return 0;  // no samples, no work
+
+    used = 0;    
+    i = 0;
+
+    // Process the last sample saved from the sPrevSampleLious call first...
+    while (iSlopeCount <= SCALE) 
+    {
+        vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount);
+        temp = vol1 * sPrevSampleL + iSlopeCount * src[0];
+        dest[2 * i] = (SAMPLETYPE)(temp / SCALE);
+        temp = vol1 * sPrevSampleR + iSlopeCount * src[1];
+        dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE);
+        i++;
+        iSlopeCount += iRate;
+    }
+    // now always (iSlopeCount > SCALE)
+    iSlopeCount -= SCALE;
+
+    while (1)
+    {
+        while (iSlopeCount > SCALE) 
+        {
+            iSlopeCount -= SCALE;
+            used ++;
+            if (used >= nSamples - 1) goto end;
+        }
+        srcPos = 2 * used;
+        vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount);
+        temp = src[srcPos] * vol1 + iSlopeCount * src[srcPos + 2];
+        dest[2 * i] = (SAMPLETYPE)(temp / SCALE);
+        temp = src[srcPos + 1] * vol1 + iSlopeCount * src[srcPos + 3];
+        dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE);
+
+        i++;
+        iSlopeCount += iRate;
+    }
+end:
+    // Store the last sample for the next round
+    sPrevSampleL = src[2 * nSamples - 2];
+    sPrevSampleR = src[2 * nSamples - 1];
+
+    return i;
+}
+
+
+// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower 
+// iRate, larger faster iRates.
+void RateTransposerInteger::setRate(float newRate)
+{
+    iRate = (int)(newRate * SCALE + 0.5f);
+    RateTransposer::setRate(newRate);
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// RateTransposerFloat - floating point arithmetic implementation
+// 
+//////////////////////////////////////////////////////////////////////////////
+
+// Constructor
+RateTransposerFloat::RateTransposerFloat() : RateTransposer()
+{
+    // Notice: use local function calling syntax for sake of clarity, 
+    // to indicate the fact that C++ constructor can't call virtual functions.
+    RateTransposerFloat::resetRegisters();
+    RateTransposerFloat::setRate(1.0f);
+}
+
+
+RateTransposerFloat::~RateTransposerFloat()
+{
+}
+
+
+void RateTransposerFloat::resetRegisters()
+{
+    fSlopeCount = 0;
+    sPrevSampleL = 
+    sPrevSampleR = 0;
+}
+
+
+
+// Transposes the sample rate of the given samples using linear interpolation. 
+// 'Mono' version of the routine. Returns the number of samples returned in 
+// the "dest" buffer
+uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
+{
+    unsigned int i, used;
+
+    used = 0;    
+    i = 0;
+
+    // Process the last sample saved from the previous call first...
+    while (fSlopeCount <= 1.0f) 
+    {
+        dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src[0]);
+        i++;
+        fSlopeCount += fRate;
+    }
+    fSlopeCount -= 1.0f;
+
+    if (nSamples > 1)
+    {
+        while (1)
+        {
+            while (fSlopeCount > 1.0f) 
+            {
+                fSlopeCount -= 1.0f;
+                used ++;
+                if (used >= nSamples - 1) goto end;
+            }
+            dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]);
+            i++;
+            fSlopeCount += fRate;
+        }
+    }
+end:
+    // Store the last sample for the next round
+    sPrevSampleL = src[nSamples - 1];
+
+    return i;
+}
+
+
+// Transposes the sample rate of the given samples using linear interpolation. 
+// 'Mono' version of the routine. Returns the number of samples returned in 
+// the "dest" buffer
+uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples)
+{
+    unsigned int srcPos, i, used;
+
+    if (nSamples == 0) return 0;  // no samples, no work
+
+    used = 0;    
+    i = 0;
+
+    // Process the last sample saved from the sPrevSampleLious call first...
+    while (fSlopeCount <= 1.0f) 
+    {
+        dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src[0]);
+        dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount * src[1]);
+        i++;
+        fSlopeCount += fRate;
+    }
+    // now always (iSlopeCount > 1.0f)
+    fSlopeCount -= 1.0f;
+
+    if (nSamples > 1)
+    {
+        while (1)
+        {
+            while (fSlopeCount > 1.0f) 
+            {
+                fSlopeCount -= 1.0f;
+                used ++;
+                if (used >= nSamples - 1) goto end;
+            }
+            srcPos = 2 * used;
+
+            dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] 
+                + fSlopeCount * src[srcPos + 2]);
+            dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] 
+                + fSlopeCount * src[srcPos + 3]);
+
+            i++;
+            fSlopeCount += fRate;
+        }
+    }
+end:
+    // Store the last sample for the next round
+    sPrevSampleL = src[2 * nSamples - 2];
+    sPrevSampleR = src[2 * nSamples - 1];
+
+    return i;
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/RateTransposer.h b/plugins/soundtouch/soundtouch/source/SoundTouch/RateTransposer.h
new file mode 100644
index 00000000..f035af2c
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/RateTransposer.h
@@ -0,0 +1,159 @@
+////////////////////////////////////////////////////////////////////////////////
+/// 
+/// Sample rate transposer. Changes sample rate by using linear interpolation 
+/// together with anti-alias filtering (first order interpolation with anti-
+/// alias filtering should be quite adequate for this application).
+///
+/// Use either of the derived classes of 'RateTransposerInteger' or 
+/// 'RateTransposerFloat' for corresponding integer/floating point tranposing
+/// algorithm implementation.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: RateTransposer.h 63 2009-02-21 16:00:14Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef RateTransposer_H
+#define RateTransposer_H
+
+#include <stddef.h>
+#include "AAFilter.h"
+#include "FIFOSamplePipe.h"
+#include "FIFOSampleBuffer.h"
+
+#include "STTypes.h"
+
+namespace soundtouch
+{
+
+/// A common linear samplerate transposer class.
+///
+/// Note: Use function "RateTransposer::newInstance()" to create a new class 
+/// instance instead of the "new" operator; that function automatically 
+/// chooses a correct implementation depending on if integer or floating 
+/// arithmetics are to be used.
+class RateTransposer : public FIFOProcessor
+{
+protected:
+    /// Anti-alias filter object
+    AAFilter *pAAFilter;
+
+    float fRate;
+
+    int numChannels;
+
+    /// Buffer for collecting samples to feed the anti-alias filter between
+    /// two batches
+    FIFOSampleBuffer storeBuffer;
+
+    /// Buffer for keeping samples between transposing & anti-alias filter
+    FIFOSampleBuffer tempBuffer;
+
+    /// Output sample buffer
+    FIFOSampleBuffer outputBuffer;
+
+    BOOL bUseAAFilter;
+
+    virtual void resetRegisters() = 0;
+
+    virtual uint transposeStereo(SAMPLETYPE *dest, 
+                         const SAMPLETYPE *src, 
+                         uint numSamples) = 0;
+    virtual uint transposeMono(SAMPLETYPE *dest, 
+                       const SAMPLETYPE *src, 
+                       uint numSamples) = 0;
+    inline uint transpose(SAMPLETYPE *dest, 
+                   const SAMPLETYPE *src, 
+                   uint numSamples);
+
+    void downsample(const SAMPLETYPE *src, 
+                    uint numSamples);
+    void upsample(const SAMPLETYPE *src, 
+                 uint numSamples);
+
+    /// Transposes sample rate by applying anti-alias filter to prevent folding. 
+    /// Returns amount of samples returned in the "dest" buffer.
+    /// The maximum amount of samples that can be returned at a time is set by
+    /// the 'set_returnBuffer_size' function.
+    void processSamples(const SAMPLETYPE *src, 
+                        uint numSamples);
+
+
+public:
+    RateTransposer();
+    virtual ~RateTransposer();
+
+    /// Operator 'new' is overloaded so that it automatically creates a suitable instance 
+    /// depending on if we're to use integer or floating point arithmetics.
+    static void *operator new(size_t s);
+
+    /// Use this function instead of "new" operator to create a new instance of this class. 
+    /// This function automatically chooses a correct implementation, depending on if 
+    /// integer ot floating point arithmetics are to be used.
+    static RateTransposer *newInstance();
+
+    /// Returns the output buffer object
+    FIFOSamplePipe *getOutput() { return &outputBuffer; };
+
+    /// Returns the store buffer object
+    FIFOSamplePipe *getStore() { return &storeBuffer; };
+
+    /// Return anti-alias filter object
+    AAFilter *getAAFilter();
+
+    /// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable
+    void enableAAFilter(BOOL newMode);
+
+    /// Returns nonzero if anti-alias filter is enabled.
+    BOOL isAAFilterEnabled() const;
+
+    /// Sets new target rate. Normal rate = 1.0, smaller values represent slower 
+    /// rate, larger faster rates.
+    virtual void setRate(float newRate);
+
+    /// Sets the number of channels, 1 = mono, 2 = stereo
+    void setChannels(int channels);
+
+    /// Adds 'numSamples' pcs of samples from the 'samples' memory position into
+    /// the input of the object.
+    void putSamples(const SAMPLETYPE *samples, uint numSamples);
+
+    /// Clears all the samples in the object
+    void clear();
+
+    /// Returns nonzero if there aren't any samples available for outputting.
+    int isEmpty() const;
+};
+
+}
+
+#endif
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/SoundTouch.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/SoundTouch.cpp
new file mode 100644
index 00000000..aa7ac028
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/SoundTouch.cpp
@@ -0,0 +1,480 @@
+//////////////////////////////////////////////////////////////////////////////
+///
+/// SoundTouch - main class for tempo/pitch/rate adjusting routines. 
+///
+/// Notes:
+/// - Initialize the SoundTouch object instance by setting up the sound stream 
+///   parameters with functions 'setSampleRate' and 'setChannels', then set 
+///   desired tempo/pitch/rate settings with the corresponding functions.
+///
+/// - The SoundTouch class behaves like a first-in-first-out pipeline: The 
+///   samples that are to be processed are fed into one of the pipe by calling
+///   function 'putSamples', while the ready processed samples can be read 
+///   from the other end of the pipeline with function 'receiveSamples'.
+/// 
+/// - The SoundTouch processing classes require certain sized 'batches' of 
+///   samples in order to process the sound. For this reason the classes buffer 
+///   incoming samples until there are enough of samples available for 
+///   processing, then they carry out the processing step and consequently
+///   make the processed samples available for outputting.
+/// 
+/// - For the above reason, the processing routines introduce a certain 
+///   'latency' between the input and output, so that the samples input to
+///   SoundTouch may not be immediately available in the output, and neither 
+///   the amount of outputtable samples may not immediately be in direct 
+///   relationship with the amount of previously input samples.
+///
+/// - The tempo/pitch/rate control parameters can be altered during processing.
+///   Please notice though that they aren't currently protected by semaphores,
+///   so in multi-thread application external semaphore protection may be
+///   required.
+///
+/// - This class utilizes classes 'TDStretch' for tempo change (without modifying
+///   pitch) and 'RateTransposer' for changing the playback rate (that is, both 
+///   tempo and pitch in the same ratio) of the sound. The third available control 
+///   'pitch' (change pitch but maintain tempo) is produced by a combination of
+///   combining the two other controls.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-05-19 07:57:30 +0300 (Tue, 19 May 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: SoundTouch.cpp 73 2009-05-19 04:57:30Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <assert.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <math.h>
+#include <stdexcept>
+#include <stdio.h>
+
+#include "SoundTouch.h"
+#include "TDStretch.h"
+#include "RateTransposer.h"
+#include "cpu_detect.h"
+
+using namespace soundtouch;
+    
+/// test if two floating point numbers are equal
+#define TEST_FLOAT_EQUAL(a, b)  (fabs(a - b) < 1e-10)
+
+
+/// Print library version string for autoconf
+extern "C" void soundtouch_ac_test()
+{
+    printf("SoundTouch Version: %s\n",SOUNDTOUCH_VERSION);
+} 
+
+
+SoundTouch::SoundTouch()
+{
+    // Initialize rate transposer and tempo changer instances
+
+    pRateTransposer = RateTransposer::newInstance();
+    pTDStretch = TDStretch::newInstance();
+
+    setOutPipe(pTDStretch);
+
+    rate = tempo = 0;
+
+    virtualPitch = 
+    virtualRate = 
+    virtualTempo = 1.0;
+
+    calcEffectiveRateAndTempo();
+
+    channels = 0;
+    bSrateSet = FALSE;
+}
+
+
+
+SoundTouch::~SoundTouch()
+{
+    delete pRateTransposer;
+    delete pTDStretch;
+}
+
+
+
+/// Get SoundTouch library version string
+const char *SoundTouch::getVersionString()
+{
+    static const char *_version = SOUNDTOUCH_VERSION;
+
+    return _version;
+}
+
+
+/// Get SoundTouch library version Id
+uint SoundTouch::getVersionId()
+{
+    return SOUNDTOUCH_VERSION_ID;
+}
+
+
+// Sets the number of channels, 1 = mono, 2 = stereo
+void SoundTouch::setChannels(uint numChannels)
+{
+    if (numChannels != 1 && numChannels != 2) 
+    {
+        throw std::runtime_error("Illegal number of channels");
+    }
+    channels = numChannels;
+    pRateTransposer->setChannels((int)numChannels);
+    pTDStretch->setChannels((int)numChannels);
+}
+
+
+
+// Sets new rate control value. Normal rate = 1.0, smaller values
+// represent slower rate, larger faster rates.
+void SoundTouch::setRate(float newRate)
+{
+    virtualRate = newRate;
+    calcEffectiveRateAndTempo();
+}
+
+
+
+// Sets new rate control value as a difference in percents compared
+// to the original rate (-50 .. +100 %)
+void SoundTouch::setRateChange(float newRate)
+{
+    virtualRate = 1.0f + 0.01f * newRate;
+    calcEffectiveRateAndTempo();
+}
+
+
+
+// Sets new tempo control value. Normal tempo = 1.0, smaller values
+// represent slower tempo, larger faster tempo.
+void SoundTouch::setTempo(float newTempo)
+{
+    virtualTempo = newTempo;
+    calcEffectiveRateAndTempo();
+}
+
+
+
+// Sets new tempo control value as a difference in percents compared
+// to the original tempo (-50 .. +100 %)
+void SoundTouch::setTempoChange(float newTempo)
+{
+    virtualTempo = 1.0f + 0.01f * newTempo;
+    calcEffectiveRateAndTempo();
+}
+
+
+
+// Sets new pitch control value. Original pitch = 1.0, smaller values
+// represent lower pitches, larger values higher pitch.
+void SoundTouch::setPitch(float newPitch)
+{
+    virtualPitch = newPitch;
+    calcEffectiveRateAndTempo();
+}
+
+
+
+// Sets pitch change in octaves compared to the original pitch
+// (-1.00 .. +1.00)
+void SoundTouch::setPitchOctaves(float newPitch)
+{
+    virtualPitch = (float)exp(0.69314718056f * newPitch);
+    calcEffectiveRateAndTempo();
+}
+
+
+
+// Sets pitch change in semi-tones compared to the original pitch
+// (-12 .. +12)
+void SoundTouch::setPitchSemiTones(int newPitch)
+{
+    setPitchOctaves((float)newPitch / 12.0f);
+}
+
+
+
+void SoundTouch::setPitchSemiTones(float newPitch)
+{
+    setPitchOctaves(newPitch / 12.0f);
+}
+
+
+// Calculates 'effective' rate and tempo values from the
+// nominal control values.
+void SoundTouch::calcEffectiveRateAndTempo()
+{
+    float oldTempo = tempo;
+    float oldRate = rate;
+
+    tempo = virtualTempo / virtualPitch;
+    rate = virtualPitch * virtualRate;
+
+    if (!TEST_FLOAT_EQUAL(rate,oldRate)) pRateTransposer->setRate(rate);
+    if (!TEST_FLOAT_EQUAL(tempo, oldTempo)) pTDStretch->setTempo(tempo);
+
+#ifndef PREVENT_CLICK_AT_RATE_CROSSOVER
+    if (rate <= 1.0f) 
+    {
+        if (output != pTDStretch) 
+        {
+            FIFOSamplePipe *tempoOut;
+
+            assert(output == pRateTransposer);
+            // move samples in the current output buffer to the output of pTDStretch
+            tempoOut = pTDStretch->getOutput();
+            tempoOut->moveSamples(*output);
+            // move samples in pitch transposer's store buffer to tempo changer's input
+            pTDStretch->moveSamples(*pRateTransposer->getStore());
+
+            output = pTDStretch;
+        }
+    }
+    else
+#endif
+    {
+        if (output != pRateTransposer) 
+        {
+            FIFOSamplePipe *transOut;
+
+            assert(output == pTDStretch);
+            // move samples in the current output buffer to the output of pRateTransposer
+            transOut = pRateTransposer->getOutput();
+            transOut->moveSamples(*output);
+            // move samples in tempo changer's input to pitch transposer's input
+            pRateTransposer->moveSamples(*pTDStretch->getInput());
+
+            output = pRateTransposer;
+        }
+    } 
+}
+
+
+// Sets sample rate.
+void SoundTouch::setSampleRate(uint srate)
+{
+    bSrateSet = TRUE;
+    // set sample rate, leave other tempo changer parameters as they are.
+    pTDStretch->setParameters((int)srate);
+}
+
+
+// Adds 'numSamples' pcs of samples from the 'samples' memory position into
+// the input of the object.
+void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples)
+{
+    if (bSrateSet == FALSE) 
+    {
+        throw std::runtime_error("SoundTouch : Sample rate not defined");
+    } 
+    else if (channels == 0) 
+    {
+        throw std::runtime_error("SoundTouch : Number of channels not defined");
+    }
+
+    // Transpose the rate of the new samples if necessary
+    /* Bypass the nominal setting - can introduce a click in sound when tempo/pitch control crosses the nominal value...
+    if (rate == 1.0f) 
+    {
+        // The rate value is same as the original, simply evaluate the tempo changer. 
+        assert(output == pTDStretch);
+        if (pRateTransposer->isEmpty() == 0) 
+        {
+            // yet flush the last samples in the pitch transposer buffer
+            // (may happen if 'rate' changes from a non-zero value to zero)
+            pTDStretch->moveSamples(*pRateTransposer);
+        }
+        pTDStretch->putSamples(samples, nSamples);
+    } 
+    */
+#ifndef PREVENT_CLICK_AT_RATE_CROSSOVER
+    else if (rate <= 1.0f) 
+    {
+        // transpose the rate down, output the transposed sound to tempo changer buffer
+        assert(output == pTDStretch);
+        pRateTransposer->putSamples(samples, nSamples);
+        pTDStretch->moveSamples(*pRateTransposer);
+    } 
+    else 
+#endif
+    {
+        // evaluate the tempo changer, then transpose the rate up, 
+        assert(output == pRateTransposer);
+        pTDStretch->putSamples(samples, nSamples);
+        pRateTransposer->moveSamples(*pTDStretch);
+    }
+}
+
+
+// Flushes the last samples from the processing pipeline to the output.
+// Clears also the internal processing buffers.
+//
+// Note: This function is meant for extracting the last samples of a sound
+// stream. This function may introduce additional blank samples in the end
+// of the sound stream, and thus it's not recommended to call this function
+// in the middle of a sound stream.
+void SoundTouch::flush()
+{
+    int i;
+    uint nOut;
+    SAMPLETYPE buff[128];
+
+    nOut = numSamples();
+
+    memset(buff, 0, 128 * sizeof(SAMPLETYPE));
+    // "Push" the last active samples out from the processing pipeline by
+    // feeding blank samples into the processing pipeline until new, 
+    // processed samples appear in the output (not however, more than 
+    // 8ksamples in any case)
+    for (i = 0; i < 128; i ++) 
+    {
+        putSamples(buff, 64);
+        if (numSamples() != nOut) break;  // new samples have appeared in the output!
+    }
+
+    // Clear working buffers
+    pRateTransposer->clear();
+    pTDStretch->clearInput();
+    // yet leave the 'tempoChanger' output intouched as that's where the
+    // flushed samples are!
+}
+
+
+// Changes a setting controlling the processing system behaviour. See the
+// 'SETTING_...' defines for available setting ID's.
+BOOL SoundTouch::setSetting(int settingId, int value)
+{
+    int sampleRate, sequenceMs, seekWindowMs, overlapMs;
+
+    // read current tdstretch routine parameters
+    pTDStretch->getParameters(&sampleRate, &sequenceMs, &seekWindowMs, &overlapMs);
+
+    switch (settingId) 
+    {
+        case SETTING_USE_AA_FILTER :
+            // enables / disabless anti-alias filter
+            pRateTransposer->enableAAFilter((value != 0) ? TRUE : FALSE);
+            return TRUE;
+
+        case SETTING_AA_FILTER_LENGTH :
+            // sets anti-alias filter length
+            pRateTransposer->getAAFilter()->setLength(value);
+            return TRUE;
+
+        case SETTING_USE_QUICKSEEK :
+            // enables / disables tempo routine quick seeking algorithm
+            pTDStretch->enableQuickSeek((value != 0) ? TRUE : FALSE);
+            return TRUE;
+
+        case SETTING_SEQUENCE_MS:
+            // change time-stretch sequence duration parameter
+            pTDStretch->setParameters(sampleRate, value, seekWindowMs, overlapMs);
+            return TRUE;
+
+        case SETTING_SEEKWINDOW_MS:
+            // change time-stretch seek window length parameter
+            pTDStretch->setParameters(sampleRate, sequenceMs, value, overlapMs);
+            return TRUE;
+
+        case SETTING_OVERLAP_MS:
+            // change time-stretch overlap length parameter
+            pTDStretch->setParameters(sampleRate, sequenceMs, seekWindowMs, value);
+            return TRUE;
+
+        default :
+            return FALSE;
+    }
+}
+
+
+// Reads a setting controlling the processing system behaviour. See the
+// 'SETTING_...' defines for available setting ID's.
+//
+// Returns the setting value.
+int SoundTouch::getSetting(int settingId) const
+{
+    int temp;
+
+    switch (settingId) 
+    {
+        case SETTING_USE_AA_FILTER :
+            return (uint)pRateTransposer->isAAFilterEnabled();
+
+        case SETTING_AA_FILTER_LENGTH :
+            return pRateTransposer->getAAFilter()->getLength();
+
+        case SETTING_USE_QUICKSEEK :
+            return (uint)   pTDStretch->isQuickSeekEnabled();
+
+        case SETTING_SEQUENCE_MS:
+            pTDStretch->getParameters(NULL, &temp, NULL, NULL);
+            return temp;
+
+        case SETTING_SEEKWINDOW_MS:
+            pTDStretch->getParameters(NULL, NULL, &temp, NULL);
+            return temp;
+
+        case SETTING_OVERLAP_MS:
+            pTDStretch->getParameters(NULL, NULL, NULL, &temp);
+            return temp;
+
+        default :
+            return 0;
+    }
+}
+
+
+// Clears all the samples in the object's output and internal processing
+// buffers.
+void SoundTouch::clear()
+{
+    pRateTransposer->clear();
+    pTDStretch->clear();
+}
+
+
+
+/// Returns number of samples currently unprocessed.
+uint SoundTouch::numUnprocessedSamples() const
+{
+    FIFOSamplePipe * psp;
+    if (pTDStretch)
+    {
+        psp = pTDStretch->getInput();
+        if (psp)
+        {
+            return psp->numSamples();
+        }
+    }
+    return 0;
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/TDStretch.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/TDStretch.cpp
new file mode 100644
index 00000000..232133b5
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/TDStretch.cpp
@@ -0,0 +1,1045 @@
+////////////////////////////////////////////////////////////////////////////////
+/// 
+/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
+/// while maintaining the original pitch by using a time domain WSOLA-like 
+/// method with several performance-increasing tweaks.
+///
+/// Note : MMX optimized functions reside in a separate, platform-specific 
+/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-12-28 21:27:04 +0200 (Mon, 28 Dec 2009) $
+// File revision : $Revision: 1.12 $
+//
+// $Id: TDStretch.cpp 77 2009-12-28 19:27:04Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <string.h>
+#include <limits.h>
+#include <assert.h>
+#include <math.h>
+#include <float.h>
+#include <stdexcept>
+
+#include "STTypes.h"
+#include "cpu_detect.h"
+#include "TDStretch.h"
+
+#include <stdio.h>
+
+using namespace soundtouch;
+
+#define max(x, y) (((x) > (y)) ? (x) : (y))
+
+
+/*****************************************************************************
+ *
+ * Constant definitions
+ *
+ *****************************************************************************/
+
+// Table for the hierarchical mixing position seeking algorithm
+static const short _scanOffsets[5][24]={
+    { 124,  186,  248,  310,  372,  434,  496,  558,  620,  682,  744, 806,
+      868,  930,  992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488,   0},
+    {-100,  -75,  -50,  -25,   25,   50,   75,  100,    0,    0,    0,   0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
+    { -20,  -15,  -10,   -5,    5,   10,   15,   20,    0,    0,    0,   0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
+    {  -4,   -3,   -2,   -1,    1,    2,    3,    4,    0,    0,    0,   0,
+        0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,   0},
+    { 121,  114,   97,  114,   98,  105,  108,   32,  104,   99,  117,  111,
+      116,  100,  110,  117,  111,  115,    0,    0,    0,    0,    0,   0}};
+
+/*****************************************************************************
+ *
+ * Implementation of the class 'TDStretch'
+ *
+ *****************************************************************************/
+
+
+TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
+{
+    bQuickSeek = FALSE;
+    channels = 2;
+
+    pMidBuffer = NULL;
+    pRefMidBufferUnaligned = NULL;
+    overlapLength = 0;
+
+    bAutoSeqSetting = TRUE;
+    bAutoSeekSetting = TRUE;
+
+//    outDebt = 0;
+    skipFract = 0;
+
+    tempo = 1.0f;
+    setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
+    setTempo(1.0f);
+
+    clear();
+}
+
+
+
+TDStretch::~TDStretch()
+{
+    delete[] pMidBuffer;
+    delete[] pRefMidBufferUnaligned;
+}
+
+
+
+// Sets routine control parameters. These control are certain time constants
+// defining how the sound is stretched to the desired duration.
+//
+// 'sampleRate' = sample rate of the sound
+// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
+// 'seekwindowMS' = seeking window length for scanning the best overlapping 
+//      position (default = 28 ms)
+// 'overlapMS' = overlapping length (default = 12 ms)
+
+void TDStretch::setParameters(int aSampleRate, int aSequenceMS, 
+                              int aSeekWindowMS, int aOverlapMS)
+{
+    // accept only positive parameter values - if zero or negative, use old values instead
+    if (aSampleRate > 0)   this->sampleRate = aSampleRate;
+    if (aOverlapMS > 0)    this->overlapMs = aOverlapMS;
+
+    if (aSequenceMS > 0)
+    {
+        this->sequenceMs = aSequenceMS;
+        bAutoSeqSetting = FALSE;
+    } 
+    else if (aSequenceMS == 0)
+    {
+        // if zero, use automatic setting
+        bAutoSeqSetting = TRUE;
+    }
+
+    if (aSeekWindowMS > 0) 
+    {
+        this->seekWindowMs = aSeekWindowMS;
+        bAutoSeekSetting = FALSE;
+    } 
+    else if (aSeekWindowMS == 0) 
+    {
+        // if zero, use automatic setting
+        bAutoSeekSetting = TRUE;
+    }
+
+    calcSeqParameters();
+
+    calculateOverlapLength(overlapMs);
+
+    // set tempo to recalculate 'sampleReq'
+    setTempo(tempo);
+
+}
+
+
+
+/// Get routine control parameters, see setParameters() function.
+/// Any of the parameters to this function can be NULL, in such case corresponding parameter
+/// value isn't returned.
+void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const
+{
+    if (pSampleRate)
+    {
+        *pSampleRate = sampleRate;
+    }
+
+    if (pSequenceMs)
+    {
+        *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs;
+    }
+
+    if (pSeekWindowMs)
+    {
+        *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs;
+    }
+
+    if (pOverlapMs)
+    {
+        *pOverlapMs = overlapMs;
+    }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'pInput'
+void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const
+{
+    int i, itemp;
+
+    for (i = 0; i < overlapLength ; i ++) 
+    {
+        itemp = overlapLength - i;
+        pOutput[i] = (pInput[i] * i + pMidBuffer[i] * itemp ) / overlapLength;    // >> overlapDividerBits;
+    }
+}
+
+
+
+void TDStretch::clearMidBuffer()
+{
+    memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength);
+}
+
+
+void TDStretch::clearInput()
+{
+    inputBuffer.clear();
+    clearMidBuffer();
+}
+
+
+// Clears the sample buffers
+void TDStretch::clear()
+{
+    outputBuffer.clear();
+    clearInput();
+}
+
+
+
+// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
+// to enable
+void TDStretch::enableQuickSeek(BOOL enable)
+{
+    bQuickSeek = enable;
+}
+
+
+// Returns nonzero if the quick seeking algorithm is enabled.
+BOOL TDStretch::isQuickSeekEnabled() const
+{
+    return bQuickSeek;
+}
+
+
+// Seeks for the optimal overlap-mixing position.
+int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
+{
+    if (channels == 2) 
+    {
+        // stereo sound
+        if (bQuickSeek) 
+        {
+            return seekBestOverlapPositionStereoQuick(refPos);
+        } 
+        else 
+        {
+            return seekBestOverlapPositionStereo(refPos);
+        }
+    } 
+    else 
+    {
+        // mono sound
+        if (bQuickSeek) 
+        {
+            return seekBestOverlapPositionMonoQuick(refPos);
+        } 
+        else 
+        {
+            return seekBestOverlapPositionMono(refPos);
+        }
+    }
+}
+
+
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position
+// of 'ovlPos'.
+inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const
+{
+    if (channels == 2) 
+    {
+        // stereo sound
+        overlapStereo(pOutput, pInput + 2 * ovlPos);
+    } else {
+        // mono sound.
+        overlapMono(pOutput, pInput + ovlPos);
+    }
+}
+
+
+
+
+// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
+// routine
+//
+// The best position is determined as the position where the two overlapped
+// sample sequences are 'most alike', in terms of the highest cross-correlation
+// value over the overlapping period
+int TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos) 
+{
+    int bestOffs;
+    double bestCorr, corr;
+    int i;
+
+    // Slopes the amplitudes of the 'midBuffer' samples
+    precalcCorrReferenceStereo();
+
+    bestCorr = FLT_MIN;
+    bestOffs = 0;
+
+    // Scans for the best correlation value by testing each possible position
+    // over the permitted range.
+    for (i = 0; i < seekLength; i ++) 
+    {
+        // Calculates correlation value for the mixing position corresponding
+        // to 'i'
+        corr = (double)calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer);
+        // heuristic rule to slightly favour values close to mid of the range
+        double tmp = (double)(2 * i - seekLength) / (double)seekLength;
+        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
+
+        // Checks for the highest correlation value
+        if (corr > bestCorr) 
+        {
+            bestCorr = corr;
+            bestOffs = i;
+        }
+    }
+    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
+    clearCrossCorrState();
+
+    return bestOffs;
+}
+
+
+// Seeks for the optimal overlap-mixing position. The 'stereo' version of the
+// routine
+//
+// The best position is determined as the position where the two overlapped
+// sample sequences are 'most alike', in terms of the highest cross-correlation
+// value over the overlapping period
+int TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos) 
+{
+    int j;
+    int bestOffs;
+    double bestCorr, corr;
+    int scanCount, corrOffset, tempOffset;
+
+    // Slopes the amplitude of the 'midBuffer' samples
+    precalcCorrReferenceStereo();
+
+    bestCorr = FLT_MIN;
+    bestOffs = _scanOffsets[0][0];
+    corrOffset = 0;
+    tempOffset = 0;
+
+    // Scans for the best correlation value using four-pass hierarchical search.
+    //
+    // The look-up table 'scans' has hierarchical position adjusting steps.
+    // In first pass the routine searhes for the highest correlation with 
+    // relatively coarse steps, then rescans the neighbourhood of the highest
+    // correlation with better resolution and so on.
+    for (scanCount = 0;scanCount < 4; scanCount ++) 
+    {
+        j = 0;
+        while (_scanOffsets[scanCount][j]) 
+        {
+            tempOffset = corrOffset + _scanOffsets[scanCount][j];
+            if (tempOffset >= seekLength) break;
+
+            // Calculates correlation value for the mixing position corresponding
+            // to 'tempOffset'
+            corr = (double)calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer);
+            // heuristic rule to slightly favour values close to mid of the range
+            double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
+            corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
+
+            // Checks for the highest correlation value
+            if (corr > bestCorr) 
+            {
+                bestCorr = corr;
+                bestOffs = tempOffset;
+            }
+            j ++;
+        }
+        corrOffset = bestOffs;
+    }
+    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
+    clearCrossCorrState();
+
+    return bestOffs;
+}
+
+
+
+// Seeks for the optimal overlap-mixing position. The 'mono' version of the
+// routine
+//
+// The best position is determined as the position where the two overlapped
+// sample sequences are 'most alike', in terms of the highest cross-correlation
+// value over the overlapping period
+int TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos) 
+{
+    int bestOffs;
+    double bestCorr, corr;
+    int tempOffset;
+    const SAMPLETYPE *compare;
+
+    // Slopes the amplitude of the 'midBuffer' samples
+    precalcCorrReferenceMono();
+
+    bestCorr = FLT_MIN;
+    bestOffs = 0;
+
+    // Scans for the best correlation value by testing each possible position
+    // over the permitted range.
+    for (tempOffset = 0; tempOffset < seekLength; tempOffset ++) 
+    {
+        compare = refPos + tempOffset;
+
+        // Calculates correlation value for the mixing position corresponding
+        // to 'tempOffset'
+        corr = (double)calcCrossCorrMono(pRefMidBuffer, compare);
+        // heuristic rule to slightly favour values close to mid of the range
+        double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
+        corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
+
+        // Checks for the highest correlation value
+        if (corr > bestCorr) 
+        {
+            bestCorr = corr;
+            bestOffs = tempOffset;
+        }
+    }
+    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
+    clearCrossCorrState();
+
+    return bestOffs;
+}
+
+
+// Seeks for the optimal overlap-mixing position. The 'mono' version of the
+// routine
+//
+// The best position is determined as the position where the two overlapped
+// sample sequences are 'most alike', in terms of the highest cross-correlation
+// value over the overlapping period
+int TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos) 
+{
+    int j;
+    int bestOffs;
+    double bestCorr, corr;
+    int scanCount, corrOffset, tempOffset;
+
+    // Slopes the amplitude of the 'midBuffer' samples
+    precalcCorrReferenceMono();
+
+    bestCorr = FLT_MIN;
+    bestOffs = _scanOffsets[0][0];
+    corrOffset = 0;
+    tempOffset = 0;
+
+    // Scans for the best correlation value using four-pass hierarchical search.
+    //
+    // The look-up table 'scans' has hierarchical position adjusting steps.
+    // In first pass the routine searhes for the highest correlation with 
+    // relatively coarse steps, then rescans the neighbourhood of the highest
+    // correlation with better resolution and so on.
+    for (scanCount = 0;scanCount < 4; scanCount ++) 
+    {
+        j = 0;
+        while (_scanOffsets[scanCount][j]) 
+        {
+            tempOffset = corrOffset + _scanOffsets[scanCount][j];
+            if (tempOffset >= seekLength) break;
+
+            // Calculates correlation value for the mixing position corresponding
+            // to 'tempOffset'
+            corr = (double)calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer);
+            // heuristic rule to slightly favour values close to mid of the range
+            double tmp = (double)(2 * tempOffset - seekLength) / seekLength;
+            corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp));
+
+            // Checks for the highest correlation value
+            if (corr > bestCorr) 
+            {
+                bestCorr = corr;
+                bestOffs = tempOffset;
+            }
+            j ++;
+        }
+        corrOffset = bestOffs;
+    }
+    // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
+    clearCrossCorrState();
+
+    return bestOffs;
+}
+
+
+/// clear cross correlation routine state if necessary 
+void TDStretch::clearCrossCorrState()
+{
+    // default implementation is empty.
+}
+
+
+/// Calculates processing sequence length according to tempo setting
+void TDStretch::calcSeqParameters()
+{
+    // Adjust tempo param according to tempo, so that variating processing sequence length is used
+    // at varius tempo settings, between the given low...top limits
+    #define AUTOSEQ_TEMPO_LOW   0.5     // auto setting low tempo range (-50%)
+    #define AUTOSEQ_TEMPO_TOP   2.0     // auto setting top tempo range (+100%)
+
+    // sequence-ms setting values at above low & top tempo
+    #define AUTOSEQ_AT_MIN      125.0
+    #define AUTOSEQ_AT_MAX      50.0
+    #define AUTOSEQ_K           ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
+    #define AUTOSEQ_C           (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW))
+
+    // seek-window-ms setting values at above low & top tempo
+    #define AUTOSEEK_AT_MIN     25.0
+    #define AUTOSEEK_AT_MAX     15.0
+    #define AUTOSEEK_K          ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW))
+    #define AUTOSEEK_C          (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW))
+
+    #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x)))
+
+    double seq, seek;
+    
+    if (bAutoSeqSetting)
+    {
+        seq = AUTOSEQ_C + AUTOSEQ_K * tempo;
+        seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN);
+        sequenceMs = (int)(seq + 0.5);
+    }
+
+    if (bAutoSeekSetting)
+    {
+        seek = AUTOSEEK_C + AUTOSEEK_K * tempo;
+        seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN);
+        seekWindowMs = (int)(seek + 0.5);
+    }
+
+    // Update seek window lengths
+    seekWindowLength = (sampleRate * sequenceMs) / 1000;
+    if (seekWindowLength < 2 * overlapLength) 
+    {
+        seekWindowLength = 2 * overlapLength;
+    }
+    seekLength = (sampleRate * seekWindowMs) / 1000;
+}
+
+
+
+// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 
+// tempo, larger faster tempo.
+void TDStretch::setTempo(float newTempo)
+{
+    int intskip;
+
+    tempo = newTempo;
+
+    // Calculate new sequence duration
+    calcSeqParameters();
+
+    // Calculate ideal skip length (according to tempo value) 
+    nominalSkip = tempo * (seekWindowLength - overlapLength);
+    intskip = (int)(nominalSkip + 0.5f);
+
+    // Calculate how many samples are needed in the 'inputBuffer' to 
+    // process another batch of samples
+    //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2;
+    sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength;
+}
+
+
+
+// Sets the number of channels, 1 = mono, 2 = stereo
+void TDStretch::setChannels(int numChannels)
+{
+    assert(numChannels > 0);
+    if (channels == numChannels) return;
+    assert(numChannels == 1 || numChannels == 2);
+
+    channels = numChannels;
+    inputBuffer.setChannels(channels);
+    outputBuffer.setChannels(channels);
+}
+
+
+// nominal tempo, no need for processing, just pass the samples through
+// to outputBuffer
+/*
+void TDStretch::processNominalTempo()
+{
+    assert(tempo == 1.0f);
+
+    if (bMidBufferDirty) 
+    {
+        // If there are samples in pMidBuffer waiting for overlapping,
+        // do a single sliding overlapping with them in order to prevent a 
+        // clicking distortion in the output sound
+        if (inputBuffer.numSamples() < overlapLength) 
+        {
+            // wait until we've got overlapLength input samples
+            return;
+        }
+        // Mix the samples in the beginning of 'inputBuffer' with the 
+        // samples in 'midBuffer' using sliding overlapping 
+        overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
+        outputBuffer.putSamples(overlapLength);
+        inputBuffer.receiveSamples(overlapLength);
+        clearMidBuffer();
+        // now we've caught the nominal sample flow and may switch to
+        // bypass mode
+    }
+
+    // Simply bypass samples from input to output
+    outputBuffer.moveSamples(inputBuffer);
+}
+*/
+
+#include <stdio.h>
+
+// Processes as many processing frames of the samples 'inputBuffer', store
+// the result into 'outputBuffer'
+void TDStretch::processSamples()
+{
+    int ovlSkip, offset;
+    int temp;
+
+    /* Removed this small optimization - can introduce a click to sound when tempo setting
+       crosses the nominal value
+    if (tempo == 1.0f) 
+    {
+        // tempo not changed from the original, so bypass the processing
+        processNominalTempo();
+        return;
+    }
+    */
+
+    // Process samples as long as there are enough samples in 'inputBuffer'
+    // to form a processing frame.
+//    while ((int)inputBuffer.numSamples() >= sampleReq - (outDebt / 4)) 
+    while ((int)inputBuffer.numSamples() >= sampleReq) 
+    {
+        // If tempo differs from the normal ('SCALE'), scan for the best overlapping
+        // position
+        offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
+
+        // Mix the samples in the 'inputBuffer' at position of 'offset' with the 
+        // samples in 'midBuffer' using sliding overlapping
+        // ... first partially overlap with the end of the previous sequence
+        // (that's in 'midBuffer')
+        overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset);
+        outputBuffer.putSamples((uint)overlapLength);
+
+        // ... then copy sequence samples from 'inputBuffer' to output:
+        temp = (seekLength / 2 - offset);
+
+        // compensate cumulated output length diff vs. ideal output
+//        temp -= outDebt / 4;
+
+        // update ideal vs. true output difference 
+//        outDebt += temp;
+
+        // length of sequence
+//        temp += (seekWindowLength - 2 * overlapLength);
+        temp = (seekWindowLength - 2 * overlapLength);
+
+        // crosscheck that we don't have buffer overflow...
+        if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2))
+        {
+            continue;    // just in case, shouldn't really happen
+        }
+
+        outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp);
+
+        // Copies the end of the current sequence from 'inputBuffer' to 
+        // 'midBuffer' for being mixed with the beginning of the next 
+        // processing sequence and so on
+        assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples());
+        memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), 
+            channels * sizeof(SAMPLETYPE) * overlapLength);
+
+        // Remove the processed samples from the input buffer. Update
+        // the difference between integer & nominal skip step to 'skipFract'
+        // in order to prevent the error from accumulating over time.
+        skipFract += nominalSkip;   // real skip size
+        ovlSkip = (int)skipFract;   // rounded to integer skip
+        skipFract -= ovlSkip;       // maintain the fraction part, i.e. real vs. integer skip
+        inputBuffer.receiveSamples((uint)ovlSkip);
+    }
+}
+
+
+// Adds 'numsamples' pcs of samples from the 'samples' memory position into
+// the input of the object.
+void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples)
+{
+    // Add the samples into the input buffer
+    inputBuffer.putSamples(samples, nSamples);
+    // Process the samples in input buffer
+    processSamples();
+}
+
+
+
+/// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
+void TDStretch::acceptNewOverlapLength(int newOverlapLength)
+{
+    int prevOvl;
+
+    assert(newOverlapLength >= 0);
+    prevOvl = overlapLength;
+    overlapLength = newOverlapLength;
+
+    if (overlapLength > prevOvl)
+    {
+        delete[] pMidBuffer;
+        delete[] pRefMidBufferUnaligned;
+
+        pMidBuffer = new SAMPLETYPE[overlapLength * 2];
+        clearMidBuffer();
+
+        pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)];
+        // ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency
+        pRefMidBuffer = (SAMPLETYPE *)((((ulong)pRefMidBufferUnaligned) + 15) & (ulong)-16);
+    }
+}
+
+
+// Operator 'new' is overloaded so that it automatically creates a suitable instance 
+// depending on if we've a MMX/SSE/etc-capable CPU available or not.
+void * TDStretch::operator new(size_t s)
+{
+    // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead!
+    throw std::runtime_error("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!");
+    return NULL;
+}
+
+
+TDStretch * TDStretch::newInstance()
+{
+    uint uExtensions;
+
+    uExtensions = detectCPUextensions();
+
+    // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU
+
+#ifdef ALLOW_MMX
+    // MMX routines available only with integer sample types
+    if (uExtensions & SUPPORT_MMX)
+    {
+        return ::new TDStretchMMX;
+    }
+    else
+#endif // ALLOW_MMX
+
+
+#ifdef ALLOW_SSE
+    if (uExtensions & SUPPORT_SSE)
+    {
+        // SSE support
+        return ::new TDStretchSSE;
+    }
+    else
+#endif // ALLOW_SSE
+
+
+#ifdef ALLOW_3DNOW
+    if (uExtensions & SUPPORT_3DNOW)
+    {
+        // 3DNow! support
+        return ::new TDStretch3DNow;
+    }
+    else
+#endif // ALLOW_3DNOW
+
+    {
+        // ISA optimizations not supported, use plain C version
+        return ::new TDStretch;
+    }
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Integer arithmetics specific algorithm implementations.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef INTEGER_SAMPLES
+
+// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
+// is faster to calculate
+void TDStretch::precalcCorrReferenceStereo()
+{
+    int i, cnt2;
+    int temp, temp2;
+
+    for (i=0 ; i < (int)overlapLength ;i ++) 
+    {
+        temp = i * (overlapLength - i);
+        cnt2 = i * 2;
+
+        temp2 = (pMidBuffer[cnt2] * temp) / slopingDivider;
+        pRefMidBuffer[cnt2] = (short)(temp2);
+        temp2 = (pMidBuffer[cnt2 + 1] * temp) / slopingDivider;
+        pRefMidBuffer[cnt2 + 1] = (short)(temp2);
+    }
+}
+
+
+// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
+// is faster to calculate
+void TDStretch::precalcCorrReferenceMono()
+{
+    int i;
+    long temp;
+    long temp2;
+
+    for (i=0 ; i < (int)overlapLength ;i ++) 
+    {
+        temp = i * (overlapLength - i);
+        temp2 = (pMidBuffer[i] * temp) / slopingDivider;
+        pRefMidBuffer[i] = (short)temp2;
+    }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' 
+// version of the routine.
+void TDStretch::overlapStereo(short *poutput, const short *input) const
+{
+    int i;
+    short temp;
+    int cnt2;
+
+    for (i = 0; i < overlapLength ; i ++) 
+    {
+        temp = (short)(overlapLength - i);
+        cnt2 = 2 * i;
+        poutput[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp )  / overlapLength;
+        poutput[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
+    }
+}
+
+// Calculates the x having the closest 2^x value for the given value
+static int _getClosest2Power(double value)
+{
+    return (int)(log(value) / log(2.0) + 0.5);
+}
+
+
+/// Calculates overlap period length in samples.
+/// Integer version rounds overlap length to closest power of 2
+/// for a divide scaling operation.
+void TDStretch::calculateOverlapLength(int aoverlapMs)
+{
+    int newOvl;
+
+    assert(aoverlapMs >= 0);
+
+    // calculate overlap length so that it's power of 2 - thus it's easy to do
+    // integer division by right-shifting. Term "-1" at end is to account for 
+    // the extra most significatnt bit left unused in result by signed multiplication 
+    overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1;
+    if (overlapDividerBits > 9) overlapDividerBits = 9;
+    if (overlapDividerBits < 3) overlapDividerBits = 3;
+    newOvl = (int)pow(2.0, (int)overlapDividerBits + 1);    // +1 => account for -1 above
+
+    acceptNewOverlapLength(newOvl);
+
+    // calculate sloping divider so that crosscorrelation operation won't 
+    // overflow 32-bit register. Max. sum of the crosscorrelation sum without 
+    // divider would be 2^30*(N^3-N)/3, where N = overlap length
+    slopingDivider = (newOvl * newOvl - 1) / 3;
+}
+
+
+long TDStretch::calcCrossCorrMono(const short *mixingPos, const short *compare) const
+{
+    long corr;
+    long norm;
+    int i;
+
+    corr = norm = 0;
+    for (i = 1; i < overlapLength; i ++) 
+    {
+        corr += (mixingPos[i] * compare[i]) >> overlapDividerBits;
+        norm += (mixingPos[i] * mixingPos[i]) >> overlapDividerBits;
+    }
+
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+    return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
+}
+
+
+long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare) const
+{
+    long corr;
+    long norm;
+    int i;
+
+    corr = norm = 0;
+    for (i = 2; i < 2 * overlapLength; i += 2) 
+    {
+        corr += (mixingPos[i] * compare[i] +
+                 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;
+        norm += (mixingPos[i] * mixingPos[i] + mixingPos[i + 1] * mixingPos[i + 1]) >> overlapDividerBits;
+    }
+
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+    return (long)((double)corr * SHRT_MAX / sqrt((double)norm));
+}
+
+#endif // INTEGER_SAMPLES
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Floating point arithmetics specific algorithm implementations.
+//
+
+#ifdef FLOAT_SAMPLES
+
+
+// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
+// is faster to calculate
+void TDStretch::precalcCorrReferenceStereo()
+{
+    int i, cnt2;
+    float temp;
+
+    for (i=0 ; i < (int)overlapLength ;i ++) 
+    {
+        temp = (float)i * (float)(overlapLength - i);
+        cnt2 = i * 2;
+        pRefMidBuffer[cnt2] = (float)(pMidBuffer[cnt2] * temp);
+        pRefMidBuffer[cnt2 + 1] = (float)(pMidBuffer[cnt2 + 1] * temp);
+    }
+}
+
+
+// Slopes the amplitude of the 'midBuffer' samples so that cross correlation
+// is faster to calculate
+void TDStretch::precalcCorrReferenceMono()
+{
+    int i;
+    float temp;
+
+    for (i=0 ; i < (int)overlapLength ;i ++) 
+    {
+        temp = (float)i * (float)(overlapLength - i);
+        pRefMidBuffer[i] = (float)(pMidBuffer[i] * temp);
+    }
+}
+
+
+// Overlaps samples in 'midBuffer' with the samples in 'pInput'
+void TDStretch::overlapStereo(float *pOutput, const float *pInput) const
+{
+    int i;
+    int cnt2;
+    float fTemp;
+    float fScale;
+    float fi;
+
+    fScale = 1.0f / (float)overlapLength;
+
+    for (i = 0; i < (int)overlapLength ; i ++) 
+    {
+        fTemp = (float)(overlapLength - i) * fScale;
+        fi = (float)i * fScale;
+        cnt2 = 2 * i;
+        pOutput[cnt2 + 0] = pInput[cnt2 + 0] * fi + pMidBuffer[cnt2 + 0] * fTemp;
+        pOutput[cnt2 + 1] = pInput[cnt2 + 1] * fi + pMidBuffer[cnt2 + 1] * fTemp;
+    }
+}
+
+
+/// Calculates overlapInMsec period length in samples.
+void TDStretch::calculateOverlapLength(int overlapInMsec)
+{
+    int newOvl;
+
+    assert(overlapInMsec >= 0);
+    newOvl = (sampleRate * overlapInMsec) / 1000;
+    if (newOvl < 16) newOvl = 16;
+
+    // must be divisible by 8
+    newOvl -= newOvl % 8;
+
+    acceptNewOverlapLength(newOvl);
+}
+
+
+
+double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const
+{
+    double corr;
+    double norm;
+    int i;
+
+    corr = norm = 0;
+    for (i = 1; i < overlapLength; i ++) 
+    {
+        corr += mixingPos[i] * compare[i];
+        norm += mixingPos[i] * mixingPos[i];
+    }
+
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero
+    return corr / sqrt(norm);
+}
+
+
+double TDStretch::calcCrossCorrStereo(const float *mixingPos, const float *compare) const
+{
+    double corr;
+    double norm;
+    int i;
+
+    corr = norm = 0;
+    for (i = 2; i < 2 * overlapLength; i += 2) 
+    {
+        corr += mixingPos[i] * compare[i] +
+                mixingPos[i + 1] * compare[i + 1];
+        norm += mixingPos[i] * mixingPos[i] + 
+                mixingPos[i + 1] * mixingPos[i + 1];
+    }
+
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero
+    return corr / sqrt(norm);
+}
+
+#endif // FLOAT_SAMPLES
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/TDStretch.h b/plugins/soundtouch/soundtouch/source/SoundTouch/TDStretch.h
new file mode 100644
index 00000000..00d1f3e3
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/TDStretch.h
@@ -0,0 +1,275 @@
+////////////////////////////////////////////////////////////////////////////////
+/// 
+/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo 
+/// while maintaining the original pitch by using a time domain WSOLA-like method 
+/// with several performance-increasing tweaks.
+///
+/// Note : MMX/SSE optimized functions reside in separate, platform-specific files 
+/// 'mmx_optimized.cpp' and 'sse_optimized.cpp'
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-05-17 14:35:13 +0300 (Sun, 17 May 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: TDStretch.h 71 2009-05-17 11:35:13Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef TDStretch_H
+#define TDStretch_H
+
+#include <stddef.h>
+#include "STTypes.h"
+#include "RateTransposer.h"
+#include "FIFOSamplePipe.h"
+
+namespace soundtouch
+{
+
+/// Default values for sound processing parameters:
+/// Notice that the default parameters are tuned for contemporary popular music 
+/// processing. For speech processing applications these parameters suit better:
+///     #define DEFAULT_SEQUENCE_MS     40
+///     #define DEFAULT_SEEKWINDOW_MS   15
+///     #define DEFAULT_OVERLAP_MS      8
+///
+
+/// Default length of a single processing sequence, in milliseconds. This determines to how 
+/// long sequences the original sound is chopped in the time-stretch algorithm.
+///
+/// The larger this value is, the lesser sequences are used in processing. In principle
+/// a bigger value sounds better when slowing down tempo, but worse when increasing tempo
+/// and vice versa.
+///
+/// Increasing this value reduces computational burden & vice versa.
+//#define DEFAULT_SEQUENCE_MS         40
+#define DEFAULT_SEQUENCE_MS         USE_AUTO_SEQUENCE_LEN
+
+/// Giving this value for the sequence length sets automatic parameter value
+/// according to tempo setting (recommended)
+#define USE_AUTO_SEQUENCE_LEN       0
+
+/// Seeking window default length in milliseconds for algorithm that finds the best possible 
+/// overlapping location. This determines from how wide window the algorithm may look for an 
+/// optimal joining location when mixing the sound sequences back together. 
+///
+/// The bigger this window setting is, the higher the possibility to find a better mixing
+/// position will become, but at the same time large values may cause a "drifting" artifact
+/// because consequent sequences will be taken at more uneven intervals.
+///
+/// If there's a disturbing artifact that sounds as if a constant frequency was drifting 
+/// around, try reducing this setting.
+///
+/// Increasing this value increases computational burden & vice versa.
+//#define DEFAULT_SEEKWINDOW_MS       15
+#define DEFAULT_SEEKWINDOW_MS       USE_AUTO_SEEKWINDOW_LEN
+
+/// Giving this value for the seek window length sets automatic parameter value
+/// according to tempo setting (recommended)
+#define USE_AUTO_SEEKWINDOW_LEN     0
+
+/// Overlap length in milliseconds. When the chopped sound sequences are mixed back together, 
+/// to form a continuous sound stream, this parameter defines over how long period the two 
+/// consecutive sequences are let to overlap each other. 
+///
+/// This shouldn't be that critical parameter. If you reduce the DEFAULT_SEQUENCE_MS setting 
+/// by a large amount, you might wish to try a smaller value on this.
+///
+/// Increasing this value increases computational burden & vice versa.
+#define DEFAULT_OVERLAP_MS      8
+
+
+/// Class that does the time-stretch (tempo change) effect for the processed
+/// sound.
+class TDStretch : public FIFOProcessor
+{
+protected:
+    int channels;
+    int sampleReq;
+    float tempo;
+
+    SAMPLETYPE *pMidBuffer;
+    SAMPLETYPE *pRefMidBuffer;
+    SAMPLETYPE *pRefMidBufferUnaligned;
+    int overlapLength;
+    int seekLength;
+    int seekWindowLength;
+    int overlapDividerBits;
+    int slopingDivider;
+    float nominalSkip;
+    float skipFract;
+    FIFOSampleBuffer outputBuffer;
+    FIFOSampleBuffer inputBuffer;
+    BOOL bQuickSeek;
+//    int outDebt;
+//    BOOL bMidBufferDirty;
+
+    int sampleRate;
+    int sequenceMs;
+    int seekWindowMs;
+    int overlapMs;
+    BOOL bAutoSeqSetting;
+    BOOL bAutoSeekSetting;
+
+    void acceptNewOverlapLength(int newOverlapLength);
+
+    virtual void clearCrossCorrState();
+    void calculateOverlapLength(int overlapMs);
+
+    virtual LONG_SAMPLETYPE calcCrossCorrStereo(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
+    virtual LONG_SAMPLETYPE calcCrossCorrMono(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const;
+
+    virtual int seekBestOverlapPositionStereo(const SAMPLETYPE *refPos);
+    virtual int seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos);
+    virtual int seekBestOverlapPositionMono(const SAMPLETYPE *refPos);
+    virtual int seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos);
+    int seekBestOverlapPosition(const SAMPLETYPE *refPos);
+
+    virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const;
+    virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const;
+
+    void clearMidBuffer();
+    void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const;
+
+    void precalcCorrReferenceMono();
+    void precalcCorrReferenceStereo();
+
+    void calcSeqParameters();
+
+    /// Changes the tempo of the given sound samples.
+    /// Returns amount of samples returned in the "output" buffer.
+    /// The maximum amount of samples that can be returned at a time is set by
+    /// the 'set_returnBuffer_size' function.
+    void processSamples();
+    
+public:
+    TDStretch();
+    virtual ~TDStretch();
+
+    /// Operator 'new' is overloaded so that it automatically creates a suitable instance 
+    /// depending on if we've a MMX/SSE/etc-capable CPU available or not.
+    static void *operator new(size_t s);
+
+    /// Use this function instead of "new" operator to create a new instance of this class. 
+    /// This function automatically chooses a correct feature set depending on if the CPU
+    /// supports MMX/SSE/etc extensions.
+    static TDStretch *newInstance();
+    
+    /// Returns the output buffer object
+    FIFOSamplePipe *getOutput() { return &outputBuffer; };
+
+    /// Returns the input buffer object
+    FIFOSamplePipe *getInput() { return &inputBuffer; };
+
+    /// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower 
+    /// tempo, larger faster tempo.
+    void setTempo(float newTempo);
+
+    /// Returns nonzero if there aren't any samples available for outputting.
+    virtual void clear();
+
+    /// Clears the input buffer
+    void clearInput();
+
+    /// Sets the number of channels, 1 = mono, 2 = stereo
+    void setChannels(int numChannels);
+
+    /// Enables/disables the quick position seeking algorithm. Zero to disable, 
+    /// nonzero to enable
+    void enableQuickSeek(BOOL enable);
+
+    /// Returns nonzero if the quick seeking algorithm is enabled.
+    BOOL isQuickSeekEnabled() const;
+
+    /// Sets routine control parameters. These control are certain time constants
+    /// defining how the sound is stretched to the desired duration.
+    //
+    /// 'sampleRate' = sample rate of the sound
+    /// 'sequenceMS' = one processing sequence length in milliseconds
+    /// 'seekwindowMS' = seeking window length for scanning the best overlapping 
+    ///      position
+    /// 'overlapMS' = overlapping length
+    void setParameters(int sampleRate,          ///< Samplerate of sound being processed (Hz)
+                       int sequenceMS = -1,     ///< Single processing sequence length (ms)
+                       int seekwindowMS = -1,   ///< Offset seeking window length (ms)
+                       int overlapMS = -1       ///< Sequence overlapping length (ms)
+                       );
+
+    /// Get routine control parameters, see setParameters() function.
+    /// Any of the parameters to this function can be NULL, in such case corresponding parameter
+    /// value isn't returned.
+    void getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const;
+
+    /// Adds 'numsamples' pcs of samples from the 'samples' memory position into
+    /// the input of the object.
+    virtual void putSamples(
+            const SAMPLETYPE *samples,  ///< Input sample data
+            uint numSamples                         ///< Number of samples in 'samples' so that one sample
+                                                    ///< contains both channels if stereo
+            );
+};
+
+
+
+// Implementation-specific class declarations:
+
+#ifdef ALLOW_MMX
+    /// Class that implements MMX optimized routines for 16bit integer samples type.
+    class TDStretchMMX : public TDStretch
+    {
+    protected:
+        long calcCrossCorrStereo(const short *mixingPos, const short *compare) const;
+        virtual void overlapStereo(short *output, const short *input) const;
+        virtual void clearCrossCorrState();
+    };
+#endif /// ALLOW_MMX
+
+
+#ifdef ALLOW_3DNOW
+    /// Class that implements 3DNow! optimized routines for floating point samples type.
+    class TDStretch3DNow : public TDStretch
+    {
+    protected:
+        double calcCrossCorrStereo(const float *mixingPos, const float *compare) const;
+    };
+#endif /// ALLOW_3DNOW
+
+
+#ifdef ALLOW_SSE
+    /// Class that implements SSE optimized routines for floating point samples type.
+    class TDStretchSSE : public TDStretch
+    {
+    protected:
+        double calcCrossCorrStereo(const float *mixingPos, const float *compare) const;
+    };
+
+#endif /// ALLOW_SSE
+
+}
+#endif  /// TDStretch_H
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect.h b/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect.h
new file mode 100644
index 00000000..025781da
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect.h
@@ -0,0 +1,62 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// A header file for detecting the Intel MMX instructions set extension.
+///
+/// Please see 'mmx_win.cpp', 'mmx_cpp.cpp' and 'mmx_non_x86.cpp' for the 
+/// routine implementations for x86 Windows, x86 gnu version and non-x86 
+/// platforms, respectively.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $
+// File revision : $Revision: 4 $
+//
+// $Id: cpu_detect.h 11 2008-02-10 16:26:55Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _CPU_DETECT_H_
+#define _CPU_DETECT_H_
+
+#include "STTypes.h"
+
+#define SUPPORT_MMX         0x0001
+#define SUPPORT_3DNOW       0x0002
+#define SUPPORT_ALTIVEC     0x0004
+#define SUPPORT_SSE         0x0008
+#define SUPPORT_SSE2        0x0010
+
+/// Checks which instruction set extensions are supported by the CPU.
+///
+/// \return A bitmask of supported extensions, see SUPPORT_... defines.
+uint detectCPUextensions(void);
+
+/// Disables given set of instruction extensions. See SUPPORT_... defines.
+void disableExtensions(uint wDisableMask);
+
+#endif  // _CPU_DETECT_H_
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect_x86_gcc.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect_x86_gcc.cpp
new file mode 100644
index 00000000..b0d0a693
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect_x86_gcc.cpp
@@ -0,0 +1,135 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Generic version of the x86 CPU extension detection routine.
+///
+/// This file is for GNU & other non-Windows compilers, see 'cpu_detect_x86_win.cpp' 
+/// for the Microsoft compiler version.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-25 19:13:51 +0200 (Wed, 25 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: cpu_detect_x86_gcc.cpp 67 2009-02-25 17:13:51Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include <stdexcept>
+#include <string>
+#include "cpu_detect.h"
+#include "STTypes.h"
+
+using namespace std;
+
+#include <stdio.h>
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// processor instructions extension detection routines
+//
+//////////////////////////////////////////////////////////////////////////////
+
+// Flag variable indicating whick ISA extensions are disabled (for debugging)
+static uint _dwDisabledISA = 0x00;      // 0xffffffff; //<- use this to disable all extensions
+
+// Disables given set of instruction extensions. See SUPPORT_... defines.
+void disableExtensions(uint dwDisableMask)
+{
+    _dwDisabledISA = dwDisableMask;
+}
+
+
+
+/// Checks which instruction set extensions are supported by the CPU.
+uint detectCPUextensions(void)
+{
+#if (!(ALLOW_X86_OPTIMIZATIONS) || !(__GNUC__))
+
+    return 0; // always disable extensions on non-x86 platforms.
+
+#else
+    uint res = 0;
+
+    if (_dwDisabledISA == 0xffffffff) return 0;
+
+    asm volatile(
+        "\n\txor     %%esi, %%esi"       // clear %%esi = result register
+        // check if 'cpuid' instructions is available by toggling eflags bit 21
+
+        "\n\tpushf"                      // save eflags to stack
+        "\n\tmovl    (%%esp), %%eax"     // load eax from stack (with eflags)
+        "\n\tmovl    %%eax, %%ecx"       // save the original eflags values to ecx
+        "\n\txor     $0x00200000, %%eax" // toggle bit 21
+        "\n\tmovl    %%eax, (%%esp)"     // store toggled eflags to stack
+        "\n\tpopf"                       // load eflags from stack
+        "\n\tpushf"                      // save updated eflags to stack
+        "\n\tmovl    (%%esp), %%eax"     // load eax from stack
+        "\n\tpopf"                       // pop stack to restore esp
+        "\n\txor     %%edx, %%edx"       // clear edx for defaulting no mmx
+        "\n\tcmp     %%ecx, %%eax"       // compare to original eflags values
+        "\n\tjz      end"                // jumps to 'end' if cpuid not present
+        // cpuid instruction available, test for presence of mmx instructions
+
+        "\n\tmovl    $1, %%eax"
+        "\n\tcpuid"
+        "\n\ttest    $0x00800000, %%edx"
+        "\n\tjz      end"                // branch if MMX not available
+
+        "\n\tor      $0x01, %%esi"       // otherwise add MMX support bit
+
+        "\n\ttest    $0x02000000, %%edx"
+        "\n\tjz      test3DNow"          // branch if SSE not available
+
+        "\n\tor      $0x08, %%esi"       // otherwise add SSE support bit
+
+    "\n\ttest3DNow:"
+        // test for precense of AMD extensions
+        "\n\tmov     $0x80000000, %%eax"
+        "\n\tcpuid"
+        "\n\tcmp     $0x80000000, %%eax"
+        "\n\tjbe     end"                 // branch if no AMD extensions detected
+
+        // test for precense of 3DNow! extension
+        "\n\tmov     $0x80000001, %%eax"
+        "\n\tcpuid"
+        "\n\ttest    $0x80000000, %%edx"
+        "\n\tjz      end"                  // branch if 3DNow! not detected
+
+        "\n\tor      $0x02, %%esi"         // otherwise add 3DNow support bit
+
+    "\n\tend:"
+
+        "\n\tmov     %%esi, %0"
+
+      : "=r" (res)
+      : /* no inputs */
+      : "%edx", "%eax", "%ecx", "%esi" );
+      
+    return res & ~_dwDisabledISA;
+#endif
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect_x86_win.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect_x86_win.cpp
new file mode 100644
index 00000000..c6c54246
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/cpu_detect_x86_win.cpp
@@ -0,0 +1,129 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// Win32 version of the x86 CPU detect routine.
+///
+/// This file is to be compiled in Windows platform with Microsoft Visual C++ 
+/// Compiler. Please see 'cpu_detect_x86_gcc.cpp' for the gcc compiler version 
+/// for all GNU platforms.
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-02-13 18:22:48 +0200 (Fri, 13 Feb 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: cpu_detect_x86_win.cpp 62 2009-02-13 16:22:48Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "cpu_detect.h"
+
+#ifndef WIN32
+#error wrong platform - this source code file is exclusively for Win32 platform
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// processor instructions extension detection routines
+//
+//////////////////////////////////////////////////////////////////////////////
+
+// Flag variable indicating whick ISA extensions are disabled (for debugging)
+static uint _dwDisabledISA = 0x00;      // 0xffffffff; //<- use this to disable all extensions
+
+
+// Disables given set of instruction extensions. See SUPPORT_... defines.
+void disableExtensions(uint dwDisableMask)
+{
+    _dwDisabledISA = dwDisableMask;
+}
+
+
+
+/// Checks which instruction set extensions are supported by the CPU.
+uint detectCPUextensions(void)
+{
+    uint res = 0;
+
+    if (_dwDisabledISA == 0xffffffff) return 0;
+
+    _asm 
+    {
+        ; check if 'cpuid' instructions is available by toggling eflags bit 21
+        ;
+        xor     esi, esi            ; clear esi = result register
+
+        pushfd                      ; save eflags to stack
+        mov     eax,dword ptr [esp] ; load eax from stack (with eflags)
+        mov     ecx, eax            ; save the original eflags values to ecx
+        xor     eax, 0x00200000     ; toggle bit 21
+        mov     dword ptr [esp],eax ; store toggled eflags to stack
+        popfd                       ; load eflags from stack
+
+        pushfd                      ; save updated eflags to stack
+        mov     eax,dword ptr [esp] ; load eax from stack
+        popfd                       ; pop stack to restore stack pointer
+
+        xor     edx, edx            ; clear edx for defaulting no mmx
+        cmp     eax, ecx            ; compare to original eflags values
+        jz      end                 ; jumps to 'end' if cpuid not present
+
+        ; cpuid instruction available, test for presence of mmx instructions 
+        mov     eax, 1
+        cpuid
+        test    edx, 0x00800000
+        jz      end                 ; branch if MMX not available
+
+        or      esi, SUPPORT_MMX    ; otherwise add MMX support bit
+
+        test    edx, 0x02000000
+        jz      test3DNow           ; branch if SSE not available
+
+        or      esi, SUPPORT_SSE    ; otherwise add SSE support bit
+
+    test3DNow:
+        ; test for precense of AMD extensions
+        mov     eax, 0x80000000
+        cpuid
+        cmp     eax, 0x80000000
+        jbe     end                ; branch if no AMD extensions detected
+
+        ; test for precense of 3DNow! extension
+        mov     eax, 0x80000001
+        cpuid
+        test    edx, 0x80000000
+        jz      end                 ; branch if 3DNow! not detected
+
+        or      esi, SUPPORT_3DNOW  ; otherwise add 3DNow support bit
+
+    end:
+
+        mov     res, esi
+    }
+
+    return res & ~_dwDisabledISA;
+}
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/mmx_optimized.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/mmx_optimized.cpp
new file mode 100644
index 00000000..539ee57c
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/mmx_optimized.cpp
@@ -0,0 +1,320 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// MMX optimized routines. All MMX optimized functions have been gathered into 
+/// this single source code file, regardless to their class or original source 
+/// code file, in order to ease porting the library to other compiler and 
+/// processor platforms.
+///
+/// The MMX-optimizations are programmed using MMX compiler intrinsics that
+/// are supported both by Microsoft Visual C++ and GCC compilers, so this file
+/// should compile with both toolsets.
+///
+/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ 
+/// 6.0 processor pack" update to support compiler intrinsic syntax. The update
+/// is available for download at Microsoft Developers Network, see here:
+/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-10-31 16:53:23 +0200 (Sat, 31 Oct 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: mmx_optimized.cpp 75 2009-10-31 14:53:23Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "STTypes.h"
+
+#ifdef ALLOW_MMX
+// MMX routines available only with integer sample type
+
+#if !(WIN32 || __i386__ || __x86_64__)
+#error "wrong platform - this source code file is exclusively for x86 platforms"
+#endif
+
+using namespace soundtouch;
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// implementation of MMX optimized functions of class 'TDStretchMMX'
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#include "TDStretch.h"
+#include <mmintrin.h>
+#include <limits.h>
+#include <math.h>
+
+
+// Calculates cross correlation of two buffers
+long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const
+{
+    const __m64 *pVec1, *pVec2;
+    __m64 shifter;
+    __m64 accu, normaccu;
+    long corr, norm;
+    int i;
+   
+    pVec1 = (__m64*)pV1;
+    pVec2 = (__m64*)pV2;
+
+    shifter = _m_from_int(overlapDividerBits);
+    normaccu = accu = _mm_setzero_si64();
+
+    // Process 4 parallel sets of 2 * stereo samples each during each 
+    // round to improve CPU-level parallellization.
+    for (i = 0; i < overlapLength / 8; i ++)
+    {
+        __m64 temp, temp2;
+
+        // dictionary of instructions:
+        // _m_pmaddwd   : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3]
+        // _mm_add_pi32 : 2*32bit add
+        // _m_psrad     : 32bit right-shift
+
+        temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]),
+                            _mm_madd_pi16(pVec1[1], pVec2[1]));
+        temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]),
+                             _mm_madd_pi16(pVec1[1], pVec1[1]));
+        accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
+        normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
+
+        temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]),
+                            _mm_madd_pi16(pVec1[3], pVec2[3]));
+        temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]),
+                             _mm_madd_pi16(pVec1[3], pVec1[3]));
+        accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter));
+        normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter));
+
+        pVec1 += 4;
+        pVec2 += 4;
+    }
+
+    // copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1
+    // and finally store the result into the variable "corr"
+
+    accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32));
+    corr = _m_to_int(accu);
+
+    normaccu = _mm_add_pi32(normaccu, _mm_srli_si64(normaccu, 32));
+    norm = _m_to_int(normaccu);
+
+    // Clear MMS state
+    _m_empty();
+
+    // Normalize result by dividing by sqrt(norm) - this step is easiest 
+    // done using floating point operation
+    if (norm == 0) norm = 1;    // to avoid div by zero
+    return (long)((double)corr * USHRT_MAX / sqrt((double)norm));
+    // Note: Warning about the missing EMMS instruction is harmless
+    // as it'll be called elsewhere.
+}
+
+
+
+void TDStretchMMX::clearCrossCorrState()
+{
+    // Clear MMS state
+    _m_empty();
+    //_asm EMMS;
+}
+
+
+
+// MMX-optimized version of the function overlapStereo
+void TDStretchMMX::overlapStereo(short *output, const short *input) const
+{
+    const __m64 *pVinput, *pVMidBuf;
+    __m64 *pVdest;
+    __m64 mix1, mix2, adder, shifter;
+    int i;
+
+    pVinput  = (const __m64*)input;
+    pVMidBuf = (const __m64*)pMidBuffer;
+    pVdest   = (__m64*)output;
+
+    // mix1  = mixer values for 1st stereo sample
+    // mix1  = mixer values for 2nd stereo sample
+    // adder = adder for updating mixer values after each round
+    
+    mix1  = _mm_set_pi16(0, overlapLength,   0, overlapLength);
+    adder = _mm_set_pi16(1, -1, 1, -1);
+    mix2  = _mm_add_pi16(mix1, adder);
+    adder = _mm_add_pi16(adder, adder);
+
+    // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in
+    // overlapDividerBits calculation earlier.
+    shifter = _m_from_int(overlapDividerBits + 1);
+
+    for (i = 0; i < overlapLength / 4; i ++)
+    {
+        __m64 temp1, temp2;
+                
+        // load & shuffle data so that input & mixbuffer data samples are paired
+        temp1 = _mm_unpacklo_pi16(pVMidBuf[0], pVinput[0]);     // = i0l m0l i0r m0r
+        temp2 = _mm_unpackhi_pi16(pVMidBuf[0], pVinput[0]);     // = i1l m1l i1r m1r
+
+        // temp = (temp .* mix) >> shifter
+        temp1 = _mm_sra_pi32(_mm_madd_pi16(temp1, mix1), shifter);
+        temp2 = _mm_sra_pi32(_mm_madd_pi16(temp2, mix2), shifter);
+        pVdest[0] = _mm_packs_pi32(temp1, temp2); // pack 2*2*32bit => 4*16bit
+
+        // update mix += adder
+        mix1 = _mm_add_pi16(mix1, adder);
+        mix2 = _mm_add_pi16(mix2, adder);
+
+        // --- second round begins here ---
+
+        // load & shuffle data so that input & mixbuffer data samples are paired
+        temp1 = _mm_unpacklo_pi16(pVMidBuf[1], pVinput[1]);       // = i2l m2l i2r m2r
+        temp2 = _mm_unpackhi_pi16(pVMidBuf[1], pVinput[1]);       // = i3l m3l i3r m3r
+
+        // temp = (temp .* mix) >> shifter
+        temp1 = _mm_sra_pi32(_mm_madd_pi16(temp1, mix1), shifter);
+        temp2 = _mm_sra_pi32(_mm_madd_pi16(temp2, mix2), shifter);
+        pVdest[1] = _mm_packs_pi32(temp1, temp2); // pack 2*2*32bit => 4*16bit
+
+        // update mix += adder
+        mix1 = _mm_add_pi16(mix1, adder);
+        mix2 = _mm_add_pi16(mix2, adder);
+
+        pVinput  += 2;
+        pVMidBuf += 2;
+        pVdest   += 2;
+    }
+
+    _m_empty(); // clear MMS state
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// implementation of MMX optimized functions of class 'FIRFilter'
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#include "FIRFilter.h"
+
+
+FIRFilterMMX::FIRFilterMMX() : FIRFilter()
+{
+    filterCoeffsUnalign = NULL;
+}
+
+
+FIRFilterMMX::~FIRFilterMMX()
+{
+    delete[] filterCoeffsUnalign;
+}
+
+
+// (overloaded) Calculates filter coefficients for MMX routine
+void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor)
+{
+    uint i;
+    FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);
+
+    // Ensure that filter coeffs array is aligned to 16-byte boundary
+    delete[] filterCoeffsUnalign;
+    filterCoeffsUnalign = new short[2 * newLength + 8];
+    filterCoeffsAlign = (short *)(((ulong)filterCoeffsUnalign + 15) & -16);
+
+    // rearrange the filter coefficients for mmx routines 
+    for (i = 0;i < length; i += 4) 
+    {
+        filterCoeffsAlign[2 * i + 0] = coeffs[i + 0];
+        filterCoeffsAlign[2 * i + 1] = coeffs[i + 2];
+        filterCoeffsAlign[2 * i + 2] = coeffs[i + 0];
+        filterCoeffsAlign[2 * i + 3] = coeffs[i + 2];
+
+        filterCoeffsAlign[2 * i + 4] = coeffs[i + 1];
+        filterCoeffsAlign[2 * i + 5] = coeffs[i + 3];
+        filterCoeffsAlign[2 * i + 6] = coeffs[i + 1];
+        filterCoeffsAlign[2 * i + 7] = coeffs[i + 3];
+    }
+}
+
+
+
+// mmx-optimized version of the filter routine for stereo sound
+uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const
+{
+    // Create stack copies of the needed member variables for asm routines :
+    uint i, j;
+    __m64 *pVdest = (__m64*)dest;
+
+    if (length < 2) return 0;
+
+    for (i = 0; i < (numSamples - length) / 2; i ++)
+    {
+        __m64 accu1;
+        __m64 accu2;
+        const __m64 *pVsrc = (const __m64*)src;
+        const __m64 *pVfilter = (const __m64*)filterCoeffsAlign;
+
+        accu1 = accu2 = _mm_setzero_si64();
+        for (j = 0; j < lengthDiv8 * 2; j ++)
+        {
+            __m64 temp1, temp2;
+
+            temp1 = _mm_unpacklo_pi16(pVsrc[0], pVsrc[1]);  // = l2 l0 r2 r0
+            temp2 = _mm_unpackhi_pi16(pVsrc[0], pVsrc[1]);  // = l3 l1 r3 r1
+
+            accu1 = _mm_add_pi32(accu1, _mm_madd_pi16(temp1, pVfilter[0]));  // += l2*f2+l0*f0 r2*f2+r0*f0
+            accu1 = _mm_add_pi32(accu1, _mm_madd_pi16(temp2, pVfilter[1]));  // += l3*f3+l1*f1 r3*f3+r1*f1
+
+            temp1 = _mm_unpacklo_pi16(pVsrc[1], pVsrc[2]);  // = l4 l2 r4 r2
+
+            accu2 = _mm_add_pi32(accu2, _mm_madd_pi16(temp2, pVfilter[0]));  // += l3*f2+l1*f0 r3*f2+r1*f0
+            accu2 = _mm_add_pi32(accu2, _mm_madd_pi16(temp1, pVfilter[1]));  // += l4*f3+l2*f1 r4*f3+r2*f1
+
+            // accu1 += l2*f2+l0*f0 r2*f2+r0*f0
+            //       += l3*f3+l1*f1 r3*f3+r1*f1
+
+            // accu2 += l3*f2+l1*f0 r3*f2+r1*f0
+            //          l4*f3+l2*f1 r4*f3+r2*f1
+
+            pVfilter += 2;
+            pVsrc += 2;
+        }
+        // accu >>= resultDivFactor
+        accu1 = _mm_srai_pi32(accu1, resultDivFactor);
+        accu2 = _mm_srai_pi32(accu2, resultDivFactor);
+
+        // pack 2*2*32bits => 4*16 bits
+        pVdest[0] = _mm_packs_pi32(accu1, accu2);
+        src += 4;
+        pVdest ++;
+    }
+
+   _m_empty();  // clear emms state
+
+    return (numSamples & 0xfffffffe) - length;
+}
+
+#endif  // ALLOW_MMX
diff --git a/plugins/soundtouch/soundtouch/source/SoundTouch/sse_optimized.cpp b/plugins/soundtouch/soundtouch/source/SoundTouch/sse_optimized.cpp
new file mode 100644
index 00000000..7659be68
--- /dev/null
+++ b/plugins/soundtouch/soundtouch/source/SoundTouch/sse_optimized.cpp
@@ -0,0 +1,510 @@
+////////////////////////////////////////////////////////////////////////////////
+///
+/// SSE optimized routines for Pentium-III, Athlon-XP and later CPUs. All SSE 
+/// optimized functions have been gathered into this single source 
+/// code file, regardless to their class or original source code file, in order 
+/// to ease porting the library to other compiler and processor platforms.
+///
+/// The SSE-optimizations are programmed using SSE compiler intrinsics that
+/// are supported both by Microsoft Visual C++ and GCC compilers, so this file
+/// should compile with both toolsets.
+///
+/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ 
+/// 6.0 processor pack" update to support SSE instruction set. The update is 
+/// available for download at Microsoft Developers Network, see here:
+/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx
+///
+/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and 
+/// perform a search with keywords "processor pack".
+///
+/// Author        : Copyright (c) Olli Parviainen
+/// Author e-mail : oparviai 'at' iki.fi
+/// SoundTouch WWW: http://www.surina.net/soundtouch
+///
+////////////////////////////////////////////////////////////////////////////////
+//
+// Last changed  : $Date: 2009-12-28 22:32:57 +0200 (Mon, 28 Dec 2009) $
+// File revision : $Revision: 4 $
+//
+// $Id: sse_optimized.cpp 80 2009-12-28 20:32:57Z oparviai $
+//
+////////////////////////////////////////////////////////////////////////////////
+//
+// License :
+//
+//  SoundTouch audio processing library
+//  Copyright (c) Olli Parviainen
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2.1 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//
+////////////////////////////////////////////////////////////////////////////////
+
+#include "cpu_detect.h"
+#include "STTypes.h"
+
+using namespace soundtouch;
+
+#ifdef ALLOW_SSE
+
+// SSE routines available only with float sample type    
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// implementation of SSE optimized functions of class 'TDStretchSSE'
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#include "TDStretch.h"
+#include <xmmintrin.h>
+#include <math.h>
+
+// Calculates cross correlation of two buffers
+double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) const
+{
+    int i;
+    const float *pVec1;
+    const __m128 *pVec2;
+    __m128 vSum, vNorm;
+
+    // Note. It means a major slow-down if the routine needs to tolerate 
+    // unaligned __m128 memory accesses. It's way faster if we can skip 
+    // unaligned slots and use _mm_load_ps instruction instead of _mm_loadu_ps.
+    // This can mean up to ~ 10-fold difference (incl. part of which is
+    // due to skipping every second round for stereo sound though).
+    //
+    // Compile-time define ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided
+    // for choosing if this little cheating is allowed.
+
+#ifdef ALLOW_NONEXACT_SIMD_OPTIMIZATION
+    // Little cheating allowed, return valid correlation only for 
+    // aligned locations, meaning every second round for stereo sound.
+
+    #define _MM_LOAD    _mm_load_ps
+
+    if (((ulong)pV1) & 15) return -1e50;    // skip unaligned locations
+
+#else
+    // No cheating allowed, use unaligned load & take the resulting
+    // performance hit.
+    #define _MM_LOAD    _mm_loadu_ps
+#endif 
+
+    // ensure overlapLength is divisible by 8
+    assert((overlapLength % 8) == 0);
+
+    // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
+    // Note: pV2 _must_ be aligned to 16-bit boundary, pV1 need not.
+    pVec1 = (const float*)pV1;
+    pVec2 = (const __m128*)pV2;
+    vSum = vNorm = _mm_setzero_ps();
+
+    // Unroll the loop by factor of 4 * 4 operations
+    for (i = 0; i < overlapLength / 8; i ++) 
+    {
+        __m128 vTemp;
+        // vSum += pV1[0..3] * pV2[0..3]
+        vTemp = _MM_LOAD(pVec1);
+        vSum  = _mm_add_ps(vSum,  _mm_mul_ps(vTemp ,pVec2[0]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
+
+        // vSum += pV1[4..7] * pV2[4..7]
+        vTemp = _MM_LOAD(pVec1 + 4);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[1]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
+
+        // vSum += pV1[8..11] * pV2[8..11]
+        vTemp = _MM_LOAD(pVec1 + 8);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[2]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
+
+        // vSum += pV1[12..15] * pV2[12..15]
+        vTemp = _MM_LOAD(pVec1 + 12);
+        vSum  = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[3]));
+        vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp));
+
+        pVec1 += 16;
+        pVec2 += 4;
+    }
+
+    // return value = vSum[0] + vSum[1] + vSum[2] + vSum[3]
+    float *pvNorm = (float*)&vNorm;
+    double norm = sqrt(pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]);
+    if (norm < 1e-9) norm = 1.0;    // to avoid div by zero
+
+    float *pvSum = (float*)&vSum;
+    return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / norm;
+
+    /* This is approximately corresponding routine in C-language yet without normalization:
+    double corr, norm;
+    uint i;
+
+    // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors
+    corr = norm = 0.0;
+    for (i = 0; i < overlapLength / 8; i ++) 
+    {
+        corr += pV1[0] * pV2[0] +
+                pV1[1] * pV2[1] +
+                pV1[2] * pV2[2] +
+                pV1[3] * pV2[3] +
+                pV1[4] * pV2[4] +
+                pV1[5] * pV2[5] +
+                pV1[6] * pV2[6] +
+                pV1[7] * pV2[7] +
+                pV1[8] * pV2[8] +
+                pV1[9] * pV2[9] +
+                pV1[10] * pV2[10] +
+                pV1[11] * pV2[11] +
+                pV1[12] * pV2[12] +
+                pV1[13] * pV2[13] +
+                pV1[14] * pV2[14] +
+                pV1[15] * pV2[15];
+
+	for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j];
+
+        pV1 += 16;
+        pV2 += 16;
+    }
+    return corr / sqrt(norm);
+    */
+
+    /* This is a bit outdated, corresponding routine in assembler. This may be teeny-weeny bit
+       faster than intrinsic version, but more difficult to maintain & get compiled on multiple
+       platforms.
+
+    uint overlapLengthLocal = overlapLength;
+    float corr;
+
+    _asm 
+    {
+        // Very important note: data in 'pV2' _must_ be aligned to 
+        // 16-byte boundary!
+
+        // give prefetch hints to CPU of what data are to be needed soonish
+        // give more aggressive hints on pV1 as that changes while pV2 stays
+        // same between runs
+        prefetcht0 [pV1]
+        prefetcht0 [pV2]
+        prefetcht0 [pV1 + 32]
+
+        mov     eax, dword ptr pV1
+        mov     ebx, dword ptr pV2
+
+        xorps   xmm0, xmm0
+
+        mov     ecx, overlapLengthLocal
+        shr     ecx, 3  // div by eight
+
+    loop1:
+        prefetcht0 [eax + 64]     // give a prefetch hint to CPU what data are to be needed soonish
+        prefetcht0 [ebx + 32]     // give a prefetch hint to CPU what data are to be needed soonish
+        movups  xmm1, [eax]
+        mulps   xmm1, [ebx]
+        addps   xmm0, xmm1
+
+        movups  xmm2, [eax + 16]
+        mulps   xmm2, [ebx + 16]
+        addps   xmm0, xmm2
+
+        prefetcht0 [eax + 96]     // give a prefetch hint to CPU what data are to be needed soonish
+        prefetcht0 [ebx + 64]     // give a prefetch hint to CPU what data are to be needed soonish
+
+        movups  xmm3, [eax + 32]
+        mulps   xmm3, [ebx + 32]
+        addps   xmm0, xmm3
+
+        movups  xmm4, [eax + 48]
+        mulps   xmm4, [ebx + 48]
+        addps   xmm0, xmm4
+
+        add     eax, 64
+        add     ebx, 64
+
+        dec     ecx
+        jnz     loop1
+
+        // add the four floats of xmm0 together and return the result. 
+
+        movhlps xmm1, xmm0          // move 3 & 4 of xmm0 to 1 & 2 of xmm1
+        addps   xmm1, xmm0
+        movaps  xmm2, xmm1
+        shufps  xmm2, xmm2, 0x01    // move 2 of xmm2 as 1 of xmm2
+        addss   xmm2, xmm1
+        movss   corr, xmm2
+    }
+
+    return (double)corr;
+    */
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// implementation of SSE optimized functions of class 'FIRFilter'
+//
+//////////////////////////////////////////////////////////////////////////////
+
+#include "FIRFilter.h"
+
+FIRFilterSSE::FIRFilterSSE() : FIRFilter()
+{
+    filterCoeffsAlign = NULL;
+    filterCoeffsUnalign = NULL;
+}
+
+
+FIRFilterSSE::~FIRFilterSSE()
+{
+    delete[] filterCoeffsUnalign;
+    filterCoeffsAlign = NULL;
+    filterCoeffsUnalign = NULL;
+}
+
+
+// (overloaded) Calculates filter coefficients for SSE routine
+void FIRFilterSSE::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor)
+{
+    uint i;
+    float fDivider;
+
+    FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor);
+
+    // Scale the filter coefficients so that it won't be necessary to scale the filtering result
+    // also rearrange coefficients suitably for 3DNow!
+    // Ensure that filter coeffs array is aligned to 16-byte boundary
+    delete[] filterCoeffsUnalign;
+    filterCoeffsUnalign = new float[2 * newLength + 4];
+    filterCoeffsAlign = (float *)(((unsigned long)filterCoeffsUnalign + 15) & (ulong)-16);
+
+    fDivider = (float)resultDivider;
+
+    // rearrange the filter coefficients for mmx routines 
+    for (i = 0; i < newLength; i ++)
+    {
+        filterCoeffsAlign[2 * i + 0] =
+        filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider;
+    }
+}
+
+
+
+// SSE-optimized version of the filter routine for stereo sound
+uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint numSamples) const
+{
+    int count = (int)((numSamples - length) & (uint)-2);
+    int j;
+
+    assert(count % 2 == 0);
+
+    if (count < 2) return 0;
+
+    assert(source != NULL);
+    assert(dest != NULL);
+    assert((length % 8) == 0);
+    assert(filterCoeffsAlign != NULL);
+    assert(((ulong)filterCoeffsAlign) % 16 == 0);
+
+    // filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2'
+    for (j = 0; j < count; j += 2)
+    {
+        const float *pSrc;
+        const __m128 *pFil;
+        __m128 sum1, sum2;
+        uint i;
+
+        pSrc = (const float*)source;              // source audio data
+        pFil = (const __m128*)filterCoeffsAlign;  // filter coefficients. NOTE: Assumes coefficients 
+                                                  // are aligned to 16-byte boundary
+        sum1 = sum2 = _mm_setzero_ps();
+
+        for (i = 0; i < length / 8; i ++) 
+        {
+            // Unroll loop for efficiency & calculate filter for 2*2 stereo samples 
+            // at each pass
+
+            // sum1 is accu for 2*2 filtered stereo sound data at the primary sound data offset
+            // sum2 is accu for 2*2 filtered stereo sound data for the next sound sample offset.
+
+            sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc)    , pFil[0]));
+            sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 2), pFil[0]));
+
+            sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 4), pFil[1]));
+            sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 6), pFil[1]));
+
+            sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 8) ,  pFil[2]));
+            sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 10), pFil[2]));
+
+            sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 12), pFil[3]));
+            sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 14), pFil[3]));
+
+            pSrc += 16;
+            pFil += 4;
+        }
+
+        // Now sum1 and sum2 both have a filtered 2-channel sample each, but we still need
+        // to sum the two hi- and lo-floats of these registers together.
+
+        // post-shuffle & add the filtered values and store to dest.
+        _mm_storeu_ps(dest, _mm_add_ps(
+                    _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1,0,3,2)),   // s2_1 s2_0 s1_3 s1_2
+                    _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3,2,1,0))    // s2_3 s2_2 s1_1 s1_0
+                    ));
+        source += 4;
+        dest += 4;
+    }
+
+    // Ideas for further improvement:
+    // 1. If it could be guaranteed that 'source' were always aligned to 16-byte 
+    //    boundary, a faster aligned '_mm_load_ps' instruction could be used.
+    // 2. If it could be guaranteed that 'dest' were always aligned to 16-byte 
+    //    boundary, a faster '_mm_store_ps' instruction could be used.
+
+    return (uint)count;
+
+    /* original routine in C-language. please notice the C-version has differently 
+       organized coefficients though.
+    double suml1, suml2;
+    double sumr1, sumr2;
+    uint i, j;
+
+    for (j = 0; j < count; j += 2)
+    {
+        const float *ptr;
+        const float *pFil;
+
+        suml1 = sumr1 = 0.0;
+        suml2 = sumr2 = 0.0;
+        ptr = src;
+        pFil = filterCoeffs;
+        for (i = 0; i < lengthLocal; i ++) 
+        {
+            // unroll loop for efficiency.
+
+            suml1 += ptr[0] * pFil[0] + 
+                     ptr[2] * pFil[2] +
+                     ptr[4] * pFil[4] +
+                     ptr[6] * pFil[6];
+
+            sumr1 += ptr[1] * pFil[1] + 
+                     ptr[3] * pFil[3] +
+                     ptr[5] * pFil[5] +
+                     ptr[7] * pFil[7];
+
+            suml2 += ptr[8] * pFil[0] + 
+                     ptr[10] * pFil[2] +
+                     ptr[12] * pFil[4] +
+                     ptr[14] * pFil[6];
+
+            sumr2 += ptr[9] * pFil[1] + 
+                     ptr[11] * pFil[3] +
+                     ptr[13] * pFil[5] +
+                     ptr[15] * pFil[7];
+
+            ptr += 16;
+            pFil += 8;
+        }
+        dest[0] = (float)suml1;
+        dest[1] = (float)sumr1;
+        dest[2] = (float)suml2;
+        dest[3] = (float)sumr2;
+
+        src += 4;
+        dest += 4;
+    }
+    */
+
+
+    /* Similar routine in assembly, again obsoleted due to maintainability
+    _asm
+    {
+        // Very important note: data in 'src' _must_ be aligned to 
+        // 16-byte boundary!
+        mov     edx, count
+        mov     ebx, dword ptr src
+        mov     eax, dword ptr dest
+        shr     edx, 1
+
+    loop1:
+        // "outer loop" : during each round 2*2 output samples are calculated
+
+        // give prefetch hints to CPU of what data are to be needed soonish
+        prefetcht0 [ebx]
+        prefetcht0 [filterCoeffsLocal]
+
+        mov     esi, ebx
+        mov     edi, filterCoeffsLocal
+        xorps   xmm0, xmm0
+        xorps   xmm1, xmm1
+        mov     ecx, lengthLocal
+
+    loop2:
+        // "inner loop" : during each round eight FIR filter taps are evaluated for 2*2 samples
+        prefetcht0 [esi + 32]     // give a prefetch hint to CPU what data are to be needed soonish
+        prefetcht0 [edi + 32]     // give a prefetch hint to CPU what data are to be needed soonish
+
+        movups  xmm2, [esi]         // possibly unaligned load
+        movups  xmm3, [esi + 8]     // possibly unaligned load
+        mulps   xmm2, [edi]
+        mulps   xmm3, [edi]
+        addps   xmm0, xmm2
+        addps   xmm1, xmm3
+
+        movups  xmm4, [esi + 16]    // possibly unaligned load
+        movups  xmm5, [esi + 24]    // possibly unaligned load
+        mulps   xmm4, [edi + 16]
+        mulps   xmm5, [edi + 16]
+        addps   xmm0, xmm4
+        addps   xmm1, xmm5
+
+        prefetcht0 [esi + 64]     // give a prefetch hint to CPU what data are to be needed soonish
+        prefetcht0 [edi + 64]     // give a prefetch hint to CPU what data are to be needed soonish
+
+        movups  xmm6, [esi + 32]    // possibly unaligned load
+        movups  xmm7, [esi + 40]    // possibly unaligned load
+        mulps   xmm6, [edi + 32]
+        mulps   xmm7, [edi + 32]
+        addps   xmm0, xmm6
+        addps   xmm1, xmm7
+
+        movups  xmm4, [esi + 48]    // possibly unaligned load
+        movups  xmm5, [esi + 56]    // possibly unaligned load
+        mulps   xmm4, [edi + 48]
+        mulps   xmm5, [edi + 48]
+        addps   xmm0, xmm4
+        addps   xmm1, xmm5
+
+        add     esi, 64
+        add     edi, 64
+        dec     ecx
+        jnz     loop2
+
+        // Now xmm0 and xmm1 both have a filtered 2-channel sample each, but we still need
+        // to sum the two hi- and lo-floats of these registers together.
+
+        movhlps xmm2, xmm0          // xmm2 = xmm2_3 xmm2_2 xmm0_3 xmm0_2
+        movlhps xmm2, xmm1          // xmm2 = xmm1_1 xmm1_0 xmm0_3 xmm0_2
+        shufps  xmm0, xmm1, 0xe4    // xmm0 = xmm1_3 xmm1_2 xmm0_1 xmm0_0
+        addps   xmm0, xmm2
+
+        movaps  [eax], xmm0
+        add     ebx, 16
+        add     eax, 16
+
+        dec     edx
+        jnz     loop1
+    }
+    */
+}
+
+#endif  // ALLOW_SSE
author	waker <wakeroid@gmail.com>	2011-03-23 21:26:26 +0100
committer	waker <wakeroid@gmail.com>	2011-03-23 21:26:26 +0100
commit	fd302d7abc36942e7ff14b22fae1e72b4495bef1 (patch)
tree	2a36f8361c907a5bea91a9d905957a709f31ea64 /plugins/soundtouch/soundtouch/source
parent	11e63b53b8c91da89592c373bb32fc2b656c6024 (diff)