13 files changed, 2496 insertions, 0 deletions
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S
new file mode 100644
index 00000000..6860f1cf
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF @
+#endif
+
+.macro  require8 val=1
+ELF     .eabi_attribute 24, \val
+.endm
+
+.macro  preserve8 val=1
+ELF     .eabi_attribute 25, \val
+.endm
+
+/*
+.macro  function name, export=0
+    .macro endfunc
+ELF     .size   \name, . - \name
+        .endfunc
+        .purgem endfunc
+    .endm
+        .text
+    .if \export
+        .global EXTERN_ASM\name
+EXTERN_ASM\name:
+    .endif
+ELF     .type   \name, %function
+        .func   \name
+\name:
+.endm
+*/
+
+.macro  function name, export=0
+    .macro endfunc
+ELF     .size   \name, . - \name
+        .endfunc
+        .purgem endfunc
+    .endm
+        .text
+    .if \export
+    	.hidden EXTERN_ASM\name
+        .global EXTERN_ASM\name
+EXTERN_ASM\name:
+    .endif
+ELF     .type   \name, %function
+        .func   \name
+\name:
+.endm
+
+.macro  mov32   rd, val
+#if HAVE_ARMV6T2
+        movw            \rd, #(\val) & 0xffff
+    .if (\val) >> 16
+        movt            \rd, #(\val) >> 16
+    .endif
+#else
+        ldr             \rd, =\val
+#endif
+.endm
+
+.macro  movrel rd, val
+#if HAVE_ARMV6T2 && !CONFIG_PIC
+        movw            \rd, #:lower16:\val
+        movt            \rd, #:upper16:\val
+#else
+        ldr             \rd, =\val
+#endif
+.endm
+
+#if HAVE_VFP_ARGS
+        .eabi_attribute 28, 1
+#   define VFP
+#   define NOVFP @
+#else
+#   define VFP   @
+#   define NOVFP
+#endif
+
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+#define X(s) JOIN(EXTERN_ASM, s)
+
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c
new file mode 100644
index 00000000..28148e92
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/fft.h"
+#if CONFIG_DCA_DECODER
+#include "libavcodec/synth_filter.h"
+#endif
+
+void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
+
+#if 0
+void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+#endif
+
+void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+                                float *synth_buf_ptr, int *synth_buf_offset,
+                                float synth_buf2[32], const float window[512],
+                                float out[32], const float in[32],
+                                float scale, float bias);
+
+av_cold void ff_fft_init_arm(FFTContext *s)
+{
+    if (HAVE_NEON) {
+        s->fft_permute  = ff_fft_permute_neon;
+        s->fft_calc     = ff_fft_calc_neon;
+#if 0
+        s->imdct_calc   = ff_imdct_calc_neon;
+        s->imdct_half   = ff_imdct_half_neon;
+        s->mdct_calc    = ff_mdct_calc_neon;
+        s->permutation  = FF_MDCT_PERM_INTERLEAVE;
+#endif
+    }
+}
+
+#if CONFIG_RDFT
+av_cold void ff_rdft_init_arm(RDFTContext *s)
+{
+    if (HAVE_NEON)
+        s->rdft_calc    = ff_rdft_calc_neon;
+}
+#endif
+
+#if CONFIG_DCA_DECODER
+av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
+{
+    if (HAVE_NEON)
+        s->synth_filter_float = ff_synth_filter_float_neon;
+}
+#endif
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S
new file mode 100644
index 00000000..117f4fee
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S
@@ -0,0 +1,372 @@
+/*
+ * ARM NEON optimised FFT
+ *
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2009 Naotoshi Nojiri
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+#define M_SQRT1_2 0.70710678118654752440
+
+        .text
+
+function fft4_neon
+        vld1.32         {d0-d3}, [r0,:128]
+
+        vext.32         q8,  q1,  q1,  #1       @ i2,r3 d3=i3,r2
+        vsub.f32        d6,  d0,  d1            @ r0-r1,i0-i1
+        vsub.f32        d7,  d16, d17           @ r3-r2,i2-i3
+        vadd.f32        d4,  d0,  d1            @ r0+r1,i0+i1
+        vadd.f32        d5,  d2,  d3            @ i2+i3,r2+r3
+        vadd.f32        d1,  d6,  d7
+        vsub.f32        d3,  d6,  d7
+        vadd.f32        d0,  d4,  d5
+        vsub.f32        d2,  d4,  d5
+
+        vst1.32         {d0-d3}, [r0,:128]
+
+        bx              lr
+endfunc
+
+function fft8_neon
+        mov             r1,  r0
+        vld1.32         {d0-d3},   [r1,:128]!
+        vld1.32         {d16-d19}, [r1,:128]
+
+        movw            r2,  #0x04f3            @ sqrt(1/2)
+        movt            r2,  #0x3f35
+        eor             r3,  r2,  #1<<31
+        vdup.32         d31, r2
+
+        vext.32         q11, q1,  q1,  #1       @ i2,r3,i3,r2
+        vadd.f32        d4,  d16, d17           @ r4+r5,i4+i5
+        vmov            d28, r3,  r2
+        vadd.f32        d5,  d18, d19           @ r6+r7,i6+i7
+        vsub.f32        d17, d16, d17           @ r4-r5,i4-i5
+        vsub.f32        d19, d18, d19           @ r6-r7,i6-i7
+        vrev64.32       d29, d28
+        vadd.f32        d20, d0,  d1            @ r0+r1,i0+i1
+        vadd.f32        d21, d2,  d3            @ r2+r3,i2+i3
+        vmul.f32        d26, d17, d28           @ -a2r*w,a2i*w
+        vext.32         q3,  q2,  q2,  #1
+        vmul.f32        d27, d19, d29           @ a3r*w,-a3i*w
+        vsub.f32        d23, d22, d23           @ i2-i3,r3-r2
+        vsub.f32        d22, d0,  d1            @ r0-r1,i0-i1
+        vmul.f32        d24, d17, d31           @ a2r*w,a2i*w
+        vmul.f32        d25, d19, d31           @ a3r*w,a3i*w
+        vadd.f32        d0,  d20, d21
+        vsub.f32        d2,  d20, d21
+        vadd.f32        d1,  d22, d23
+        vrev64.32       q13, q13
+        vsub.f32        d3,  d22, d23
+        vsub.f32        d6,  d6,  d7
+        vadd.f32        d24, d24, d26           @ a2r+a2i,a2i-a2r   t1,t2
+        vadd.f32        d25, d25, d27           @ a3r-a3i,a3i+a3r   t5,t6
+        vadd.f32        d7,  d4,  d5
+        vsub.f32        d18, d2,  d6
+        vext.32         q13, q12, q12, #1
+        vadd.f32        d2,  d2,  d6
+        vsub.f32        d16, d0,  d7
+        vadd.f32        d5,  d25, d24
+        vsub.f32        d4,  d26, d27
+        vadd.f32        d0,  d0,  d7
+        vsub.f32        d17, d1,  d5
+        vsub.f32        d19, d3,  d4
+        vadd.f32        d3,  d3,  d4
+        vadd.f32        d1,  d1,  d5
+
+        vst1.32         {d16-d19}, [r1,:128]
+        vst1.32         {d0-d3},   [r0,:128]
+
+        bx              lr
+endfunc
+
+function fft16_neon
+        movrel          r1, mppm
+        vld1.32         {d16-d19}, [r0,:128]!   @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
+        pld             [r0, #32]
+        vld1.32         {d2-d3}, [r1,:128]
+        vext.32         q13, q9,  q9,  #1
+        vld1.32         {d22-d25}, [r0,:128]!   @ q11{r4,i4,r5,i5} q12{r6,i5,r7,i7}
+        vadd.f32        d4,  d16, d17
+        vsub.f32        d5,  d16, d17
+        vadd.f32        d18, d18, d19
+        vsub.f32        d19, d26, d27
+
+        vadd.f32        d20, d22, d23
+        vsub.f32        d22, d22, d23
+        vsub.f32        d23, d24, d25
+        vadd.f32        q8,  q2,  q9            @ {r0,i0,r1,i1}
+        vadd.f32        d21, d24, d25
+        vmul.f32        d24, d22, d2
+        vsub.f32        q9,  q2,  q9            @ {r2,i2,r3,i3}
+        vmul.f32        d25, d23, d3
+        vuzp.32         d16, d17                @ {r0,r1,i0,i1}
+        vmul.f32        q1,  q11, d2[1]
+        vuzp.32         d18, d19                @ {r2,r3,i2,i3}
+        vrev64.32       q12, q12
+        vadd.f32        q11, q12, q1            @ {t1a,t2a,t5,t6}
+        vld1.32         {d24-d27}, [r0,:128]!   @ q12{r8,i8,r9,i9} q13{r10,i10,r11,i11}
+        vzip.32         q10, q11
+        vld1.32         {d28-d31}, [r0,:128]    @ q14{r12,i12,r13,i13} q15{r14,i14,r15,i15}
+        vadd.f32        d0,  d22, d20
+        vadd.f32        d1,  d21, d23
+        vsub.f32        d2,  d21, d23
+        vsub.f32        d3,  d22, d20
+        sub             r0,  r0,  #96
+        vext.32         q13, q13, q13, #1
+        vsub.f32        q10, q8,  q0            @ {r4,r5,i4,i5}
+        vadd.f32        q8,  q8,  q0            @ {r0,r1,i0,i1}
+        vext.32         q15, q15, q15, #1
+        vsub.f32        q11, q9,  q1            @ {r6,r7,i6,i7}
+        vswp            d25, d26                @ q12{r8,i8,i10,r11} q13{r9,i9,i11,r10}
+        vadd.f32        q9,  q9,  q1            @ {r2,r3,i2,i3}
+        vswp            d29, d30                @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
+        vadd.f32        q0,  q12, q13           @ {t1,t2,t5,t6}
+        vadd.f32        q1,  q14, q15           @ {t1a,t2a,t5a,t6a}
+        movrel          r2,  X(ff_cos_16)
+        vsub.f32        q13, q12, q13           @ {t3,t4,t7,t8}
+        vrev64.32       d1,  d1
+        vsub.f32        q15, q14, q15           @ {t3a,t4a,t7a,t8a}
+        vrev64.32       d3,  d3
+        movrel          r3,  pmmp
+        vswp            d1,  d26                @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8}
+        vswp            d3,  d30                @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a}
+        vadd.f32        q12, q0,  q13           @ {r8,i8,r9,i9}
+        vadd.f32        q14, q1,  q15           @ {r12,i12,r13,i13}
+        vld1.32         {d4-d5},  [r2,:64]
+        vsub.f32        q13, q0,  q13           @ {r10,i10,r11,i11}
+        vsub.f32        q15, q1,  q15           @ {r14,i14,r15,i15}
+        vswp            d25, d28                @ q12{r8,i8,r12,i12} q14{r9,i9,r13,i13}
+        vld1.32         {d6-d7},  [r3,:128]
+        vrev64.32       q1,  q14
+        vmul.f32        q14, q14, d4[1]
+        vmul.f32        q1,  q1,  q3
+        vmla.f32        q14, q1,  d5[1]         @ {t1a,t2a,t5a,t6a}
+        vswp            d27, d30                @ q13{r10,i10,r14,i14} q15{r11,i11,r15,i15}
+        vzip.32         q12, q14
+        vadd.f32        d0,  d28, d24
+        vadd.f32        d1,  d25, d29
+        vsub.f32        d2,  d25, d29
+        vsub.f32        d3,  d28, d24
+        vsub.f32        q12, q8,  q0            @ {r8,r9,i8,i9}
+        vadd.f32        q8,  q8,  q0            @ {r0,r1,i0,i1}
+        vsub.f32        q14, q10, q1            @ {r12,r13,i12,i13}
+        mov             r1,  #32
+        vadd.f32        q10, q10, q1            @ {r4,r5,i4,i5}
+        vrev64.32       q0,  q13
+        vmul.f32        q13, q13, d5[0]
+        vrev64.32       q1,  q15
+        vmul.f32        q15, q15, d5[1]
+        vst2.32         {d16-d17},[r0,:128], r1
+        vmul.f32        q0,  q0,  q3
+        vst2.32         {d20-d21},[r0,:128], r1
+        vmul.f32        q1,  q1,  q3
+        vmla.f32        q13, q0,  d5[0]         @ {t1,t2,t5,t6}
+        vmla.f32        q15, q1,  d4[1]         @ {t1a,t2a,t5a,t6a}
+        vst2.32         {d24-d25},[r0,:128], r1
+        vst2.32         {d28-d29},[r0,:128]
+        vzip.32         q13, q15
+        sub             r0, r0, #80
+        vadd.f32        d0,  d30, d26
+        vadd.f32        d1,  d27, d31
+        vsub.f32        d2,  d27, d31
+        vsub.f32        d3,  d30, d26
+        vsub.f32        q13, q9,  q0            @ {r10,r11,i10,i11}
+        vadd.f32        q9,  q9,  q0            @ {r2,r3,i2,i3}
+        vsub.f32        q15, q11, q1            @ {r14,r15,i14,i15}
+        vadd.f32        q11, q11, q1            @ {r6,r7,i6,i7}
+        vst2.32         {d18-d19},[r0,:128], r1
+        vst2.32         {d22-d23},[r0,:128], r1
+        vst2.32         {d26-d27},[r0,:128], r1
+        vst2.32         {d30-d31},[r0,:128]
+        bx              lr
+endfunc
+
+function fft_pass_neon
+        push            {r4-r6,lr}
+        mov             r6,  r2                 @ n
+        lsl             r5,  r2,  #3            @ 2 * n * sizeof FFTSample
+        lsl             r4,  r2,  #4            @ 2 * n * sizeof FFTComplex
+        lsl             r2,  r2,  #5            @ 4 * n * sizeof FFTComplex
+        add             r3,  r2,  r4
+        add             r4,  r4,  r0            @ &z[o1]
+        add             r2,  r2,  r0            @ &z[o2]
+        add             r3,  r3,  r0            @ &z[o3]
+        vld1.32         {d20-d21},[r2,:128]     @ {z[o2],z[o2+1]}
+        movrel          r12, pmmp
+        vld1.32         {d22-d23},[r3,:128]     @ {z[o3],z[o3+1]}
+        add             r5,  r5,  r1            @ wim
+        vld1.32         {d6-d7},  [r12,:128]    @ pmmp
+        vswp            d21, d22
+        vld1.32         {d4},     [r1,:64]!     @ {wre[0],wre[1]}
+        sub             r5,  r5,  #4            @ wim--
+        vrev64.32       q1,  q11
+        vmul.f32        q11, q11, d4[1]
+        vmul.f32        q1,  q1,  q3
+        vld1.32         {d5[0]},  [r5,:32]      @ d5[0] = wim[-1]
+        vmla.f32        q11, q1,  d5[0]         @ {t1a,t2a,t5a,t6a}
+        vld2.32         {d16-d17},[r0,:128]     @ {z[0],z[1]}
+        sub             r6, r6, #1              @ n--
+        vld2.32         {d18-d19},[r4,:128]     @ {z[o1],z[o1+1]}
+        vzip.32         q10, q11
+        vadd.f32        d0,  d22, d20
+        vadd.f32        d1,  d21, d23
+        vsub.f32        d2,  d21, d23
+        vsub.f32        d3,  d22, d20
+        vsub.f32        q10, q8,  q0
+        vadd.f32        q8,  q8,  q0
+        vsub.f32        q11, q9,  q1
+        vadd.f32        q9,  q9,  q1
+        vst2.32         {d20-d21},[r2,:128]!    @ {z[o2],z[o2+1]}
+        vst2.32         {d16-d17},[r0,:128]!    @ {z[0],z[1]}
+        vst2.32         {d22-d23},[r3,:128]!    @ {z[o3],z[o3+1]}
+        vst2.32         {d18-d19},[r4,:128]!    @ {z[o1],z[o1+1]}
+        sub             r5,  r5,  #8            @ wim -= 2
+1:
+        vld1.32         {d20-d21},[r2,:128]     @ {z[o2],z[o2+1]}
+        vld1.32         {d22-d23},[r3,:128]     @ {z[o3],z[o3+1]}
+        vswp            d21, d22
+        vld1.32         {d4}, [r1]!             @ {wre[0],wre[1]}
+        vrev64.32       q0,  q10
+        vmul.f32        q10, q10, d4[0]
+        vrev64.32       q1,  q11
+        vmul.f32        q11, q11, d4[1]
+        vld1.32         {d5}, [r5]              @ {wim[-1],wim[0]}
+        vmul.f32        q0,  q0,  q3
+        sub             r5,  r5,  #8            @ wim -= 2
+        vmul.f32        q1,  q1,  q3
+        vmla.f32        q10, q0,  d5[1]         @ {t1,t2,t5,t6}
+        vmla.f32        q11, q1,  d5[0]         @ {t1a,t2a,t5a,t6a}
+        vld2.32         {d16-d17},[r0,:128]     @ {z[0],z[1]}
+        subs            r6,  r6,  #1            @ n--
+        vld2.32         {d18-d19},[r4,:128]     @ {z[o1],z[o1+1]}
+        vzip.32         q10, q11
+        vadd.f32        d0,  d22, d20
+        vadd.f32        d1,  d21, d23
+        vsub.f32        d2,  d21, d23
+        vsub.f32        d3,  d22, d20
+        vsub.f32        q10, q8,  q0
+        vadd.f32        q8,  q8,  q0
+        vsub.f32        q11, q9,  q1
+        vadd.f32        q9,  q9,  q1
+        vst2.32         {d20-d21}, [r2,:128]!   @ {z[o2],z[o2+1]}
+        vst2.32         {d16-d17}, [r0,:128]!   @ {z[0],z[1]}
+        vst2.32         {d22-d23}, [r3,:128]!   @ {z[o3],z[o3+1]}
+        vst2.32         {d18-d19}, [r4,:128]!   @ {z[o1],z[o1+1]}
+        bne             1b
+
+        pop             {r4-r6,pc}
+endfunc
+
+.macro  def_fft n, n2, n4
+        .align 6
+function fft\n\()_neon
+        push            {r4, lr}
+        mov             r4,  r0
+        bl              fft\n2\()_neon
+        add             r0,  r4,  #\n4*2*8
+        bl              fft\n4\()_neon
+        add             r0,  r4,  #\n4*3*8
+        bl              fft\n4\()_neon
+        mov             r0,  r4
+        pop             {r4, lr}
+        movrel          r1,  X(ff_cos_\n)
+        mov             r2,  #\n4/2
+        b               fft_pass_neon
+endfunc
+.endm
+
+        def_fft    32,    16,     8
+        def_fft    64,    32,    16
+        def_fft   128,    64,    32
+        def_fft   256,   128,    64
+        def_fft   512,   256,   128
+        def_fft  1024,   512,   256
+        def_fft  2048,  1024,   512
+        def_fft  4096,  2048,  1024
+        def_fft  8192,  4096,  2048
+        def_fft 16384,  8192,  4096
+        def_fft 32768, 16384,  8192
+        def_fft 65536, 32768, 16384
+
+function ff_fft_calc_neon, export=1
+        ldr             r2,  [r0]
+        sub             r2,  r2,  #2
+        movrel          r3,  fft_tab_neon
+        ldr             r3,  [r3, r2, lsl #2]
+        mov             r0,  r1
+        bx              r3
+endfunc
+
+function ff_fft_permute_neon, export=1
+        push            {r4,lr}
+        mov             r12, #1
+        ldr             r2,  [r0]       @ nbits
+        ldr             r3,  [r0, #12]  @ tmp_buf
+        ldr             r0,  [r0, #8]   @ revtab
+        lsl             r12, r12, r2
+        mov             r2,  r12
+1:
+        vld1.32         {d0-d1}, [r1,:128]!
+        ldr             r4,  [r0], #4
+        uxth            lr,  r4
+        uxth            r4,  r4,  ror #16
+        add             lr,  r3,  lr,  lsl #3
+        add             r4,  r3,  r4,  lsl #3
+        vst1.32         {d0}, [lr,:64]
+        vst1.32         {d1}, [r4,:64]
+        subs            r12, r12, #2
+        bgt             1b
+
+        sub             r1,  r1,  r2,  lsl #3
+1:
+        vld1.32         {d0-d3}, [r3,:128]!
+        vst1.32         {d0-d3}, [r1,:128]!
+        subs            r2,  r2,  #4
+        bgt             1b
+
+        pop             {r4,pc}
+endfunc
+
+        .section .rodata
+        .align 4
+fft_tab_neon:
+        .word fft4_neon
+        .word fft8_neon
+        .word fft16_neon
+        .word fft32_neon
+        .word fft64_neon
+        .word fft128_neon
+        .word fft256_neon
+        .word fft512_neon
+        .word fft1024_neon
+        .word fft2048_neon
+        .word fft4096_neon
+        .word fft8192_neon
+        .word fft16384_neon
+        .word fft32768_neon
+        .word fft65536_neon
+ELF     .size fft_tab_neon, . - fft_tab_neon
+
+        .align 4
+pmmp:   .float  +1.0, -1.0, -1.0, +1.0
+mppm:   .float  -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S
new file mode 100644
index 00000000..4f8a1032
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S
@@ -0,0 +1,151 @@
+/*
+ * ARM NEON optimised RDFT
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+        preserve8
+
+function ff_rdft_calc_neon, export=1
+        push            {r4-r8,lr}
+
+        ldr             r6,  [r0, #4]           @ inverse
+        mov             r4,  r0
+        mov             r5,  r1
+
+        lsls            r6,  r6,  #31
+        bne             1f
+        add             r0,  r4,  #20
+        bl              X(ff_fft_permute_neon)
+        add             r0,  r4,  #20
+        mov             r1,  r5
+        bl              X(ff_fft_calc_neon)
+1:
+        ldr             r12, [r4, #0]           @ nbits
+        mov             r2,  #1
+        lsl             r12, r2,  r12
+        add             r0,  r5,  #8
+        add             r1,  r5,  r12, lsl #2
+        lsr             r12, r12, #2
+        ldr             r2,  [r4, #12]          @ tcos
+        sub             r12, r12, #2
+        ldr             r3,  [r4, #16]          @ tsin
+        mov             r7,  r0
+        sub             r1,  r1,  #8
+        mov             lr,  r1
+        mov             r8,  #-8
+        vld1.32         {d0},     [r0,:64]!     @ d1[0,1]
+        vld1.32         {d1},     [r1,:64], r8  @ d2[0,1]
+        vld1.32         {d4},     [r2,:64]!     @ tcos[i]
+        vld1.32         {d5},     [r3,:64]!     @ tsin[i]
+        vmov.f32        d18, #0.5               @ k1
+        vdup.32         d19, r6
+        pld             [r0, #32]
+        veor            d19, d18, d19           @ k2
+        vmov.i32        d16, #0
+        vmov.i32        d17, #1<<31
+        pld             [r1, #-32]
+        vtrn.32         d16, d17
+        pld             [r2, #32]
+        vrev64.32       d16, d16                @ d16=1,0 d17=0,1
+        pld             [r3, #32]
+2:
+        veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
+        vld1.32         {d24},    [r0,:64]!     @  d1[0,1]
+        vadd.f32        d0,  d0,  d3            @  d1[0]+d2[0], d1[1]-d2[1]
+        vld1.32         {d25},    [r1,:64], r8  @  d2[0,1]
+        vadd.f32        d1,  d2,  d1            @ -d1[0]+d2[0], d1[1]+d2[1]
+        veor            q3,  q12, q8            @ -d1[0],d1[1], d2[0],-d2[1]
+        pld             [r0, #32]
+        vmul.f32        q10, q0,  q9            @  ev.re, ev.im, od.im, od.re
+        pld             [r1, #-32]
+        vadd.f32        d0,  d24, d7            @  d1[0]+d2[0], d1[1]-d2[1]
+        vadd.f32        d1,  d6,  d25           @ -d1[0]+d2[0], d1[1]+d2[1]
+        vmul.f32        q11, q0,  q9            @  ev.re, ev.im, od.im, od.re
+        veor            d7,  d21, d16           @ -od.im, od.re
+        vrev64.32       d3,  d21                @  od.re, od.im
+        veor            d6,  d20, d17           @  ev.re,-ev.im
+        veor            d2,  d3,  d16           @ -od.re, od.im
+        vmla.f32        d20, d3,  d4[1]
+        vmla.f32        d20, d7,  d5[1]
+        vmla.f32        d6,  d2,  d4[1]
+        vmla.f32        d6,  d21, d5[1]
+        vld1.32         {d4},     [r2,:64]!     @  tcos[i]
+        veor            d7,  d23, d16           @ -od.im, od.re
+        vld1.32         {d5},     [r3,:64]!     @  tsin[i]
+        veor            d24, d22, d17           @  ev.re,-ev.im
+        vrev64.32       d3,  d23                @  od.re, od.im
+        pld             [r2, #32]
+        veor            d2,  d3,  d16           @ -od.re, od.im
+        pld             [r3, #32]
+        vmla.f32        d22, d3,  d4[0]
+        vmla.f32        d22, d7,  d5[0]
+        vmla.f32        d24, d2,  d4[0]
+        vmla.f32        d24, d23, d5[0]
+        vld1.32         {d0},     [r0,:64]!     @  d1[0,1]
+        vld1.32         {d1},     [r1,:64], r8  @  d2[0,1]
+        vst1.32         {d20},    [r7,:64]!
+        vst1.32         {d6},     [lr,:64], r8
+        vst1.32         {d22},    [r7,:64]!
+        vst1.32         {d24},    [lr,:64], r8
+        subs            r12, r12, #2
+        bgt             2b
+
+        veor            q1,  q0,  q8            @ -d1[0],d1[1], d2[0],-d2[1]
+        vadd.f32        d0,  d0,  d3            @  d1[0]+d2[0], d1[1]-d2[1]
+        vadd.f32        d1,  d2,  d1            @ -d1[0]+d2[0], d1[1]+d2[1]
+        ldr             r2,  [r4, #8]           @  sign_convention
+        vmul.f32        q10, q0,  q9            @  ev.re, ev.im, od.im, od.re
+        add             r0,  r0,  #4
+        bfc             r2,  #0,  #31
+        vld1.32         {d0[0]},  [r0,:32]
+        veor            d7,  d21, d16           @ -od.im, od.re
+        vrev64.32       d3,  d21                @  od.re, od.im
+        veor            d6,  d20, d17           @  ev.re,-ev.im
+        vld1.32         {d22},    [r5,:64]
+        vdup.32         d1,  r2
+        vmov            d23, d22
+        veor            d2,  d3,  d16           @ -od.re, od.im
+        vtrn.32         d22, d23
+        veor            d0,  d0,  d1
+        veor            d23, d23, d17
+        vmla.f32        d20, d3,  d4[1]
+        vmla.f32        d20, d7,  d5[1]
+        vmla.f32        d6,  d2,  d4[1]
+        vmla.f32        d6,  d21, d5[1]
+        vadd.f32        d22, d22, d23
+        vst1.32         {d20},    [r7,:64]
+        vst1.32         {d6},     [lr,:64]
+        vst1.32         {d0[0]},  [r0,:32]
+        vst1.32         {d22},    [r5,:64]
+
+        cmp             r6,  #0
+        popeq           {r4-r8,pc}
+
+        vmul.f32        d22, d22, d18
+        vst1.32         {d22},    [r5,:64]
+        add             r0,  r4,  #20
+        mov             r1,  r5
+        bl              X(ff_fft_permute_neon)
+        add             r0,  r4,  #20
+        mov             r1,  r5
+        pop             {r4-r8,lr}
+        b               X(ff_fft_calc_neon)
+endfunc
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S
new file mode 100644
index 00000000..17cde583
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S
@@ -0,0 +1,372 @@
+/*
+ * ARM NEON IDCT
+ *
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * Based on Simple IDCT
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4c ((1<<(COL_SHIFT-1))/W4)
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define w1 d0[0]
+#define w2 d0[1]
+#define w3 d0[2]
+#define w4 d0[3]
+#define w5 d1[0]
+#define w6 d1[1]
+#define w7 d1[2]
+#define w4c d1[3]
+
+        .macro idct_col4_top
+        vmull.s16       q7,  d6,  w2    /* q9   = W2 * col[2] */
+        vmull.s16       q8,  d6,  w6    /* q10  = W6 * col[2] */
+        vmull.s16       q9,  d4,  w1    /* q9   = W1 * col[1] */
+        vadd.i32        q11, q15, q7
+        vmull.s16       q10, d4,  w3    /* q10  = W3 * col[1] */
+        vadd.i32        q12, q15, q8
+        vmull.s16       q5,  d4,  w5    /* q5   = W5 * col[1] */
+        vsub.i32        q13, q15, q8
+        vmull.s16       q6,  d4,  w7    /* q6   = W7 * col[1] */
+        vsub.i32        q14, q15, q7
+
+        vmlal.s16       q9,  d8,  w3    /* q9  += W3 * col[3] */
+        vmlsl.s16       q10, d8,  w7    /* q10 -= W7 * col[3] */
+        vmlsl.s16       q5,  d8,  w1    /* q5  -= W1 * col[3] */
+        vmlsl.s16       q6,  d8,  w5    /* q6  -= W5 * col[3] */
+        .endm
+
+        .text
+        .align 6
+
+function idct_row4_pld_neon
+        pld             [r0]
+        add             r3,  r0,  r1,  lsl #2
+        pld             [r0, r1]
+        pld             [r0, r1, lsl #1]
+        pld             [r3, -r1]
+        pld             [r3]
+        pld             [r3, r1]
+        add             r3,  r3,  r1,  lsl #1
+        pld             [r3]
+        pld             [r3, r1]
+endfunc
+
+function idct_row4_neon
+        vmov.i32        q15, #(1<<(ROW_SHIFT-1))
+        vld1.64         {d2-d5},  [r2,:128]!
+        vmlal.s16       q15, d2,  w4    /* q15  += W4 * col[0] */
+        vld1.64         {d6,d7},  [r2,:128]!
+        vorr            d10, d3,  d5
+        vld1.64         {d8,d9},  [r2,:128]!
+        add             r2,  r2,  #-64
+
+        vorr            d11, d7,  d9
+        vorr            d10, d10, d11
+        vmov            r3,  r4,  d10
+
+        idct_col4_top
+
+        orrs            r3,  r3,  r4
+        beq             1f
+
+        vmull.s16       q7,  d3,  w4    /* q7   = W4 * col[4] */
+        vmlal.s16       q9,  d5,  w5    /* q9  += W5 * col[5] */
+        vmlsl.s16       q10, d5,  w1    /* q10 -= W1 * col[5] */
+        vmull.s16       q8,  d7,  w2    /* q8   = W2 * col[6] */
+        vmlal.s16       q5,  d5,  w7    /* q5  += W7 * col[5] */
+        vadd.i32        q11, q11, q7
+        vsub.i32        q12, q12, q7
+        vsub.i32        q13, q13, q7
+        vadd.i32        q14, q14, q7
+        vmlal.s16       q6,  d5,  w3    /* q6  += W3 * col[5] */
+        vmull.s16       q7,  d7,  w6    /* q7   = W6 * col[6] */
+        vmlal.s16       q9,  d9,  w7
+        vmlsl.s16       q10, d9,  w5
+        vmlal.s16       q5,  d9,  w3
+        vmlsl.s16       q6,  d9,  w1
+        vadd.i32        q11, q11, q7
+        vsub.i32        q12, q12, q8
+        vadd.i32        q13, q13, q8
+        vsub.i32        q14, q14, q7
+
+1:      vadd.i32        q3,  q11, q9
+        vadd.i32        q4,  q12, q10
+        vshrn.i32       d2,  q3,  #ROW_SHIFT
+        vshrn.i32       d4,  q4,  #ROW_SHIFT
+        vadd.i32        q7,  q13, q5
+        vadd.i32        q8,  q14, q6
+        vtrn.16         d2,  d4
+        vshrn.i32       d6,  q7,  #ROW_SHIFT
+        vshrn.i32       d8,  q8,  #ROW_SHIFT
+        vsub.i32        q14, q14, q6
+        vsub.i32        q11, q11, q9
+        vtrn.16         d6,  d8
+        vsub.i32        q13, q13, q5
+        vshrn.i32       d3,  q14, #ROW_SHIFT
+        vtrn.32         d2,  d6
+        vsub.i32        q12, q12, q10
+        vtrn.32         d4,  d8
+        vshrn.i32       d5,  q13, #ROW_SHIFT
+        vshrn.i32       d7,  q12, #ROW_SHIFT
+        vshrn.i32       d9,  q11, #ROW_SHIFT
+
+        vtrn.16         d3,  d5
+        vtrn.16         d7,  d9
+        vtrn.32         d3,  d7
+        vtrn.32         d5,  d9
+
+        vst1.64         {d2-d5},  [r2,:128]!
+        vst1.64         {d6-d9},  [r2,:128]!
+
+        bx              lr
+endfunc
+
+function idct_col4_neon
+        mov             ip,  #16
+        vld1.64         {d2}, [r2,:64], ip /* d2 = col[0] */
+        vdup.16         d30, w4c
+        vld1.64         {d4}, [r2,:64], ip /* d3 = col[1] */
+        vadd.i16        d30, d30, d2
+        vld1.64         {d6}, [r2,:64], ip /* d4 = col[2] */
+        vmull.s16       q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
+        vld1.64         {d8}, [r2,:64], ip /* d5 = col[3] */
+
+        ldrd            r4,  [r2]
+        ldrd            r6,  [r2, #16]
+        orrs            r4,  r4,  r5
+
+        idct_col4_top
+        addeq           r2,  r2,  #16
+        beq             1f
+
+        vld1.64         {d3}, [r2,:64], ip /* d6 = col[4] */
+        vmull.s16       q7,  d3,  w4    /* q7   = W4 * col[4] */
+        vadd.i32        q11, q11, q7
+        vsub.i32        q12, q12, q7
+        vsub.i32        q13, q13, q7
+        vadd.i32        q14, q14, q7
+
+1:      orrs            r6,  r6,  r7
+        ldrd            r4,  [r2, #16]
+        addeq           r2,  r2,  #16
+        beq             2f
+
+        vld1.64         {d5}, [r2,:64], ip /* d7 = col[5] */
+        vmlal.s16       q9,  d5,  w5    /* q9  += W5 * col[5] */
+        vmlsl.s16       q10, d5,  w1    /* q10 -= W1 * col[5] */
+        vmlal.s16       q5,  d5,  w7    /* q5  += W7 * col[5] */
+        vmlal.s16       q6,  d5,  w3    /* q6  += W3 * col[5] */
+
+2:      orrs            r4,  r4,  r5
+        ldrd            r4,  [r2, #16]
+        addeq           r2,  r2,  #16
+        beq             3f
+
+        vld1.64         {d7}, [r2,:64], ip /* d8 = col[6] */
+        vmull.s16       q7,  d7,  w6    /* q7   = W6 * col[6] */
+        vmull.s16       q8,  d7,  w2    /* q8   = W2 * col[6] */
+        vadd.i32        q11, q11, q7
+        vsub.i32        q14, q14, q7
+        vsub.i32        q12, q12, q8
+        vadd.i32        q13, q13, q8
+
+3:      orrs            r4,  r4,  r5
+        addeq           r2,  r2,  #16
+        beq             4f
+
+        vld1.64         {d9}, [r2,:64], ip /* d9 = col[7] */
+        vmlal.s16       q9,  d9,  w7
+        vmlsl.s16       q10, d9,  w5
+        vmlal.s16       q5,  d9,  w3
+        vmlsl.s16       q6,  d9,  w1
+
+4:      vaddhn.i32      d2,  q11, q9
+        vaddhn.i32      d3,  q12, q10
+        vaddhn.i32      d4,  q13, q5
+        vaddhn.i32      d5,  q14, q6
+        vsubhn.i32      d9,  q11, q9
+        vsubhn.i32      d8,  q12, q10
+        vsubhn.i32      d7,  q13, q5
+        vsubhn.i32      d6,  q14, q6
+
+        bx              lr
+endfunc
+
+        .align 6
+
+function idct_col4_st8_neon
+        vqshrun.s16     d2,  q1,  #COL_SHIFT-16
+        vqshrun.s16     d3,  q2,  #COL_SHIFT-16
+        vqshrun.s16     d4,  q3,  #COL_SHIFT-16
+        vqshrun.s16     d5,  q4,  #COL_SHIFT-16
+        vst1.32         {d2[0]}, [r0,:32], r1
+        vst1.32         {d2[1]}, [r0,:32], r1
+        vst1.32         {d3[0]}, [r0,:32], r1
+        vst1.32         {d3[1]}, [r0,:32], r1
+        vst1.32         {d4[0]}, [r0,:32], r1
+        vst1.32         {d4[1]}, [r0,:32], r1
+        vst1.32         {d5[0]}, [r0,:32], r1
+        vst1.32         {d5[1]}, [r0,:32], r1
+
+        bx              lr
+endfunc
+
+        .section .rodata
+        .align 4
+idct_coeff_neon:
+        .short W1, W2, W3, W4, W5, W6, W7, W4c
+
+        .macro idct_start data
+        push            {r4-r7, lr}
+        pld             [\data]
+        pld             [\data, #64]
+        vpush           {d8-d15}
+        movrel          r3,  idct_coeff_neon
+        vld1.64         {d0,d1}, [r3,:128]
+        .endm
+
+        .macro idct_end
+        vpop            {d8-d15}
+        pop             {r4-r7, pc}
+        .endm
+
+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+function ff_simple_idct_put_neon, export=1
+        idct_start      r2
+
+        bl              idct_row4_pld_neon
+        bl              idct_row4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_neon
+        bl              idct_col4_st8_neon
+        sub             r0,  r0,  r1, lsl #3
+        add             r0,  r0,  #4
+        add             r2,  r2,  #-120
+        bl              idct_col4_neon
+        bl              idct_col4_st8_neon
+
+        idct_end
+endfunc
+
+        .align 6
+
+function idct_col4_add8_neon
+        mov             ip,  r0
+
+        vld1.32         {d10[0]}, [r0,:32], r1
+        vshr.s16        q1,  q1,  #COL_SHIFT-16
+        vld1.32         {d10[1]}, [r0,:32], r1
+        vshr.s16        q2,  q2,  #COL_SHIFT-16
+        vld1.32         {d11[0]}, [r0,:32], r1
+        vshr.s16        q3,  q3,  #COL_SHIFT-16
+        vld1.32         {d11[1]}, [r0,:32], r1
+        vshr.s16        q4,  q4,  #COL_SHIFT-16
+        vld1.32         {d12[0]}, [r0,:32], r1
+        vaddw.u8        q1,  q1,  d10
+        vld1.32         {d12[1]}, [r0,:32], r1
+        vaddw.u8        q2,  q2,  d11
+        vld1.32         {d13[0]}, [r0,:32], r1
+        vqmovun.s16     d2,  q1
+        vld1.32         {d13[1]}, [r0,:32], r1
+        vaddw.u8        q3,  q3,  d12
+        vst1.32         {d2[0]},  [ip,:32], r1
+        vqmovun.s16     d3,  q2
+        vst1.32         {d2[1]},  [ip,:32], r1
+        vaddw.u8        q4,  q4,  d13
+        vst1.32         {d3[0]},  [ip,:32], r1
+        vqmovun.s16     d4,  q3
+        vst1.32         {d3[1]},  [ip,:32], r1
+        vqmovun.s16     d5,  q4
+        vst1.32         {d4[0]},  [ip,:32], r1
+        vst1.32         {d4[1]},  [ip,:32], r1
+        vst1.32         {d5[0]},  [ip,:32], r1
+        vst1.32         {d5[1]},  [ip,:32], r1
+
+        bx              lr
+endfunc
+
+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+function ff_simple_idct_add_neon, export=1
+        idct_start      r2
+
+        bl              idct_row4_pld_neon
+        bl              idct_row4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_neon
+        bl              idct_col4_add8_neon
+        sub             r0,  r0,  r1, lsl #3
+        add             r0,  r0,  #4
+        add             r2,  r2,  #-120
+        bl              idct_col4_neon
+        bl              idct_col4_add8_neon
+
+        idct_end
+endfunc
+
+        .align 6
+
+function idct_col4_st16_neon
+        mov             ip,  #16
+
+        vshr.s16        q1,  q1,  #COL_SHIFT-16
+        vshr.s16        q2,  q2,  #COL_SHIFT-16
+        vst1.64         {d2}, [r2,:64], ip
+        vshr.s16        q3,  q3,  #COL_SHIFT-16
+        vst1.64         {d3}, [r2,:64], ip
+        vshr.s16        q4,  q4,  #COL_SHIFT-16
+        vst1.64         {d4}, [r2,:64], ip
+        vst1.64         {d5}, [r2,:64], ip
+        vst1.64         {d6}, [r2,:64], ip
+        vst1.64         {d7}, [r2,:64], ip
+        vst1.64         {d8}, [r2,:64], ip
+        vst1.64         {d9}, [r2,:64], ip
+
+        bx              lr
+endfunc
+
+/* void ff_simple_idct_neon(DCTELEM *data); */
+function ff_simple_idct_neon, export=1
+        idct_start      r0
+
+        mov             r2,  r0
+        bl              idct_row4_neon
+        bl              idct_row4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_st16_neon
+        add             r2,  r2,  #-120
+        bl              idct_col4_neon
+        add             r2,  r2,  #-128
+        bl              idct_col4_st16_neon
+
+        idct_end
+endfunc
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/avfft.c b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.c
new file mode 100644
index 00000000..25fc4e09
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.c
@@ -0,0 +1,142 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "avfft.h"
+#include "fft.h"
+
+/* FFT */
+
+FFTContext *av_fft_init(int nbits, int inverse)
+{
+    FFTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_fft_init(s, nbits, inverse);
+
+    return s;
+}
+
+void av_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+
+void av_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+
+void av_fft_end(FFTContext *s)
+{
+    if (s) {
+        ff_fft_end(s);
+        av_free(s);
+    }
+}
+
+#if CONFIG_MDCT
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale)
+{
+    FFTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_mdct_init(s, nbits, inverse, scale);
+
+    return s;
+}
+
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+
+void av_mdct_end(FFTContext *s)
+{
+    if (s) {
+        ff_mdct_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_MDCT */
+
+#if CONFIG_RDFT
+
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
+{
+    RDFTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_rdft_init(s, nbits, trans);
+
+    return s;
+}
+
+void av_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    ff_rdft_calc(s, data);
+}
+
+void av_rdft_end(RDFTContext *s)
+{
+    if (s) {
+        ff_rdft_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_RDFT */
+
+#if CONFIG_DCT
+
+DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
+{
+    DCTContext *s = av_malloc(sizeof(*s));
+
+    if (s)
+        ff_dct_init(s, nbits, inverse);
+
+    return s;
+}
+
+void av_dct_calc(DCTContext *s, FFTSample *data)
+{
+    ff_dct_calc(s, data);
+}
+
+void av_dct_end(DCTContext *s)
+{
+    if (s) {
+        ff_dct_end(s);
+        av_free(s);
+    }
+}
+
+#endif /* CONFIG_DCT */
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/avfft.h b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.h
new file mode 100644
index 00000000..fdf30237
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.h
@@ -0,0 +1,103 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+
+#include "publik.h"
+
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext FFTContext;
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+PUBLIK FFTContext *av_fft_init(int nbits, int inverse);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+PUBLIK void av_fft_permute(FFTContext *s, FFTComplex *z);
+
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+PUBLIK void av_fft_calc(FFTContext *s, FFTComplex *z);
+
+PUBLIK void av_fft_end(FFTContext *s);
+
+#if 0
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+#endif
+
+/* Real Discrete Fourier Transform */
+
+enum RDFTransformType {
+    DFT_R2C,
+    IDFT_C2R,
+    IDFT_R2C,
+    DFT_C2R,
+};
+
+typedef struct RDFTContext RDFTContext;
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+PUBLIK RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+PUBLIK void av_rdft_calc(RDFTContext *s, FFTSample *data);
+PUBLIK void av_rdft_end(RDFTContext *s);
+
+/* Discrete Cosine Transform */
+
+typedef struct DCTContext DCTContext;
+
+enum DCTTransformType {
+    DCT_II = 0,
+    DCT_III,
+    DCT_I,
+    DST_I,
+};
+
+/**
+ * Set up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+PUBLIK DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+PUBLIK void av_dct_calc(DCTContext *s, FFTSample *data);
+PUBLIK void av_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_AVFFT_H */
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/dct.c b/plugins/supereq/ffmpeg_fft/libavcodec/dct.c
new file mode 100644
index 00000000..6ea1936e
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/dct.c
@@ -0,0 +1,228 @@
+/*
+ * (I)DCT Transforms
+ * Copyright (c) 2009 Peter Ross <pross@xvid.org>
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/**
+ * @file
+ * (Inverse) Discrete Cosine Transforms. These are also known as the
+ * type II and type III DCTs respectively.
+ */
+
+#include <math.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+#ifndef ARCH_ARM
+#include "x86/fft.h"
+#endif
+
+#define DCT32_FLOAT
+#include "dct32.h"
+
+/* sin((M_PI * x / (2*n)) */
+#define SIN(s,n,x) (s->costab[(n) - (x)])
+
+/* cos((M_PI * x / (2*n)) */
+#define COS(s,n,x) (s->costab[x])
+
+static void ff_dst_calc_I_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+
+    data[0] = 0;
+    for(i = 1; i < n/2; i++) {
+        float tmp1 = data[i    ];
+        float tmp2 = data[n - i];
+        float s = SIN(ctx, n, 2*i);
+
+        s *= tmp1 + tmp2;
+        tmp1 = (tmp1 - tmp2) * 0.5f;
+        data[i    ] = s + tmp1;
+        data[n - i] = s - tmp1;
+    }
+
+    data[n/2] *= 2;
+    ff_rdft_calc(&ctx->rdft, data);
+
+    data[0] *= 0.5f;
+
+    for(i = 1; i < n-2; i += 2) {
+        data[i + 1] += data[i - 1];
+        data[i    ] = -data[i + 2];
+    }
+
+    data[n-1] = 0;
+}
+
+static void ff_dct_calc_I_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+    float next = -0.5f * (data[0] - data[n]);
+
+    for(i = 0; i < n/2; i++) {
+        float tmp1 = data[i    ];
+        float tmp2 = data[n - i];
+        float s = SIN(ctx, n, 2*i);
+        float c = COS(ctx, n, 2*i);
+
+        c *= tmp1 - tmp2;
+        s *= tmp1 - tmp2;
+
+        next += c;
+
+        tmp1 = (tmp1 + tmp2) * 0.5f;
+        data[i    ] = tmp1 - s;
+        data[n - i] = tmp1 + s;
+    }
+
+    ff_rdft_calc(&ctx->rdft, data);
+    data[n] = data[1];
+    data[1] = next;
+
+    for(i = 3; i <= n; i += 2)
+        data[i] = data[i - 2] - data[i];
+}
+
+static void ff_dct_calc_III_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+
+    float next = data[n - 1];
+    float inv_n = 1.0f / n;
+
+    for (i = n - 2; i >= 2; i -= 2) {
+        float val1 = data[i    ];
+        float val2 = data[i - 1] - data[i + 1];
+        float c = COS(ctx, n, i);
+        float s = SIN(ctx, n, i);
+
+        data[i    ] = c * val1 + s * val2;
+        data[i + 1] = s * val1 - c * val2;
+    }
+
+    data[1] = 2 * next;
+
+    ff_rdft_calc(&ctx->rdft, data);
+
+    for (i = 0; i < n / 2; i++) {
+        float tmp1 = data[i        ] * inv_n;
+        float tmp2 = data[n - i - 1] * inv_n;
+        float csc = ctx->csc2[i] * (tmp1 - tmp2);
+
+        tmp1 += tmp2;
+        data[i        ] = tmp1 + csc;
+        data[n - i - 1] = tmp1 - csc;
+    }
+}
+
+static void ff_dct_calc_II_c(DCTContext *ctx, FFTSample *data)
+{
+    int n = 1 << ctx->nbits;
+    int i;
+    float next;
+
+    for (i=0; i < n/2; i++) {
+        float tmp1 = data[i        ];
+        float tmp2 = data[n - i - 1];
+        float s = SIN(ctx, n, 2*i + 1);
+
+        s *= tmp1 - tmp2;
+        tmp1 = (tmp1 + tmp2) * 0.5f;
+
+        data[i    ] = tmp1 + s;
+        data[n-i-1] = tmp1 - s;
+    }
+
+    ff_rdft_calc(&ctx->rdft, data);
+
+    next = data[1] * 0.5;
+    data[1] *= -1;
+
+    for (i = n - 2; i >= 0; i -= 2) {
+        float inr = data[i    ];
+        float ini = data[i + 1];
+        float c = COS(ctx, n, i);
+        float s = SIN(ctx, n, i);
+
+        data[i  ] = c * inr + s * ini;
+
+        data[i+1] = next;
+
+        next +=     s * inr - c * ini;
+    }
+}
+
+static void dct32_func(DCTContext *ctx, FFTSample *data)
+{
+    ctx->dct32(data, data);
+}
+
+void ff_dct_calc(DCTContext *s, FFTSample *data)
+{
+    s->dct_calc(s, data);
+}
+
+av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
+{
+    int n = 1 << nbits;
+    int i;
+
+    s->nbits    = nbits;
+    s->inverse  = inverse;
+
+    ff_init_ff_cos_tabs(nbits+2);
+
+    s->costab = ff_cos_tabs[nbits+2];
+
+    s->csc2 = av_malloc(n/2 * sizeof(FFTSample));
+
+    if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
+        av_free(s->csc2);
+        return -1;
+    }
+
+    for (i = 0; i < n/2; i++)
+        s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1)));
+
+    switch(inverse) {
+    case DCT_I  : s->dct_calc = ff_dct_calc_I_c; break;
+    case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break;
+    case DCT_III: s->dct_calc = ff_dct_calc_III_c; break;
+    case DST_I  : s->dct_calc = ff_dst_calc_I_c; break;
+    }
+
+    if (inverse == DCT_II && nbits == 5)
+        s->dct_calc = dct32_func;
+
+    s->dct32 = dct32;
+    if (HAVE_MMX)     ff_dct_init_mmx(s);
+
+    return 0;
+}
+
+av_cold void ff_dct_end(DCTContext *s)
+{
+    ff_rdft_end(&s->rdft);
+    av_free(s->csc2);
+}
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/dct32.c b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.c
new file mode 100644
index 00000000..3e6ad78d
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.c
@@ -0,0 +1,262 @@
+/*
+ * Template for the Discrete Cosine Transform for 32 samples
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dct32.h"
+
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+
+/* cos(i*pi/64) */
+
+#define COS0_0  FIXHR(0.50060299823519630134/2)
+#define COS0_1  FIXHR(0.50547095989754365998/2)
+#define COS0_2  FIXHR(0.51544730992262454697/2)
+#define COS0_3  FIXHR(0.53104259108978417447/2)
+#define COS0_4  FIXHR(0.55310389603444452782/2)
+#define COS0_5  FIXHR(0.58293496820613387367/2)
+#define COS0_6  FIXHR(0.62250412303566481615/2)
+#define COS0_7  FIXHR(0.67480834145500574602/2)
+#define COS0_8  FIXHR(0.74453627100229844977/2)
+#define COS0_9  FIXHR(0.83934964541552703873/2)
+#define COS0_10 FIXHR(0.97256823786196069369/2)
+#define COS0_11 FIXHR(1.16943993343288495515/4)
+#define COS0_12 FIXHR(1.48416461631416627724/4)
+#define COS0_13 FIXHR(2.05778100995341155085/8)
+#define COS0_14 FIXHR(3.40760841846871878570/8)
+#define COS0_15 FIXHR(10.19000812354805681150/32)
+
+#define COS1_0 FIXHR(0.50241928618815570551/2)
+#define COS1_1 FIXHR(0.52249861493968888062/2)
+#define COS1_2 FIXHR(0.56694403481635770368/2)
+#define COS1_3 FIXHR(0.64682178335999012954/2)
+#define COS1_4 FIXHR(0.78815462345125022473/2)
+#define COS1_5 FIXHR(1.06067768599034747134/4)
+#define COS1_6 FIXHR(1.72244709823833392782/4)
+#define COS1_7 FIXHR(5.10114861868916385802/16)
+
+#define COS2_0 FIXHR(0.50979557910415916894/2)
+#define COS2_1 FIXHR(0.60134488693504528054/2)
+#define COS2_2 FIXHR(0.89997622313641570463/2)
+#define COS2_3 FIXHR(2.56291544774150617881/8)
+
+#define COS3_0 FIXHR(0.54119610014619698439/2)
+#define COS3_1 FIXHR(1.30656296487637652785/4)
+
+#define COS4_0 FIXHR(0.70710678118654752439/2)
+
+/* butterfly operator */
+#define BF(a, b, c, s)\
+{\
+    tmp0 = val##a + val##b;\
+    tmp1 = val##a - val##b;\
+    val##a = tmp0;\
+    val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF0(a, b, c, s)\
+{\
+    tmp0 = tab[a] + tab[b];\
+    tmp1 = tab[a] - tab[b];\
+    val##a = tmp0;\
+    val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF1(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    val##c += val##d;\
+}
+
+#define BF2(a, b, c, d)\
+{\
+    BF(a, b, COS4_0, 1);\
+    BF(c, d,-COS4_0, 1);\
+    val##c += val##d;\
+    val##a += val##c;\
+    val##c += val##b;\
+    val##b += val##d;\
+}
+
+#define ADD(a, b) val##a += val##b
+
+/* DCT32 without 1/sqrt(2) coef zero scaling. */
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
+{
+    INTFLOAT tmp0, tmp1;
+
+    INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
+             val8 , val9 , val10, val11, val12, val13, val14, val15,
+             val16, val17, val18, val19, val20, val21, val22, val23,
+             val24, val25, val26, val27, val28, val29, val30, val31;
+
+    /* pass 1 */
+    BF0( 0, 31, COS0_0 , 1);
+    BF0(15, 16, COS0_15, 5);
+    /* pass 2 */
+    BF( 0, 15, COS1_0 , 1);
+    BF(16, 31,-COS1_0 , 1);
+    /* pass 1 */
+    BF0( 7, 24, COS0_7 , 1);
+    BF0( 8, 23, COS0_8 , 1);
+    /* pass 2 */
+    BF( 7,  8, COS1_7 , 4);
+    BF(23, 24,-COS1_7 , 4);
+    /* pass 3 */
+    BF( 0,  7, COS2_0 , 1);
+    BF( 8, 15,-COS2_0 , 1);
+    BF(16, 23, COS2_0 , 1);
+    BF(24, 31,-COS2_0 , 1);
+    /* pass 1 */
+    BF0( 3, 28, COS0_3 , 1);
+    BF0(12, 19, COS0_12, 2);
+    /* pass 2 */
+    BF( 3, 12, COS1_3 , 1);
+    BF(19, 28,-COS1_3 , 1);
+    /* pass 1 */
+    BF0( 4, 27, COS0_4 , 1);
+    BF0(11, 20, COS0_11, 2);
+    /* pass 2 */
+    BF( 4, 11, COS1_4 , 1);
+    BF(20, 27,-COS1_4 , 1);
+    /* pass 3 */
+    BF( 3,  4, COS2_3 , 3);
+    BF(11, 12,-COS2_3 , 3);
+    BF(19, 20, COS2_3 , 3);
+    BF(27, 28,-COS2_3 , 3);
+    /* pass 4 */
+    BF( 0,  3, COS3_0 , 1);
+    BF( 4,  7,-COS3_0 , 1);
+    BF( 8, 11, COS3_0 , 1);
+    BF(12, 15,-COS3_0 , 1);
+    BF(16, 19, COS3_0 , 1);
+    BF(20, 23,-COS3_0 , 1);
+    BF(24, 27, COS3_0 , 1);
+    BF(28, 31,-COS3_0 , 1);
+
+
+
+    /* pass 1 */
+    BF0( 1, 30, COS0_1 , 1);
+    BF0(14, 17, COS0_14, 3);
+    /* pass 2 */
+    BF( 1, 14, COS1_1 , 1);
+    BF(17, 30,-COS1_1 , 1);
+    /* pass 1 */
+    BF0( 6, 25, COS0_6 , 1);
+    BF0( 9, 22, COS0_9 , 1);
+    /* pass 2 */
+    BF( 6,  9, COS1_6 , 2);
+    BF(22, 25,-COS1_6 , 2);
+    /* pass 3 */
+    BF( 1,  6, COS2_1 , 1);
+    BF( 9, 14,-COS2_1 , 1);
+    BF(17, 22, COS2_1 , 1);
+    BF(25, 30,-COS2_1 , 1);
+
+    /* pass 1 */
+    BF0( 2, 29, COS0_2 , 1);
+    BF0(13, 18, COS0_13, 3);
+    /* pass 2 */
+    BF( 2, 13, COS1_2 , 1);
+    BF(18, 29,-COS1_2 , 1);
+    /* pass 1 */
+    BF0( 5, 26, COS0_5 , 1);
+    BF0(10, 21, COS0_10, 1);
+    /* pass 2 */
+    BF( 5, 10, COS1_5 , 2);
+    BF(21, 26,-COS1_5 , 2);
+    /* pass 3 */
+    BF( 2,  5, COS2_2 , 1);
+    BF(10, 13,-COS2_2 , 1);
+    BF(18, 21, COS2_2 , 1);
+    BF(26, 29,-COS2_2 , 1);
+    /* pass 4 */
+    BF( 1,  2, COS3_1 , 2);
+    BF( 5,  6,-COS3_1 , 2);
+    BF( 9, 10, COS3_1 , 2);
+    BF(13, 14,-COS3_1 , 2);
+    BF(17, 18, COS3_1 , 2);
+    BF(21, 22,-COS3_1 , 2);
+    BF(25, 26, COS3_1 , 2);
+    BF(29, 30,-COS3_1 , 2);
+
+    /* pass 5 */
+    BF1( 0,  1,  2,  3);
+    BF2( 4,  5,  6,  7);
+    BF1( 8,  9, 10, 11);
+    BF2(12, 13, 14, 15);
+    BF1(16, 17, 18, 19);
+    BF2(20, 21, 22, 23);
+    BF1(24, 25, 26, 27);
+    BF2(28, 29, 30, 31);
+
+    /* pass 6 */
+
+    ADD( 8, 12);
+    ADD(12, 10);
+    ADD(10, 14);
+    ADD(14,  9);
+    ADD( 9, 13);
+    ADD(13, 11);
+    ADD(11, 15);
+
+    out[ 0] = val0;
+    out[16] = val1;
+    out[ 8] = val2;
+    out[24] = val3;
+    out[ 4] = val4;
+    out[20] = val5;
+    out[12] = val6;
+    out[28] = val7;
+    out[ 2] = val8;
+    out[18] = val9;
+    out[10] = val10;
+    out[26] = val11;
+    out[ 6] = val12;
+    out[22] = val13;
+    out[14] = val14;
+    out[30] = val15;
+
+    ADD(24, 28);
+    ADD(28, 26);
+    ADD(26, 30);
+    ADD(30, 25);
+    ADD(25, 29);
+    ADD(29, 27);
+    ADD(27, 31);
+
+    out[ 1] = val16 + val24;
+    out[17] = val17 + val25;
+    out[ 9] = val18 + val26;
+    out[25] = val19 + val27;
+    out[ 5] = val20 + val28;
+    out[21] = val21 + val29;
+    out[13] = val22 + val30;
+    out[29] = val23 + val31;
+    out[ 3] = val24 + val20;
+    out[19] = val25 + val21;
+    out[11] = val26 + val22;
+    out[27] = val27 + val23;
+    out[ 7] = val28 + val18;
+    out[23] = val29 + val19;
+    out[15] = val30 + val17;
+    out[31] = val31;
+}
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/dct32.h b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.h
new file mode 100644
index 00000000..dc2d847a
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.h
@@ -0,0 +1,10 @@
+#ifndef DCT_32_H
+#define DCT_32_H
+
+#define FIXHR(x)       ((float)(x))
+#define MULH3(x, y, s) ((s)*(y)*(x))
+#define INTFLOAT float
+
+void dct32(INTFLOAT *out, const INTFLOAT *tab);
+
+#endif
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/fft.c b/plugins/supereq/ffmpeg_fft/libavcodec/fft.c
new file mode 100644
index 00000000..04082bf4
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/fft.c
@@ -0,0 +1,300 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FFT/IFFT transforms.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if !CONFIG_HARDCODED_TABLES
+COSTABLE(16);
+COSTABLE(32);
+COSTABLE(64);
+COSTABLE(128);
+COSTABLE(256);
+COSTABLE(512);
+COSTABLE(1024);
+COSTABLE(2048);
+COSTABLE(4096);
+COSTABLE(8192);
+COSTABLE(16384);
+COSTABLE(32768);
+COSTABLE(65536);
+#endif
+COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {
+    NULL, NULL, NULL, NULL,
+    ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
+    ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
+};
+
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+#if !CONFIG_HARDCODED_TABLES
+    int i;
+    int m = 1<<index;
+    double freq = 2*M_PI/m;
+    FFTSample *tab = ff_cos_tabs[index];
+    for(i=0; i<=m/4; i++)
+        tab[i] = cos(i*freq);
+    for(i=1; i<m/4; i++)
+        tab[m/2-i] = tab[i];
+#endif
+}
+
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, n;
+
+    if (nbits < 2 || nbits > 16)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    if (!s->tmp_buf)
+        goto fail;
+    s->inverse = inverse;
+
+    s->fft_permute = ff_fft_permute_c;
+    s->fft_calc    = ff_fft_calc_c;
+#if CONFIG_MDCT
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->mdct_calc   = ff_mdct_calc_c;
+#endif
+
+#if ARCH_ARM
+    ff_fft_init_arm(s);
+#elif HAVE_ALTIVEC
+    if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
+#elif HAVE_MMX
+    if (HAVE_MMX)     ff_fft_init_mmx(s);
+#endif
+
+    for(j=4; j<=nbits; j++) {
+        ff_init_ff_cos_tabs(j);
+    }
+    for(i=0; i<n; i++)
+        s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
+
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, np;
+    const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+    /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+    for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+    memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+}
+
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->tmp_buf);
+}
+
+#define sqrthalf (float)M_SQRT1_2
+
+#define BF(x,y,a,b) {\
+    x = a - b;\
+    y = a + b;\
+}
+
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    t1 = a2.re * wre + a2.im * wim;\
+    t2 = a2.im * wre - a2.re * wim;\
+    t5 = a3.re * wre - a3.im * wim;\
+    t6 = a3.im * wre + a3.re * wim;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTSample t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,ff_cos_##n,n4/2);\
+}
+
+static void fft4(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+
+static void fft8(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+    fft4(z);
+
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t3, z[7].re, z[6].re, -z[7].re);
+    BF(t4, z[7].im, z[6].im, -z[7].im);
+    BF(t8, t1, t3, t1);
+    BF(t7, t2, t2, t4);
+    BF(z[4].re, z[0].re, z[0].re, t1);
+    BF(z[4].im, z[0].im, z[0].im, t2);
+    BF(z[6].re, z[2].re, z[2].re, t7);
+    BF(z[6].im, z[2].im, z[2].im, t8);
+
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6;
+
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
+    TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+
+static void (* const fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
+
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/fft.h b/plugins/supereq/ffmpeg_fft/libavcodec/fft.h
new file mode 100644
index 00000000..b2e0f540
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/fft.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFT_H
+#define AVCODEC_FFT_H
+
+#include <stdint.h>
+#include "../config.h"
+#include "libavutil/mem.h"
+#include "avfft.h"
+
+/* FFT computation */
+
+struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *tmp_buf;
+    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+    int mdct_bits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    int permutation;
+#define FF_MDCT_PERM_NONE       0
+#define FF_MDCT_PERM_INTERLEAVE 1
+};
+
+#if CONFIG_HARDCODED_TABLES
+#define COSTABLE_CONST const
+#define SINTABLE_CONST const
+#define SINETABLE_CONST const
+#else
+#define COSTABLE_CONST
+#define SINTABLE_CONST
+#define SINETABLE_CONST
+#endif
+
+#define COSTABLE(size) \
+    COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
+#define SINTABLE(size) \
+    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
+#define SINETABLE(size) \
+    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+extern COSTABLE(16);
+extern COSTABLE(32);
+extern COSTABLE(64);
+extern COSTABLE(128);
+extern COSTABLE(256);
+extern COSTABLE(512);
+extern COSTABLE(1024);
+extern COSTABLE(2048);
+extern COSTABLE(4096);
+extern COSTABLE(8192);
+extern COSTABLE(16384);
+extern COSTABLE(32768);
+extern COSTABLE(65536);
+extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
+
+/**
+ * Initialize the cosine table in ff_cos_tabs[index]
+ * \param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
+
+extern SINTABLE(16);
+extern SINTABLE(32);
+extern SINTABLE(64);
+extern SINTABLE(128);
+extern SINTABLE(256);
+extern SINTABLE(512);
+extern SINTABLE(1024);
+extern SINTABLE(2048);
+extern SINTABLE(4096);
+extern SINTABLE(8192);
+extern SINTABLE(16384);
+extern SINTABLE(32768);
+extern SINTABLE(65536);
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+
+void ff_fft_init_altivec(FFTContext *s);
+void ff_fft_init_mmx(FFTContext *s);
+void ff_fft_init_arm(FFTContext *s);
+void ff_dct_init_mmx(DCTContext *s);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+
+/* MDCT computation */
+
+static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+
+static inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
+                                const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+
+/**
+ * Maximum window size for ff_kbd_window_init.
+ */
+#define FF_KBD_WINDOW_MAX 1024
+
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param   window  pointer to half window
+ * @param   alpha   determines window shape
+ * @param   n       size of half window, max FF_KBD_WINDOW_MAX
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+
+/**
+ * Generate a sine window.
+ * @param   window  pointer to half window
+ * @param   n       size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+
+/**
+ * initialize the specified entry of ff_sine_windows
+ */
+void ff_init_ff_sine_windows(int index);
+extern SINETABLE(  32);
+extern SINETABLE(  64);
+extern SINETABLE( 128);
+extern SINETABLE( 256);
+extern SINETABLE( 512);
+extern SINETABLE(1024);
+extern SINETABLE(2048);
+extern SINETABLE(4096);
+extern SINETABLE_CONST float * const ff_sine_windows[13];
+
+int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+struct RDFTContext {
+    int nbits;
+    int inverse;
+    int sign_convention;
+
+    /* pre/post rotation tables */
+    const FFTSample *tcos;
+    SINTABLE_CONST FFTSample *tsin;
+    FFTContext fft;
+    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+
+void ff_rdft_init_arm(RDFTContext *s);
+
+static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    s->rdft_calc(s, data);
+}
+
+/* Discrete Cosine Transform */
+
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+    void (*dct32)(FFTSample *out, const FFTSample *in);
+};
+
+/**
+ * Set up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_calc(DCTContext *s, FFTSample *data);
+void ff_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_FFT_H */
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/rdft.c b/plugins/supereq/ffmpeg_fft/libavcodec/rdft.c
new file mode 100644
index 00000000..fe6014fb
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/rdft.c
@@ -0,0 +1,137 @@
+/*
+ * (I)RDFT transforms
+ * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdlib.h>
+#include <math.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+/**
+ * @file
+ * (Inverse) Real Discrete Fourier Transforms.
+ */
+
+/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
+#if !CONFIG_HARDCODED_TABLES
+SINTABLE(16);
+SINTABLE(32);
+SINTABLE(64);
+SINTABLE(128);
+SINTABLE(256);
+SINTABLE(512);
+SINTABLE(1024);
+SINTABLE(2048);
+SINTABLE(4096);
+SINTABLE(8192);
+SINTABLE(16384);
+SINTABLE(32768);
+SINTABLE(65536);
+#endif
+SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
+    NULL, NULL, NULL, NULL,
+    ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
+    ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
+};
+
+/** Map one real FFT into two parallel real even and odd FFTs. Then interleave
+ * the two real FFTs into one complex FFT. Unmangle the results.
+ * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
+ */
+static void ff_rdft_calc_c(RDFTContext* s, FFTSample* data)
+{
+    int i, i1, i2;
+    FFTComplex ev, od;
+    const int n = 1 << s->nbits;
+    const float k1 = 0.5;
+    const float k2 = 0.5 - s->inverse;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+
+    if (!s->inverse) {
+        ff_fft_permute(&s->fft, (FFTComplex*)data);
+        ff_fft_calc(&s->fft, (FFTComplex*)data);
+    }
+    /* i=0 is a special case because of packing, the DC term is real, so we
+       are going to throw the N/2 term (also real) in with it. */
+    ev.re = data[0];
+    data[0] = ev.re+data[1];
+    data[1] = ev.re-data[1];
+    for (i = 1; i < (n>>2); i++) {
+        i1 = 2*i;
+        i2 = n-i1;
+        /* Separate even and odd FFTs */
+        ev.re =  k1*(data[i1  ]+data[i2  ]);
+        od.im = -k2*(data[i1  ]-data[i2  ]);
+        ev.im =  k1*(data[i1+1]-data[i2+1]);
+        od.re =  k2*(data[i1+1]+data[i2+1]);
+        /* Apply twiddle factors to the odd FFT and add to the even FFT */
+        data[i1  ] =  ev.re + od.re*tcos[i] - od.im*tsin[i];
+        data[i1+1] =  ev.im + od.im*tcos[i] + od.re*tsin[i];
+        data[i2  ] =  ev.re - od.re*tcos[i] + od.im*tsin[i];
+        data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
+    }
+    data[2*i+1]=s->sign_convention*data[2*i+1];
+    if (s->inverse) {
+        data[0] *= k1;
+        data[1] *= k1;
+        ff_fft_permute(&s->fft, (FFTComplex*)data);
+        ff_fft_calc(&s->fft, (FFTComplex*)data);
+    }
+}
+
+av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
+{
+    int n = 1 << nbits;
+    int i;
+    const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1)*2*M_PI/n;
+
+    s->nbits           = nbits;
+    s->inverse         = trans == IDFT_C2R || trans == DFT_C2R;
+    s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
+
+    if (nbits < 4 || nbits > 16) {
+        return -1;
+    }
+
+    if (ff_fft_init(&s->fft, nbits-1, trans == IDFT_C2R || trans == IDFT_R2C) < 0) {
+        return -1;
+    }
+
+    ff_init_ff_cos_tabs(nbits);
+    s->tcos = ff_cos_tabs[nbits];
+    s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
+#if !CONFIG_HARDCODED_TABLES
+    for (i = 0; i < (n>>2); i++) {
+        s->tsin[i] = sin(i*theta);
+    }
+#endif
+    s->rdft_calc   = ff_rdft_calc_c;
+
+#if ARCH_ARM
+    ff_rdft_init_arm(s);
+#endif
+
+    return 0;
+}
+
+av_cold void ff_rdft_end(RDFTContext *s)
+{
+    ff_fft_end(&s->fft);
+}