summaryrefslogtreecommitdiff
path: root/plugins/supereq/ffmpeg_fft/libavcodec
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/supereq/ffmpeg_fft/libavcodec')
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S104
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c71
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S372
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S151
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S372
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/avfft.c142
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/avfft.h103
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/dct.c228
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/dct32.c262
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/dct32.h10
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/fft.c300
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/fft.h244
-rw-r--r--plugins/supereq/ffmpeg_fft/libavcodec/rdft.c137
13 files changed, 2496 insertions, 0 deletions
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S
new file mode 100644
index 00000000..6860f1cf
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/asm.S
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#ifdef __ELF__
+# define ELF
+#else
+# define ELF @
+#endif
+
+.macro require8 val=1
+ELF .eabi_attribute 24, \val
+.endm
+
+.macro preserve8 val=1
+ELF .eabi_attribute 25, \val
+.endm
+
+/*
+.macro function name, export=0
+ .macro endfunc
+ELF .size \name, . - \name
+ .endfunc
+ .purgem endfunc
+ .endm
+ .text
+ .if \export
+ .global EXTERN_ASM\name
+EXTERN_ASM\name:
+ .endif
+ELF .type \name, %function
+ .func \name
+\name:
+.endm
+*/
+
+.macro function name, export=0
+ .macro endfunc
+ELF .size \name, . - \name
+ .endfunc
+ .purgem endfunc
+ .endm
+ .text
+ .if \export
+ .hidden EXTERN_ASM\name
+ .global EXTERN_ASM\name
+EXTERN_ASM\name:
+ .endif
+ELF .type \name, %function
+ .func \name
+\name:
+.endm
+
+.macro mov32 rd, val
+#if HAVE_ARMV6T2
+ movw \rd, #(\val) & 0xffff
+ .if (\val) >> 16
+ movt \rd, #(\val) >> 16
+ .endif
+#else
+ ldr \rd, =\val
+#endif
+.endm
+
+.macro movrel rd, val
+#if HAVE_ARMV6T2 && !CONFIG_PIC
+ movw \rd, #:lower16:\val
+ movt \rd, #:upper16:\val
+#else
+ ldr \rd, =\val
+#endif
+.endm
+
+#if HAVE_VFP_ARGS
+ .eabi_attribute 28, 1
+# define VFP
+# define NOVFP @
+#else
+# define VFP @
+# define NOVFP
+#endif
+
+#define GLUE(a, b) a ## b
+#define JOIN(a, b) GLUE(a, b)
+#define X(s) JOIN(EXTERN_ASM, s)
+
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c
new file mode 100644
index 00000000..28148e92
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_init_arm.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/fft.h"
+#if CONFIG_DCA_DECODER
+#include "libavcodec/synth_filter.h"
+#endif
+
+void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
+
+#if 0
+void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
+#endif
+
+void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
+
+void ff_synth_filter_float_neon(FFTContext *imdct,
+ float *synth_buf_ptr, int *synth_buf_offset,
+ float synth_buf2[32], const float window[512],
+ float out[32], const float in[32],
+ float scale, float bias);
+
+av_cold void ff_fft_init_arm(FFTContext *s)
+{
+ if (HAVE_NEON) {
+ s->fft_permute = ff_fft_permute_neon;
+ s->fft_calc = ff_fft_calc_neon;
+#if 0
+ s->imdct_calc = ff_imdct_calc_neon;
+ s->imdct_half = ff_imdct_half_neon;
+ s->mdct_calc = ff_mdct_calc_neon;
+ s->permutation = FF_MDCT_PERM_INTERLEAVE;
+#endif
+ }
+}
+
+#if CONFIG_RDFT
+av_cold void ff_rdft_init_arm(RDFTContext *s)
+{
+ if (HAVE_NEON)
+ s->rdft_calc = ff_rdft_calc_neon;
+}
+#endif
+
+#if CONFIG_DCA_DECODER
+av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
+{
+ if (HAVE_NEON)
+ s->synth_filter_float = ff_synth_filter_float_neon;
+}
+#endif
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S
new file mode 100644
index 00000000..117f4fee
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/fft_neon.S
@@ -0,0 +1,372 @@
+/*
+ * ARM NEON optimised FFT
+ *
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2009 Naotoshi Nojiri
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+#define M_SQRT1_2 0.70710678118654752440
+
+ .text
+
+function fft4_neon
+ vld1.32 {d0-d3}, [r0,:128]
+
+ vext.32 q8, q1, q1, #1 @ i2,r3 d3=i3,r2
+ vsub.f32 d6, d0, d1 @ r0-r1,i0-i1
+ vsub.f32 d7, d16, d17 @ r3-r2,i2-i3
+ vadd.f32 d4, d0, d1 @ r0+r1,i0+i1
+ vadd.f32 d5, d2, d3 @ i2+i3,r2+r3
+ vadd.f32 d1, d6, d7
+ vsub.f32 d3, d6, d7
+ vadd.f32 d0, d4, d5
+ vsub.f32 d2, d4, d5
+
+ vst1.32 {d0-d3}, [r0,:128]
+
+ bx lr
+endfunc
+
+function fft8_neon
+ mov r1, r0
+ vld1.32 {d0-d3}, [r1,:128]!
+ vld1.32 {d16-d19}, [r1,:128]
+
+ movw r2, #0x04f3 @ sqrt(1/2)
+ movt r2, #0x3f35
+ eor r3, r2, #1<<31
+ vdup.32 d31, r2
+
+ vext.32 q11, q1, q1, #1 @ i2,r3,i3,r2
+ vadd.f32 d4, d16, d17 @ r4+r5,i4+i5
+ vmov d28, r3, r2
+ vadd.f32 d5, d18, d19 @ r6+r7,i6+i7
+ vsub.f32 d17, d16, d17 @ r4-r5,i4-i5
+ vsub.f32 d19, d18, d19 @ r6-r7,i6-i7
+ vrev64.32 d29, d28
+ vadd.f32 d20, d0, d1 @ r0+r1,i0+i1
+ vadd.f32 d21, d2, d3 @ r2+r3,i2+i3
+ vmul.f32 d26, d17, d28 @ -a2r*w,a2i*w
+ vext.32 q3, q2, q2, #1
+ vmul.f32 d27, d19, d29 @ a3r*w,-a3i*w
+ vsub.f32 d23, d22, d23 @ i2-i3,r3-r2
+ vsub.f32 d22, d0, d1 @ r0-r1,i0-i1
+ vmul.f32 d24, d17, d31 @ a2r*w,a2i*w
+ vmul.f32 d25, d19, d31 @ a3r*w,a3i*w
+ vadd.f32 d0, d20, d21
+ vsub.f32 d2, d20, d21
+ vadd.f32 d1, d22, d23
+ vrev64.32 q13, q13
+ vsub.f32 d3, d22, d23
+ vsub.f32 d6, d6, d7
+ vadd.f32 d24, d24, d26 @ a2r+a2i,a2i-a2r t1,t2
+ vadd.f32 d25, d25, d27 @ a3r-a3i,a3i+a3r t5,t6
+ vadd.f32 d7, d4, d5
+ vsub.f32 d18, d2, d6
+ vext.32 q13, q12, q12, #1
+ vadd.f32 d2, d2, d6
+ vsub.f32 d16, d0, d7
+ vadd.f32 d5, d25, d24
+ vsub.f32 d4, d26, d27
+ vadd.f32 d0, d0, d7
+ vsub.f32 d17, d1, d5
+ vsub.f32 d19, d3, d4
+ vadd.f32 d3, d3, d4
+ vadd.f32 d1, d1, d5
+
+ vst1.32 {d16-d19}, [r1,:128]
+ vst1.32 {d0-d3}, [r0,:128]
+
+ bx lr
+endfunc
+
+function fft16_neon
+ movrel r1, mppm
+ vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
+ pld [r0, #32]
+ vld1.32 {d2-d3}, [r1,:128]
+ vext.32 q13, q9, q9, #1
+ vld1.32 {d22-d25}, [r0,:128]! @ q11{r4,i4,r5,i5} q12{r6,i5,r7,i7}
+ vadd.f32 d4, d16, d17
+ vsub.f32 d5, d16, d17
+ vadd.f32 d18, d18, d19
+ vsub.f32 d19, d26, d27
+
+ vadd.f32 d20, d22, d23
+ vsub.f32 d22, d22, d23
+ vsub.f32 d23, d24, d25
+ vadd.f32 q8, q2, q9 @ {r0,i0,r1,i1}
+ vadd.f32 d21, d24, d25
+ vmul.f32 d24, d22, d2
+ vsub.f32 q9, q2, q9 @ {r2,i2,r3,i3}
+ vmul.f32 d25, d23, d3
+ vuzp.32 d16, d17 @ {r0,r1,i0,i1}
+ vmul.f32 q1, q11, d2[1]
+ vuzp.32 d18, d19 @ {r2,r3,i2,i3}
+ vrev64.32 q12, q12
+ vadd.f32 q11, q12, q1 @ {t1a,t2a,t5,t6}
+ vld1.32 {d24-d27}, [r0,:128]! @ q12{r8,i8,r9,i9} q13{r10,i10,r11,i11}
+ vzip.32 q10, q11
+ vld1.32 {d28-d31}, [r0,:128] @ q14{r12,i12,r13,i13} q15{r14,i14,r15,i15}
+ vadd.f32 d0, d22, d20
+ vadd.f32 d1, d21, d23
+ vsub.f32 d2, d21, d23
+ vsub.f32 d3, d22, d20
+ sub r0, r0, #96
+ vext.32 q13, q13, q13, #1
+ vsub.f32 q10, q8, q0 @ {r4,r5,i4,i5}
+ vadd.f32 q8, q8, q0 @ {r0,r1,i0,i1}
+ vext.32 q15, q15, q15, #1
+ vsub.f32 q11, q9, q1 @ {r6,r7,i6,i7}
+ vswp d25, d26 @ q12{r8,i8,i10,r11} q13{r9,i9,i11,r10}
+ vadd.f32 q9, q9, q1 @ {r2,r3,i2,i3}
+ vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
+ vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6}
+ vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a}
+ movrel r2, X(ff_cos_16)
+ vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8}
+ vrev64.32 d1, d1
+ vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a}
+ vrev64.32 d3, d3
+ movrel r3, pmmp
+ vswp d1, d26 @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8}
+ vswp d3, d30 @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a}
+ vadd.f32 q12, q0, q13 @ {r8,i8,r9,i9}
+ vadd.f32 q14, q1, q15 @ {r12,i12,r13,i13}
+ vld1.32 {d4-d5}, [r2,:64]
+ vsub.f32 q13, q0, q13 @ {r10,i10,r11,i11}
+ vsub.f32 q15, q1, q15 @ {r14,i14,r15,i15}
+ vswp d25, d28 @ q12{r8,i8,r12,i12} q14{r9,i9,r13,i13}
+ vld1.32 {d6-d7}, [r3,:128]
+ vrev64.32 q1, q14
+ vmul.f32 q14, q14, d4[1]
+ vmul.f32 q1, q1, q3
+ vmla.f32 q14, q1, d5[1] @ {t1a,t2a,t5a,t6a}
+ vswp d27, d30 @ q13{r10,i10,r14,i14} q15{r11,i11,r15,i15}
+ vzip.32 q12, q14
+ vadd.f32 d0, d28, d24
+ vadd.f32 d1, d25, d29
+ vsub.f32 d2, d25, d29
+ vsub.f32 d3, d28, d24
+ vsub.f32 q12, q8, q0 @ {r8,r9,i8,i9}
+ vadd.f32 q8, q8, q0 @ {r0,r1,i0,i1}
+ vsub.f32 q14, q10, q1 @ {r12,r13,i12,i13}
+ mov r1, #32
+ vadd.f32 q10, q10, q1 @ {r4,r5,i4,i5}
+ vrev64.32 q0, q13
+ vmul.f32 q13, q13, d5[0]
+ vrev64.32 q1, q15
+ vmul.f32 q15, q15, d5[1]
+ vst2.32 {d16-d17},[r0,:128], r1
+ vmul.f32 q0, q0, q3
+ vst2.32 {d20-d21},[r0,:128], r1
+ vmul.f32 q1, q1, q3
+ vmla.f32 q13, q0, d5[0] @ {t1,t2,t5,t6}
+ vmla.f32 q15, q1, d4[1] @ {t1a,t2a,t5a,t6a}
+ vst2.32 {d24-d25},[r0,:128], r1
+ vst2.32 {d28-d29},[r0,:128]
+ vzip.32 q13, q15
+ sub r0, r0, #80
+ vadd.f32 d0, d30, d26
+ vadd.f32 d1, d27, d31
+ vsub.f32 d2, d27, d31
+ vsub.f32 d3, d30, d26
+ vsub.f32 q13, q9, q0 @ {r10,r11,i10,i11}
+ vadd.f32 q9, q9, q0 @ {r2,r3,i2,i3}
+ vsub.f32 q15, q11, q1 @ {r14,r15,i14,i15}
+ vadd.f32 q11, q11, q1 @ {r6,r7,i6,i7}
+ vst2.32 {d18-d19},[r0,:128], r1
+ vst2.32 {d22-d23},[r0,:128], r1
+ vst2.32 {d26-d27},[r0,:128], r1
+ vst2.32 {d30-d31},[r0,:128]
+ bx lr
+endfunc
+
+function fft_pass_neon
+ push {r4-r6,lr}
+ mov r6, r2 @ n
+ lsl r5, r2, #3 @ 2 * n * sizeof FFTSample
+ lsl r4, r2, #4 @ 2 * n * sizeof FFTComplex
+ lsl r2, r2, #5 @ 4 * n * sizeof FFTComplex
+ add r3, r2, r4
+ add r4, r4, r0 @ &z[o1]
+ add r2, r2, r0 @ &z[o2]
+ add r3, r3, r0 @ &z[o3]
+ vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]}
+ movrel r12, pmmp
+ vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]}
+ add r5, r5, r1 @ wim
+ vld1.32 {d6-d7}, [r12,:128] @ pmmp
+ vswp d21, d22
+ vld1.32 {d4}, [r1,:64]! @ {wre[0],wre[1]}
+ sub r5, r5, #4 @ wim--
+ vrev64.32 q1, q11
+ vmul.f32 q11, q11, d4[1]
+ vmul.f32 q1, q1, q3
+ vld1.32 {d5[0]}, [r5,:32] @ d5[0] = wim[-1]
+ vmla.f32 q11, q1, d5[0] @ {t1a,t2a,t5a,t6a}
+ vld2.32 {d16-d17},[r0,:128] @ {z[0],z[1]}
+ sub r6, r6, #1 @ n--
+ vld2.32 {d18-d19},[r4,:128] @ {z[o1],z[o1+1]}
+ vzip.32 q10, q11
+ vadd.f32 d0, d22, d20
+ vadd.f32 d1, d21, d23
+ vsub.f32 d2, d21, d23
+ vsub.f32 d3, d22, d20
+ vsub.f32 q10, q8, q0
+ vadd.f32 q8, q8, q0
+ vsub.f32 q11, q9, q1
+ vadd.f32 q9, q9, q1
+ vst2.32 {d20-d21},[r2,:128]! @ {z[o2],z[o2+1]}
+ vst2.32 {d16-d17},[r0,:128]! @ {z[0],z[1]}
+ vst2.32 {d22-d23},[r3,:128]! @ {z[o3],z[o3+1]}
+ vst2.32 {d18-d19},[r4,:128]! @ {z[o1],z[o1+1]}
+ sub r5, r5, #8 @ wim -= 2
+1:
+ vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]}
+ vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]}
+ vswp d21, d22
+ vld1.32 {d4}, [r1]! @ {wre[0],wre[1]}
+ vrev64.32 q0, q10
+ vmul.f32 q10, q10, d4[0]
+ vrev64.32 q1, q11
+ vmul.f32 q11, q11, d4[1]
+ vld1.32 {d5}, [r5] @ {wim[-1],wim[0]}
+ vmul.f32 q0, q0, q3
+ sub r5, r5, #8 @ wim -= 2
+ vmul.f32 q1, q1, q3
+ vmla.f32 q10, q0, d5[1] @ {t1,t2,t5,t6}
+ vmla.f32 q11, q1, d5[0] @ {t1a,t2a,t5a,t6a}
+ vld2.32 {d16-d17},[r0,:128] @ {z[0],z[1]}
+ subs r6, r6, #1 @ n--
+ vld2.32 {d18-d19},[r4,:128] @ {z[o1],z[o1+1]}
+ vzip.32 q10, q11
+ vadd.f32 d0, d22, d20
+ vadd.f32 d1, d21, d23
+ vsub.f32 d2, d21, d23
+ vsub.f32 d3, d22, d20
+ vsub.f32 q10, q8, q0
+ vadd.f32 q8, q8, q0
+ vsub.f32 q11, q9, q1
+ vadd.f32 q9, q9, q1
+ vst2.32 {d20-d21}, [r2,:128]! @ {z[o2],z[o2+1]}
+ vst2.32 {d16-d17}, [r0,:128]! @ {z[0],z[1]}
+ vst2.32 {d22-d23}, [r3,:128]! @ {z[o3],z[o3+1]}
+ vst2.32 {d18-d19}, [r4,:128]! @ {z[o1],z[o1+1]}
+ bne 1b
+
+ pop {r4-r6,pc}
+endfunc
+
+.macro def_fft n, n2, n4
+ .align 6
+function fft\n\()_neon
+ push {r4, lr}
+ mov r4, r0
+ bl fft\n2\()_neon
+ add r0, r4, #\n4*2*8
+ bl fft\n4\()_neon
+ add r0, r4, #\n4*3*8
+ bl fft\n4\()_neon
+ mov r0, r4
+ pop {r4, lr}
+ movrel r1, X(ff_cos_\n)
+ mov r2, #\n4/2
+ b fft_pass_neon
+endfunc
+.endm
+
+ def_fft 32, 16, 8
+ def_fft 64, 32, 16
+ def_fft 128, 64, 32
+ def_fft 256, 128, 64
+ def_fft 512, 256, 128
+ def_fft 1024, 512, 256
+ def_fft 2048, 1024, 512
+ def_fft 4096, 2048, 1024
+ def_fft 8192, 4096, 2048
+ def_fft 16384, 8192, 4096
+ def_fft 32768, 16384, 8192
+ def_fft 65536, 32768, 16384
+
+function ff_fft_calc_neon, export=1
+ ldr r2, [r0]
+ sub r2, r2, #2
+ movrel r3, fft_tab_neon
+ ldr r3, [r3, r2, lsl #2]
+ mov r0, r1
+ bx r3
+endfunc
+
+function ff_fft_permute_neon, export=1
+ push {r4,lr}
+ mov r12, #1
+ ldr r2, [r0] @ nbits
+ ldr r3, [r0, #12] @ tmp_buf
+ ldr r0, [r0, #8] @ revtab
+ lsl r12, r12, r2
+ mov r2, r12
+1:
+ vld1.32 {d0-d1}, [r1,:128]!
+ ldr r4, [r0], #4
+ uxth lr, r4
+ uxth r4, r4, ror #16
+ add lr, r3, lr, lsl #3
+ add r4, r3, r4, lsl #3
+ vst1.32 {d0}, [lr,:64]
+ vst1.32 {d1}, [r4,:64]
+ subs r12, r12, #2
+ bgt 1b
+
+ sub r1, r1, r2, lsl #3
+1:
+ vld1.32 {d0-d3}, [r3,:128]!
+ vst1.32 {d0-d3}, [r1,:128]!
+ subs r2, r2, #4
+ bgt 1b
+
+ pop {r4,pc}
+endfunc
+
+ .section .rodata
+ .align 4
+fft_tab_neon:
+ .word fft4_neon
+ .word fft8_neon
+ .word fft16_neon
+ .word fft32_neon
+ .word fft64_neon
+ .word fft128_neon
+ .word fft256_neon
+ .word fft512_neon
+ .word fft1024_neon
+ .word fft2048_neon
+ .word fft4096_neon
+ .word fft8192_neon
+ .word fft16384_neon
+ .word fft32768_neon
+ .word fft65536_neon
+ELF .size fft_tab_neon, . - fft_tab_neon
+
+ .align 4
+pmmp: .float +1.0, -1.0, -1.0, +1.0
+mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S
new file mode 100644
index 00000000..4f8a1032
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/rdft_neon.S
@@ -0,0 +1,151 @@
+/*
+ * ARM NEON optimised RDFT
+ * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+ preserve8
+
+function ff_rdft_calc_neon, export=1
+ push {r4-r8,lr}
+
+ ldr r6, [r0, #4] @ inverse
+ mov r4, r0
+ mov r5, r1
+
+ lsls r6, r6, #31
+ bne 1f
+ add r0, r4, #20
+ bl X(ff_fft_permute_neon)
+ add r0, r4, #20
+ mov r1, r5
+ bl X(ff_fft_calc_neon)
+1:
+ ldr r12, [r4, #0] @ nbits
+ mov r2, #1
+ lsl r12, r2, r12
+ add r0, r5, #8
+ add r1, r5, r12, lsl #2
+ lsr r12, r12, #2
+ ldr r2, [r4, #12] @ tcos
+ sub r12, r12, #2
+ ldr r3, [r4, #16] @ tsin
+ mov r7, r0
+ sub r1, r1, #8
+ mov lr, r1
+ mov r8, #-8
+ vld1.32 {d0}, [r0,:64]! @ d1[0,1]
+ vld1.32 {d1}, [r1,:64], r8 @ d2[0,1]
+ vld1.32 {d4}, [r2,:64]! @ tcos[i]
+ vld1.32 {d5}, [r3,:64]! @ tsin[i]
+ vmov.f32 d18, #0.5 @ k1
+ vdup.32 d19, r6
+ pld [r0, #32]
+ veor d19, d18, d19 @ k2
+ vmov.i32 d16, #0
+ vmov.i32 d17, #1<<31
+ pld [r1, #-32]
+ vtrn.32 d16, d17
+ pld [r2, #32]
+ vrev64.32 d16, d16 @ d16=1,0 d17=0,1
+ pld [r3, #32]
+2:
+ veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1]
+ vld1.32 {d24}, [r0,:64]! @ d1[0,1]
+ vadd.f32 d0, d0, d3 @ d1[0]+d2[0], d1[1]-d2[1]
+ vld1.32 {d25}, [r1,:64], r8 @ d2[0,1]
+ vadd.f32 d1, d2, d1 @ -d1[0]+d2[0], d1[1]+d2[1]
+ veor q3, q12, q8 @ -d1[0],d1[1], d2[0],-d2[1]
+ pld [r0, #32]
+ vmul.f32 q10, q0, q9 @ ev.re, ev.im, od.im, od.re
+ pld [r1, #-32]
+ vadd.f32 d0, d24, d7 @ d1[0]+d2[0], d1[1]-d2[1]
+ vadd.f32 d1, d6, d25 @ -d1[0]+d2[0], d1[1]+d2[1]
+ vmul.f32 q11, q0, q9 @ ev.re, ev.im, od.im, od.re
+ veor d7, d21, d16 @ -od.im, od.re
+ vrev64.32 d3, d21 @ od.re, od.im
+ veor d6, d20, d17 @ ev.re,-ev.im
+ veor d2, d3, d16 @ -od.re, od.im
+ vmla.f32 d20, d3, d4[1]
+ vmla.f32 d20, d7, d5[1]
+ vmla.f32 d6, d2, d4[1]
+ vmla.f32 d6, d21, d5[1]
+ vld1.32 {d4}, [r2,:64]! @ tcos[i]
+ veor d7, d23, d16 @ -od.im, od.re
+ vld1.32 {d5}, [r3,:64]! @ tsin[i]
+ veor d24, d22, d17 @ ev.re,-ev.im
+ vrev64.32 d3, d23 @ od.re, od.im
+ pld [r2, #32]
+ veor d2, d3, d16 @ -od.re, od.im
+ pld [r3, #32]
+ vmla.f32 d22, d3, d4[0]
+ vmla.f32 d22, d7, d5[0]
+ vmla.f32 d24, d2, d4[0]
+ vmla.f32 d24, d23, d5[0]
+ vld1.32 {d0}, [r0,:64]! @ d1[0,1]
+ vld1.32 {d1}, [r1,:64], r8 @ d2[0,1]
+ vst1.32 {d20}, [r7,:64]!
+ vst1.32 {d6}, [lr,:64], r8
+ vst1.32 {d22}, [r7,:64]!
+ vst1.32 {d24}, [lr,:64], r8
+ subs r12, r12, #2
+ bgt 2b
+
+ veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1]
+ vadd.f32 d0, d0, d3 @ d1[0]+d2[0], d1[1]-d2[1]
+ vadd.f32 d1, d2, d1 @ -d1[0]+d2[0], d1[1]+d2[1]
+ ldr r2, [r4, #8] @ sign_convention
+ vmul.f32 q10, q0, q9 @ ev.re, ev.im, od.im, od.re
+ add r0, r0, #4
+ bfc r2, #0, #31
+ vld1.32 {d0[0]}, [r0,:32]
+ veor d7, d21, d16 @ -od.im, od.re
+ vrev64.32 d3, d21 @ od.re, od.im
+ veor d6, d20, d17 @ ev.re,-ev.im
+ vld1.32 {d22}, [r5,:64]
+ vdup.32 d1, r2
+ vmov d23, d22
+ veor d2, d3, d16 @ -od.re, od.im
+ vtrn.32 d22, d23
+ veor d0, d0, d1
+ veor d23, d23, d17
+ vmla.f32 d20, d3, d4[1]
+ vmla.f32 d20, d7, d5[1]
+ vmla.f32 d6, d2, d4[1]
+ vmla.f32 d6, d21, d5[1]
+ vadd.f32 d22, d22, d23
+ vst1.32 {d20}, [r7,:64]
+ vst1.32 {d6}, [lr,:64]
+ vst1.32 {d0[0]}, [r0,:32]
+ vst1.32 {d22}, [r5,:64]
+
+ cmp r6, #0
+ popeq {r4-r8,pc}
+
+ vmul.f32 d22, d22, d18
+ vst1.32 {d22}, [r5,:64]
+ add r0, r4, #20
+ mov r1, r5
+ bl X(ff_fft_permute_neon)
+ add r0, r4, #20
+ mov r1, r5
+ pop {r4-r8,lr}
+ b X(ff_fft_calc_neon)
+endfunc
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S b/plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S
new file mode 100644
index 00000000..17cde583
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/arm/simple_idct_neon.S
@@ -0,0 +1,372 @@
+/*
+ * ARM NEON IDCT
+ *
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * Based on Simple IDCT
+ * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4c ((1<<(COL_SHIFT-1))/W4)
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define w1 d0[0]
+#define w2 d0[1]
+#define w3 d0[2]
+#define w4 d0[3]
+#define w5 d1[0]
+#define w6 d1[1]
+#define w7 d1[2]
+#define w4c d1[3]
+
+ .macro idct_col4_top
+ vmull.s16 q7, d6, w2 /* q9 = W2 * col[2] */
+ vmull.s16 q8, d6, w6 /* q10 = W6 * col[2] */
+ vmull.s16 q9, d4, w1 /* q9 = W1 * col[1] */
+ vadd.i32 q11, q15, q7
+ vmull.s16 q10, d4, w3 /* q10 = W3 * col[1] */
+ vadd.i32 q12, q15, q8
+ vmull.s16 q5, d4, w5 /* q5 = W5 * col[1] */
+ vsub.i32 q13, q15, q8
+ vmull.s16 q6, d4, w7 /* q6 = W7 * col[1] */
+ vsub.i32 q14, q15, q7
+
+ vmlal.s16 q9, d8, w3 /* q9 += W3 * col[3] */
+ vmlsl.s16 q10, d8, w7 /* q10 -= W7 * col[3] */
+ vmlsl.s16 q5, d8, w1 /* q5 -= W1 * col[3] */
+ vmlsl.s16 q6, d8, w5 /* q6 -= W5 * col[3] */
+ .endm
+
+ .text
+ .align 6
+
+function idct_row4_pld_neon
+ pld [r0]
+ add r3, r0, r1, lsl #2
+ pld [r0, r1]
+ pld [r0, r1, lsl #1]
+ pld [r3, -r1]
+ pld [r3]
+ pld [r3, r1]
+ add r3, r3, r1, lsl #1
+ pld [r3]
+ pld [r3, r1]
+endfunc
+
+function idct_row4_neon
+ vmov.i32 q15, #(1<<(ROW_SHIFT-1))
+ vld1.64 {d2-d5}, [r2,:128]!
+ vmlal.s16 q15, d2, w4 /* q15 += W4 * col[0] */
+ vld1.64 {d6,d7}, [r2,:128]!
+ vorr d10, d3, d5
+ vld1.64 {d8,d9}, [r2,:128]!
+ add r2, r2, #-64
+
+ vorr d11, d7, d9
+ vorr d10, d10, d11
+ vmov r3, r4, d10
+
+ idct_col4_top
+
+ orrs r3, r3, r4
+ beq 1f
+
+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
+ vadd.i32 q11, q11, q7
+ vsub.i32 q12, q12, q7
+ vsub.i32 q13, q13, q7
+ vadd.i32 q14, q14, q7
+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
+ vmlal.s16 q9, d9, w7
+ vmlsl.s16 q10, d9, w5
+ vmlal.s16 q5, d9, w3
+ vmlsl.s16 q6, d9, w1
+ vadd.i32 q11, q11, q7
+ vsub.i32 q12, q12, q8
+ vadd.i32 q13, q13, q8
+ vsub.i32 q14, q14, q7
+
+1: vadd.i32 q3, q11, q9
+ vadd.i32 q4, q12, q10
+ vshrn.i32 d2, q3, #ROW_SHIFT
+ vshrn.i32 d4, q4, #ROW_SHIFT
+ vadd.i32 q7, q13, q5
+ vadd.i32 q8, q14, q6
+ vtrn.16 d2, d4
+ vshrn.i32 d6, q7, #ROW_SHIFT
+ vshrn.i32 d8, q8, #ROW_SHIFT
+ vsub.i32 q14, q14, q6
+ vsub.i32 q11, q11, q9
+ vtrn.16 d6, d8
+ vsub.i32 q13, q13, q5
+ vshrn.i32 d3, q14, #ROW_SHIFT
+ vtrn.32 d2, d6
+ vsub.i32 q12, q12, q10
+ vtrn.32 d4, d8
+ vshrn.i32 d5, q13, #ROW_SHIFT
+ vshrn.i32 d7, q12, #ROW_SHIFT
+ vshrn.i32 d9, q11, #ROW_SHIFT
+
+ vtrn.16 d3, d5
+ vtrn.16 d7, d9
+ vtrn.32 d3, d7
+ vtrn.32 d5, d9
+
+ vst1.64 {d2-d5}, [r2,:128]!
+ vst1.64 {d6-d9}, [r2,:128]!
+
+ bx lr
+endfunc
+
+function idct_col4_neon
+ mov ip, #16
+ vld1.64 {d2}, [r2,:64], ip /* d2 = col[0] */
+ vdup.16 d30, w4c
+ vld1.64 {d4}, [r2,:64], ip /* d3 = col[1] */
+ vadd.i16 d30, d30, d2
+ vld1.64 {d6}, [r2,:64], ip /* d4 = col[2] */
+ vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
+ vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
+
+ ldrd r4, [r2]
+ ldrd r6, [r2, #16]
+ orrs r4, r4, r5
+
+ idct_col4_top
+ addeq r2, r2, #16
+ beq 1f
+
+ vld1.64 {d3}, [r2,:64], ip /* d6 = col[4] */
+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
+ vadd.i32 q11, q11, q7
+ vsub.i32 q12, q12, q7
+ vsub.i32 q13, q13, q7
+ vadd.i32 q14, q14, q7
+
+1: orrs r6, r6, r7
+ ldrd r4, [r2, #16]
+ addeq r2, r2, #16
+ beq 2f
+
+ vld1.64 {d5}, [r2,:64], ip /* d7 = col[5] */
+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
+
+2: orrs r4, r4, r5
+ ldrd r4, [r2, #16]
+ addeq r2, r2, #16
+ beq 3f
+
+ vld1.64 {d7}, [r2,:64], ip /* d8 = col[6] */
+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
+ vadd.i32 q11, q11, q7
+ vsub.i32 q14, q14, q7
+ vsub.i32 q12, q12, q8
+ vadd.i32 q13, q13, q8
+
+3: orrs r4, r4, r5
+ addeq r2, r2, #16
+ beq 4f
+
+ vld1.64 {d9}, [r2,:64], ip /* d9 = col[7] */
+ vmlal.s16 q9, d9, w7
+ vmlsl.s16 q10, d9, w5
+ vmlal.s16 q5, d9, w3
+ vmlsl.s16 q6, d9, w1
+
+4: vaddhn.i32 d2, q11, q9
+ vaddhn.i32 d3, q12, q10
+ vaddhn.i32 d4, q13, q5
+ vaddhn.i32 d5, q14, q6
+ vsubhn.i32 d9, q11, q9
+ vsubhn.i32 d8, q12, q10
+ vsubhn.i32 d7, q13, q5
+ vsubhn.i32 d6, q14, q6
+
+ bx lr
+endfunc
+
+ .align 6
+
+function idct_col4_st8_neon
+ vqshrun.s16 d2, q1, #COL_SHIFT-16
+ vqshrun.s16 d3, q2, #COL_SHIFT-16
+ vqshrun.s16 d4, q3, #COL_SHIFT-16
+ vqshrun.s16 d5, q4, #COL_SHIFT-16
+ vst1.32 {d2[0]}, [r0,:32], r1
+ vst1.32 {d2[1]}, [r0,:32], r1
+ vst1.32 {d3[0]}, [r0,:32], r1
+ vst1.32 {d3[1]}, [r0,:32], r1
+ vst1.32 {d4[0]}, [r0,:32], r1
+ vst1.32 {d4[1]}, [r0,:32], r1
+ vst1.32 {d5[0]}, [r0,:32], r1
+ vst1.32 {d5[1]}, [r0,:32], r1
+
+ bx lr
+endfunc
+
+ .section .rodata
+ .align 4
+idct_coeff_neon:
+ .short W1, W2, W3, W4, W5, W6, W7, W4c
+
+ .macro idct_start data
+ push {r4-r7, lr}
+ pld [\data]
+ pld [\data, #64]
+ vpush {d8-d15}
+ movrel r3, idct_coeff_neon
+ vld1.64 {d0,d1}, [r3,:128]
+ .endm
+
+ .macro idct_end
+ vpop {d8-d15}
+ pop {r4-r7, pc}
+ .endm
+
+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+function ff_simple_idct_put_neon, export=1
+ idct_start r2
+
+ bl idct_row4_pld_neon
+ bl idct_row4_neon
+ add r2, r2, #-128
+ bl idct_col4_neon
+ bl idct_col4_st8_neon
+ sub r0, r0, r1, lsl #3
+ add r0, r0, #4
+ add r2, r2, #-120
+ bl idct_col4_neon
+ bl idct_col4_st8_neon
+
+ idct_end
+endfunc
+
+ .align 6
+
+function idct_col4_add8_neon
+ mov ip, r0
+
+ vld1.32 {d10[0]}, [r0,:32], r1
+ vshr.s16 q1, q1, #COL_SHIFT-16
+ vld1.32 {d10[1]}, [r0,:32], r1
+ vshr.s16 q2, q2, #COL_SHIFT-16
+ vld1.32 {d11[0]}, [r0,:32], r1
+ vshr.s16 q3, q3, #COL_SHIFT-16
+ vld1.32 {d11[1]}, [r0,:32], r1
+ vshr.s16 q4, q4, #COL_SHIFT-16
+ vld1.32 {d12[0]}, [r0,:32], r1
+ vaddw.u8 q1, q1, d10
+ vld1.32 {d12[1]}, [r0,:32], r1
+ vaddw.u8 q2, q2, d11
+ vld1.32 {d13[0]}, [r0,:32], r1
+ vqmovun.s16 d2, q1
+ vld1.32 {d13[1]}, [r0,:32], r1
+ vaddw.u8 q3, q3, d12
+ vst1.32 {d2[0]}, [ip,:32], r1
+ vqmovun.s16 d3, q2
+ vst1.32 {d2[1]}, [ip,:32], r1
+ vaddw.u8 q4, q4, d13
+ vst1.32 {d3[0]}, [ip,:32], r1
+ vqmovun.s16 d4, q3
+ vst1.32 {d3[1]}, [ip,:32], r1
+ vqmovun.s16 d5, q4
+ vst1.32 {d4[0]}, [ip,:32], r1
+ vst1.32 {d4[1]}, [ip,:32], r1
+ vst1.32 {d5[0]}, [ip,:32], r1
+ vst1.32 {d5[1]}, [ip,:32], r1
+
+ bx lr
+endfunc
+
+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+function ff_simple_idct_add_neon, export=1
+ idct_start r2
+
+ bl idct_row4_pld_neon
+ bl idct_row4_neon
+ add r2, r2, #-128
+ bl idct_col4_neon
+ bl idct_col4_add8_neon
+ sub r0, r0, r1, lsl #3
+ add r0, r0, #4
+ add r2, r2, #-120
+ bl idct_col4_neon
+ bl idct_col4_add8_neon
+
+ idct_end
+endfunc
+
+ .align 6
+
+function idct_col4_st16_neon
+ mov ip, #16
+
+ vshr.s16 q1, q1, #COL_SHIFT-16
+ vshr.s16 q2, q2, #COL_SHIFT-16
+ vst1.64 {d2}, [r2,:64], ip
+ vshr.s16 q3, q3, #COL_SHIFT-16
+ vst1.64 {d3}, [r2,:64], ip
+ vshr.s16 q4, q4, #COL_SHIFT-16
+ vst1.64 {d4}, [r2,:64], ip
+ vst1.64 {d5}, [r2,:64], ip
+ vst1.64 {d6}, [r2,:64], ip
+ vst1.64 {d7}, [r2,:64], ip
+ vst1.64 {d8}, [r2,:64], ip
+ vst1.64 {d9}, [r2,:64], ip
+
+ bx lr
+endfunc
+
+/* void ff_simple_idct_neon(DCTELEM *data); */
+function ff_simple_idct_neon, export=1
+ idct_start r0
+
+ mov r2, r0
+ bl idct_row4_neon
+ bl idct_row4_neon
+ add r2, r2, #-128
+ bl idct_col4_neon
+ add r2, r2, #-128
+ bl idct_col4_st16_neon
+ add r2, r2, #-120
+ bl idct_col4_neon
+ add r2, r2, #-128
+ bl idct_col4_st16_neon
+
+ idct_end
+endfunc
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/avfft.c b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.c
new file mode 100644
index 00000000..25fc4e09
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.c
@@ -0,0 +1,142 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "avfft.h"
+#include "fft.h"
+
+/* FFT */
+
+FFTContext *av_fft_init(int nbits, int inverse)
+{
+ FFTContext *s = av_malloc(sizeof(*s));
+
+ if (s)
+ ff_fft_init(s, nbits, inverse);
+
+ return s;
+}
+
+void av_fft_permute(FFTContext *s, FFTComplex *z)
+{
+ s->fft_permute(s, z);
+}
+
+void av_fft_calc(FFTContext *s, FFTComplex *z)
+{
+ s->fft_calc(s, z);
+}
+
+void av_fft_end(FFTContext *s)
+{
+ if (s) {
+ ff_fft_end(s);
+ av_free(s);
+ }
+}
+
+#if CONFIG_MDCT
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale)
+{
+ FFTContext *s = av_malloc(sizeof(*s));
+
+ if (s)
+ ff_mdct_init(s, nbits, inverse, scale);
+
+ return s;
+}
+
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ s->imdct_calc(s, output, input);
+}
+
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ s->imdct_half(s, output, input);
+}
+
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ s->mdct_calc(s, output, input);
+}
+
+void av_mdct_end(FFTContext *s)
+{
+ if (s) {
+ ff_mdct_end(s);
+ av_free(s);
+ }
+}
+
+#endif /* CONFIG_MDCT */
+
+#if CONFIG_RDFT
+
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
+{
+ RDFTContext *s = av_malloc(sizeof(*s));
+
+ if (s)
+ ff_rdft_init(s, nbits, trans);
+
+ return s;
+}
+
+void av_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+ ff_rdft_calc(s, data);
+}
+
+void av_rdft_end(RDFTContext *s)
+{
+ if (s) {
+ ff_rdft_end(s);
+ av_free(s);
+ }
+}
+
+#endif /* CONFIG_RDFT */
+
+#if CONFIG_DCT
+
+DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
+{
+ DCTContext *s = av_malloc(sizeof(*s));
+
+ if (s)
+ ff_dct_init(s, nbits, inverse);
+
+ return s;
+}
+
+void av_dct_calc(DCTContext *s, FFTSample *data)
+{
+ ff_dct_calc(s, data);
+}
+
+void av_dct_end(DCTContext *s)
+{
+ if (s) {
+ ff_dct_end(s);
+ av_free(s);
+ }
+}
+
+#endif /* CONFIG_DCT */
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/avfft.h b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.h
new file mode 100644
index 00000000..fdf30237
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/avfft.h
@@ -0,0 +1,103 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+
+#include "publik.h"
+
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+ FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext FFTContext;
+
+/**
+ * Set up a complex FFT.
+ * @param nbits log2 of the length of the input array
+ * @param inverse if 0 perform the forward transform, if 1 perform the inverse
+ */
+PUBLIK FFTContext *av_fft_init(int nbits, int inverse);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+PUBLIK void av_fft_permute(FFTContext *s, FFTComplex *z);
+
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+PUBLIK void av_fft_calc(FFTContext *s, FFTComplex *z);
+
+PUBLIK void av_fft_end(FFTContext *s);
+
+#if 0
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+#endif
+
+/* Real Discrete Fourier Transform */
+
+enum RDFTransformType {
+ DFT_R2C,
+ IDFT_C2R,
+ IDFT_R2C,
+ DFT_C2R,
+};
+
+typedef struct RDFTContext RDFTContext;
+
+/**
+ * Set up a real FFT.
+ * @param nbits log2 of the length of the input array
+ * @param trans the type of transform
+ */
+PUBLIK RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+PUBLIK void av_rdft_calc(RDFTContext *s, FFTSample *data);
+PUBLIK void av_rdft_end(RDFTContext *s);
+
+/* Discrete Cosine Transform */
+
+typedef struct DCTContext DCTContext;
+
+enum DCTTransformType {
+ DCT_II = 0,
+ DCT_III,
+ DCT_I,
+ DST_I,
+};
+
+/**
+ * Set up DCT.
+ * @param nbits size of the input array:
+ * (1 << nbits) for DCT-II, DCT-III and DST-I
+ * (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+PUBLIK DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+PUBLIK void av_dct_calc(DCTContext *s, FFTSample *data);
+PUBLIK void av_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_AVFFT_H */
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/dct.c b/plugins/supereq/ffmpeg_fft/libavcodec/dct.c
new file mode 100644
index 00000000..6ea1936e
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/dct.c
@@ -0,0 +1,228 @@
+/*
+ * (I)DCT Transforms
+ * Copyright (c) 2009 Peter Ross <pross@xvid.org>
+ * Copyright (c) 2010 Alex Converse <alex.converse@gmail.com>
+ * Copyright (c) 2010 Vitor Sessak
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * (Inverse) Discrete Cosine Transforms. These are also known as the
+ * type II and type III DCTs respectively.
+ */
+
+#include <math.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+#ifndef ARCH_ARM
+#include "x86/fft.h"
+#endif
+
+#define DCT32_FLOAT
+#include "dct32.h"
+
+/* sin((M_PI * x / (2*n)) */
+#define SIN(s,n,x) (s->costab[(n) - (x)])
+
+/* cos((M_PI * x / (2*n)) */
+#define COS(s,n,x) (s->costab[x])
+
+static void ff_dst_calc_I_c(DCTContext *ctx, FFTSample *data)
+{
+ int n = 1 << ctx->nbits;
+ int i;
+
+ data[0] = 0;
+ for(i = 1; i < n/2; i++) {
+ float tmp1 = data[i ];
+ float tmp2 = data[n - i];
+ float s = SIN(ctx, n, 2*i);
+
+ s *= tmp1 + tmp2;
+ tmp1 = (tmp1 - tmp2) * 0.5f;
+ data[i ] = s + tmp1;
+ data[n - i] = s - tmp1;
+ }
+
+ data[n/2] *= 2;
+ ff_rdft_calc(&ctx->rdft, data);
+
+ data[0] *= 0.5f;
+
+ for(i = 1; i < n-2; i += 2) {
+ data[i + 1] += data[i - 1];
+ data[i ] = -data[i + 2];
+ }
+
+ data[n-1] = 0;
+}
+
+static void ff_dct_calc_I_c(DCTContext *ctx, FFTSample *data)
+{
+ int n = 1 << ctx->nbits;
+ int i;
+ float next = -0.5f * (data[0] - data[n]);
+
+ for(i = 0; i < n/2; i++) {
+ float tmp1 = data[i ];
+ float tmp2 = data[n - i];
+ float s = SIN(ctx, n, 2*i);
+ float c = COS(ctx, n, 2*i);
+
+ c *= tmp1 - tmp2;
+ s *= tmp1 - tmp2;
+
+ next += c;
+
+ tmp1 = (tmp1 + tmp2) * 0.5f;
+ data[i ] = tmp1 - s;
+ data[n - i] = tmp1 + s;
+ }
+
+ ff_rdft_calc(&ctx->rdft, data);
+ data[n] = data[1];
+ data[1] = next;
+
+ for(i = 3; i <= n; i += 2)
+ data[i] = data[i - 2] - data[i];
+}
+
+static void ff_dct_calc_III_c(DCTContext *ctx, FFTSample *data)
+{
+ int n = 1 << ctx->nbits;
+ int i;
+
+ float next = data[n - 1];
+ float inv_n = 1.0f / n;
+
+ for (i = n - 2; i >= 2; i -= 2) {
+ float val1 = data[i ];
+ float val2 = data[i - 1] - data[i + 1];
+ float c = COS(ctx, n, i);
+ float s = SIN(ctx, n, i);
+
+ data[i ] = c * val1 + s * val2;
+ data[i + 1] = s * val1 - c * val2;
+ }
+
+ data[1] = 2 * next;
+
+ ff_rdft_calc(&ctx->rdft, data);
+
+ for (i = 0; i < n / 2; i++) {
+ float tmp1 = data[i ] * inv_n;
+ float tmp2 = data[n - i - 1] * inv_n;
+ float csc = ctx->csc2[i] * (tmp1 - tmp2);
+
+ tmp1 += tmp2;
+ data[i ] = tmp1 + csc;
+ data[n - i - 1] = tmp1 - csc;
+ }
+}
+
+static void ff_dct_calc_II_c(DCTContext *ctx, FFTSample *data)
+{
+ int n = 1 << ctx->nbits;
+ int i;
+ float next;
+
+ for (i=0; i < n/2; i++) {
+ float tmp1 = data[i ];
+ float tmp2 = data[n - i - 1];
+ float s = SIN(ctx, n, 2*i + 1);
+
+ s *= tmp1 - tmp2;
+ tmp1 = (tmp1 + tmp2) * 0.5f;
+
+ data[i ] = tmp1 + s;
+ data[n-i-1] = tmp1 - s;
+ }
+
+ ff_rdft_calc(&ctx->rdft, data);
+
+ next = data[1] * 0.5;
+ data[1] *= -1;
+
+ for (i = n - 2; i >= 0; i -= 2) {
+ float inr = data[i ];
+ float ini = data[i + 1];
+ float c = COS(ctx, n, i);
+ float s = SIN(ctx, n, i);
+
+ data[i ] = c * inr + s * ini;
+
+ data[i+1] = next;
+
+ next += s * inr - c * ini;
+ }
+}
+
+static void dct32_func(DCTContext *ctx, FFTSample *data)
+{
+ ctx->dct32(data, data);
+}
+
+void ff_dct_calc(DCTContext *s, FFTSample *data)
+{
+ s->dct_calc(s, data);
+}
+
+av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
+{
+ int n = 1 << nbits;
+ int i;
+
+ s->nbits = nbits;
+ s->inverse = inverse;
+
+ ff_init_ff_cos_tabs(nbits+2);
+
+ s->costab = ff_cos_tabs[nbits+2];
+
+ s->csc2 = av_malloc(n/2 * sizeof(FFTSample));
+
+ if (ff_rdft_init(&s->rdft, nbits, inverse == DCT_III) < 0) {
+ av_free(s->csc2);
+ return -1;
+ }
+
+ for (i = 0; i < n/2; i++)
+ s->csc2[i] = 0.5 / sin((M_PI / (2*n) * (2*i + 1)));
+
+ switch(inverse) {
+ case DCT_I : s->dct_calc = ff_dct_calc_I_c; break;
+ case DCT_II : s->dct_calc = ff_dct_calc_II_c ; break;
+ case DCT_III: s->dct_calc = ff_dct_calc_III_c; break;
+ case DST_I : s->dct_calc = ff_dst_calc_I_c; break;
+ }
+
+ if (inverse == DCT_II && nbits == 5)
+ s->dct_calc = dct32_func;
+
+ s->dct32 = dct32;
+ if (HAVE_MMX) ff_dct_init_mmx(s);
+
+ return 0;
+}
+
+av_cold void ff_dct_end(DCTContext *s)
+{
+ ff_rdft_end(&s->rdft);
+ av_free(s->csc2);
+}
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/dct32.c b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.c
new file mode 100644
index 00000000..3e6ad78d
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.c
@@ -0,0 +1,262 @@
+/*
+ * Template for the Discrete Cosine Transform for 32 samples
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dct32.h"
+
+/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
+
+/* cos(i*pi/64) */
+
+#define COS0_0 FIXHR(0.50060299823519630134/2)
+#define COS0_1 FIXHR(0.50547095989754365998/2)
+#define COS0_2 FIXHR(0.51544730992262454697/2)
+#define COS0_3 FIXHR(0.53104259108978417447/2)
+#define COS0_4 FIXHR(0.55310389603444452782/2)
+#define COS0_5 FIXHR(0.58293496820613387367/2)
+#define COS0_6 FIXHR(0.62250412303566481615/2)
+#define COS0_7 FIXHR(0.67480834145500574602/2)
+#define COS0_8 FIXHR(0.74453627100229844977/2)
+#define COS0_9 FIXHR(0.83934964541552703873/2)
+#define COS0_10 FIXHR(0.97256823786196069369/2)
+#define COS0_11 FIXHR(1.16943993343288495515/4)
+#define COS0_12 FIXHR(1.48416461631416627724/4)
+#define COS0_13 FIXHR(2.05778100995341155085/8)
+#define COS0_14 FIXHR(3.40760841846871878570/8)
+#define COS0_15 FIXHR(10.19000812354805681150/32)
+
+#define COS1_0 FIXHR(0.50241928618815570551/2)
+#define COS1_1 FIXHR(0.52249861493968888062/2)
+#define COS1_2 FIXHR(0.56694403481635770368/2)
+#define COS1_3 FIXHR(0.64682178335999012954/2)
+#define COS1_4 FIXHR(0.78815462345125022473/2)
+#define COS1_5 FIXHR(1.06067768599034747134/4)
+#define COS1_6 FIXHR(1.72244709823833392782/4)
+#define COS1_7 FIXHR(5.10114861868916385802/16)
+
+#define COS2_0 FIXHR(0.50979557910415916894/2)
+#define COS2_1 FIXHR(0.60134488693504528054/2)
+#define COS2_2 FIXHR(0.89997622313641570463/2)
+#define COS2_3 FIXHR(2.56291544774150617881/8)
+
+#define COS3_0 FIXHR(0.54119610014619698439/2)
+#define COS3_1 FIXHR(1.30656296487637652785/4)
+
+#define COS4_0 FIXHR(0.70710678118654752439/2)
+
+/* butterfly operator */
+#define BF(a, b, c, s)\
+{\
+ tmp0 = val##a + val##b;\
+ tmp1 = val##a - val##b;\
+ val##a = tmp0;\
+ val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF0(a, b, c, s)\
+{\
+ tmp0 = tab[a] + tab[b];\
+ tmp1 = tab[a] - tab[b];\
+ val##a = tmp0;\
+ val##b = MULH3(tmp1, c, 1<<(s));\
+}
+
+#define BF1(a, b, c, d)\
+{\
+ BF(a, b, COS4_0, 1);\
+ BF(c, d,-COS4_0, 1);\
+ val##c += val##d;\
+}
+
+#define BF2(a, b, c, d)\
+{\
+ BF(a, b, COS4_0, 1);\
+ BF(c, d,-COS4_0, 1);\
+ val##c += val##d;\
+ val##a += val##c;\
+ val##c += val##b;\
+ val##b += val##d;\
+}
+
+#define ADD(a, b) val##a += val##b
+
+/* DCT32 without 1/sqrt(2) coef zero scaling. */
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
+{
+ INTFLOAT tmp0, tmp1;
+
+ INTFLOAT val0 , val1 , val2 , val3 , val4 , val5 , val6 , val7 ,
+ val8 , val9 , val10, val11, val12, val13, val14, val15,
+ val16, val17, val18, val19, val20, val21, val22, val23,
+ val24, val25, val26, val27, val28, val29, val30, val31;
+
+ /* pass 1 */
+ BF0( 0, 31, COS0_0 , 1);
+ BF0(15, 16, COS0_15, 5);
+ /* pass 2 */
+ BF( 0, 15, COS1_0 , 1);
+ BF(16, 31,-COS1_0 , 1);
+ /* pass 1 */
+ BF0( 7, 24, COS0_7 , 1);
+ BF0( 8, 23, COS0_8 , 1);
+ /* pass 2 */
+ BF( 7, 8, COS1_7 , 4);
+ BF(23, 24,-COS1_7 , 4);
+ /* pass 3 */
+ BF( 0, 7, COS2_0 , 1);
+ BF( 8, 15,-COS2_0 , 1);
+ BF(16, 23, COS2_0 , 1);
+ BF(24, 31,-COS2_0 , 1);
+ /* pass 1 */
+ BF0( 3, 28, COS0_3 , 1);
+ BF0(12, 19, COS0_12, 2);
+ /* pass 2 */
+ BF( 3, 12, COS1_3 , 1);
+ BF(19, 28,-COS1_3 , 1);
+ /* pass 1 */
+ BF0( 4, 27, COS0_4 , 1);
+ BF0(11, 20, COS0_11, 2);
+ /* pass 2 */
+ BF( 4, 11, COS1_4 , 1);
+ BF(20, 27,-COS1_4 , 1);
+ /* pass 3 */
+ BF( 3, 4, COS2_3 , 3);
+ BF(11, 12,-COS2_3 , 3);
+ BF(19, 20, COS2_3 , 3);
+ BF(27, 28,-COS2_3 , 3);
+ /* pass 4 */
+ BF( 0, 3, COS3_0 , 1);
+ BF( 4, 7,-COS3_0 , 1);
+ BF( 8, 11, COS3_0 , 1);
+ BF(12, 15,-COS3_0 , 1);
+ BF(16, 19, COS3_0 , 1);
+ BF(20, 23,-COS3_0 , 1);
+ BF(24, 27, COS3_0 , 1);
+ BF(28, 31,-COS3_0 , 1);
+
+
+
+ /* pass 1 */
+ BF0( 1, 30, COS0_1 , 1);
+ BF0(14, 17, COS0_14, 3);
+ /* pass 2 */
+ BF( 1, 14, COS1_1 , 1);
+ BF(17, 30,-COS1_1 , 1);
+ /* pass 1 */
+ BF0( 6, 25, COS0_6 , 1);
+ BF0( 9, 22, COS0_9 , 1);
+ /* pass 2 */
+ BF( 6, 9, COS1_6 , 2);
+ BF(22, 25,-COS1_6 , 2);
+ /* pass 3 */
+ BF( 1, 6, COS2_1 , 1);
+ BF( 9, 14,-COS2_1 , 1);
+ BF(17, 22, COS2_1 , 1);
+ BF(25, 30,-COS2_1 , 1);
+
+ /* pass 1 */
+ BF0( 2, 29, COS0_2 , 1);
+ BF0(13, 18, COS0_13, 3);
+ /* pass 2 */
+ BF( 2, 13, COS1_2 , 1);
+ BF(18, 29,-COS1_2 , 1);
+ /* pass 1 */
+ BF0( 5, 26, COS0_5 , 1);
+ BF0(10, 21, COS0_10, 1);
+ /* pass 2 */
+ BF( 5, 10, COS1_5 , 2);
+ BF(21, 26,-COS1_5 , 2);
+ /* pass 3 */
+ BF( 2, 5, COS2_2 , 1);
+ BF(10, 13,-COS2_2 , 1);
+ BF(18, 21, COS2_2 , 1);
+ BF(26, 29,-COS2_2 , 1);
+ /* pass 4 */
+ BF( 1, 2, COS3_1 , 2);
+ BF( 5, 6,-COS3_1 , 2);
+ BF( 9, 10, COS3_1 , 2);
+ BF(13, 14,-COS3_1 , 2);
+ BF(17, 18, COS3_1 , 2);
+ BF(21, 22,-COS3_1 , 2);
+ BF(25, 26, COS3_1 , 2);
+ BF(29, 30,-COS3_1 , 2);
+
+ /* pass 5 */
+ BF1( 0, 1, 2, 3);
+ BF2( 4, 5, 6, 7);
+ BF1( 8, 9, 10, 11);
+ BF2(12, 13, 14, 15);
+ BF1(16, 17, 18, 19);
+ BF2(20, 21, 22, 23);
+ BF1(24, 25, 26, 27);
+ BF2(28, 29, 30, 31);
+
+ /* pass 6 */
+
+ ADD( 8, 12);
+ ADD(12, 10);
+ ADD(10, 14);
+ ADD(14, 9);
+ ADD( 9, 13);
+ ADD(13, 11);
+ ADD(11, 15);
+
+ out[ 0] = val0;
+ out[16] = val1;
+ out[ 8] = val2;
+ out[24] = val3;
+ out[ 4] = val4;
+ out[20] = val5;
+ out[12] = val6;
+ out[28] = val7;
+ out[ 2] = val8;
+ out[18] = val9;
+ out[10] = val10;
+ out[26] = val11;
+ out[ 6] = val12;
+ out[22] = val13;
+ out[14] = val14;
+ out[30] = val15;
+
+ ADD(24, 28);
+ ADD(28, 26);
+ ADD(26, 30);
+ ADD(30, 25);
+ ADD(25, 29);
+ ADD(29, 27);
+ ADD(27, 31);
+
+ out[ 1] = val16 + val24;
+ out[17] = val17 + val25;
+ out[ 9] = val18 + val26;
+ out[25] = val19 + val27;
+ out[ 5] = val20 + val28;
+ out[21] = val21 + val29;
+ out[13] = val22 + val30;
+ out[29] = val23 + val31;
+ out[ 3] = val24 + val20;
+ out[19] = val25 + val21;
+ out[11] = val26 + val22;
+ out[27] = val27 + val23;
+ out[ 7] = val28 + val18;
+ out[23] = val29 + val19;
+ out[15] = val30 + val17;
+ out[31] = val31;
+}
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/dct32.h b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.h
new file mode 100644
index 00000000..dc2d847a
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/dct32.h
@@ -0,0 +1,10 @@
+#ifndef DCT_32_H
+#define DCT_32_H
+
+#define FIXHR(x) ((float)(x))
+#define MULH3(x, y, s) ((s)*(y)*(x))
+#define INTFLOAT float
+
+void dct32(INTFLOAT *out, const INTFLOAT *tab);
+
+#endif
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/fft.c b/plugins/supereq/ffmpeg_fft/libavcodec/fft.c
new file mode 100644
index 00000000..04082bf4
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/fft.c
@@ -0,0 +1,300 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * FFT/IFFT transforms.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if !CONFIG_HARDCODED_TABLES
+COSTABLE(16);
+COSTABLE(32);
+COSTABLE(64);
+COSTABLE(128);
+COSTABLE(256);
+COSTABLE(512);
+COSTABLE(1024);
+COSTABLE(2048);
+COSTABLE(4096);
+COSTABLE(8192);
+COSTABLE(16384);
+COSTABLE(32768);
+COSTABLE(65536);
+#endif
+COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {
+ NULL, NULL, NULL, NULL,
+ ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
+ ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
+};
+
+static int split_radix_permutation(int i, int n, int inverse)
+{
+ int m;
+ if(n <= 2) return i&1;
+ m = n >> 1;
+ if(!(i&m)) return split_radix_permutation(i, m, inverse)*2;
+ m >>= 1;
+ if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+ else return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+#if !CONFIG_HARDCODED_TABLES
+ int i;
+ int m = 1<<index;
+ double freq = 2*M_PI/m;
+ FFTSample *tab = ff_cos_tabs[index];
+ for(i=0; i<=m/4; i++)
+ tab[i] = cos(i*freq);
+ for(i=1; i<m/4; i++)
+ tab[m/2-i] = tab[i];
+#endif
+}
+
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+ int i, j, n;
+
+ if (nbits < 2 || nbits > 16)
+ goto fail;
+ s->nbits = nbits;
+ n = 1 << nbits;
+
+ s->revtab = av_malloc(n * sizeof(uint16_t));
+ if (!s->revtab)
+ goto fail;
+ s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+ if (!s->tmp_buf)
+ goto fail;
+ s->inverse = inverse;
+
+ s->fft_permute = ff_fft_permute_c;
+ s->fft_calc = ff_fft_calc_c;
+#if CONFIG_MDCT
+ s->imdct_calc = ff_imdct_calc_c;
+ s->imdct_half = ff_imdct_half_c;
+ s->mdct_calc = ff_mdct_calc_c;
+#endif
+
+#if ARCH_ARM
+ ff_fft_init_arm(s);
+#elif HAVE_ALTIVEC
+ if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
+#elif HAVE_MMX
+ if (HAVE_MMX) ff_fft_init_mmx(s);
+#endif
+
+ for(j=4; j<=nbits; j++) {
+ ff_init_ff_cos_tabs(j);
+ }
+ for(i=0; i<n; i++)
+ s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
+
+ return 0;
+ fail:
+ av_freep(&s->revtab);
+ av_freep(&s->tmp_buf);
+ return -1;
+}
+
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+ int j, np;
+ const uint16_t *revtab = s->revtab;
+ np = 1 << s->nbits;
+ /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+ for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+ memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+}
+
+av_cold void ff_fft_end(FFTContext *s)
+{
+ av_freep(&s->revtab);
+ av_freep(&s->tmp_buf);
+}
+
+#define sqrthalf (float)M_SQRT1_2
+
+#define BF(x,y,a,b) {\
+ x = a - b;\
+ y = a + b;\
+}
+
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+ BF(t3, t5, t5, t1);\
+ BF(a2.re, a0.re, a0.re, t5);\
+ BF(a3.im, a1.im, a1.im, t3);\
+ BF(t4, t6, t2, t6);\
+ BF(a3.re, a1.re, a1.re, t4);\
+ BF(a2.im, a0.im, a0.im, t6);\
+}
+
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+ FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+ BF(t3, t5, t5, t1);\
+ BF(a2.re, a0.re, r0, t5);\
+ BF(a3.im, a1.im, i1, t3);\
+ BF(t4, t6, t2, t6);\
+ BF(a3.re, a1.re, r1, t4);\
+ BF(a2.im, a0.im, i0, t6);\
+}
+
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+ t1 = a2.re * wre + a2.im * wim;\
+ t2 = a2.im * wre - a2.re * wim;\
+ t5 = a3.re * wre - a3.im * wim;\
+ t6 = a3.im * wre + a3.re * wim;\
+ BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+ t1 = a2.re;\
+ t2 = a2.im;\
+ t5 = a3.re;\
+ t6 = a3.im;\
+ BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+ FFTSample t1, t2, t3, t4, t5, t6;\
+ int o1 = 2*n;\
+ int o2 = 4*n;\
+ int o3 = 6*n;\
+ const FFTSample *wim = wre+o1;\
+ n--;\
+\
+ TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+ TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+ do {\
+ z += 2;\
+ wre += 2;\
+ wim -= 2;\
+ TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+ TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+ } while(--n);\
+}
+
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+ fft##n2(z);\
+ fft##n4(z+n4*2);\
+ fft##n4(z+n4*3);\
+ pass(z,ff_cos_##n,n4/2);\
+}
+
+static void fft4(FFTComplex *z)
+{
+ FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+ BF(t3, t1, z[0].re, z[1].re);
+ BF(t8, t6, z[3].re, z[2].re);
+ BF(z[2].re, z[0].re, t1, t6);
+ BF(t4, t2, z[0].im, z[1].im);
+ BF(t7, t5, z[2].im, z[3].im);
+ BF(z[3].im, z[1].im, t4, t8);
+ BF(z[3].re, z[1].re, t3, t7);
+ BF(z[2].im, z[0].im, t2, t5);
+}
+
+static void fft8(FFTComplex *z)
+{
+ FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+ fft4(z);
+
+ BF(t1, z[5].re, z[4].re, -z[5].re);
+ BF(t2, z[5].im, z[4].im, -z[5].im);
+ BF(t3, z[7].re, z[6].re, -z[7].re);
+ BF(t4, z[7].im, z[6].im, -z[7].im);
+ BF(t8, t1, t3, t1);
+ BF(t7, t2, t2, t4);
+ BF(z[4].re, z[0].re, z[0].re, t1);
+ BF(z[4].im, z[0].im, z[0].im, t2);
+ BF(z[6].re, z[2].re, z[2].re, t7);
+ BF(z[6].im, z[2].im, z[2].im, t8);
+
+ TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+ FFTSample t1, t2, t3, t4, t5, t6;
+
+ fft8(z);
+ fft4(z+8);
+ fft4(z+12);
+
+ TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+ TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+ TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
+ TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+
+static void (* const fft_dispatch[])(FFTComplex*) = {
+ fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+ fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+ fft_dispatch[s->nbits-2](z);
+}
+
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/fft.h b/plugins/supereq/ffmpeg_fft/libavcodec/fft.h
new file mode 100644
index 00000000..b2e0f540
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/fft.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFT_H
+#define AVCODEC_FFT_H
+
+#include <stdint.h>
+#include "../config.h"
+#include "libavutil/mem.h"
+#include "avfft.h"
+
+/* FFT computation */
+
+struct FFTContext {
+ int nbits;
+ int inverse;
+ uint16_t *revtab;
+ FFTComplex *tmp_buf;
+ int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+ int mdct_bits; /* n = 2^nbits */
+ /* pre/post rotation tables */
+ FFTSample *tcos;
+ FFTSample *tsin;
+ void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+ void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+ void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+ void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+ void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+ int permutation;
+#define FF_MDCT_PERM_NONE 0
+#define FF_MDCT_PERM_INTERLEAVE 1
+};
+
+#if CONFIG_HARDCODED_TABLES
+#define COSTABLE_CONST const
+#define SINTABLE_CONST const
+#define SINETABLE_CONST const
+#else
+#define COSTABLE_CONST
+#define SINTABLE_CONST
+#define SINETABLE_CONST
+#endif
+
+#define COSTABLE(size) \
+ COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
+#define SINTABLE(size) \
+ SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
+#define SINETABLE(size) \
+ SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+extern COSTABLE(16);
+extern COSTABLE(32);
+extern COSTABLE(64);
+extern COSTABLE(128);
+extern COSTABLE(256);
+extern COSTABLE(512);
+extern COSTABLE(1024);
+extern COSTABLE(2048);
+extern COSTABLE(4096);
+extern COSTABLE(8192);
+extern COSTABLE(16384);
+extern COSTABLE(32768);
+extern COSTABLE(65536);
+extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
+
+/**
+ * Initialize the cosine table in ff_cos_tabs[index]
+ * \param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
+
+extern SINTABLE(16);
+extern SINTABLE(32);
+extern SINTABLE(64);
+extern SINTABLE(128);
+extern SINTABLE(256);
+extern SINTABLE(512);
+extern SINTABLE(1024);
+extern SINTABLE(2048);
+extern SINTABLE(4096);
+extern SINTABLE(8192);
+extern SINTABLE(16384);
+extern SINTABLE(32768);
+extern SINTABLE(65536);
+
+/**
+ * Set up a complex FFT.
+ * @param nbits log2 of the length of the input array
+ * @param inverse if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+
+void ff_fft_init_altivec(FFTContext *s);
+void ff_fft_init_mmx(FFTContext *s);
+void ff_fft_init_arm(FFTContext *s);
+void ff_dct_init_mmx(DCTContext *s);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+ s->fft_permute(s, z);
+}
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+ s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+
+/* MDCT computation */
+
+static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ s->imdct_calc(s, output, input);
+}
+static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+ s->imdct_half(s, output, input);
+}
+
+static inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
+ const FFTSample *input)
+{
+ s->mdct_calc(s, output, input);
+}
+
+/**
+ * Maximum window size for ff_kbd_window_init.
+ */
+#define FF_KBD_WINDOW_MAX 1024
+
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param window pointer to half window
+ * @param alpha determines window shape
+ * @param n size of half window, max FF_KBD_WINDOW_MAX
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+
+/**
+ * Generate a sine window.
+ * @param window pointer to half window
+ * @param n size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+
+/**
+ * initialize the specified entry of ff_sine_windows
+ */
+void ff_init_ff_sine_windows(int index);
+extern SINETABLE( 32);
+extern SINETABLE( 64);
+extern SINETABLE( 128);
+extern SINETABLE( 256);
+extern SINETABLE( 512);
+extern SINETABLE(1024);
+extern SINETABLE(2048);
+extern SINETABLE(4096);
+extern SINETABLE_CONST float * const ff_sine_windows[13];
+
+int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+struct RDFTContext {
+ int nbits;
+ int inverse;
+ int sign_convention;
+
+ /* pre/post rotation tables */
+ const FFTSample *tcos;
+ SINTABLE_CONST FFTSample *tsin;
+ FFTContext fft;
+ void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+
+/**
+ * Set up a real FFT.
+ * @param nbits log2 of the length of the input array
+ * @param trans the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+
+void ff_rdft_init_arm(RDFTContext *s);
+
+static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+ s->rdft_calc(s, data);
+}
+
+/* Discrete Cosine Transform */
+
+struct DCTContext {
+ int nbits;
+ int inverse;
+ RDFTContext rdft;
+ const float *costab;
+ FFTSample *csc2;
+ void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+ void (*dct32)(FFTSample *out, const FFTSample *in);
+};
+
+/**
+ * Set up DCT.
+ * @param nbits size of the input array:
+ * (1 << nbits) for DCT-II, DCT-III and DST-I
+ * (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_calc(DCTContext *s, FFTSample *data);
+void ff_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_FFT_H */
diff --git a/plugins/supereq/ffmpeg_fft/libavcodec/rdft.c b/plugins/supereq/ffmpeg_fft/libavcodec/rdft.c
new file mode 100644
index 00000000..fe6014fb
--- /dev/null
+++ b/plugins/supereq/ffmpeg_fft/libavcodec/rdft.c
@@ -0,0 +1,137 @@
+/*
+ * (I)RDFT transforms
+ * Copyright (c) 2009 Alex Converse <alex dot converse at gmail dot com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <stdlib.h>
+#include <math.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+/**
+ * @file
+ * (Inverse) Real Discrete Fourier Transforms.
+ */
+
+/* sin(2*pi*x/n) for 0<=x<n/4, followed by n/2<=x<3n/4 */
+#if !CONFIG_HARDCODED_TABLES
+SINTABLE(16);
+SINTABLE(32);
+SINTABLE(64);
+SINTABLE(128);
+SINTABLE(256);
+SINTABLE(512);
+SINTABLE(1024);
+SINTABLE(2048);
+SINTABLE(4096);
+SINTABLE(8192);
+SINTABLE(16384);
+SINTABLE(32768);
+SINTABLE(65536);
+#endif
+SINTABLE_CONST FFTSample * const ff_sin_tabs[] = {
+ NULL, NULL, NULL, NULL,
+ ff_sin_16, ff_sin_32, ff_sin_64, ff_sin_128, ff_sin_256, ff_sin_512, ff_sin_1024,
+ ff_sin_2048, ff_sin_4096, ff_sin_8192, ff_sin_16384, ff_sin_32768, ff_sin_65536,
+};
+
+/** Map one real FFT into two parallel real even and odd FFTs. Then interleave
+ * the two real FFTs into one complex FFT. Unmangle the results.
+ * ref: http://www.engineeringproductivitytools.com/stuff/T0001/PT10.HTM
+ */
+static void ff_rdft_calc_c(RDFTContext* s, FFTSample* data)
+{
+ int i, i1, i2;
+ FFTComplex ev, od;
+ const int n = 1 << s->nbits;
+ const float k1 = 0.5;
+ const float k2 = 0.5 - s->inverse;
+ const FFTSample *tcos = s->tcos;
+ const FFTSample *tsin = s->tsin;
+
+ if (!s->inverse) {
+ ff_fft_permute(&s->fft, (FFTComplex*)data);
+ ff_fft_calc(&s->fft, (FFTComplex*)data);
+ }
+ /* i=0 is a special case because of packing, the DC term is real, so we
+ are going to throw the N/2 term (also real) in with it. */
+ ev.re = data[0];
+ data[0] = ev.re+data[1];
+ data[1] = ev.re-data[1];
+ for (i = 1; i < (n>>2); i++) {
+ i1 = 2*i;
+ i2 = n-i1;
+ /* Separate even and odd FFTs */
+ ev.re = k1*(data[i1 ]+data[i2 ]);
+ od.im = -k2*(data[i1 ]-data[i2 ]);
+ ev.im = k1*(data[i1+1]-data[i2+1]);
+ od.re = k2*(data[i1+1]+data[i2+1]);
+ /* Apply twiddle factors to the odd FFT and add to the even FFT */
+ data[i1 ] = ev.re + od.re*tcos[i] - od.im*tsin[i];
+ data[i1+1] = ev.im + od.im*tcos[i] + od.re*tsin[i];
+ data[i2 ] = ev.re - od.re*tcos[i] + od.im*tsin[i];
+ data[i2+1] = -ev.im + od.im*tcos[i] + od.re*tsin[i];
+ }
+ data[2*i+1]=s->sign_convention*data[2*i+1];
+ if (s->inverse) {
+ data[0] *= k1;
+ data[1] *= k1;
+ ff_fft_permute(&s->fft, (FFTComplex*)data);
+ ff_fft_calc(&s->fft, (FFTComplex*)data);
+ }
+}
+
+av_cold int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans)
+{
+ int n = 1 << nbits;
+ int i;
+ const double theta = (trans == DFT_R2C || trans == DFT_C2R ? -1 : 1)*2*M_PI/n;
+
+ s->nbits = nbits;
+ s->inverse = trans == IDFT_C2R || trans == DFT_C2R;
+ s->sign_convention = trans == IDFT_R2C || trans == DFT_C2R ? 1 : -1;
+
+ if (nbits < 4 || nbits > 16) {
+ return -1;
+ }
+
+ if (ff_fft_init(&s->fft, nbits-1, trans == IDFT_C2R || trans == IDFT_R2C) < 0) {
+ return -1;
+ }
+
+ ff_init_ff_cos_tabs(nbits);
+ s->tcos = ff_cos_tabs[nbits];
+ s->tsin = ff_sin_tabs[nbits]+(trans == DFT_R2C || trans == DFT_C2R)*(n>>2);
+#if !CONFIG_HARDCODED_TABLES
+ for (i = 0; i < (n>>2); i++) {
+ s->tsin[i] = sin(i*theta);
+ }
+#endif
+ s->rdft_calc = ff_rdft_calc_c;
+
+#if ARCH_ARM
+ ff_rdft_init_arm(s);
+#endif
+
+ return 0;
+}
+
+av_cold void ff_rdft_end(RDFTContext *s)
+{
+ ff_fft_end(&s->fft);
+}