aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2010-05-09 14:45:29 +0000
committerGravatar diego <diego@b3059339-0415-0410-9bf9-f77b7e298cf2>2010-05-09 14:45:29 +0000
commit12f67b8372d8f7146a2983f24727ea6306aa70c0 (patch)
tree105e0b1ada4c20ad88fc08e4ccb39830369f36aa
parent3921b7170673525a45bbfbc0abc93b401e29f794 (diff)
Remove internal liba52 copy.
Nowadays FFmpeg is faster than liba52 and external liba52 is well supported. git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@31147 b3059339-0415-0410-9bf9-f77b7e298cf2
-rw-r--r--Copyright12
-rw-r--r--DOCS/tech/MAINTAINERS1
-rw-r--r--DOCS/tech/binary-packaging.txt2
-rw-r--r--DOCS/tech/general.txt2
-rw-r--r--Makefile16
-rwxr-xr-xconfigure20
-rw-r--r--liba52/a52.h74
-rw-r--r--liba52/a52_internal.h140
-rw-r--r--liba52/bit_allocate.c265
-rw-r--r--liba52/bitstream.c106
-rw-r--r--liba52/bitstream.h152
-rw-r--r--liba52/crc.c73
-rw-r--r--liba52/downmix.c1788
-rw-r--r--liba52/imdct.c1304
-rw-r--r--liba52/imdct_3dnow.h581
-rw-r--r--liba52/liba52.txt208
-rw-r--r--liba52/liba52_changes.diff2473
-rw-r--r--liba52/mm_accel.h46
-rw-r--r--liba52/parse.c919
-rw-r--r--liba52/resample.c82
-rw-r--r--liba52/resample_altivec.c110
-rw-r--r--liba52/resample_c.c205
-rw-r--r--liba52/resample_mmx.c541
-rw-r--r--liba52/srfftp.h303
-rw-r--r--liba52/srfftp_3dnow.h187
-rw-r--r--liba52/tables.h246
-rw-r--r--liba52/test.c154
-rw-r--r--libmpcodecs/ad_liba52.c16
-rw-r--r--libmpdemux/muxer_mpeg.c2
29 files changed, 6 insertions, 10022 deletions
diff --git a/Copyright b/Copyright
index 24261f58d2..46fdb03248 100644
--- a/Copyright
+++ b/Copyright
@@ -37,18 +37,6 @@ Copyright: 1992 by Jutta Degener and Carsten Bormann, TU Berlin
License: permissive, see libmpcodecs/native/xa_gsm.c
-Name: liba52
-Version: 0.7.4 + patches
-URL: http://liba52.sourceforge.net/
-Directory: liba52
-Copyright: 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- 2000-2001 Michel Lespinasse <walken@zoy.org>
- 2000 Yuqing Deng <Yuqing_Deng@brown.edu>
- 2002 Nick Kurshev
- 2004 Romain Dolbeau <romain@dolbeau.org>
-License: GNU General Public License
-
-
Name: libdvdcss
Version: 1.2.10
URL: http://developers.videolan.org/libdvdcss/
diff --git a/DOCS/tech/MAINTAINERS b/DOCS/tech/MAINTAINERS
index 7b7b1c710c..b9b84d8fc2 100644
--- a/DOCS/tech/MAINTAINERS
+++ b/DOCS/tech/MAINTAINERS
@@ -93,7 +93,6 @@ Imported libs/projects:
* VIDIX core: Benjamin Zores
* mp3lib: None
* loader: None
- * liba52: None
* libmpeg2: None
* libdvdcss: Diego Biurrun
* libdvdread: Diego Biurrun
diff --git a/DOCS/tech/binary-packaging.txt b/DOCS/tech/binary-packaging.txt
index 9d852372db..3aec8b2312 100644
--- a/DOCS/tech/binary-packaging.txt
+++ b/DOCS/tech/binary-packaging.txt
@@ -45,7 +45,7 @@ the following features MUST be included in any official binary package:
* codecs
- FAAD(internal)
- libavcodec(internal)
- - native codecs (libmpeg2/liba52/mp3lib)
+ - native codecs (libmpeg2/mp3lib)
- Vorbis Tremor codec(internal)
- RealPlayer codecs support (*)
- Win32/VfW/DShow/QT codecs support (*)
diff --git a/DOCS/tech/general.txt b/DOCS/tech/general.txt
index b9ac61ff10..36a584b746 100644
--- a/DOCS/tech/general.txt
+++ b/DOCS/tech/general.txt
@@ -196,7 +196,7 @@ Now, go on:
Only used if none of the above works.
4. Codecs. Consists of libmpcodecs/* and separate files or libs,
- for example liba52, libmpeg2, loader, mp3lib.
+ for example libmpeg2, loader, mp3lib.
mplayer.c doesn't call them directly, but through the dec_audio.c and
dec_video.c files, so the mplayer.c doesn't have to know anything about
diff --git a/Makefile b/Makefile
index e1d2ad9578..1b2c99c0e9 100644
--- a/Makefile
+++ b/Makefile
@@ -108,16 +108,6 @@ SRCS_COMMON-$(HAVE_SYS_MMAN_H) += libaf/af_export.c osdep/mmap_anon.c
SRCS_COMMON-$(JPEG) += libmpcodecs/vd_ijpg.c
SRCS_COMMON-$(LADSPA) += libaf/af_ladspa.c
SRCS_COMMON-$(LIBA52) += libmpcodecs/ad_liba52.c
-SRCS_LIBA52_INTERNAL += liba52/crc.c \
- liba52/resample.c \
- liba52/bit_allocate.c \
- liba52/bitstream.c \
- liba52/downmix.c \
- liba52/imdct.c \
- liba52/parse.c \
-
-SRCS_COMMON-$(LIBA52_INTERNAL) += $(SRCS_LIBA52_INTERNAL)
-
SRCS_COMMON-$(LIBASS) += libmpcodecs/vf_ass.c \
libass/ass_mp.c \
@@ -758,7 +748,6 @@ DIRS = . \
gui/wm \
gui/win32 \
input \
- liba52 \
libaf \
libao2 \
libass \
@@ -1020,8 +1009,6 @@ codec-cfg-test$(EXESUF): codec-cfg.c codecs.conf.h help_mp.h $(TEST_OBJS)
codecs2html$(EXESUF): codec-cfg.c help_mp.h $(TEST_OBJS)
$(CC) -I. -DCODECS2HTML -o $@ $^
-liba52/test$(EXESUF): cpudetect.o $(SRCS_LIBA52_INTERNAL:.c=.o) -lm
-
libvo/aspecttest$(EXESUF): libvo/aspect.o libvo/geometry.o $(TEST_OBJS)
LOADER_TEST_OBJS = $(SRCS_WIN32_EMULATION:.c=.o) $(SRCS_QTX_EMULATION:.S=.o) libavutil/libavutil.a osdep/mmap_anon.o cpudetect.o $(TEST_OBJS)
@@ -1031,8 +1018,7 @@ loader/qtx/list$(EXESUF) loader/qtx/qtxload$(EXESUF): $(LOADER_TEST_OBJS)
mp3lib/test$(EXESUF) mp3lib/test2$(EXESUF): $(SRCS_MP3LIB:.c=.o) libvo/aclib.o cpudetect.o $(TEST_OBJS)
-TESTS = codecs2html codec-cfg-test liba52/test libvo/aspecttest \
- mp3lib/test mp3lib/test2
+TESTS = codecs2html codec-cfg-test libvo/aspecttest mp3lib/test mp3lib/test2
ifdef ARCH_X86
TESTS += loader/qtx/list loader/qtx/qtxload
diff --git a/configure b/configure
index 415ae52bc4..1f0e1a29fb 100755
--- a/configure
+++ b/configure
@@ -335,7 +335,6 @@ Codecs:
--enable-libdca enable libdca support [autodetect]
--disable-mp3lib disable builtin mp3lib [autodetect]
--disable-liba52 disable liba52 [autodetect]
- --enable-liba52-internal enable builtin liba52 [disabled]
--disable-libmpeg2 disable builtin libmpeg2 [autodetect]
--disable-musepack disable musepack support [autodetect]
--disable-libopencore_amrnb disable libopencore_amr narrowband [autodetect]
@@ -631,7 +630,6 @@ _speex=auto
_theora=auto
_mp3lib=auto
_liba52=auto
-_liba52_internal=no
_libdca=auto
_libmpeg2=auto
_faad=auto
@@ -1028,8 +1026,6 @@ for ac_option do
--disable-theora) _theora=no ;;
--enable-mp3lib) _mp3lib=yes ;;
--disable-mp3lib) _mp3lib=no ;;
- --enable-liba52-internal) _liba52_internal=yes ;;
- --disable-liba52-internal) _liba52_internal=no ;;
--enable-liba52) _liba52=yes ;;
--disable-liba52) _liba52=no ;;
--enable-libdca) _libdca=yes ;;
@@ -6796,27 +6792,19 @@ fi
echores "$_mp3lib"
echocheck "liba52 support"
-if test "$_liba52_internal" = auto ; then
- test "$cc_vendor" = intel && test "$_cc_major" -le 10 -o "$_cc_major" -eq 11 -a "$_cc_minor" -eq 0 && _liba52_internal=no || _liba52_internal=yes
-fi
def_liba52='#undef CONFIG_LIBA52'
-def_liba52_internal="#undef CONFIG_LIBA52_INTERNAL"
-if test "$_liba52_internal" = yes ; then
- _liba52=yes
- def_liba52_internal="#define CONFIG_LIBA52_INTERNAL 1"
- res_comment="internal"
-elif test "$_liba52_internal" = no && test "$_liba52" = auto ; then
+if test "$_liba52" = auto ; then
_liba52=no
cat > $TMPC << EOF
#include <inttypes.h>
#include <a52dec/a52.h>
int main(void) { a52_state_t *testHand; testHand=a52_init(0); return 0; }
EOF
- cc_check -la52 && _liba52=yes && res_comment="external" && extra_ldflags="$extra_ldflags -la52"
+ cc_check -la52 && _liba52=yes && extra_ldflags="$extra_ldflags -la52"
fi
if test "$_liba52" = yes ; then
def_liba52='#define CONFIG_LIBA52 1'
- codecmodules="liba52($res_comment) $codecmodules"
+ codecmodules="liba52 $codecmodules"
else
nocodecmodules="liba52 $nocodecmodules"
fi
@@ -8582,7 +8570,6 @@ KAI = $_kai
KVA = $_kva
LADSPA = $_ladspa
LIBA52 = $_liba52
-LIBA52_INTERNAL = $_liba52_internal
LIBASS = $_ass
LIBASS_INTERNAL = $ass_internal
LIBBS2B = $_libbs2b
@@ -8965,7 +8952,6 @@ $def_faac
$def_faad
$def_faad_internal
$def_liba52
-$def_liba52_internal
$def_libdca
$def_libdv
$def_liblzo
diff --git a/liba52/a52.h b/liba52/a52.h
deleted file mode 100644
index 3547ad1d36..0000000000
--- a/liba52/a52.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * a52.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef A52_H
-#define A52_H
-
-#include <stdint.h>
-#include "mm_accel.h"
-
-#ifndef LIBA52_DOUBLE
-typedef float sample_t;
-#else
-typedef double sample_t;
-#endif
-
-typedef struct a52_state_s a52_state_t;
-
-#define A52_CHANNEL 0
-#define A52_MONO 1
-#define A52_STEREO 2
-#define A52_3F 3
-#define A52_2F1R 4
-#define A52_3F1R 5
-#define A52_2F2R 6
-#define A52_3F2R 7
-#define A52_CHANNEL1 8
-#define A52_CHANNEL2 9
-#define A52_DOLBY 10
-#define A52_CHANNEL_MASK 15
-
-#define A52_LFE 16
-#define A52_ADJUST_LEVEL 32
-
-a52_state_t * a52_init (uint32_t mm_accel);
-sample_t * a52_samples (a52_state_t * state);
-int a52_syncinfo (uint8_t * buf, int * flags,
- int * sample_rate, int * bit_rate);
-int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
- sample_t * level, sample_t bias);
-void a52_dynrng (a52_state_t * state,
- sample_t (* call) (sample_t, void *), void * data);
-int a52_block (a52_state_t * state);
-void a52_free (a52_state_t * state);
-
-void* a52_resample_init(uint32_t mm_accel,int flags,int chans);
-extern int (* a52_resample) (float * _f, int16_t * s16);
-
-uint16_t crc16_block(uint8_t *data,uint32_t num_bytes);
-
-#endif /* A52_H */
diff --git a/liba52/a52_internal.h b/liba52/a52_internal.h
deleted file mode 100644
index 1f6d205931..0000000000
--- a/liba52/a52_internal.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * a52_internal.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-typedef struct {
- uint8_t bai; /* fine SNR offset, fast gain */
- uint8_t deltbae; /* delta bit allocation exists */
- int8_t deltba[50]; /* per-band delta bit allocation */
-} ba_t;
-
-typedef struct {
- uint8_t exp[256]; /* decoded channel exponents */
- int8_t bap[256]; /* derived channel bit allocation */
-} expbap_t;
-
-struct a52_state_s {
- uint8_t fscod; /* sample rate */
- uint8_t halfrate; /* halfrate factor */
- uint8_t acmod; /* coded channels */
- uint8_t lfeon; /* coded lfe channel */
- sample_t clev; /* centre channel mix level */
- sample_t slev; /* surround channels mix level */
-
- int output; /* type of output */
- sample_t level; /* output level */
- sample_t bias; /* output bias */
-
- int dynrnge; /* apply dynamic range */
- sample_t dynrng; /* dynamic range */
- void * dynrngdata; /* dynamic range callback funtion and data */
- sample_t (* dynrngcall) (sample_t range, void * dynrngdata);
-
- uint8_t chincpl; /* channel coupled */
- uint8_t phsflginu; /* phase flags in use (stereo only) */
- uint8_t cplstrtmant; /* coupling channel start mantissa */
- uint8_t cplendmant; /* coupling channel end mantissa */
- uint32_t cplbndstrc; /* coupling band structure */
- sample_t cplco[5][18]; /* coupling coordinates */
-
- /* derived information */
- uint8_t cplstrtbnd; /* coupling start band (for bit allocation) */
- uint8_t ncplbnd; /* number of coupling bands */
-
- uint8_t rematflg; /* stereo rematrixing */
-
- uint8_t endmant[5]; /* channel end mantissa */
-
- uint16_t bai; /* bit allocation information */
-
- uint32_t * buffer_start;
- uint16_t lfsr_state; /* dither state */
- uint32_t bits_left;
- uint32_t current_word;
-
- uint8_t csnroffst; /* coarse SNR offset */
- ba_t cplba; /* coupling bit allocation parameters */
- ba_t ba[5]; /* channel bit allocation parameters */
- ba_t lfeba; /* lfe bit allocation parameters */
-
- uint8_t cplfleak; /* coupling fast leak init */
- uint8_t cplsleak; /* coupling slow leak init */
-
- expbap_t cpl_expbap;
- expbap_t fbw_expbap[5];
- expbap_t lfe_expbap;
-
- sample_t * samples;
- int downmixed;
-};
-
-#define LEVEL_PLUS6DB 2.0
-#define LEVEL_PLUS3DB 1.4142135623730951
-#define LEVEL_3DB 0.7071067811865476
-#define LEVEL_45DB 0.5946035575013605
-#define LEVEL_6DB 0.5
-
-#define EXP_REUSE (0)
-#define EXP_D15 (1)
-#define EXP_D25 (2)
-#define EXP_D45 (3)
-
-#define DELTA_BIT_REUSE (0)
-#define DELTA_BIT_NEW (1)
-#define DELTA_BIT_NONE (2)
-#define DELTA_BIT_RESERVED (3)
-
-#if ARCH_X86_64
-# define REG_a "rax"
-# define REG_d "rdx"
-# define REG_S "rsi"
-# define REG_D "rdi"
-# define REG_BP "rbp"
-#else
-# define REG_a "eax"
-# define REG_d "edx"
-# define REG_S "esi"
-# define REG_D "edi"
-# define REG_BP "ebp"
-#endif
-
-void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
- int start, int end, int fastleak, int slowleak,
- expbap_t * expbap);
-
-int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev);
-void downmix_accel_init(uint32_t mm_accel);
-int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev);
-extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
-extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
-
-void a52_imdct_init (uint32_t mm_accel);
-void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
-extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
-void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias);
diff --git a/liba52/bit_allocate.c b/liba52/bit_allocate.c
deleted file mode 100644
index 0567b22852..0000000000
--- a/liba52/bit_allocate.c
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
- * bit_allocate.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-
-#include "a52.h"
-#include "a52_internal.h"
-
-static int hthtab[3][50] = {
- {0x730, 0x730, 0x7c0, 0x800, 0x820, 0x840, 0x850, 0x850, 0x860, 0x860,
- 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x890, 0x890,
- 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900,
- 0x910, 0x910, 0x910, 0x910, 0x900, 0x8f0, 0x8c0, 0x870, 0x820, 0x7e0,
- 0x7a0, 0x770, 0x760, 0x7a0, 0x7c0, 0x7c0, 0x6e0, 0x400, 0x3c0, 0x3c0},
- {0x710, 0x710, 0x7a0, 0x7f0, 0x820, 0x830, 0x840, 0x850, 0x850, 0x860,
- 0x860, 0x860, 0x860, 0x860, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880,
- 0x890, 0x890, 0x8a0, 0x8a0, 0x8b0, 0x8b0, 0x8c0, 0x8c0, 0x8e0, 0x8f0,
- 0x900, 0x910, 0x910, 0x910, 0x910, 0x900, 0x8e0, 0x8b0, 0x870, 0x820,
- 0x7e0, 0x7b0, 0x760, 0x770, 0x7a0, 0x7c0, 0x780, 0x5d0, 0x3c0, 0x3c0},
- {0x680, 0x680, 0x750, 0x7b0, 0x7e0, 0x810, 0x820, 0x830, 0x840, 0x850,
- 0x850, 0x850, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860, 0x860,
- 0x870, 0x870, 0x870, 0x870, 0x880, 0x880, 0x880, 0x890, 0x8a0, 0x8b0,
- 0x8c0, 0x8d0, 0x8e0, 0x8f0, 0x900, 0x910, 0x910, 0x910, 0x900, 0x8f0,
- 0x8d0, 0x8b0, 0x840, 0x7f0, 0x790, 0x760, 0x7a0, 0x7c0, 0x7b0, 0x720}
-};
-
-static int8_t baptab[305] = {
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, /* 93 padding elems */
-
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 14, 14, 14, 14, 14, 14, 14,
- 14, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9, 9,
- 9, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5,
- 5, 4, 4, -3, -3, 3, 3, 3, -2, -2, -1, -1, -1, -1, -1, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0 /* 148 padding elems */
-};
-
-static int bndtab[30] = {21, 22, 23, 24, 25, 26, 27, 28, 31, 34,
- 37, 40, 43, 46, 49, 55, 61, 67, 73, 79,
- 85, 97, 109, 121, 133, 157, 181, 205, 229, 253};
-
-static int8_t latab[256] = {
- -64, -63, -62, -61, -60, -59, -58, -57, -56, -55, -54, -53,
- -52, -52, -51, -50, -49, -48, -47, -47, -46, -45, -44, -44,
- -43, -42, -41, -41, -40, -39, -38, -38, -37, -36, -36, -35,
- -35, -34, -33, -33, -32, -32, -31, -30, -30, -29, -29, -28,
- -28, -27, -27, -26, -26, -25, -25, -24, -24, -23, -23, -22,
- -22, -21, -21, -21, -20, -20, -19, -19, -19, -18, -18, -18,
- -17, -17, -17, -16, -16, -16, -15, -15, -15, -14, -14, -14,
- -13, -13, -13, -13, -12, -12, -12, -12, -11, -11, -11, -11,
- -10, -10, -10, -10, -10, -9, -9, -9, -9, -9, -8, -8,
- -8, -8, -8, -8, -7, -7, -7, -7, -7, -7, -6, -6,
- -6, -6, -6, -6, -6, -6, -5, -5, -5, -5, -5, -5,
- -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
- -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
- -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2,
- -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-
-#define UPDATE_LEAK() \
-do { \
- fastleak += fdecay; \
- if (fastleak > psd + fgain) \
- fastleak = psd + fgain; \
- slowleak += sdecay; \
- if (slowleak > psd + sgain) \
- slowleak = psd + sgain; \
-} while (0)
-
-#define COMPUTE_MASK() \
-do { \
- if (psd > dbknee) \
- mask -= (psd - dbknee) >> 2; \
- if (mask > hth [i >> halfrate]) \
- mask = hth [i >> halfrate]; \
- mask -= snroffset + 128 * deltba[i]; \
- mask = (mask > 0) ? 0 : ((-mask) >> 5); \
- mask -= floor; \
-} while (0)
-
-void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
- int start, int end, int fastleak, int slowleak,
- expbap_t * expbap)
-{
- static int slowgain[4] = {0x540, 0x4d8, 0x478, 0x410};
- static int dbpbtab[4] = {0xc00, 0x500, 0x300, 0x100};
- static int floortab[8] = {0x910, 0x950, 0x990, 0x9d0,
- 0xa10, 0xa90, 0xb10, 0x1400};
-
- int i, j;
- uint8_t * exp;
- int8_t * bap;
- int fdecay, fgain, sdecay, sgain, dbknee, floor, snroffset;
- int psd, mask;
- int8_t * deltba;
- int * hth;
- int halfrate;
-
- halfrate = state->halfrate;
- fdecay = (63 + 20 * ((state->bai >> 7) & 3)) >> halfrate; /* fdcycod */
- fgain = 128 + 128 * (ba->bai & 7); /* fgaincod */
- sdecay = (15 + 2 * (state->bai >> 9)) >> halfrate; /* sdcycod */
- sgain = slowgain[(state->bai >> 5) & 3]; /* sgaincod */
- dbknee = dbpbtab[(state->bai >> 3) & 3]; /* dbpbcod */
- hth = hthtab[state->fscod];
- /*
- * if there is no delta bit allocation, make deltba point to an area
- * known to contain zeroes. baptab+156 here.
- */
- deltba = (ba->deltbae == DELTA_BIT_NONE) ? baptab + 156 : ba->deltba;
- floor = floortab[state->bai & 7]; /* floorcod */
- snroffset = 960 - 64 * state->csnroffst - 4 * (ba->bai >> 3) + floor;
- floor >>= 5;
-
- exp = expbap->exp;
- bap = expbap->bap;
-
- i = bndstart;
- j = start;
- if (start == 0) { /* not the coupling channel */
- int lowcomp;
-
- lowcomp = 0;
- j = end - 1;
- do {
- if (i < j) {
- if (exp[i+1] == exp[i] - 2)
- lowcomp = 384;
- else if (lowcomp && (exp[i+1] > exp[i]))
- lowcomp -= 64;
- }
- psd = 128 * exp[i];
- mask = psd + fgain + lowcomp;
- COMPUTE_MASK ();
- bap[i] = (baptab+156)[mask + 4 * exp[i]];
- i++;
- } while ((i < 3) || ((i < 7) && (exp[i] > exp[i-1])));
- fastleak = psd + fgain;
- slowleak = psd + sgain;
-
- while (i < 7) {
- if (i < j) {
- if (exp[i+1] == exp[i] - 2)
- lowcomp = 384;
- else if (lowcomp && (exp[i+1] > exp[i]))
- lowcomp -= 64;
- }
- psd = 128 * exp[i];
- UPDATE_LEAK ();
- mask = ((fastleak + lowcomp < slowleak) ?
- fastleak + lowcomp : slowleak);
- COMPUTE_MASK ();
- bap[i] = (baptab+156)[mask + 4 * exp[i]];
- i++;
- }
-
- if (end == 7) /* lfe channel */
- return;
-
- do {
- if (exp[i+1] == exp[i] - 2)
- lowcomp = 320;
- else if (lowcomp && (exp[i+1] > exp[i]))
- lowcomp -= 64;
- psd = 128 * exp[i];
- UPDATE_LEAK ();
- mask = ((fastleak + lowcomp < slowleak) ?
- fastleak + lowcomp : slowleak);
- COMPUTE_MASK ();
- bap[i] = (baptab+156)[mask + 4 * exp[i]];
- i++;
- } while (i < 20);
-
- while (lowcomp > 128) { /* two iterations maximum */
- lowcomp -= 128;
- psd = 128 * exp[i];
- UPDATE_LEAK ();
- mask = ((fastleak + lowcomp < slowleak) ?
- fastleak + lowcomp : slowleak);
- COMPUTE_MASK ();
- bap[i] = (baptab+156)[mask + 4 * exp[i]];
- i++;
- }
- j = i;
- }
-
- do {
- int startband, endband;
-
- startband = j;
- endband = ((bndtab-20)[i] < end) ? (bndtab-20)[i] : end;
- psd = 128 * exp[j++];
- while (j < endband) {
- int next, delta;
-
- next = 128 * exp[j++];
- delta = next - psd;
- switch (delta >> 9) {
- case -6: case -5: case -4: case -3: case -2:
- psd = next;
- break;
- case -1:
- psd = next + latab[(-delta) >> 1];
- break;
- case 0:
- psd += latab[delta >> 1];
- break;
- }
- }
- /* minpsd = -289 */
- UPDATE_LEAK ();
- mask = (fastleak < slowleak) ? fastleak : slowleak;
- COMPUTE_MASK ();
- i++;
- j = startband;
- do {
- /* max(mask+4*exp)=147=-(minpsd+fgain-deltba-snroffset)>>5+4*exp */
- /* min(mask+4*exp)=-156=-(sgain-deltba-snroffset)>>5 */
- bap[j] = (baptab+156)[mask + 4 * exp[j]];
- } while (++j < endband);
- } while (j < end);
-}
diff --git a/liba52/bitstream.c b/liba52/bitstream.c
deleted file mode 100644
index 7307527194..0000000000
--- a/liba52/bitstream.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * bitstream.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "config.h"
-
-#include <inttypes.h>
-
-#include "a52.h"
-#include "a52_internal.h"
-#include "bitstream.h"
-
-#define BUFFER_SIZE 4096
-
-#ifdef ALT_BITSTREAM_READER
-int indx=0;
-#endif
-
-void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
-{
- int align;
-
- align = (long)buf & 3;
- state->buffer_start = (uint32_t *) (buf - align);
- state->bits_left = 0;
-#ifdef ALT_BITSTREAM_READER
- indx=0;
-#endif
- bitstream_get (state, align * 8);
-}
-
-static inline void bitstream_fill_current (a52_state_t * state)
-{
- uint32_t tmp;
-
- tmp = *(state->buffer_start++);
- state->current_word = swab32 (tmp);
-}
-
-/*
- * The fast paths for _get is in the
- * bitstream.h header file so it can be inlined.
- *
- * The "bottom half" of this routine is suffixed _bh
- *
- * -ah
- */
-
-uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits)
-{
- uint32_t result;
-
- num_bits -= state->bits_left;
- result = ((state->current_word << (32 - state->bits_left)) >>
- (32 - state->bits_left));
-
- bitstream_fill_current (state);
-
- if (num_bits != 0)
- result = (result << num_bits) | (state->current_word >> (32 - num_bits));
-
- state->bits_left = 32 - num_bits;
-
- return result;
-}
-
-int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits)
-{
- int32_t result;
-
- num_bits -= state->bits_left;
- result = ((((int32_t)state->current_word) << (32 - state->bits_left)) >>
- (32 - state->bits_left));
-
- bitstream_fill_current(state);
-
- if (num_bits != 0)
- result = (result << num_bits) | (state->current_word >> (32 - num_bits));
-
- state->bits_left = 32 - num_bits;
-
- return result;
-}
diff --git a/liba52/bitstream.h b/liba52/bitstream.h
deleted file mode 100644
index 76f5556ea9..0000000000
--- a/liba52/bitstream.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * bitstream.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* code from ffmpeg/libavcodec */
-#if defined(__sparc__) || defined(hpux)
-/*
- * the alt bitstream reader performs unaligned memory accesses; that doesn't work
- * on sparc/hpux. For now, disable ALT_BITSTREAM_READER.
- */
-#undef ALT_BITSTREAM_READER
-#else
-// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input)
-#define ALT_BITSTREAM_READER
-
-/* used to avoid misaligned exceptions on some archs (alpha, ...) */
-#if ARCH_X86 || HAVE_ARMV6
-# define unaligned32(a) (*(uint32_t*)(a))
-#else
-# ifdef __GNUC__
-static inline uint32_t unaligned32(const void *v) {
- struct Unaligned {
- uint32_t i;
- } __attribute__((packed));
-
- return ((const struct Unaligned *) v)->i;
-}
-# elif defined(__DECC)
-static inline uint32_t unaligned32(const void *v) {
- return *(const __unaligned uint32_t *) v;
-}
-# else
-static inline uint32_t unaligned32(const void *v) {
- return *(const uint32_t *) v;
-}
-# endif
-#endif //!ARCH_X86
-
-#endif
-
-/* (stolen from the kernel) */
-#if HAVE_BIGENDIAN
-
-# define swab32(x) (x)
-
-#else
-
-# if defined (__i386__)
-
-# define swab32(x) __i386_swab32(x)
- static inline const uint32_t __i386_swab32(uint32_t x)
- {
- __asm__("bswap %0" : "=r" (x) : "0" (x));
- return x;
- }
-
-# else
-
-# define swab32(x) __generic_swab32(x)
- static inline const uint32_t __generic_swab32(uint32_t x)
- {
- return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) |
- (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]));
- }
-# endif
-#endif
-
-#ifdef ALT_BITSTREAM_READER
-extern int indx;
-#endif
-
-void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
-uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
-int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
-
-static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
-{
-#ifdef ALT_BITSTREAM_READER
- uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
-
- result<<= (indx&0x07);
- result>>= 32 - num_bits;
- indx+= num_bits;
-
- return result;
-#else
- uint32_t result;
-
- if (num_bits < state->bits_left) {
- result = (state->current_word << (32 - state->bits_left)) >> (32 - num_bits);
- state->bits_left -= num_bits;
- return result;
- }
-
- return a52_bitstream_get_bh (state, num_bits);
-#endif
-}
-
-static inline void bitstream_skip(a52_state_t * state, int num_bits)
-{
-#ifdef ALT_BITSTREAM_READER
- indx+= num_bits;
-#else
- bitstream_get(state, num_bits);
-#endif
-}
-
-static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
-{
-#ifdef ALT_BITSTREAM_READER
- int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
-
- result<<= (indx&0x07);
- result>>= 32 - num_bits;
- indx+= num_bits;
-
- return result;
-#else
- int32_t result;
-
- if (num_bits < state->bits_left) {
- result = (((int32_t)state->current_word) << (32 - state->bits_left)) >> (32 - num_bits);
- state->bits_left -= num_bits;
- return result;
- }
-
- return a52_bitstream_get_bh_2 (state, num_bits);
-#endif
-}
diff --git a/liba52/crc.c b/liba52/crc.c
deleted file mode 100644
index aa0a19c005..0000000000
--- a/liba52/crc.c
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * crc.c
- *
- * Copyright (C) Aaron Holtzman - May 1999
- *
- * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
- *
- * ac3dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * ac3dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <inttypes.h>
-
-static const uint16_t crc_lut[256] =
-{
- 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011,
- 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022,
- 0x8063,0x0066,0x006c,0x8069,0x0078,0x807d,0x8077,0x0072,
- 0x0050,0x8055,0x805f,0x005a,0x804b,0x004e,0x0044,0x8041,
- 0x80c3,0x00c6,0x00cc,0x80c9,0x00d8,0x80dd,0x80d7,0x00d2,
- 0x00f0,0x80f5,0x80ff,0x00fa,0x80eb,0x00ee,0x00e4,0x80e1,
- 0x00a0,0x80a5,0x80af,0x00aa,0x80bb,0x00be,0x00b4,0x80b1,
- 0x8093,0x0096,0x009c,0x8099,0x0088,0x808d,0x8087,0x0082,
- 0x8183,0x0186,0x018c,0x8189,0x0198,0x819d,0x8197,0x0192,
- 0x01b0,0x81b5,0x81bf,0x01ba,0x81ab,0x01ae,0x01a4,0x81a1,
- 0x01e0,0x81e5,0x81ef,0x01ea,0x81fb,0x01fe,0x01f4,0x81f1,
- 0x81d3,0x01d6,0x01dc,0x81d9,0x01c8,0x81cd,0x81c7,0x01c2,
- 0x0140,0x8145,0x814f,0x014a,0x815b,0x015e,0x0154,0x8151,
- 0x8173,0x0176,0x017c,0x8179,0x0168,0x816d,0x8167,0x0162,
- 0x8123,0x0126,0x012c,0x8129,0x0138,0x813d,0x8137,0x0132,
- 0x0110,0x8115,0x811f,0x011a,0x810b,0x010e,0x0104,0x8101,
- 0x8303,0x0306,0x030c,0x8309,0x0318,0x831d,0x8317,0x0312,
- 0x0330,0x8335,0x833f,0x033a,0x832b,0x032e,0x0324,0x8321,
- 0x0360,0x8365,0x836f,0x036a,0x837b,0x037e,0x0374,0x8371,
- 0x8353,0x0356,0x035c,0x8359,0x0348,0x834d,0x8347,0x0342,
- 0x03c0,0x83c5,0x83cf,0x03ca,0x83db,0x03de,0x03d4,0x83d1,
- 0x83f3,0x03f6,0x03fc,0x83f9,0x03e8,0x83ed,0x83e7,0x03e2,
- 0x83a3,0x03a6,0x03ac,0x83a9,0x03b8,0x83bd,0x83b7,0x03b2,
- 0x0390,0x8395,0x839f,0x039a,0x838b,0x038e,0x0384,0x8381,
- 0x0280,0x8285,0x828f,0x028a,0x829b,0x029e,0x0294,0x8291,
- 0x82b3,0x02b6,0x02bc,0x82b9,0x02a8,0x82ad,0x82a7,0x02a2,
- 0x82e3,0x02e6,0x02ec,0x82e9,0x02f8,0x82fd,0x82f7,0x02f2,
- 0x02d0,0x82d5,0x82df,0x02da,0x82cb,0x02ce,0x02c4,0x82c1,
- 0x8243,0x0246,0x024c,0x8249,0x0258,0x825d,0x8257,0x0252,
- 0x0270,0x8275,0x827f,0x027a,0x826b,0x026e,0x0264,0x8261,
- 0x0220,0x8225,0x822f,0x022a,0x823b,0x023e,0x0234,0x8231,
- 0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202
-};
-
-uint16_t crc16_block(uint8_t *data,uint32_t num_bytes)
-{
- uint32_t i;
- uint16_t state=0;
-
- for(i=0;i<num_bytes;i++)
- state = crc_lut[data[i] ^ (state>>8)] ^ (state<<8);
-
- return state;
-}
diff --git a/liba52/downmix.c b/liba52/downmix.c
deleted file mode 100644
index 212c87d921..0000000000
--- a/liba52/downmix.c
+++ /dev/null
@@ -1,1788 +0,0 @@
-/*
- * downmix.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
- */
-
-#include "config.h"
-
-#include <string.h>
-#include <inttypes.h>
-
-#include "a52.h"
-#include "a52_internal.h"
-#include "mm_accel.h"
-
-#define CONVERT(acmod,output) (((output) << 3) + (acmod))
-
-
-void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)= NULL;
-void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL;
-
-static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
-static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
-static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
-static void upmix_MMX (sample_t * samples, int acmod, int output);
-static void upmix_C (sample_t * samples, int acmod, int output);
-
-void downmix_accel_init(uint32_t mm_accel)
-{
- a52_upmix= upmix_C;
- a52_downmix= downmix_C;
-#if ARCH_X86 || ARCH_X86_64
- if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX;
- if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE;
- if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
-#endif
-}
-
-int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev)
-{
- static uint8_t table[11][8] = {
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
- A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
- {A52_MONO, A52_MONO, A52_MONO, A52_MONO,
- A52_MONO, A52_MONO, A52_MONO, A52_MONO},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
- A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
- A52_STEREO, A52_3F, A52_STEREO, A52_3F},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
- A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
- A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
- A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
- A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R},
- {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO,
- A52_MONO, A52_MONO, A52_MONO, A52_MONO},
- {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO,
- A52_MONO, A52_MONO, A52_MONO, A52_MONO},
- {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY,
- A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY}
- };
- int output;
-
- output = flags & A52_CHANNEL_MASK;
- if (output > A52_DOLBY)
- return -1;
-
- output = table[output][input & 7];
-
- if ((output == A52_STEREO) &&
- ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB))))
- output = A52_DOLBY;
-
- if (flags & A52_ADJUST_LEVEL)
- switch (CONVERT (input & 7, output)) {
-
- case CONVERT (A52_3F, A52_MONO):
- *level *= LEVEL_3DB / (1 + clev);
- break;
-
- case CONVERT (A52_STEREO, A52_MONO):
- case CONVERT (A52_2F2R, A52_2F1R):
- case CONVERT (A52_3F2R, A52_3F1R):
- level_3db:
- *level *= LEVEL_3DB;
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
- if (clev < LEVEL_PLUS3DB - 1)
- goto level_3db;
- /* break thru */
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F1R, A52_2F1R):
- case CONVERT (A52_3F1R, A52_2F2R):
- case CONVERT (A52_3F2R, A52_2F2R):
- *level /= 1 + clev;
- break;
-
- case CONVERT (A52_2F1R, A52_MONO):
- *level *= LEVEL_PLUS3DB / (2 + slev);
- break;
-
- case CONVERT (A52_2F1R, A52_STEREO):
- case CONVERT (A52_3F1R, A52_3F):
- *level /= 1 + slev * LEVEL_3DB;
- break;
-
- case CONVERT (A52_3F1R, A52_MONO):
- *level *= LEVEL_3DB / (1 + clev + 0.5 * slev);
- break;
-
- case CONVERT (A52_3F1R, A52_STEREO):
- *level /= 1 + clev + slev * LEVEL_3DB;
- break;
-
- case CONVERT (A52_2F2R, A52_MONO):
- *level *= LEVEL_3DB / (1 + slev);
- break;
-
- case CONVERT (A52_2F2R, A52_STEREO):
- case CONVERT (A52_3F2R, A52_3F):
- *level /= 1 + slev;
- break;
-
- case CONVERT (A52_3F2R, A52_MONO):
- *level *= LEVEL_3DB / (1 + clev + slev);
- break;
-
- case CONVERT (A52_3F2R, A52_STEREO):
- *level /= 1 + clev + slev;
- break;
-
- case CONVERT (A52_MONO, A52_DOLBY):
- *level *= LEVEL_PLUS3DB;
- break;
-
- case CONVERT (A52_3F, A52_DOLBY):
- case CONVERT (A52_2F1R, A52_DOLBY):
- *level *= 1 / (1 + LEVEL_3DB);
- break;
-
- case CONVERT (A52_3F1R, A52_DOLBY):
- case CONVERT (A52_2F2R, A52_DOLBY):
- *level *= 1 / (1 + 2 * LEVEL_3DB);
- break;
-
- case CONVERT (A52_3F2R, A52_DOLBY):
- *level *= 1 / (1 + 3 * LEVEL_3DB);
- break;
- }
-
- return output;
-}
-
-int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev)
-{
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL):
- case CONVERT (A52_MONO, A52_MONO):
- case CONVERT (A52_STEREO, A52_STEREO):
- case CONVERT (A52_3F, A52_3F):
- case CONVERT (A52_2F1R, A52_2F1R):
- case CONVERT (A52_3F1R, A52_3F1R):
- case CONVERT (A52_2F2R, A52_2F2R):
- case CONVERT (A52_3F2R, A52_3F2R):
- case CONVERT (A52_STEREO, A52_DOLBY):
- coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
- return 0;
-
- case CONVERT (A52_CHANNEL, A52_MONO):
- coeff[0] = coeff[1] = level * LEVEL_6DB;
- return 3;
-
- case CONVERT (A52_STEREO, A52_MONO):
- coeff[0] = coeff[1] = level * LEVEL_3DB;
- return 3;
-
- case CONVERT (A52_3F, A52_MONO):
- coeff[0] = coeff[2] = level * LEVEL_3DB;
- coeff[1] = level * clev * LEVEL_PLUS3DB;
- return 7;
-
- case CONVERT (A52_2F1R, A52_MONO):
- coeff[0] = coeff[1] = level * LEVEL_3DB;
- coeff[2] = level * slev * LEVEL_3DB;
- return 7;
-
- case CONVERT (A52_2F2R, A52_MONO):
- coeff[0] = coeff[1] = level * LEVEL_3DB;
- coeff[2] = coeff[3] = level * slev * LEVEL_3DB;
- return 15;
-
- case CONVERT (A52_3F1R, A52_MONO):
- coeff[0] = coeff[2] = level * LEVEL_3DB;
- coeff[1] = level * clev * LEVEL_PLUS3DB;
- coeff[3] = level * slev * LEVEL_3DB;
- return 15;
-
- case CONVERT (A52_3F2R, A52_MONO):
- coeff[0] = coeff[2] = level * LEVEL_3DB;
- coeff[1] = level * clev * LEVEL_PLUS3DB;
- coeff[3] = coeff[4] = level * slev * LEVEL_3DB;
- return 31;
-
- case CONVERT (A52_MONO, A52_DOLBY):
- coeff[0] = level * LEVEL_3DB;
- return 0;
-
- case CONVERT (A52_3F, A52_DOLBY):
- clev = LEVEL_3DB;
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F1R, A52_2F1R):
- case CONVERT (A52_3F2R, A52_2F2R):
- coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
- coeff[1] = level * clev;
- return 7;
-
- case CONVERT (A52_2F1R, A52_DOLBY):
- slev = 1;
- case CONVERT (A52_2F1R, A52_STEREO):
- coeff[0] = coeff[1] = level;
- coeff[2] = level * slev * LEVEL_3DB;
- return 7;
-
- case CONVERT (A52_3F1R, A52_DOLBY):
- clev = LEVEL_3DB;
- slev = 1;
- case CONVERT (A52_3F1R, A52_STEREO):
- coeff[0] = coeff[2] = level;
- coeff[1] = level * clev;
- coeff[3] = level * slev * LEVEL_3DB;
- return 15;
-
- case CONVERT (A52_2F2R, A52_DOLBY):
- slev = LEVEL_3DB;
- case CONVERT (A52_2F2R, A52_STEREO):
- coeff[0] = coeff[1] = level;
- coeff[2] = coeff[3] = level * slev;
- return 15;
-
- case CONVERT (A52_3F2R, A52_DOLBY):
- clev = LEVEL_3DB;
- case CONVERT (A52_3F2R, A52_2F1R):
- slev = LEVEL_3DB;
- case CONVERT (A52_3F2R, A52_STEREO):
- coeff[0] = coeff[2] = level;
- coeff[1] = level * clev;
- coeff[3] = coeff[4] = level * slev;
- return 31;
-
- case CONVERT (A52_3F1R, A52_3F):
- coeff[0] = coeff[1] = coeff[2] = level;
- coeff[3] = level * slev * LEVEL_3DB;
- return 13;
-
- case CONVERT (A52_3F2R, A52_3F):
- coeff[0] = coeff[1] = coeff[2] = level;
- coeff[3] = coeff[4] = level * slev;
- return 29;
-
- case CONVERT (A52_2F2R, A52_2F1R):
- coeff[0] = coeff[1] = level;
- coeff[2] = coeff[3] = level * LEVEL_3DB;
- return 12;
-
- case CONVERT (A52_3F2R, A52_3F1R):
- coeff[0] = coeff[1] = coeff[2] = level;
- coeff[3] = coeff[4] = level * LEVEL_3DB;
- return 24;
-
- case CONVERT (A52_2F1R, A52_2F2R):
- coeff[0] = coeff[1] = level;
- coeff[2] = level * LEVEL_3DB;
- return 0;
-
- case CONVERT (A52_3F1R, A52_2F2R):
- coeff[0] = coeff[2] = level;
- coeff[1] = level * clev;
- coeff[3] = level * LEVEL_3DB;
- return 7;
-
- case CONVERT (A52_3F1R, A52_3F2R):
- coeff[0] = coeff[1] = coeff[2] = level;
- coeff[3] = level * LEVEL_3DB;
- return 0;
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL1):
- coeff[0] = level;
- coeff[1] = 0;
- return 0;
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL2):
- coeff[0] = 0;
- coeff[1] = level;
- return 0;
- }
-
- return -1; /* NOTREACHED */
-}
-
-static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias)
-{
- int i;
-
- for (i = 0; i < 256; i++)
- dest[i] += src[i] + bias;
-}
-
-static void mix3to1 (sample_t * samples, sample_t bias)
-{
- int i;
-
- for (i = 0; i < 256; i++)
- samples[i] += samples[i + 256] + samples[i + 512] + bias;
-}
-
-static void mix4to1 (sample_t * samples, sample_t bias)
-{
- int i;
-
- for (i = 0; i < 256; i++)
- samples[i] += (samples[i + 256] + samples[i + 512] +
- samples[i + 768] + bias);
-}
-
-static void mix5to1 (sample_t * samples, sample_t bias)
-{
- int i;
-
- for (i = 0; i < 256; i++)
- samples[i] += (samples[i + 256] + samples[i + 512] +
- samples[i + 768] + samples[i + 1024] + bias);
-}
-
-static void mix3to2 (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t common;
-
- for (i = 0; i < 256; i++) {
- common = samples[i + 256] + bias;
- samples[i] += common;
- samples[i + 256] = samples[i + 512] + common;
- }
-}
-
-static void mix21to2 (sample_t * left, sample_t * right, sample_t bias)
-{
- int i;
- sample_t common;
-
- for (i = 0; i < 256; i++) {
- common = right[i + 256] + bias;
- left[i] += common;
- right[i] += common;
- }
-}
-
-static void mix21toS (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t surround;
-
- for (i = 0; i < 256; i++) {
- surround = samples[i + 512];
- samples[i] += bias - surround;
- samples[i + 256] += bias + surround;
- }
-}
-
-static void mix31to2 (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t common;
-
- for (i = 0; i < 256; i++) {
- common = samples[i + 256] + samples[i + 768] + bias;
- samples[i] += common;
- samples[i + 256] = samples[i + 512] + common;
- }
-}
-
-static void mix31toS (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t common, surround;
-
- for (i = 0; i < 256; i++) {
- common = samples[i + 256] + bias;
- surround = samples[i + 768];
- samples[i] += common - surround;
- samples[i + 256] = samples[i + 512] + common + surround;
- }
-}
-
-static void mix22toS (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t surround;
-
- for (i = 0; i < 256; i++) {
- surround = samples[i + 512] + samples[i + 768];
- samples[i] += bias - surround;
- samples[i + 256] += bias + surround;
- }
-}
-
-static void mix32to2 (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t common;
-
- for (i = 0; i < 256; i++) {
- common = samples[i + 256] + bias;
- samples[i] += common + samples[i + 768];
- samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
- }
-}
-
-static void mix32toS (sample_t * samples, sample_t bias)
-{
- int i;
- sample_t common, surround;
-
- for (i = 0; i < 256; i++) {
- common = samples[i + 256] + bias;
- surround = samples[i + 768] + samples[i + 1024];
- samples[i] += common - surround;
- samples[i + 256] = samples[i + 512] + common + surround;
- }
-}
-
-static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
-{
- int i;
-
- for (i = 0; i < 256; i++)
- dest[i] = src[i] + src[i + 256] + bias;
-}
-
-static void zero (sample_t * samples)
-{
- int i;
-
- for (i = 0; i < 256; i++)
- samples[i] = 0;
-}
-
-void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
-{
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL2):
- memcpy (samples, samples + 256, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_CHANNEL, A52_MONO):
- case CONVERT (A52_STEREO, A52_MONO):
- mix_2to1:
- mix2to1 (samples, samples + 256, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_MONO):
- if (slev == 0)
- goto mix_2to1;
- case CONVERT (A52_3F, A52_MONO):
- mix_3to1:
- mix3to1 (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_MONO):
- if (slev == 0)
- goto mix_3to1;
- case CONVERT (A52_2F2R, A52_MONO):
- if (slev == 0)
- goto mix_2to1;
- mix4to1 (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_MONO):
- if (slev == 0)
- goto mix_3to1;
- mix5to1 (samples, bias);
- break;
-
- case CONVERT (A52_MONO, A52_DOLBY):
- memcpy (samples + 256, samples, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F, A52_DOLBY):
- mix_3to2:
- mix3to2 (samples, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_STEREO):
- if (slev == 0)
- break;
- mix21to2 (samples, samples + 256, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_DOLBY):
- mix21toS (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_STEREO):
- if (slev == 0)
- goto mix_3to2;
- mix31to2 (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_DOLBY):
- mix31toS (samples, bias);
- break;
-
- case CONVERT (A52_2F2R, A52_STEREO):
- if (slev == 0)
- break;
- mix2to1 (samples, samples + 512, bias);
- mix2to1 (samples + 256, samples + 768, bias);
- break;
-
- case CONVERT (A52_2F2R, A52_DOLBY):
- mix22toS (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_STEREO):
- if (slev == 0)
- goto mix_3to2;
- mix32to2 (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_DOLBY):
- mix32toS (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_3F):
- if (slev == 0)
- break;
- mix21to2 (samples, samples + 512, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_3F):
- if (slev == 0)
- break;
- mix2to1 (samples, samples + 768, bias);
- mix2to1 (samples + 512, samples + 1024, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_2F1R):
- mix3to2 (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_2F2R, A52_2F1R):
- mix2to1 (samples + 512, samples + 768, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
- mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
- move2to1 (samples + 768, samples + 512, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_3F1R):
- mix2to1 (samples + 768, samples + 1024, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_2F2R):
- memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F1R, A52_2F2R):
- mix3to2 (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F2R, A52_2F2R):
- mix3to2 (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F1R, A52_3F2R):
- memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- break;
- }
-}
-
-void upmix_C (sample_t * samples, int acmod, int output)
-{
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL2):
- memcpy (samples + 256, samples, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F2R, A52_MONO):
- zero (samples + 1024);
- case CONVERT (A52_3F1R, A52_MONO):
- case CONVERT (A52_2F2R, A52_MONO):
- zero (samples + 768);
- case CONVERT (A52_3F, A52_MONO):
- case CONVERT (A52_2F1R, A52_MONO):
- zero (samples + 512);
- case CONVERT (A52_CHANNEL, A52_MONO):
- case CONVERT (A52_STEREO, A52_MONO):
- zero (samples + 256);
- break;
-
- case CONVERT (A52_3F2R, A52_STEREO):
- case CONVERT (A52_3F2R, A52_DOLBY):
- zero (samples + 1024);
- case CONVERT (A52_3F1R, A52_STEREO):
- case CONVERT (A52_3F1R, A52_DOLBY):
- zero (samples + 768);
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F, A52_DOLBY):
- mix_3to2:
- memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
- zero (samples + 256);
- break;
-
- case CONVERT (A52_2F2R, A52_STEREO):
- case CONVERT (A52_2F2R, A52_DOLBY):
- zero (samples + 768);
- case CONVERT (A52_2F1R, A52_STEREO):
- case CONVERT (A52_2F1R, A52_DOLBY):
- zero (samples + 512);
- break;
-
- case CONVERT (A52_3F2R, A52_3F):
- zero (samples + 1024);
- case CONVERT (A52_3F1R, A52_3F):
- case CONVERT (A52_2F2R, A52_2F1R):
- zero (samples + 768);
- break;
-
- case CONVERT (A52_3F2R, A52_3F1R):
- zero (samples + 1024);
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
- zero (samples + 1024);
- case CONVERT (A52_3F1R, A52_2F1R):
- mix_31to21:
- memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
- goto mix_3to2;
-
- case CONVERT (A52_3F2R, A52_2F2R):
- memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- goto mix_31to21;
- }
-}
-
-#if ARCH_X86 || ARCH_X86_64
-static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
-{
- __asm__ volatile(
- "movlps %2, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
- "addps (%1, %%"REG_S"), %%xmm0 \n\t"
- "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
- "addps %%xmm7, %%xmm0 \n\t"
- "addps %%xmm7, %%xmm1 \n\t"
- "movaps %%xmm0, (%1, %%"REG_S") \n\t"
- "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
- "add $32, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (src+256), "r" (dest+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix3to1_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
- "addps %%xmm7, %%xmm1 \n\t"
- "addps %%xmm1, %%xmm0 \n\t"
- "movaps %%xmm0, (%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix4to1_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
- "addps %%xmm7, %%xmm0 \n\t"
- "addps %%xmm1, %%xmm0 \n\t"
- "movaps %%xmm0, (%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix5to1_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
- "addps %%xmm7, %%xmm0 \n\t"
- "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
- "addps %%xmm1, %%xmm0 \n\t"
- "movaps %%xmm0, (%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix3to2_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "addps %%xmm7, %%xmm0 \n\t" //common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
-{
- __asm__ volatile(
- "movlps %2, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
- "addps %%xmm7, %%xmm0 \n\t" //common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps (%1, %%"REG_S"), %%xmm2 \n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm2, (%1, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (left+256), "r" (right+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix21toS_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
- "addps %%xmm7, %%xmm1 \n\t"
- "addps %%xmm7, %%xmm2 \n\t"
- "subps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix31to2_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
- "addps %%xmm7, %%xmm0 \n\t" // common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix31toS_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
- "addps %%xmm7, %%xmm0 \n\t" // common
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "subps %%xmm3, %%xmm1 \n\t"
- "addps %%xmm3, %%xmm2 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix22toS_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
- "addps %%xmm7, %%xmm1 \n\t"
- "addps %%xmm7, %%xmm2 \n\t"
- "subps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix32to2_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "addps %%xmm7, %%xmm0 \n\t" // common
- "movaps %%xmm0, %%xmm1 \n\t" // common
- "addps (%0, %%"REG_S"), %%xmm0 \n\t"
- "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
- "movaps %%xmm0, (%0, %%"REG_S") \n\t"
- "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix32toS_SSE (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movlps %1, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
- "addps %%xmm7, %%xmm0 \n\t" // common
- "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
- "subps %%xmm2, %%xmm1 \n\t"
- "addps %%xmm2, %%xmm3 \n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "addps %%xmm0, %%xmm3 \n\t"
- "movaps %%xmm1, (%0, %%"REG_S") \n\t"
- "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
-{
- __asm__ volatile(
- "movlps %2, %%xmm7 \n\t"
- "shufps $0x00, %%xmm7, %%xmm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
- "addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
- "addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
- "addps %%xmm7, %%xmm0 \n\t"
- "addps %%xmm7, %%xmm1 \n\t"
- "movaps %%xmm0, (%1, %%"REG_S") \n\t"
- "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
- "add $32, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (src+256), "r" (dest+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void zero_MMX(sample_t * samples)
-{
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "pxor %%mm0, %%mm0 \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm0, 8(%0, %%"REG_S") \n\t"
- "movq %%mm0, 16(%0, %%"REG_S") \n\t"
- "movq %%mm0, 24(%0, %%"REG_S") \n\t"
- "add $32, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms"
- :: "r" (samples+256)
- : "%"REG_S
- );
-}
-
-static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
-{
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL2):
- memcpy (samples, samples + 256, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_CHANNEL, A52_MONO):
- case CONVERT (A52_STEREO, A52_MONO):
- mix_2to1_SSE:
- mix2to1_SSE (samples, samples + 256, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_MONO):
- if (slev == 0)
- goto mix_2to1_SSE;
- case CONVERT (A52_3F, A52_MONO):
- mix_3to1_SSE:
- mix3to1_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_MONO):
- if (slev == 0)
- goto mix_3to1_SSE;
- case CONVERT (A52_2F2R, A52_MONO):
- if (slev == 0)
- goto mix_2to1_SSE;
- mix4to1_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_MONO):
- if (slev == 0)
- goto mix_3to1_SSE;
- mix5to1_SSE (samples, bias);
- break;
-
- case CONVERT (A52_MONO, A52_DOLBY):
- memcpy (samples + 256, samples, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F, A52_DOLBY):
- mix_3to2_SSE:
- mix3to2_SSE (samples, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_STEREO):
- if (slev == 0)
- break;
- mix21to2_SSE (samples, samples + 256, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_DOLBY):
- mix21toS_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_STEREO):
- if (slev == 0)
- goto mix_3to2_SSE;
- mix31to2_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_DOLBY):
- mix31toS_SSE (samples, bias);
- break;
-
- case CONVERT (A52_2F2R, A52_STEREO):
- if (slev == 0)
- break;
- mix2to1_SSE (samples, samples + 512, bias);
- mix2to1_SSE (samples + 256, samples + 768, bias);
- break;
-
- case CONVERT (A52_2F2R, A52_DOLBY):
- mix22toS_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_STEREO):
- if (slev == 0)
- goto mix_3to2_SSE;
- mix32to2_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_DOLBY):
- mix32toS_SSE (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_3F):
- if (slev == 0)
- break;
- mix21to2_SSE (samples, samples + 512, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_3F):
- if (slev == 0)
- break;
- mix2to1_SSE (samples, samples + 768, bias);
- mix2to1_SSE (samples + 512, samples + 1024, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_2F1R):
- mix3to2_SSE (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_2F2R, A52_2F1R):
- mix2to1_SSE (samples + 512, samples + 768, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
- mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
- move2to1_SSE (samples + 768, samples + 512, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_3F1R):
- mix2to1_SSE (samples + 768, samples + 1024, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_2F2R):
- memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F1R, A52_2F2R):
- mix3to2_SSE (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F2R, A52_2F2R):
- mix3to2_SSE (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F1R, A52_3F2R):
- memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- break;
- }
-}
-
-static void upmix_MMX (sample_t * samples, int acmod, int output)
-{
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL2):
- memcpy (samples + 256, samples, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F2R, A52_MONO):
- zero_MMX (samples + 1024);
- case CONVERT (A52_3F1R, A52_MONO):
- case CONVERT (A52_2F2R, A52_MONO):
- zero_MMX (samples + 768);
- case CONVERT (A52_3F, A52_MONO):
- case CONVERT (A52_2F1R, A52_MONO):
- zero_MMX (samples + 512);
- case CONVERT (A52_CHANNEL, A52_MONO):
- case CONVERT (A52_STEREO, A52_MONO):
- zero_MMX (samples + 256);
- break;
-
- case CONVERT (A52_3F2R, A52_STEREO):
- case CONVERT (A52_3F2R, A52_DOLBY):
- zero_MMX (samples + 1024);
- case CONVERT (A52_3F1R, A52_STEREO):
- case CONVERT (A52_3F1R, A52_DOLBY):
- zero_MMX (samples + 768);
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F, A52_DOLBY):
- mix_3to2_MMX:
- memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
- zero_MMX (samples + 256);
- break;
-
- case CONVERT (A52_2F2R, A52_STEREO):
- case CONVERT (A52_2F2R, A52_DOLBY):
- zero_MMX (samples + 768);
- case CONVERT (A52_2F1R, A52_STEREO):
- case CONVERT (A52_2F1R, A52_DOLBY):
- zero_MMX (samples + 512);
- break;
-
- case CONVERT (A52_3F2R, A52_3F):
- zero_MMX (samples + 1024);
- case CONVERT (A52_3F1R, A52_3F):
- case CONVERT (A52_2F2R, A52_2F1R):
- zero_MMX (samples + 768);
- break;
-
- case CONVERT (A52_3F2R, A52_3F1R):
- zero_MMX (samples + 1024);
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
- zero_MMX (samples + 1024);
- case CONVERT (A52_3F1R, A52_2F1R):
- mix_31to21_MMX:
- memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
- goto mix_3to2_MMX;
-
- case CONVERT (A52_3F2R, A52_2F2R):
- memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- goto mix_31to21_MMX;
- }
-}
-
-static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
-{
- __asm__ volatile(
- "movd %2, %%mm7 \n\t"
- "punpckldq %2, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
- "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
- "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
- "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
- "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
- "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
- "pfadd %%mm7, %%mm0 \n\t"
- "pfadd %%mm7, %%mm1 \n\t"
- "pfadd %%mm7, %%mm2 \n\t"
- "pfadd %%mm7, %%mm3 \n\t"
- "movq %%mm0, (%1, %%"REG_S") \n\t"
- "movq %%mm1, 8(%1, %%"REG_S") \n\t"
- "movq %%mm2, 16(%1, %%"REG_S") \n\t"
- "movq %%mm3, 24(%1, %%"REG_S") \n\t"
- "add $32, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (src+256), "r" (dest+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix3to1_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
- "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd %%mm7, %%mm0 \n\t"
- "pfadd %%mm7, %%mm1 \n\t"
- "pfadd %%mm2, %%mm0 \n\t"
- "pfadd %%mm3, %%mm1 \n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm1, 8(%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix4to1_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
- "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
- "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd %%mm7, %%mm0 \n\t"
- "pfadd %%mm7, %%mm1 \n\t"
- "pfadd %%mm2, %%mm0 \n\t"
- "pfadd %%mm3, %%mm1 \n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm1, 8(%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix5to1_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
- "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
- "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd %%mm7, %%mm0 \n\t"
- "pfadd %%mm7, %%mm1 \n\t"
- "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
- "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd %%mm2, %%mm0 \n\t"
- "pfadd %%mm3, %%mm1 \n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm1, 8(%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix3to2_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd %%mm7, %%mm0 \n\t" //common
- "pfadd %%mm7, %%mm1 \n\t" //common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
- "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
- "pfadd %%mm0, %%mm2 \n\t"
- "pfadd %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
-{
- __asm__ volatile(
- "movd %2, %%mm7 \n\t"
- "punpckldq %2, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
- "pfadd %%mm7, %%mm0 \n\t" //common
- "pfadd %%mm7, %%mm1 \n\t" //common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq (%1, %%"REG_S"), %%mm4 \n\t"
- "movq 8(%1, %%"REG_S"), %%mm5 \n\t"
- "pfadd %%mm0, %%mm2 \n\t"
- "pfadd %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm4, (%1, %%"REG_S") \n\t"
- "movq %%mm5, 8(%1, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (left+256), "r" (right+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix21toS_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
- "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
- "pfadd %%mm7, %%mm2 \n\t"
- "pfadd %%mm7, %%mm3 \n\t"
- "pfadd %%mm7, %%mm4 \n\t"
- "pfadd %%mm7, %%mm5 \n\t"
- "pfsub %%mm0, %%mm2 \n\t"
- "pfsub %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix31to2_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
- "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd %%mm7, %%mm0 \n\t" // common
- "pfadd %%mm7, %%mm1 \n\t" // common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
- "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
- "pfadd %%mm0, %%mm2 \n\t"
- "pfadd %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix31toS_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd %%mm7, %%mm0 \n\t" // common
- "pfadd %%mm7, %%mm1 \n\t" // common
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
- "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
- "pfadd %%mm0, %%mm2 \n\t"
- "pfadd %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
- "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
- "pfsub %%mm0, %%mm2 \n\t"
- "pfsub %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix22toS_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
- "movq 2056(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
- "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
- "pfadd %%mm7, %%mm2 \n\t"
- "pfadd %%mm7, %%mm3 \n\t"
- "pfadd %%mm7, %%mm4 \n\t"
- "pfadd %%mm7, %%mm5 \n\t"
- "pfsub %%mm0, %%mm2 \n\t"
- "pfsub %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm4 \n\t"
- "pfadd %%mm1, %%mm5 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void mix32to2_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd %%mm7, %%mm0 \n\t" // common
- "pfadd %%mm7, %%mm1 \n\t" // common
- "movq %%mm0, %%mm2 \n\t" // common
- "movq %%mm1, %%mm3 \n\t" // common
- "pfadd (%0, %%"REG_S"), %%mm0 \n\t"
- "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t"
- "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t"
- "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
- "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
- "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm1, 8(%0, %%"REG_S") \n\t"
- "movq %%mm2, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm3, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-/* todo: should be optimized better */
-static void mix32toS_3dnow (sample_t * samples, sample_t bias)
-{
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movd %1, %%mm7 \n\t"
- "punpckldq %1, %%mm7 \n\t"
- "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
- "movq 3072(%0, %%"REG_S"), %%mm4\n\t"
- "movq 3080(%0, %%"REG_S"), %%mm5\n\t"
- "pfadd %%mm7, %%mm0 \n\t" // common
- "pfadd %%mm7, %%mm1 \n\t" // common
- "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround
- "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround
- "movq (%0, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
- "movq 2048(%0, %%"REG_S"), %%mm6\n\t"
- "movq 2056(%0, %%"REG_S"), %%mm7\n\t"
- "pfsub %%mm4, %%mm2 \n\t"
- "pfsub %%mm5, %%mm3 \n\t"
- "pfadd %%mm4, %%mm6 \n\t"
- "pfadd %%mm5, %%mm7 \n\t"
- "pfadd %%mm0, %%mm2 \n\t"
- "pfadd %%mm1, %%mm3 \n\t"
- "pfadd %%mm0, %%mm6 \n\t"
- "pfadd %%mm1, %%mm7 \n\t"
- "movq %%mm2, (%0, %%"REG_S") \n\t"
- "movq %%mm3, 8(%0, %%"REG_S") \n\t"
- "movq %%mm6, 1024(%0, %%"REG_S")\n\t"
- "movq %%mm7, 1032(%0, %%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (samples+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
-{
- __asm__ volatile(
- "movd %2, %%mm7 \n\t"
- "punpckldq %2, %%mm7 \n\t"
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%0, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
- "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
- "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
- "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t"
- "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t"
- "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t"
- "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t"
- "pfadd %%mm7, %%mm0 \n\t"
- "pfadd %%mm7, %%mm1 \n\t"
- "pfadd %%mm7, %%mm2 \n\t"
- "pfadd %%mm7, %%mm3 \n\t"
- "movq %%mm0, (%1, %%"REG_S") \n\t"
- "movq %%mm1, 8(%1, %%"REG_S") \n\t"
- "movq %%mm2, 16(%1, %%"REG_S") \n\t"
- "movq %%mm3, 24(%1, %%"REG_S") \n\t"
- "add $32, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (src+256), "r" (dest+256), "m" (bias)
- : "%"REG_S
- );
-}
-
-static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
-{
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
- case CONVERT (A52_CHANNEL, A52_CHANNEL2):
- memcpy (samples, samples + 256, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_CHANNEL, A52_MONO):
- case CONVERT (A52_STEREO, A52_MONO):
- mix_2to1_3dnow:
- mix2to1_3dnow (samples, samples + 256, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_MONO):
- if (slev == 0)
- goto mix_2to1_3dnow;
- case CONVERT (A52_3F, A52_MONO):
- mix_3to1_3dnow:
- mix3to1_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_MONO):
- if (slev == 0)
- goto mix_3to1_3dnow;
- case CONVERT (A52_2F2R, A52_MONO):
- if (slev == 0)
- goto mix_2to1_3dnow;
- mix4to1_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_MONO):
- if (slev == 0)
- goto mix_3to1_3dnow;
- mix5to1_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_MONO, A52_DOLBY):
- memcpy (samples + 256, samples, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F, A52_STEREO):
- case CONVERT (A52_3F, A52_DOLBY):
- mix_3to2_3dnow:
- mix3to2_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_STEREO):
- if (slev == 0)
- break;
- mix21to2_3dnow (samples, samples + 256, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_DOLBY):
- mix21toS_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_STEREO):
- if (slev == 0)
- goto mix_3to2_3dnow;
- mix31to2_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_DOLBY):
- mix31toS_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_2F2R, A52_STEREO):
- if (slev == 0)
- break;
- mix2to1_3dnow (samples, samples + 512, bias);
- mix2to1_3dnow (samples + 256, samples + 768, bias);
- break;
-
- case CONVERT (A52_2F2R, A52_DOLBY):
- mix22toS_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_STEREO):
- if (slev == 0)
- goto mix_3to2_3dnow;
- mix32to2_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_DOLBY):
- mix32toS_3dnow (samples, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_3F):
- if (slev == 0)
- break;
- mix21to2_3dnow (samples, samples + 512, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_3F):
- if (slev == 0)
- break;
- mix2to1_3dnow (samples, samples + 768, bias);
- mix2to1_3dnow (samples + 512, samples + 1024, bias);
- break;
-
- case CONVERT (A52_3F1R, A52_2F1R):
- mix3to2_3dnow (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_2F2R, A52_2F1R):
- mix2to1_3dnow (samples + 512, samples + 768, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
- mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
- move2to1_3dnow (samples + 768, samples + 512, bias);
- break;
-
- case CONVERT (A52_3F2R, A52_3F1R):
- mix2to1_3dnow (samples + 768, samples + 1024, bias);
- break;
-
- case CONVERT (A52_2F1R, A52_2F2R):
- memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F1R, A52_2F2R):
- mix3to2_3dnow (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F2R, A52_2F2R):
- mix3to2_3dnow (samples, bias);
- memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
- memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
- break;
-
- case CONVERT (A52_3F1R, A52_3F2R):
- memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- break;
- }
- __asm__ volatile("femms":::"memory");
-}
-
-#endif // ARCH_X86 || ARCH_X86_64
diff --git a/liba52/imdct.c b/liba52/imdct.c
deleted file mode 100644
index 8220cbfb7b..0000000000
--- a/liba52/imdct.c
+++ /dev/null
@@ -1,1304 +0,0 @@
-/*
- * imdct.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * The ifft algorithms in this file have been largely inspired by Dan
- * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
- * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru>
- * michael did port them from libac3 (untested, perhaps totally broken)
- * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org)
- */
-
-#include "config.h"
-
-#include <math.h>
-#include <stdio.h>
-#ifdef LIBA52_DJBFFT
-#include <fftc4.h>
-#endif
-#ifndef M_PI
-#define M_PI 3.1415926535897932384626433832795029
-#endif
-#include <inttypes.h>
-
-#include "a52.h"
-#include "a52_internal.h"
-#include "mm_accel.h"
-#include "mangle.h"
-
-void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
-
-#if CONFIG_RUNTIME_CPUDETECT
-#undef HAVE_AMD3DNOWEXT
-#define HAVE_AMD3DNOWEXT 0
-#endif
-
-typedef struct complex_s {
- sample_t real;
- sample_t imag;
-} complex_t;
-
-static const int pm128[128] attribute_used __attribute__((aligned(16))) =
-{
- 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
- 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
- 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
- 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
- 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
- 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
- 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
- 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
-};
-
-static uint8_t attribute_used bit_reverse_512[] = {
- 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
- 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
- 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
- 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
- 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
- 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
- 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
- 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
- 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
- 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
- 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
- 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
- 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
- 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
- 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
- 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
-
-static uint8_t fftorder[] = {
- 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
- 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
- 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
- 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
- 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
- 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
- 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
- 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
-};
-
-static complex_t __attribute__((aligned(16))) buf[128];
-
-/* Twiddle factor LUT */
-static complex_t __attribute__((aligned(16))) w_1[1];
-static complex_t __attribute__((aligned(16))) w_2[2];
-static complex_t __attribute__((aligned(16))) w_4[4];
-static complex_t __attribute__((aligned(16))) w_8[8];
-static complex_t __attribute__((aligned(16))) w_16[16];
-static complex_t __attribute__((aligned(16))) w_32[32];
-static complex_t __attribute__((aligned(16))) w_64[64];
-static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};
-
-/* Twiddle factors for IMDCT */
-static sample_t __attribute__((aligned(16))) xcos1[128];
-static sample_t __attribute__((aligned(16))) xsin1[128];
-
-#if ARCH_X86 || ARCH_X86_64
-// NOTE: SSE needs 16byte alignment or it will segfault
-//
-static float __attribute__((aligned(16))) sseSinCos1c[256];
-static float __attribute__((aligned(16))) sseSinCos1d[256];
-static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
-//static float __attribute__((aligned(16))) sseW0[4];
-static float __attribute__((aligned(16))) sseW1[8];
-static float __attribute__((aligned(16))) sseW2[16];
-static float __attribute__((aligned(16))) sseW3[32];
-static float __attribute__((aligned(16))) sseW4[64];
-static float __attribute__((aligned(16))) sseW5[128];
-static float __attribute__((aligned(16))) sseW6[256];
-static float __attribute__((aligned(16))) *sseW[7]=
- {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
-static float __attribute__((aligned(16))) sseWindow[512];
-#endif
-
-/* Root values for IFFT */
-static sample_t roots16[3];
-static sample_t roots32[7];
-static sample_t roots64[15];
-static sample_t roots128[31];
-
-/* Twiddle factors for IMDCT */
-static complex_t pre1[128];
-static complex_t post1[64];
-static complex_t pre2[64];
-static complex_t post2[32];
-
-static sample_t a52_imdct_window[256];
-
-static void (* ifft128) (complex_t * buf);
-static void (* ifft64) (complex_t * buf);
-
-static inline void ifft2 (complex_t * buf)
-{
- double r, i;
-
- r = buf[0].real;
- i = buf[0].imag;
- buf[0].real += buf[1].real;
- buf[0].imag += buf[1].imag;
- buf[1].real = r - buf[1].real;
- buf[1].imag = i - buf[1].imag;
-}
-
-static inline void ifft4 (complex_t * buf)
-{
- double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
-
- tmp1 = buf[0].real + buf[1].real;
- tmp2 = buf[3].real + buf[2].real;
- tmp3 = buf[0].imag + buf[1].imag;
- tmp4 = buf[2].imag + buf[3].imag;
- tmp5 = buf[0].real - buf[1].real;
- tmp6 = buf[0].imag - buf[1].imag;
- tmp7 = buf[2].imag - buf[3].imag;
- tmp8 = buf[3].real - buf[2].real;
-
- buf[0].real = tmp1 + tmp2;
- buf[0].imag = tmp3 + tmp4;
- buf[2].real = tmp1 - tmp2;
- buf[2].imag = tmp3 - tmp4;
- buf[1].real = tmp5 + tmp7;
- buf[1].imag = tmp6 + tmp8;
- buf[3].real = tmp5 - tmp7;
- buf[3].imag = tmp6 - tmp8;
-}
-
-/* the basic split-radix ifft butterfly */
-
-#define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \
- tmp5 = a2.real * wr + a2.imag * wi; \
- tmp6 = a2.imag * wr - a2.real * wi; \
- tmp7 = a3.real * wr - a3.imag * wi; \
- tmp8 = a3.imag * wr + a3.real * wi; \
- tmp1 = tmp5 + tmp7; \
- tmp2 = tmp6 + tmp8; \
- tmp3 = tmp6 - tmp8; \
- tmp4 = tmp7 - tmp5; \
- a2.real = a0.real - tmp1; \
- a2.imag = a0.imag - tmp2; \
- a3.real = a1.real - tmp3; \
- a3.imag = a1.imag - tmp4; \
- a0.real += tmp1; \
- a0.imag += tmp2; \
- a1.real += tmp3; \
- a1.imag += tmp4; \
-} while (0)
-
-/* split-radix ifft butterfly, specialized for wr=1 wi=0 */
-
-#define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \
- tmp1 = a2.real + a3.real; \
- tmp2 = a2.imag + a3.imag; \
- tmp3 = a2.imag - a3.imag; \
- tmp4 = a3.real - a2.real; \
- a2.real = a0.real - tmp1; \
- a2.imag = a0.imag - tmp2; \
- a3.real = a1.real - tmp3; \
- a3.imag = a1.imag - tmp4; \
- a0.real += tmp1; \
- a0.imag += tmp2; \
- a1.real += tmp3; \
- a1.imag += tmp4; \
-} while (0)
-
-/* split-radix ifft butterfly, specialized for wr=wi */
-
-#define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \
- tmp5 = (a2.real + a2.imag) * w; \
- tmp6 = (a2.imag - a2.real) * w; \
- tmp7 = (a3.real - a3.imag) * w; \
- tmp8 = (a3.imag + a3.real) * w; \
- tmp1 = tmp5 + tmp7; \
- tmp2 = tmp6 + tmp8; \
- tmp3 = tmp6 - tmp8; \
- tmp4 = tmp7 - tmp5; \
- a2.real = a0.real - tmp1; \
- a2.imag = a0.imag - tmp2; \
- a3.real = a1.real - tmp3; \
- a3.imag = a1.imag - tmp4; \
- a0.real += tmp1; \
- a0.imag += tmp2; \
- a1.real += tmp3; \
- a1.imag += tmp4; \
-} while (0)
-
-static inline void ifft8 (complex_t * buf)
-{
- double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
-
- ifft4 (buf);
- ifft2 (buf + 4);
- ifft2 (buf + 6);
- BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]);
- BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]);
-}
-
-static void ifft_pass (complex_t * buf, sample_t * weight, int n)
-{
- complex_t * buf1;
- complex_t * buf2;
- complex_t * buf3;
- double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
- int i;
-
- buf++;
- buf1 = buf + n;
- buf2 = buf + 2 * n;
- buf3 = buf + 3 * n;
-
- BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]);
-
- i = n - 1;
-
- do {
- BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], weight[n], weight[2*i]);
- buf++;
- buf1++;
- buf2++;
- buf3++;
- weight++;
- } while (--i);
-}
-
-static void ifft16 (complex_t * buf)
-{
- ifft8 (buf);
- ifft4 (buf + 8);
- ifft4 (buf + 12);
- ifft_pass (buf, roots16 - 4, 4);
-}
-
-static void ifft32 (complex_t * buf)
-{
- ifft16 (buf);
- ifft8 (buf + 16);
- ifft8 (buf + 24);
- ifft_pass (buf, roots32 - 8, 8);
-}
-
-static void ifft64_c (complex_t * buf)
-{
- ifft32 (buf);
- ifft16 (buf + 32);
- ifft16 (buf + 48);
- ifft_pass (buf, roots64 - 16, 16);
-}
-
-static void ifft128_c (complex_t * buf)
-{
- ifft32 (buf);
- ifft16 (buf + 32);
- ifft16 (buf + 48);
- ifft_pass (buf, roots64 - 16, 16);
-
- ifft32 (buf + 64);
- ifft32 (buf + 96);
- ifft_pass (buf, roots128 - 32, 32);
-}
-
-void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
-{
- int i, k;
- sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
- const sample_t * window = a52_imdct_window;
- complex_t buf[128];
-
- for (i = 0; i < 128; i++) {
- k = fftorder[i];
- t_r = pre1[i].real;
- t_i = pre1[i].imag;
-
- buf[i].real = t_i * data[255-k] + t_r * data[k];
- buf[i].imag = t_r * data[255-k] - t_i * data[k];
- }
-
- ifft128 (buf);
-
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
- /* Window and convert to real valued signal */
- for (i = 0; i < 64; i++) {
- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
- t_r = post1[i].real;
- t_i = post1[i].imag;
-
- a_r = t_r * buf[i].real + t_i * buf[i].imag;
- a_i = t_i * buf[i].real - t_r * buf[i].imag;
- b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag;
- b_i = t_r * buf[127-i].real - t_i * buf[127-i].imag;
-
- w_1 = window[2*i];
- w_2 = window[255-2*i];
- data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias;
- data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
- delay[2*i] = a_i;
-
- w_1 = window[2*i+1];
- w_2 = window[254-2*i];
- data[2*i+1] = delay[2*i+1] * w_2 + b_r * w_1 + bias;
- data[254-2*i] = delay[2*i+1] * w_1 - b_r * w_2 + bias;
- delay[2*i+1] = b_i;
- }
-}
-
-#if HAVE_ALTIVEC
-
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
-// used to build registers permutation vectors (vcprm)
-// the 's' are for words in the _s_econd vector
-#define WORD_0 0x00,0x01,0x02,0x03
-#define WORD_1 0x04,0x05,0x06,0x07
-#define WORD_2 0x08,0x09,0x0a,0x0b
-#define WORD_3 0x0c,0x0d,0x0e,0x0f
-#define WORD_s0 0x10,0x11,0x12,0x13
-#define WORD_s1 0x14,0x15,0x16,0x17
-#define WORD_s2 0x18,0x19,0x1a,0x1b
-#define WORD_s3 0x1c,0x1d,0x1e,0x1f
-
-#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
-#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
-
-#define FOUROF(a) {a,a,a,a}
-
-// vcprmle is used to keep the same index as in the SSE version.
-// it's the same as vcprm, with the index inversed
-// ('le' is Little Endian)
-#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
-
-// used to build inverse/identity vectors (vcii)
-// n is _n_egative, p is _p_ositive
-#define FLOAT_n -1.
-#define FLOAT_p 1.
-
-
-void
-imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
-{
- int i;
- int k;
- int p,q;
- int m;
- long two_m;
- long two_m_plus_one;
-
- sample_t tmp_b_i;
- sample_t tmp_b_r;
- sample_t tmp_a_i;
- sample_t tmp_a_r;
-
- sample_t *data_ptr;
- sample_t *delay_ptr;
- sample_t *window_ptr;
-
- /* 512 IMDCT with source and dest data in 'data' */
-
- /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/
- for( i=0; i < 128; i++) {
- /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
- int j= bit_reverse_512[i];
- buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
- buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j]));
- }
-
- /* 1. iteration */
- for(i = 0; i < 128; i += 2) {
-#if 0
- tmp_a_r = buf[i].real;
- tmp_a_i = buf[i].imag;
- tmp_b_r = buf[i+1].real;
- tmp_b_i = buf[i+1].imag;
- buf[i].real = tmp_a_r + tmp_b_r;
- buf[i].imag = tmp_a_i + tmp_b_i;
- buf[i+1].real = tmp_a_r - tmp_b_r;
- buf[i+1].imag = tmp_a_i - tmp_b_i;
-#else
- vector float temp, bufv;
-
- bufv = vec_ld(i << 3, (float*)buf);
- temp = vec_perm(bufv, bufv, vcprm(2,3,0,1));
- bufv = vec_madd(bufv, vcii(p,p,n,n), temp);
- vec_st(bufv, i << 3, (float*)buf);
-#endif
- }
-
- /* 2. iteration */
- // Note w[1]={{1,0}, {0,-1}}
- for(i = 0; i < 128; i += 4) {
-#if 0
- tmp_a_r = buf[i].real;
- tmp_a_i = buf[i].imag;
- tmp_b_r = buf[i+2].real;
- tmp_b_i = buf[i+2].imag;
- buf[i].real = tmp_a_r + tmp_b_r;
- buf[i].imag = tmp_a_i + tmp_b_i;
- buf[i+2].real = tmp_a_r - tmp_b_r;
- buf[i+2].imag = tmp_a_i - tmp_b_i;
- tmp_a_r = buf[i+1].real;
- tmp_a_i = buf[i+1].imag;
- /* WARNING: im <-> re here ! */
- tmp_b_r = buf[i+3].imag;
- tmp_b_i = buf[i+3].real;
- buf[i+1].real = tmp_a_r + tmp_b_r;
- buf[i+1].imag = tmp_a_i - tmp_b_i;
- buf[i+3].real = tmp_a_r - tmp_b_r;
- buf[i+3].imag = tmp_a_i + tmp_b_i;
-#else
- vector float buf01, buf23, temp1, temp2;
-
- buf01 = vec_ld((i + 0) << 3, (float*)buf);
- buf23 = vec_ld((i + 2) << 3, (float*)buf);
- buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2));
-
- temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01);
- temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01);
-
- vec_st(temp1, (i + 0) << 3, (float*)buf);
- vec_st(temp2, (i + 2) << 3, (float*)buf);
-#endif
- }
-
- /* 3. iteration */
- for(i = 0; i < 128; i += 8) {
-#if 0
- tmp_a_r = buf[i].real;
- tmp_a_i = buf[i].imag;
- tmp_b_r = buf[i+4].real;
- tmp_b_i = buf[i+4].imag;
- buf[i].real = tmp_a_r + tmp_b_r;
- buf[i].imag = tmp_a_i + tmp_b_i;
- buf[i+4].real = tmp_a_r - tmp_b_r;
- buf[i+4].imag = tmp_a_i - tmp_b_i;
- tmp_a_r = buf[1+i].real;
- tmp_a_i = buf[1+i].imag;
- tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;
- tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;
- buf[1+i].real = tmp_a_r + tmp_b_r;
- buf[1+i].imag = tmp_a_i + tmp_b_i;
- buf[i+5].real = tmp_a_r - tmp_b_r;
- buf[i+5].imag = tmp_a_i - tmp_b_i;
- tmp_a_r = buf[i+2].real;
- tmp_a_i = buf[i+2].imag;
- /* WARNING re <-> im & sign */
- tmp_b_r = buf[i+6].imag;
- tmp_b_i = - buf[i+6].real;
- buf[i+2].real = tmp_a_r + tmp_b_r;
- buf[i+2].imag = tmp_a_i + tmp_b_i;
- buf[i+6].real = tmp_a_r - tmp_b_r;
- buf[i+6].imag = tmp_a_i - tmp_b_i;
- tmp_a_r = buf[i+3].real;
- tmp_a_i = buf[i+3].imag;
- tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;
- tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;
- buf[i+3].real = tmp_a_r + tmp_b_r;
- buf[i+3].imag = tmp_a_i + tmp_b_i;
- buf[i+7].real = tmp_a_r - tmp_b_r;
- buf[i+7].imag = tmp_a_i - tmp_b_i;
-#else
- vector float buf01, buf23, buf45, buf67;
-
- buf01 = vec_ld((i + 0) << 3, (float*)buf);
- buf23 = vec_ld((i + 2) << 3, (float*)buf);
-
- tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;
- tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;
- buf[i+5].real = tmp_b_r;
- buf[i+5].imag = tmp_b_i;
- tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;
- tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;
- buf[i+7].real = tmp_b_r;
- buf[i+7].imag = tmp_b_i;
-
- buf23 = vec_ld((i + 2) << 3, (float*)buf);
- buf45 = vec_ld((i + 4) << 3, (float*)buf);
- buf67 = vec_ld((i + 6) << 3, (float*)buf);
- buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3));
-
- vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf);
- vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf);
- vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf);
- vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf);
-#endif
- }
-
- /* 4-7. iterations */
- for (m=3; m < 7; m++) {
- two_m = (1 << m);
-
- two_m_plus_one = two_m<<1;
-
- for(i = 0; i < 128; i += two_m_plus_one) {
- for(k = 0; k < two_m; k+=2) {
-#if 0
- int p = k + i;
- int q = p + two_m;
- tmp_a_r = buf[p].real;
- tmp_a_i = buf[p].imag;
- tmp_b_r =
- buf[q].real * w[m][k].real -
- buf[q].imag * w[m][k].imag;
- tmp_b_i =
- buf[q].imag * w[m][k].real +
- buf[q].real * w[m][k].imag;
- buf[p].real = tmp_a_r + tmp_b_r;
- buf[p].imag = tmp_a_i + tmp_b_i;
- buf[q].real = tmp_a_r - tmp_b_r;
- buf[q].imag = tmp_a_i - tmp_b_i;
-
- tmp_a_r = buf[(p + 1)].real;
- tmp_a_i = buf[(p + 1)].imag;
- tmp_b_r =
- buf[(q + 1)].real * w[m][(k + 1)].real -
- buf[(q + 1)].imag * w[m][(k + 1)].imag;
- tmp_b_i =
- buf[(q + 1)].imag * w[m][(k + 1)].real +
- buf[(q + 1)].real * w[m][(k + 1)].imag;
- buf[(p + 1)].real = tmp_a_r + tmp_b_r;
- buf[(p + 1)].imag = tmp_a_i + tmp_b_i;
- buf[(q + 1)].real = tmp_a_r - tmp_b_r;
- buf[(q + 1)].imag = tmp_a_i - tmp_b_i;
-#else
- int p = k + i;
- int q = p + two_m;
- vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4;
- const vector float vczero = (const vector float)FOUROF(0.);
- // first compute buf[q] and buf[q+1]
- vecq = vec_ld(q << 3, (float*)buf);
- vecw = vec_ld(0, (float*)&(w[m][k]));
- temp1 = vec_madd(vecq, vecw, vczero);
- temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2));
- temp2 = vec_madd(temp2, vecw, vczero);
- temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2));
- temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3));
- vecq = vec_madd(temp4, vcii(n,p,n,p), temp3);
- // then butterfly with buf[p] and buf[p+1]
- vecp = vec_ld(p << 3, (float*)buf);
-
- temp1 = vec_add(vecp, vecq);
- temp2 = vec_sub(vecp, vecq);
-
- vec_st(temp1, p << 3, (float*)buf);
- vec_st(temp2, q << 3, (float*)buf);
-#endif
- }
- }
- }
-
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
- for( i=0; i < 128; i+=4) {
- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
-#if 0
- tmp_a_r = buf[(i + 0)].real;
- tmp_a_i = -1.0 * buf[(i + 0)].imag;
- buf[(i + 0)].real =
- (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]);
- buf[(i + 0)].imag =
- (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]);
-
- tmp_a_r = buf[(i + 1)].real;
- tmp_a_i = -1.0 * buf[(i + 1)].imag;
- buf[(i + 1)].real =
- (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]);
- buf[(i + 1)].imag =
- (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]);
-
- tmp_a_r = buf[(i + 2)].real;
- tmp_a_i = -1.0 * buf[(i + 2)].imag;
- buf[(i + 2)].real =
- (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]);
- buf[(i + 2)].imag =
- (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]);
-
- tmp_a_r = buf[(i + 3)].real;
- tmp_a_i = -1.0 * buf[(i + 3)].imag;
- buf[(i + 3)].real =
- (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]);
- buf[(i + 3)].imag =
- (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]);
-#else
- vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2;
- vector float temp0022, temp1133, tempCS01;
- const vector float vczero = (const vector float)FOUROF(0.);
-
- bufv_0 = vec_ld((i + 0) << 3, (float*)buf);
- bufv_2 = vec_ld((i + 2) << 3, (float*)buf);
-
- cosv = vec_ld(i << 2, xcos1);
- sinv = vec_ld(i << 2, xsin1);
-
- temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2));
- temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3));
- tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1));
- temp1 = vec_madd(temp0022, tempCS01, vczero);
- tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1));
- temp2 = vec_madd(temp1133, tempCS01, vczero);
- bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1);
-
- vec_st(bufv_0, (i + 0) << 3, (float*)buf);
-
- /* idem with bufv_2 and high-order cosv/sinv */
-
- temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2));
- temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3));
- tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3));
- temp1 = vec_madd(temp0022, tempCS01, vczero);
- tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3));
- temp2 = vec_madd(temp1133, tempCS01, vczero);
- bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1);
-
- vec_st(bufv_2, (i + 2) << 3, (float*)buf);
-
-#endif
- }
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = a52_imdct_window;
-
- /* Window and convert to real valued signal */
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
- *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
- }
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
- *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
- }
-
- /* The trailing edge of the window goes into the delay line */
- delay_ptr = delay;
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -buf[64+i].real * *--window_ptr;
- *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i<64; i++) {
- *delay_ptr++ = buf[i].imag * *--window_ptr;
- *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
- }
-}
-#endif
-
-
-// Stuff below this line is borrowed from libac3
-#include "srfftp.h"
-#if ARCH_X86 || ARCH_X86_64
-#undef HAVE_AMD3DNOW
-#define HAVE_AMD3DNOW 1
-#include "srfftp_3dnow.h"
-
-const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }};
-const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }};
-const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
-
-#undef HAVE_AMD3DNOWEXT
-#define HAVE_AMD3DNOWEXT 0
-#include "imdct_3dnow.h"
-#undef HAVE_AMD3DNOWEXT
-#define HAVE_AMD3DNOWEXT 1
-#include "imdct_3dnow.h"
-
-#if !ARCH_X86_64 || !defined(PIC)
-void
-imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
-{
-/* int i,k;
- int p,q;*/
- int m;
- long two_m;
- long two_m_plus_one;
- long two_m_plus_one_shl3;
- complex_t *buf_offset;
-
-/* sample_t tmp_a_i;
- sample_t tmp_a_r;
- sample_t tmp_b_i;
- sample_t tmp_b_r;*/
-
- sample_t *data_ptr;
- sample_t *delay_ptr;
- sample_t *window_ptr;
-
- /* 512 IMDCT with source and dest data in 'data' */
- /* see the c version (dct_do_512()), its allmost identical, just in C */
-
- /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
- /* Bit reversed shuffling */
- __asm__ volatile(
- "xor %%"REG_S", %%"REG_S" \n\t"
- "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
- "mov $1008, %%"REG_D" \n\t"
- "push %%"REG_BP" \n\t" //use ebp without telling gcc
- ASMALIGN(4)
- "1: \n\t"
- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI
- "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI
- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi
- "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi
- "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR
- "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t"
- "mulps %%xmm0, %%xmm2 \n\t"
- "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI
- "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t"
- "subps %%xmm0, %%xmm2 \n\t"
- "movzb (%%"REG_a"), %%"REG_d" \n\t"
- "movzb 1(%%"REG_a"), %%"REG_BP" \n\t"
- "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t"
- "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t"
- "add $16, %%"REG_S" \n\t"
- "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap
- "sub $16, %%"REG_D" \n\t"
- "jnc 1b \n\t"
- "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g*
- :: "b" (data), "c" (buf)
- : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d
- );
-
-
- /* FFT Merge */
-/* unoptimized variant
- for (m=1; m < 7; m++) {
- if(m)
- two_m = (1 << m);
- else
- two_m = 1;
-
- two_m_plus_one = (1 << (m+1));
-
- for(i = 0; i < 128; i += two_m_plus_one) {
- for(k = 0; k < two_m; k++) {
- p = k + i;
- q = p + two_m;
- tmp_a_r = buf[p].real;
- tmp_a_i = buf[p].imag;
- tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
- tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
- buf[p].real = tmp_a_r + tmp_b_r;
- buf[p].imag = tmp_a_i + tmp_b_i;
- buf[q].real = tmp_a_r - tmp_b_r;
- buf[q].imag = tmp_a_i - tmp_b_i;
- }
- }
- }
-*/
-
- /* 1. iteration */
- // Note w[0][0]={1,0}
- __asm__ volatile(
- "xorps %%xmm1, %%xmm1 \n\t"
- "xorps %%xmm2, %%xmm2 \n\t"
- "mov %0, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p]
- "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q]
- "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p]
- "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q]
- "addps %%xmm1, %%xmm0 \n\t"
- "subps %%xmm2, %%xmm0 \n\t"
- "movaps %%xmm0, (%%"REG_S")\n\t"
- "add $16, %%"REG_S" \n\t"
- "cmp %1, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "g" (buf), "r" (buf + 128)
- : "%"REG_S
- );
-
- /* 2. iteration */
- // Note w[1]={{1,0}, {0,-1}}
- __asm__ volatile(
- "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
- "mov %0, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3
- "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3
- "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3
- "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1
- "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1
- "addps %%xmm2, %%xmm0 \n\t"
- "subps %%xmm2, %%xmm1 \n\t"
- "movaps %%xmm0, (%%"REG_S") \n\t"
- "movaps %%xmm1, 16(%%"REG_S") \n\t"
- "add $32, %%"REG_S" \n\t"
- "cmp %1, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "g" (buf), "r" (buf + 128)
- : "%"REG_S
- );
-
- /* 3. iteration */
-/*
- Note sseW2+0={1,1,sqrt(2),sqrt(2))
- Note sseW2+16={0,0,sqrt(2),-sqrt(2))
- Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
- Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
-*/
- __asm__ volatile(
- "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
- "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
- "xorps %%xmm5, %%xmm5 \n\t"
- "xorps %%xmm2, %%xmm2 \n\t"
- "mov %0, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5
- "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7
- "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5
- "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7
- "mulps %%xmm2, %%xmm4 \n\t"
- "mulps %%xmm3, %%xmm5 \n\t"
- "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5
- "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7
- "mulps %%xmm6, %%xmm3 \n\t"
- "mulps %%xmm7, %%xmm2 \n\t"
- "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1
- "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3
- "addps %%xmm4, %%xmm2 \n\t"
- "addps %%xmm5, %%xmm3 \n\t"
- "movaps %%xmm2, %%xmm4 \n\t"
- "movaps %%xmm3, %%xmm5 \n\t"
- "addps %%xmm0, %%xmm2 \n\t"
- "addps %%xmm1, %%xmm3 \n\t"
- "subps %%xmm4, %%xmm0 \n\t"
- "subps %%xmm5, %%xmm1 \n\t"
- "movaps %%xmm2, (%%"REG_S") \n\t"
- "movaps %%xmm3, 16(%%"REG_S") \n\t"
- "movaps %%xmm0, 32(%%"REG_S") \n\t"
- "movaps %%xmm1, 48(%%"REG_S") \n\t"
- "add $64, %%"REG_S" \n\t"
- "cmp %1, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "g" (buf), "r" (buf + 128)
- : "%"REG_S
- );
-
- /* 4-7. iterations */
- for (m=3; m < 7; m++) {
- two_m = (1 << m);
- two_m_plus_one = two_m<<1;
- two_m_plus_one_shl3 = (two_m_plus_one<<3);
- buf_offset = buf+128;
- __asm__ volatile(
- "mov %0, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "xor %%"REG_D", %%"REG_D" \n\t" // k
- "lea (%%"REG_S", %3), %%"REG_d" \n\t"
- "2: \n\t"
- "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t"
- "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t"
- "mulps %%xmm1, %%xmm2 \n\t"
- "shufps $0xB1, %%xmm1, %%xmm1 \n\t"
- "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t"
- "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t"
- "addps %%xmm2, %%xmm1 \n\t"
- "movaps %%xmm1, %%xmm2 \n\t"
- "addps %%xmm0, %%xmm1 \n\t"
- "subps %%xmm2, %%xmm0 \n\t"
- "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t"
- "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t"
- "add $16, %%"REG_D" \n\t"
- "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
- "jb 2b \n\t"
- "add %2, %%"REG_S" \n\t"
- "cmp %1, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3),
- "r" (sseW[m])
- : "%"REG_S, "%"REG_D, "%"REG_d
- );
- }
-
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
- "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
- "shufps $0xB1, %%xmm0, %%xmm0 \n\t"
- "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t"
- "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t"
- "addps %%xmm1, %%xmm0 \n\t"
- "movaps %%xmm0, (%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- :: "r" (buf+128)
- : "%"REG_S
- );
-
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = a52_imdct_window;
-
- /* Window and convert to real valued signal */
- __asm__ volatile(
- "xor %%"REG_D", %%"REG_D" \n\t" // 0
- "xor %%"REG_S", %%"REG_S" \n\t" // 0
- "movss %3, %%xmm2 \n\t" // bias
- "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
- ASMALIGN(4)
- "1: \n\t"
- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
- "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
- "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
- "addps (%2, %%"REG_S"), %%xmm0 \n\t"
- "addps %%xmm2, %%xmm0 \n\t"
- "movaps %%xmm0, (%1, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- "sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
- : "%"REG_S, "%"REG_D
- );
- data_ptr+=128;
- delay_ptr+=128;
-// window_ptr+=128;
-
- __asm__ volatile(
- "mov $1024, %%"REG_D" \n\t" // 512
- "xor %%"REG_S", %%"REG_S" \n\t" // 0
- "movss %3, %%xmm2 \n\t" // bias
- "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
- ASMALIGN(4)
- "1: \n\t"
- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
- "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
- "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
- "addps (%2, %%"REG_S"), %%xmm0 \n\t"
- "addps %%xmm2, %%xmm0 \n\t"
- "movaps %%xmm0, (%1, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- "sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
- : "%"REG_S, "%"REG_D
- );
- data_ptr+=128;
-// window_ptr+=128;
-
- /* The trailing edge of the window goes into the delay line */
- delay_ptr = delay;
-
- __asm__ volatile(
- "xor %%"REG_D", %%"REG_D" \n\t" // 0
- "xor %%"REG_S", %%"REG_S" \n\t" // 0
- ASMALIGN(4)
- "1: \n\t"
- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
- "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
- "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
- "movaps %%xmm0, (%1, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- "sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "r" (buf+64), "r" (delay_ptr)
- : "%"REG_S, "%"REG_D
- );
- delay_ptr+=128;
-// window_ptr-=128;
-
- __asm__ volatile(
- "mov $1024, %%"REG_D" \n\t" // 1024
- "xor %%"REG_S", %%"REG_S" \n\t" // 0
- ASMALIGN(4)
- "1: \n\t"
- "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
- "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
- "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
- "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
- "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
- "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
- "movaps %%xmm0, (%1, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- "sub $16, %%"REG_D" \n\t"
- "cmp $512, %%"REG_S" \n\t"
- " jb 1b \n\t"
- :: "r" (buf), "r" (delay_ptr)
- : "%"REG_S, "%"REG_D
- );
-}
-#endif
-#endif // ARCH_X86 || ARCH_X86_64
-
-void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
-{
- int i, k;
- sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2;
- const sample_t * window = a52_imdct_window;
- complex_t buf1[64], buf2[64];
-
- /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
- for (i = 0; i < 64; i++) {
- k = fftorder[i];
- t_r = pre2[i].real;
- t_i = pre2[i].imag;
-
- buf1[i].real = t_i * data[254-k] + t_r * data[k];
- buf1[i].imag = t_r * data[254-k] - t_i * data[k];
-
- buf2[i].real = t_i * data[255-k] + t_r * data[k+1];
- buf2[i].imag = t_r * data[255-k] - t_i * data[k+1];
- }
-
- ifft64 (buf1);
- ifft64 (buf2);
-
- /* Post IFFT complex multiply */
- /* Window and convert to real valued signal */
- for (i = 0; i < 32; i++) {
- /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
- t_r = post2[i].real;
- t_i = post2[i].imag;
-
- a_r = t_r * buf1[i].real + t_i * buf1[i].imag;
- a_i = t_i * buf1[i].real - t_r * buf1[i].imag;
- b_r = t_i * buf1[63-i].real + t_r * buf1[63-i].imag;
- b_i = t_r * buf1[63-i].real - t_i * buf1[63-i].imag;
-
- c_r = t_r * buf2[i].real + t_i * buf2[i].imag;
- c_i = t_i * buf2[i].real - t_r * buf2[i].imag;
- d_r = t_i * buf2[63-i].real + t_r * buf2[63-i].imag;
- d_i = t_r * buf2[63-i].real - t_i * buf2[63-i].imag;
-
- w_1 = window[2*i];
- w_2 = window[255-2*i];
- data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias;
- data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias;
- delay[2*i] = c_i;
-
- w_1 = window[128+2*i];
- w_2 = window[127-2*i];
- data[128+2*i] = delay[127-2*i] * w_2 + a_i * w_1 + bias;
- data[127-2*i] = delay[127-2*i] * w_1 - a_i * w_2 + bias;
- delay[127-2*i] = c_r;
-
- w_1 = window[2*i+1];
- w_2 = window[254-2*i];
- data[2*i+1] = delay[2*i+1] * w_2 - b_i * w_1 + bias;
- data[254-2*i] = delay[2*i+1] * w_1 + b_i * w_2 + bias;
- delay[2*i+1] = d_r;
-
- w_1 = window[129+2*i];
- w_2 = window[126-2*i];
- data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias;
- data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias;
- delay[126-2*i] = d_i;
- }
-}
-
-static double besselI0 (double x)
-{
- double bessel = 1;
- int i = 100;
-
- do
- bessel = bessel * x / (i * i) + 1;
- while (--i);
- return bessel;
-}
-
-void a52_imdct_init (uint32_t mm_accel)
-{
- int i, j, k;
- double sum;
-
- /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
- sum = 0;
- for (i = 0; i < 256; i++) {
- sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256));
- a52_imdct_window[i] = sum;
- }
- sum++;
- for (i = 0; i < 256; i++)
- a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum);
-
- for (i = 0; i < 3; i++)
- roots16[i] = cos ((M_PI / 8) * (i + 1));
-
- for (i = 0; i < 7; i++)
- roots32[i] = cos ((M_PI / 16) * (i + 1));
-
- for (i = 0; i < 15; i++)
- roots64[i] = cos ((M_PI / 32) * (i + 1));
-
- for (i = 0; i < 31; i++)
- roots128[i] = cos ((M_PI / 64) * (i + 1));
-
- for (i = 0; i < 64; i++) {
- k = fftorder[i] / 2 + 64;
- pre1[i].real = cos ((M_PI / 256) * (k - 0.25));
- pre1[i].imag = sin ((M_PI / 256) * (k - 0.25));
- }
-
- for (i = 64; i < 128; i++) {
- k = fftorder[i] / 2 + 64;
- pre1[i].real = -cos ((M_PI / 256) * (k - 0.25));
- pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25));
- }
-
- for (i = 0; i < 64; i++) {
- post1[i].real = cos ((M_PI / 256) * (i + 0.5));
- post1[i].imag = sin ((M_PI / 256) * (i + 0.5));
- }
-
- for (i = 0; i < 64; i++) {
- k = fftorder[i] / 4;
- pre2[i].real = cos ((M_PI / 128) * (k - 0.25));
- pre2[i].imag = sin ((M_PI / 128) * (k - 0.25));
- }
-
- for (i = 0; i < 32; i++) {
- post2[i].real = cos ((M_PI / 128) * (i + 0.5));
- post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
- }
- for (i = 0; i < 128; i++) {
- xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
- xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
- }
- for (i = 0; i < 7; i++) {
- j = 1 << i;
- for (k = 0; k < j; k++) {
- w[i][k].real = cos (-M_PI * k / j);
- w[i][k].imag = sin (-M_PI * k / j);
- }
- }
-#if ARCH_X86 || ARCH_X86_64
- for (i = 0; i < 128; i++) {
- sseSinCos1c[2*i+0]= xcos1[i];
- sseSinCos1c[2*i+1]= -xcos1[i];
- sseSinCos1d[2*i+0]= xsin1[i];
- sseSinCos1d[2*i+1]= xsin1[i];
- }
- for (i = 1; i < 7; i++) {
- j = 1 << i;
- for (k = 0; k < j; k+=2) {
-
- sseW[i][4*k + 0] = w[i][k+0].real;
- sseW[i][4*k + 1] = w[i][k+0].real;
- sseW[i][4*k + 2] = w[i][k+1].real;
- sseW[i][4*k + 3] = w[i][k+1].real;
-
- sseW[i][4*k + 4] = -w[i][k+0].imag;
- sseW[i][4*k + 5] = w[i][k+0].imag;
- sseW[i][4*k + 6] = -w[i][k+1].imag;
- sseW[i][4*k + 7] = w[i][k+1].imag;
-
- //we multiply more or less uninitalized numbers so we need to use exactly 0.0
- if(k==0)
- {
-// sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0;
- sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0;
- }
-
- if(2*k == j)
- {
- sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0;
-// sseW[i][4*k + 4]= -(sseW[i][4*k + 5]= -1.0);
- }
- }
- }
-
- for(i=0; i<128; i++)
- {
- sseWindow[2*i+0]= -a52_imdct_window[2*i+0];
- sseWindow[2*i+1]= a52_imdct_window[2*i+1];
- }
-
- for(i=0; i<64; i++)
- {
- sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1];
- sseWindow[256 + 2*i+1]= a52_imdct_window[254 - 2*i+0];
- sseWindow[384 + 2*i+0]= a52_imdct_window[126 - 2*i+1];
- sseWindow[384 + 2*i+1]= -a52_imdct_window[126 - 2*i+0];
- }
-#endif
- a52_imdct_512 = imdct_do_512;
- ifft128 = ifft128_c;
- ifft64 = ifft64_c;
-
-#if ARCH_X86 || ARCH_X86_64
-#if !ARCH_X86_64 || !defined(PIC)
- if(mm_accel & MM_ACCEL_X86_SSE)
- {
- fprintf (stderr, "Using SSE optimized IMDCT transform\n");
- a52_imdct_512 = imdct_do_512_sse;
- }
- else
-#endif
- if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
- {
- fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n");
- a52_imdct_512 = imdct_do_512_3dnowex;
- }
- else
- if(mm_accel & MM_ACCEL_X86_3DNOW)
- {
- fprintf (stderr, "Using 3DNow optimized IMDCT transform\n");
- a52_imdct_512 = imdct_do_512_3dnow;
- }
- else
-#endif // ARCH_X86 || ARCH_X86_64
-#if HAVE_ALTIVEC
- if (mm_accel & MM_ACCEL_PPC_ALTIVEC)
- {
- fprintf(stderr, "Using AltiVec optimized IMDCT transform\n");
- a52_imdct_512 = imdct_do_512_altivec;
- }
- else
-#endif
-
-#ifdef LIBA52_DJBFFT
- if (mm_accel & MM_ACCEL_DJBFFT) {
- fprintf (stderr, "Using djbfft for IMDCT transform\n");
- ifft128 = (void (*) (complex_t *)) fftc4_un128;
- ifft64 = (void (*) (complex_t *)) fftc4_un64;
- } else
-#endif
- {
- fprintf (stderr, "No accelerated IMDCT transform found\n");
- }
-}
diff --git a/liba52/imdct_3dnow.h b/liba52/imdct_3dnow.h
deleted file mode 100644
index e8a91d11a4..0000000000
--- a/liba52/imdct_3dnow.h
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * 3DNOW and 3DNOWEX optimized IMDCT
- * Copyright (C) 2002 Nick Kurshev
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#undef FFT_4_3DNOW
-#undef FFT_8_3DNOW
-#undef FFT_ASMB_3DNOW
-#undef FFT_ASMB16_3DNOW
-#undef FFT_128P_3DNOW
-
-#if HAVE_AMD3DNOWEXT
-#define FFT_4_3DNOW fft_4_3dnowex
-#define FFT_8_3DNOW fft_8_3dnowex
-#define FFT_ASMB_3DNOW fft_asmb_3dnowex
-#define FFT_ASMB16_3DNOW fft_asmb16_3dnowex
-#define FFT_128P_3DNOW fft_128p_3dnowex
-#else
-#define FFT_4_3DNOW fft_4_3dnow
-#define FFT_8_3DNOW fft_8_3dnow
-#define FFT_ASMB_3DNOW fft_asmb_3dnow
-#define FFT_ASMB16_3DNOW fft_asmb16_3dnow
-#define FFT_128P_3DNOW fft_128p_3dnow
-#endif
-
-static void FFT_4_3DNOW(complex_t *x)
-{
- /* delta_p = 1 here */
- /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4}
- */
- __asm__ volatile(
- "movq 24(%1), %%mm3\n\t"
- "movq 8(%1), %%mm1\n\t"
- "pxor %2, %%mm3\n\t" /* mm3.re | -mm3.im */
- "pxor %3, %%mm1\n\t" /* -mm1.re | mm1.im */
- "pfadd %%mm1, %%mm3\n\t" /* vi.im = x[3].re - x[1].re; */
- "movq %%mm3, %%mm4\n\t" /* vi.re =-x[3].im + x[1].im; mm4 = vi */
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm4, %%mm4\n\t"
-#else
- "punpckldq %%mm4, %%mm5\n\t"
- "punpckhdq %%mm5, %%mm4\n\t"
-#endif
- "movq (%1), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
- "movq (%1), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
- "movq 24(%1), %%mm7\n\t" /* u.re = x[3].re + x[1].re; */
- "pfsub 16(%1), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
- "pfadd 16(%1), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
- "pfadd 8(%1), %%mm7\n\t" /* u.im = x[3].im + x[1].im; mm7 = u */
-
- "movq %%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
- "movq %%mm5, %%mm1\n\t" /* x[1].re = yb.re + vi.re; */
- "pfadd %%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
- "pfadd %%mm4, %%mm1\n\t" /* x[1].im = yb.im + vi.im; */
- "movq %%mm0, (%0)\n\t"
- "movq %%mm1, 8(%0)\n\t"
-
- "pfsub %%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
- "pfsub %%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
- "movq %%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
- "movq %%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
- :"=r"(x)
- :"0"(x),
- "m"(x_plus_minus_3dnow),
- "m"(x_minus_plus_3dnow)
- :"memory");
-}
-
-static void FFT_8_3DNOW(complex_t *x)
-{
- /* delta_p = diag{1, sqrt(i)} here */
- /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
- */
- complex_t wT1, wB1, wB2;
-
- __asm__ volatile(
- "movq 8(%2), %%mm0\n\t"
- "movq 24(%2), %%mm1\n\t"
- "movq %%mm0, %0\n\t" /* wT1 = x[1]; */
- "movq %%mm1, %1\n\t" /* wB1 = x[3]; */
- :"=m"(wT1), "=m"(wB1)
- :"r"(x)
- :"memory");
-
- __asm__ volatile(
- "movq 16(%0), %%mm2\n\t"
- "movq 32(%0), %%mm3\n\t"
- "movq %%mm2, 8(%0)\n\t" /* x[1] = x[2]; */
- "movq 48(%0), %%mm4\n\t"
- "movq %%mm3, 16(%0)\n\t" /* x[2] = x[4]; */
- "movq %%mm4, 24(%0)\n\t" /* x[3] = x[6]; */
- :"=r"(x)
- :"0"(x)
- :"memory");
-
- fft_4_3dnow(&x[0]);
-
- /* x[0] x[4] x[2] x[6] */
-
- __asm__ volatile(
- "movq 40(%1), %%mm0\n\t"
- "movq %%mm0, %%mm3\n\t"
- "movq 56(%1), %%mm1\n\t"
- "pfadd %%mm1, %%mm0\n\t"
- "pfsub %%mm1, %%mm3\n\t"
- "movq (%2), %%mm2\n\t"
- "pfadd %%mm2, %%mm0\n\t"
- "pfadd %%mm2, %%mm3\n\t"
- "movq (%3), %%mm1\n\t"
- "pfadd %%mm1, %%mm0\n\t"
- "pfsub %%mm1, %%mm3\n\t"
- "movq (%1), %%mm1\n\t"
- "movq 16(%1), %%mm4\n\t"
- "movq %%mm1, %%mm2\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm3, %%mm3\n\t"
-#else
- "punpckldq %%mm3, %%mm6\n\t"
- "punpckhdq %%mm6, %%mm3\n\t"
-#endif
- "pfadd %%mm0, %%mm1\n\t"
- "movq %%mm4, %%mm5\n\t"
- "pfsub %%mm0, %%mm2\n\t"
- "pfadd %%mm3, %%mm4\n\t"
- "movq %%mm1, (%0)\n\t"
- "pfsub %%mm3, %%mm5\n\t"
- "movq %%mm2, 32(%0)\n\t"
- "movd %%mm4, 16(%0)\n\t"
- "movd %%mm5, 48(%0)\n\t"
- "psrlq $32, %%mm4\n\t"
- "psrlq $32, %%mm5\n\t"
- "movd %%mm4, 52(%0)\n\t"
- "movd %%mm5, 20(%0)"
- :"=r"(x)
- :"0"(x), "r"(&wT1), "r"(&wB1)
- :"memory");
-
- /* x[1] x[5] */
- __asm__ volatile (
- "movq %6, %%mm6\n\t"
- "movq %5, %%mm7\n\t"
- "movq %1, %%mm0\n\t"
- "movq %2, %%mm1\n\t"
- "movq 56(%3), %%mm3\n\t"
- "pfsub 40(%3), %%mm0\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm1, %%mm1\n\t"
-#else
- "punpckldq %%mm1, %%mm2\n\t"
- "punpckhdq %%mm2, %%mm1\n\t"
-#endif
- "pxor %%mm7, %%mm1\n\t"
- "pfadd %%mm1, %%mm0\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm3, %%mm3\n\t"
-#else
- "punpckldq %%mm3, %%mm2\n\t"
- "punpckhdq %%mm2, %%mm3\n\t"
-#endif
- "pxor %%mm6, %%mm3\n\t"
- "pfadd %%mm3, %%mm0\n\t"
- "movq %%mm0, %%mm1\n\t"
- "pxor %%mm6, %%mm1\n\t"
- "pfacc %%mm1, %%mm0\n\t"
- "pfmul %4, %%mm0\n\t"
-
- "movq 40(%3), %%mm5\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm5, %%mm5\n\t"
-#else
- "punpckldq %%mm5, %%mm1\n\t"
- "punpckhdq %%mm1, %%mm5\n\t"
-#endif
- "movq %%mm5, %0\n\t"
-
- "movq 8(%3), %%mm1\n\t"
- "movq %%mm1, %%mm2\n\t"
- "pfsub %%mm0, %%mm1\n\t"
- "pfadd %%mm0, %%mm2\n\t"
- "movq %%mm1, 40(%3)\n\t"
- "movq %%mm2, 8(%3)\n\t"
- :"=m"(wB2)
- :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW),
- "m"(x_plus_minus_3dnow), "m"(x_minus_plus_3dnow)
- :"memory");
-
-
- /* x[3] x[7] */
- __asm__ volatile(
- "movq %1, %%mm0\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %3, %%mm1\n\t"
-#else
- "movq %3, %%mm1\n\t"
- "punpckldq %%mm1, %%mm2\n\t"
- "punpckhdq %%mm2, %%mm1\n\t"
-#endif
- "pxor %%mm6, %%mm1\n\t"
- "pfadd %%mm1, %%mm0\n\t"
- "movq %2, %%mm2\n\t"
- "movq 56(%4), %%mm3\n\t"
- "pxor %%mm7, %%mm3\n\t"
- "pfadd %%mm3, %%mm2\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm2, %%mm2\n\t"
-#else
- "punpckldq %%mm2, %%mm5\n\t"
- "punpckhdq %%mm5, %%mm2\n\t"
-#endif
- "movq 24(%4), %%mm3\n\t"
- "pfsub %%mm2, %%mm0\n\t"
- "movq %%mm3, %%mm4\n\t"
- "movq %%mm0, %%mm1\n\t"
- "pxor %%mm6, %%mm0\n\t"
- "pfacc %%mm1, %%mm0\n\t"
- "pfmul %5, %%mm0\n\t"
- "movq %%mm0, %%mm1\n\t"
- "pxor %%mm6, %%mm1\n\t"
- "pxor %%mm7, %%mm0\n\t"
- "pfadd %%mm1, %%mm3\n\t"
- "pfadd %%mm0, %%mm4\n\t"
- "movq %%mm4, 24(%0)\n\t"
- "movq %%mm3, 56(%0)\n\t"
- :"=r"(x)
- :"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW)
- :"memory");
-}
-
-static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB,
- const complex_t *d, const complex_t *d_3)
-{
- register complex_t *x2k, *x3k, *x4k, *wB;
-
- TRANS_FILL_MM6_MM7_3DNOW();
- x2k = x + 2 * k;
- x3k = x2k + 2 * k;
- x4k = x3k + 2 * k;
- wB = wTB + 2 * k;
-
- TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
- TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
-
- --k;
- for(;;) {
- TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
- TRANS_3DNOW(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
- if (!--k) break;
- x += 2;
- x2k += 2;
- x3k += 2;
- x4k += 2;
- d += 2;
- d_3 += 2;
- wTB += 2;
- wB += 2;
- }
-
-}
-
-void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB)
-{
- int k = 2;
-
- TRANS_FILL_MM6_MM7_3DNOW();
- /* transform x[0], x[8], x[4], x[12] */
- TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
-
- /* transform x[1], x[9], x[5], x[13] */
- TRANS_3DNOW(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
-
- /* transform x[2], x[10], x[6], x[14] */
- TRANSHALF_16_3DNOW(x[2],x[6],x[10],x[14]);
-
- /* transform x[3], x[11], x[7], x[15] */
- TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
-
-}
-
-static void FFT_128P_3DNOW(complex_t *a)
-{
- FFT_8_3DNOW(&a[0]); FFT_4_3DNOW(&a[8]); FFT_4_3DNOW(&a[12]);
- FFT_ASMB16_3DNOW(&a[0], &a[8]);
-
- FFT_8_3DNOW(&a[16]), FFT_8_3DNOW(&a[24]);
- FFT_ASMB_3DNOW(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
-
- FFT_8_3DNOW(&a[32]); FFT_4_3DNOW(&a[40]); FFT_4_3DNOW(&a[44]);
- FFT_ASMB16_3DNOW(&a[32], &a[40]);
-
- FFT_8_3DNOW(&a[48]); FFT_4_3DNOW(&a[56]); FFT_4_3DNOW(&a[60]);
- FFT_ASMB16_3DNOW(&a[48], &a[56]);
-
- FFT_ASMB_3DNOW(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
-
- FFT_8_3DNOW(&a[64]); FFT_4_3DNOW(&a[72]); FFT_4_3DNOW(&a[76]);
- /* FFT_16(&a[64]); */
- FFT_ASMB16_3DNOW(&a[64], &a[72]);
-
- FFT_8_3DNOW(&a[80]); FFT_8_3DNOW(&a[88]);
-
- /* FFT_32(&a[64]); */
- FFT_ASMB_3DNOW(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
-
- FFT_8_3DNOW(&a[96]); FFT_4_3DNOW(&a[104]), FFT_4_3DNOW(&a[108]);
- /* FFT_16(&a[96]); */
- FFT_ASMB16_3DNOW(&a[96], &a[104]);
-
- FFT_8_3DNOW(&a[112]), FFT_8_3DNOW(&a[120]);
- /* FFT_32(&a[96]); */
- FFT_ASMB_3DNOW(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
-
- /* FFT_128(&a[0]); */
- FFT_ASMB_3DNOW(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
-}
-
-static void
-#if HAVE_AMD3DNOWEXT
-imdct_do_512_3dnowex
-#else
-imdct_do_512_3dnow
-#endif
-(sample_t data[],sample_t delay[], sample_t bias)
-{
- int i;
-/* int k;
- int p,q;
- int m;
- int two_m;
- int two_m_plus_one;
-
- sample_t tmp_a_i;
- sample_t tmp_a_r;
- sample_t tmp_b_i;
- sample_t tmp_b_r;*/
-
- sample_t *data_ptr;
- sample_t *delay_ptr;
- sample_t *window_ptr;
-
- /* 512 IMDCT with source and dest data in 'data' */
-
- /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/
-#if 1
- __asm__ volatile (
- "movq %0, %%mm7\n\t"
- ::"m"(x_plus_minus_3dnow)
- :"memory");
- for( i=0; i < 128; i++) {
- int j = pm128[i];
- __asm__ volatile (
- "movd %1, %%mm0\n\t"
- "movd %3, %%mm1\n\t"
- "punpckldq %2, %%mm0\n\t" /* mm0 = data[256-2*j-1] | data[2*j]*/
- "punpckldq %4, %%mm1\n\t" /* mm1 = xcos[j] | xsin[j] */
- "movq %%mm0, %%mm2\n\t"
- "pfmul %%mm1, %%mm0\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd %%mm1, %%mm1\n\t"
-#else
- "punpckldq %%mm1, %%mm5\n\t"
- "punpckhdq %%mm5, %%mm1\n\t"
-#endif
- "pfmul %%mm1, %%mm2\n\t"
-#if HAVE_AMD3DNOWEXT
- "pfpnacc %%mm2, %%mm0\n\t"
-#else
- "pxor %%mm7, %%mm0\n\t"
- "pfacc %%mm2, %%mm0\n\t"
-#endif
- "pxor %%mm7, %%mm0\n\t"
- "movq %%mm0, %0"
- :"=m"(buf[i])
- :"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j])
- :"memory"
- );
-/* buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]);
- buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);*/
- }
-#else
- __asm__ volatile ("femms":::"memory");
- for( i=0; i < 128; i++) {
- /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
- int j= pm128[i];
- buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
- buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j]));
- }
-#endif
-
- /* FFT Merge */
-/* unoptimized variant
- for (m=1; m < 7; m++) {
- if(m)
- two_m = (1 << m);
- else
- two_m = 1;
-
- two_m_plus_one = (1 << (m+1));
-
- for(i = 0; i < 128; i += two_m_plus_one) {
- for(k = 0; k < two_m; k++) {
- p = k + i;
- q = p + two_m;
- tmp_a_r = buf[p].real;
- tmp_a_i = buf[p].imag;
- tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
- tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
- buf[p].real = tmp_a_r + tmp_b_r;
- buf[p].imag = tmp_a_i + tmp_b_i;
- buf[q].real = tmp_a_r - tmp_b_r;
- buf[q].imag = tmp_a_i - tmp_b_i;
- }
- }
- }
-*/
-
- FFT_128P_3DNOW (&buf[0]);
-// __asm__ volatile ("femms \n\t":::"memory");
-
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
-#if 1
- __asm__ volatile (
- "movq %0, %%mm7\n\t"
- "movq %1, %%mm6\n\t"
- ::"m"(x_plus_minus_3dnow),
- "m"(x_minus_plus_3dnow)
- :"eax","memory");
- for (i=0; i < 128; i++) {
- __asm__ volatile (
- "movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
- "movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
-#if !HAVE_AMD3DNOWEXT
- "punpckldq %%mm1, %%mm2\n\t"
- "punpckhdq %%mm2, %%mm1\n\t"
-#else
- "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
-#endif
- "movd %3, %%mm3\n\t" /* ac3_xsin[i] */
- "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
- "pfmul %%mm3, %%mm0\n\t"
- "pfmul %%mm3, %%mm1\n\t"
-#if !HAVE_AMD3DNOWEXT
- "pxor %%mm7, %%mm0\n\t"
- "pfacc %%mm1, %%mm0\n\t"
- "punpckldq %%mm0, %%mm1\n\t"
- "punpckhdq %%mm1, %%mm0\n\t"
- "movq %%mm0, %0\n\t"
-#else
- "pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */
- "pswapd %%mm0, %%mm0\n\t"
- "movq %%mm0, %0"
-#endif
- :"=m"(buf[i])
- :"m"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
- :"memory");
-/* ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]);
- ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);*/
- }
-#else
- __asm__ volatile ("femms":::"memory");
- for( i=0; i < 128; i++) {
- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
- tmp_a_r = buf[i].real;
- tmp_a_i = -1.0 * buf[i].imag;
- buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]);
- buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]);
- }
-#endif
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = a52_imdct_window;
-
- /* Window and convert to real valued signal */
-#if 1
- __asm__ volatile (
- "movd (%0), %%mm3 \n\t"
- "punpckldq %%mm3, %%mm3 \n\t"
- :: "r" (&bias)
- );
- for (i=0; i< 64; i++) {
-/* merge two loops in one to enable working of 2 decoders */
- __asm__ volatile (
- "movd 516(%1), %%mm0\n\t"
- "movd (%1), %%mm1\n\t" /**data_ptr++=-buf[64+i].im**window_ptr+++*delay_ptr++;*/
- "punpckldq (%2), %%mm0\n\t"/*data_ptr[128]=-buf[i].re*window_ptr[128]+delay_ptr[128];*/
- "punpckldq 516(%2), %%mm1\n\t"
- "pfmul (%3), %%mm0\n\t"/**data_ptr++=buf[64-i-1].re**window_ptr+++*delay_ptr++;*/
- "pfmul 512(%3), %%mm1\n\t"
- "pxor %%mm6, %%mm0\n\t"/*data_ptr[128]=buf[128-i-1].im*window_ptr[128]+delay_ptr[128];*/
- "pxor %%mm6, %%mm1\n\t"
- "pfadd (%4), %%mm0\n\t"
- "pfadd 512(%4), %%mm1\n\t"
- "pfadd %%mm3, %%mm0\n\t"
- "pfadd %%mm3, %%mm1\n\t"
- "movq %%mm0, (%0)\n\t"
- "movq %%mm1, 512(%0)"
- :"=r"(data_ptr)
- :"r"(&buf[i].real), "r"(&buf[64-i-1].real), "r"(window_ptr), "r"(delay_ptr), "0"(data_ptr)
- :"memory");
- data_ptr += 2;
- window_ptr += 2;
- delay_ptr += 2;
- }
- window_ptr += 128;
-#else
- __asm__ volatile ("femms":::"memory");
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
- *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
- }
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
- *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
- }
-#endif
-
- /* The trailing edge of the window goes into the delay line */
- delay_ptr = delay;
-#if 1
- for(i=0; i< 64; i++) {
-/* merge two loops in one to enable working of 2 decoders */
- window_ptr -=2;
- __asm__ volatile(
- "movd 508(%1), %%mm0\n\t"
- "movd (%1), %%mm1\n\t"
- "punpckldq (%2), %%mm0\n\t"
- "punpckldq 508(%2), %%mm1\n\t"
-#if HAVE_AMD3DNOWEXT
- "pswapd (%3), %%mm3\n\t"
- "pswapd -512(%3), %%mm4\n\t"
-#else
- "movq (%3), %%mm3\n\t"
- "punpckldq %%mm3, %%mm2\n\t"
- "punpckhdq %%mm2, %%mm3\n\t"
- "movq -512(%3), %%mm4\n\t"
- "punpckldq %%mm4, %%mm2\n\t"
- "punpckhdq %%mm2, %%mm4\n\t"
-#endif
- "pfmul %%mm3, %%mm0\n\t"
- "pfmul %%mm4, %%mm1\n\t"
- "pxor %%mm6, %%mm0\n\t"
- "pxor %%mm7, %%mm1\n\t"
- "movq %%mm0, (%0)\n\t"
- "movq %%mm1, 512(%0)"
- :"=r"(delay_ptr)
- :"r"(&buf[i].imag), "r"(&buf[64-i-1].imag), "r"(window_ptr), "0"(delay_ptr)
- :"memory");
- delay_ptr += 2;
- }
- __asm__ volatile ("femms":::"memory");
-#else
- __asm__ volatile ("femms":::"memory");
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -buf[64+i].real * *--window_ptr;
- *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i<64; i++) {
- *delay_ptr++ = buf[i].imag * *--window_ptr;
- *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
- }
-#endif
-}
diff --git a/liba52/liba52.txt b/liba52/liba52.txt
deleted file mode 100644
index a60a3616f3..0000000000
--- a/liba52/liba52.txt
+++ /dev/null
@@ -1,208 +0,0 @@
-Using the liba52 API
---------------------
-
-liba52 provides a low-level interface to decoding audio frames encoded
-using ATSC standard A/52 aka AC-3. liba52 provides downmixing and
-dynamic range compression for the following output configurations:
-
-A52_CHANNEL : Dual mono. Two independant mono channels.
-A52_CHANNEL1 : First of the two mono channels above.
-A52_CHANNEL2 : Second of the two mono channels above.
-A52_MONO : Mono.
-A52_STEREO : Stereo.
-A52_DOLBY : Dolby surround compatible stereo.
-A52_3F : 3 front channels (left, center, right)
-A52_2F1R : 2 front, 1 rear surround channel (L, R, S)
-A52_3F1R : 3 front, 1 rear surround channel (L, C, R, S)
-A52_2F2R : 2 front, 2 rear surround channels (L, R, LS, RS)
-A52_3F2R : 3 front, 2 rear surround channels (L, C, R, LS, RS)
-
-A52_LFE : Low frequency effects channel. Normally used to connect a
- subwoofer. Can be combined with any of the above channels.
- For example: A52_3F2R | A52_LFE -> 3 front, 2 rear, 1 LFE (5.1)
-
-
-Initialization
---------------
-
-sample_t * a52_init (uint32_t mm_accel);
-
-Initializes the A/52 library. Takes as a parameter the acceptable
-optimizations which may be used, such as MMX. These are found in the
-included header file 'mm_accel', along with an autodetection function
-(mm_accel()). Currently, the only accelleration implemented is
-MM_ACCEL_MLIB, which uses the 'mlib' library if installed. mlib is
-only available on some Sun Microsystems platforms.
-
-The return value is a pointer to a properly-aligned sample buffer used
-for output samples.
-
-
-Probing the bitstream
----------------------
-
-int a52_syncinfo (uint8_t * buf, int * flags,
- int * sample_rate, int * bit_rate);
-
-The A/52 bitstream is composed of several a52 frames concatenated one
-after each other. An a52 frame is the smallest independantly decodable
-unit in the stream.
-
-buf must contain at least 7 bytes from the input stream. If these look
-like the start of a valid a52 frame, a52_syncinfo() returns the size
-of the coded frame in bytes, and fills flags, sample_rate and bit_rate
-with the information encoded in the stream. The returned size is
-guaranteed to be an even number between 128 and 3840. sample_rate will
-be the sampling frequency in Hz, bit_rate is for the compressed stream
-and is in bits per second, and flags is a description of the coded
-channels: the A52_LFE bit is set if there is an LFE channel coded in
-this stream, and by masking flags with A52_CHANNEL_MASK you will get a
-value that describes the full-bandwidth channels, as one of the
-A52_CHANNEL...A52_3F2R flags.
-
-If this can not possibly be a valid frame, then the function returns
-0. You should then try to re-synchronize with the a52 stream - one way
-to try this would be to advance buf by one byte until its contents
-looks like a valid frame, but there might be better
-application-specific ways to synchronize.
-
-It is recommended to call this function for each frame, for several
-reasons: this function detects errors that the other functions will
-not double-check, consecutive frames might have different lengths, and
-it helps you re-sync with the stream if you get de-synchronized.
-
-
-Starting to decode a frame
---------------------------
-
-int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
- sample_t * level, sample_t bias);
-
-This starts the work of decoding the A/52 frame (to be completed using
-a52_block()). buf should point to the beginning of the complete frame
-of the full size returned by a52_syncinfo().
-
-You should pass in the flags the speaker configuration that you
-support, and liba52 will return the speaker configuration it will use
-for its output, based on what is coded in the stream and what you
-asked for. For example, if the stream contains 2+2 channels
-(a52_syncinfo() returned A52_2F2R in the flags), and you have 3+1
-speakers (you passed A52_3F1R), then liba52 will choose do downmix to
-2+1 speakers, since there is no center channel to send to your center
-speaker. So in that case the left and right channels will be
-essentially unmodified by the downmix, and the two surround channels
-will be added together and sent to your surround speaker. liba52 will
-return A52_2F1R to indicate this.
-
-The good news is that when you downmix to stereo you dont have to
-worry about this, you will ALWAYS get a stereo output no matter what
-was coded in the stream. For more complex output configurations you
-will have to handle the case where liba52 couldnt give you what you
-wanted because some of the channels were not encoded in the stream
-though.
-
-Level, bias, and A52_ADJUST_LEVEL:
-
-Before downmixing, samples are floating point values with a range of
-[-1,1]. Most types of downmixing will combine channels together, which
-will potentially result in a larger range for the output
-samples. liba52 provides two methods of controlling the range of the
-output, either before or after the downmix stage.
-
-If you do not set A52_ADJUST_LEVEL, liba52 will multiply the samples
-by your level value, so that they fit in the [-level,level]
-range. Then it will apply the standardized downmix equations,
-potentially making the samples go out of that interval again. The
-level parameter is not modified.
-
-Setting the A52_ADJUST_LEVEL flag will instruct liba52 to treat your
-level value as the intended range interval after downmixing. It will
-then figure out what level to use before the downmix (what you should
-have passed if you hadnt used the A52_ADJUST_LEVEL flag), and
-overwrite the level value you gave it with that new level value.
-
-The bias represents a value which should be added to the result
-regardless:
-
-output_sample = (input_sample * level) + bias;
-
-For example, a bias of 384 and a level of 1 tells liba52 you want
-samples between 383 and 385 instead of -1 and 1. This is what the
-sample program a52dec does, as it makes it faster to convert the
-samples to integer format, using a trick based on the IEEE
-floating-point format.
-
-This function also initialises the state for that frame, which will be
-reused next when decoding blocks.
-
-
-Dynamic range compression
--------------------------
-
-void a52_dynrng (a52_state_t * state,
- sample_t (* call) (sample_t, void *), void * data);
-
-This function is purely optional. If you dont call it, liba52 will
-provide the default behaviour, which is to apply the full dynamic
-range compression as specified in the A/52 stream. This basically
-makes the loud sounds softer, and the soft sounds louder, so you can
-more easily listen to the stream in a noisy environment without
-disturbing anyone.
-
-If you do call this function and set a NULL callback, this will
-totally disable the dynamic range compression and provide a playback
-more adapted to a movie theater or a listening room.
-
-If you call this function and specify a callback function, this
-callback might be called up to once for each block, with two
-arguments: the compression factor 'c' recommended by the bitstream,
-and the private data pointer you specified in a52_dynrng(). The
-callback will then return the amount of compression to actually use -
-typically pow(c,x) where x is somewhere between 0 and 1. More
-elaborate compression functions might want to use a different value
-for 'x' depending wether c>1 or c<1 - or even something more complex
-if this is what you want.
-
-
-Decoding blocks
----------------
-
-int a52_block (a52_state_t * state, sample_t * samples);
-
-Every A/52 frame is composed of 6 blocks, each with an output of 256
-samples for each channel. The a52_block() function decodes the next
-block in the frame, and should be called 6 times to decode all of the
-audio in the frame. After each call, you should extract the audio data
-from the sample buffer.
-
-The sample pointer given should be the one a52_init() returned.
-
-After this function returns, the samples buuffer will contain 256
-samples for the first channel, followed by 256 samples for the second
-channel, etc... the channel order is LFE, left, center, right, left
-surround, right surround. If one of the channels is not present in the
-liba52 output, as indicated by the flags returned by a52_frame(), then
-this channel is skipped and the following channels are shifted so
-liba52 does not leave an empty space between channels.
-
-
-Pseudocode example
-------------------
-
-sample_t * samples = a52_init (mm_accel());
-
-loop on input bytes:
- if at least 7 bytes in the buffer:
-
- bytes_to_get = a52_syncinfo (...)
-
- if bytes_to_get == 0:
- goto loop to keep looking for sync point
- else
- get rest of bytes
-
- a52_frame (state, buf, ...)
- [a52_dynrng (state, ...); this is only optional]
- for i = 1 ... 6:
- a52_block (state, samples)
- convert samples to integer and queue to soundcard
diff --git a/liba52/liba52_changes.diff b/liba52/liba52_changes.diff
deleted file mode 100644
index 13e4eacbac..0000000000
--- a/liba52/liba52_changes.diff
+++ /dev/null
@@ -1,2473 +0,0 @@
---- include/a52.h 2006-06-12 15:04:57.000000000 +0200
-+++ liba52/a52.h 2006-06-05 02:23:02.000000000 +0200
-@@ -59,4 +66,9 @@
- int a52_block (a52_state_t * state);
- void a52_free (a52_state_t * state);
-
-+void* a52_resample_init(uint32_t mm_accel,int flags,int chans);
-+extern int (* a52_resample) (float * _f, int16_t * s16);
-+
-+uint16_t crc16_block(uint8_t *data,uint32_t num_bytes);
-+
- #endif /* A52_H */
---- liba52/a52_internal.h 2006-06-12 15:05:07.000000000 +0200
-+++ liba52/a52_internal.h 2006-06-05 02:23:02.000000000 +0200
-@@ -103,18 +107,34 @@
- #define DELTA_BIT_NONE (2)
- #define DELTA_BIT_RESERVED (3)
-
-+#if ARCH_X86_64
-+# define REG_a "rax"
-+# define REG_d "rdx"
-+# define REG_S "rsi"
-+# define REG_D "rdi"
-+# define REG_BP "rbp"
-+#else
-+# define REG_a "eax"
-+# define REG_d "edx"
-+# define REG_S "esi"
-+# define REG_D "edi"
-+# define REG_BP "ebp"
-+#endif
-+
- void a52_bit_allocate (a52_state_t * state, ba_t * ba, int bndstart,
- int start, int end, int fastleak, int slowleak,
- expbap_t * expbap);
-
- int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev);
-+void downmix_accel_init(uint32_t mm_accel);
- int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
- sample_t clev, sample_t slev);
--void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
-+extern void (*a52_downmix) (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev);
--void a52_upmix (sample_t * samples, int acmod, int output);
-+extern void (*a52_upmix) (sample_t * samples, int acmod, int output);
-
- void a52_imdct_init (uint32_t mm_accel);
- void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias);
--void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias);
-+extern void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
-+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias);
---- liba52/bitstream.c 2006-06-12 15:05:07.000000000 +0200
-+++ liba52/bitstream.c 2006-06-05 02:23:02.000000000 +0200
-@@ -31,6 +35,10 @@
-
- #define BUFFER_SIZE 4096
-
-+#ifdef ALT_BITSTREAM_READER
-+int indx=0;
-+#endif
-+
- void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf)
- {
- int align;
-@@ -38,6 +46,9 @@
- align = (long)buf & 3;
- state->buffer_start = (uint32_t *) (buf - align);
- state->bits_left = 0;
-+#ifdef ALT_BITSTREAM_READER
-+ indx=0;
-+#endif
- bitstream_get (state, align * 8);
- }
-
---- liba52/bitstream.h 2006-06-12 15:05:07.000000000 +0200
-+++ liba52/bitstream.h 2006-06-05 02:23:02.000000000 +0200
-@@ -21,6 +25,42 @@
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-+/* code from ffmpeg/libavcodec */
-+#if defined(__sparc__) || defined(hpux)
-+/*
-+ * the alt bitstream reader performs unaligned memory accesses; that doesn't work
-+ * on sparc/hpux. For now, disable ALT_BITSTREAM_READER.
-+ */
-+#undef ALT_BITSTREAM_READER
-+#else
-+// alternative (faster) bitstram reader (reades upto 3 bytes over the end of the input)
-+#define ALT_BITSTREAM_READER
-+
-+/* used to avoid misaligned exceptions on some archs (alpha, ...) */
-+#if ARCH_X86 || HAVE_ARMV6
-+# define unaligned32(a) (*(uint32_t*)(a))
-+#else
-+# ifdef __GNUC__
-+static inline uint32_t unaligned32(const void *v) {
-+ struct Unaligned {
-+ uint32_t i;
-+ } __attribute__((packed));
-+
-+ return ((const struct Unaligned *) v)->i;
-+}
-+# elif defined(__DECC)
-+static inline uint32_t unaligned32(const void *v) {
-+ return *(const __unaligned uint32_t *) v;
-+}
-+# else
-+static inline uint32_t unaligned32(const void *v) {
-+ return *(const uint32_t *) v;
-+}
-+# endif
-+#endif //!ARCH_X86
-+
-+#endif
-+
- /* (stolen from the kernel) */
- #if HAVE_BIGENDIAN
-
-@@ -28,7 +68,7 @@
-
- #else
-
--# if 0 && defined (__i386__)
-+# if defined (__i386__)
-
- # define swab32(x) __i386_swab32(x)
- static inline const uint32_t __i386_swab32(uint32_t x)
-@@ -39,19 +79,34 @@
-
- # else
-
--# define swab32(x)\
--((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) | \
-- (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]))
--
-+# define swab32(x) __generic_swab32(x)
-+ static inline const uint32_t __generic_swab32(uint32_t x)
-+ {
-+ return ((((uint8_t*)&x)[0] << 24) | (((uint8_t*)&x)[1] << 16) |
-+ (((uint8_t*)&x)[2] << 8) | (((uint8_t*)&x)[3]));
-+ }
- # endif
- #endif
-
-+#ifdef ALT_BITSTREAM_READER
-+extern int indx;
-+#endif
-+
- void a52_bitstream_set_ptr (a52_state_t * state, uint8_t * buf);
- uint32_t a52_bitstream_get_bh (a52_state_t * state, uint32_t num_bits);
- int32_t a52_bitstream_get_bh_2 (a52_state_t * state, uint32_t num_bits);
-
- static inline uint32_t bitstream_get (a52_state_t * state, uint32_t num_bits)
- {
-+#ifdef ALT_BITSTREAM_READER
-+ uint32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
-+
-+ result<<= (indx&0x07);
-+ result>>= 32 - num_bits;
-+ indx+= num_bits;
-+
-+ return result;
-+#else
- uint32_t result;
-
- if (num_bits < state->bits_left) {
-@@ -61,10 +116,29 @@
- }
-
- return a52_bitstream_get_bh (state, num_bits);
-+#endif
-+}
-+
-+static inline void bitstream_skip(a52_state_t * state, int num_bits)
-+{
-+#ifdef ALT_BITSTREAM_READER
-+ indx+= num_bits;
-+#else
-+ bitstream_get(state, num_bits);
-+#endif
- }
-
- static inline int32_t bitstream_get_2 (a52_state_t * state, uint32_t num_bits)
- {
-+#ifdef ALT_BITSTREAM_READER
-+ int32_t result= swab32( unaligned32(((uint8_t *)state->buffer_start)+(indx>>3)) );
-+
-+ result<<= (indx&0x07);
-+ result>>= 32 - num_bits;
-+ indx+= num_bits;
-+
-+ return result;
-+#else
- int32_t result;
-
- if (num_bits < state->bits_left) {
-@@ -74,4 +148,5 @@
- }
-
- return a52_bitstream_get_bh_2 (state, num_bits);
-+#endif
- }
---- liba52/downmix.c 2006-06-12 15:17:53.000000000 +0200
-+++ liba52/downmix.c 2006-06-05 02:23:02.000000000 +0200
-@@ -19,18 +23,46 @@
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ *
-+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
- */
-
- #include "config.h"
-
- #include <string.h>
- #include <inttypes.h>
-
- #include "a52.h"
- #include "a52_internal.h"
-+#include "mm_accel.h"
-
- #define CONVERT(acmod,output) (((output) << 3) + (acmod))
-
-+
-+void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev)= NULL;
-+void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL;
-+
-+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev);
-+static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev);
-+static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev);
-+static void upmix_MMX (sample_t * samples, int acmod, int output);
-+static void upmix_C (sample_t * samples, int acmod, int output);
-+
-+void downmix_accel_init(uint32_t mm_accel)
-+{
-+ a52_upmix= upmix_C;
-+ a52_downmix= downmix_C;
-+#if ARCH_X86 || ARCH_X86_64
-+ if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX;
-+ if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE;
-+ if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow;
-+#endif
-+}
-+
- int a52_downmix_init (int input, int flags, sample_t * level,
- sample_t clev, sample_t slev)
- {
-@@ -447,7 +479,7 @@
- samples[i] = 0;
- }
-
--void a52_downmix (sample_t * samples, int acmod, int output, sample_t bias,
-+void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
- sample_t clev, sample_t slev)
- {
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-@@ -559,7 +591,7 @@
- break;
-
- case CONVERT (A52_3F2R, A52_2F1R):
-- mix3to2 (samples, bias);
-+ mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
- move2to1 (samples + 768, samples + 512, bias);
- break;
-
-@@ -583,12 +615,12 @@
- break;
-
- case CONVERT (A52_3F1R, A52_3F2R):
-- memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t));
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
- break;
- }
- }
-
--void a52_upmix (sample_t * samples, int acmod, int output)
-+void upmix_C (sample_t * samples, int acmod, int output)
- {
- switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-
-@@ -653,3 +685,1104 @@
- goto mix_31to21;
- }
- }
-+
-+#if ARCH_X86 || ARCH_X86_64
-+static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %2, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps (%1, %%"REG_S"), %%xmm0 \n\t"
-+ "addps 16(%1, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to1_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix4to1_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix5to1_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to2_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" //common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %2, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" //common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps (%1, %%"REG_S"), %%xmm2 \n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, (%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (left+256), "r" (right+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "addps %%xmm7, %%xmm2 \n\t"
-+ "subps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31to2_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "subps %%xmm3, %%xmm1 \n\t"
-+ "addps %%xmm3, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix22toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "addps %%xmm7, %%xmm2 \n\t"
-+ "subps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix32to2_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "movaps %%xmm0, %%xmm1 \n\t" // common
-+ "addps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "addps 2048(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 3072(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 4096(%0, %%"REG_S"), %%xmm1\n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix32toS_SSE (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %1, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t" // common
-+ "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t"
-+ "subps %%xmm2, %%xmm1 \n\t"
-+ "addps %%xmm2, %%xmm3 \n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "addps %%xmm0, %%xmm3 \n\t"
-+ "movaps %%xmm1, (%0, %%"REG_S") \n\t"
-+ "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movlps %2, %%xmm7 \n\t"
-+ "shufps $0x00, %%xmm7, %%xmm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps 16(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps 1024(%0, %%"REG_S"), %%xmm0\n\t"
-+ "addps 1040(%0, %%"REG_S"), %%xmm1\n\t"
-+ "addps %%xmm7, %%xmm0 \n\t"
-+ "addps %%xmm7, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "movaps %%xmm1, 16(%1, %%"REG_S")\n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void zero_MMX(sample_t * samples)
-+{
-+ __asm__ volatile(
-+ "mov $-1024, %%"REG_S" \n\t"
-+ "pxor %%mm0, %%mm0 \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm0, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm0, 16(%0, %%"REG_S") \n\t"
-+ "movq %%mm0, 24(%0, %%"REG_S") \n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ "emms"
-+ :: "r" (samples+256)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev)
-+{
-+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-+
-+ case CONVERT (A52_CHANNEL, A52_CHANNEL2):
-+ memcpy (samples, samples + 256, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_CHANNEL, A52_MONO):
-+ case CONVERT (A52_STEREO, A52_MONO):
-+ mix_2to1_SSE:
-+ mix2to1_SSE (samples, samples + 256, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_2to1_SSE;
-+ case CONVERT (A52_3F, A52_MONO):
-+ mix_3to1_SSE:
-+ mix3to1_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_3to1_SSE;
-+ case CONVERT (A52_2F2R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_2to1_SSE;
-+ mix4to1_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_3to1_SSE;
-+ mix5to1_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_MONO, A52_DOLBY):
-+ memcpy (samples + 256, samples, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F, A52_STEREO):
-+ case CONVERT (A52_3F, A52_DOLBY):
-+ mix_3to2_SSE:
-+ mix3to2_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_STEREO):
-+ if (slev == 0)
-+ break;
-+ mix21to2_SSE (samples, samples + 256, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_DOLBY):
-+ mix21toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_STEREO):
-+ if (slev == 0)
-+ goto mix_3to2_SSE;
-+ mix31to2_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_DOLBY):
-+ mix31toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_STEREO):
-+ if (slev == 0)
-+ break;
-+ mix2to1_SSE (samples, samples + 512, bias);
-+ mix2to1_SSE (samples + 256, samples + 768, bias);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_DOLBY):
-+ mix22toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_STEREO):
-+ if (slev == 0)
-+ goto mix_3to2_SSE;
-+ mix32to2_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_DOLBY):
-+ mix32toS_SSE (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_3F):
-+ if (slev == 0)
-+ break;
-+ mix21to2_SSE (samples, samples + 512, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F):
-+ if (slev == 0)
-+ break;
-+ mix2to1_SSE (samples, samples + 768, bias);
-+ mix2to1_SSE (samples + 512, samples + 1024, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_2F1R):
-+ mix3to2_SSE (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_2F1R):
-+ mix2to1_SSE (samples + 512, samples + 768, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F1R):
-+ mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
-+ move2to1_SSE (samples + 768, samples + 512, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F1R):
-+ mix2to1_SSE (samples + 768, samples + 1024, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_2F2R):
-+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_2F2R):
-+ mix3to2_SSE (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F2R):
-+ mix3to2_SSE (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_3F2R):
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+ }
-+}
-+
-+static void upmix_MMX (sample_t * samples, int acmod, int output)
-+{
-+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-+
-+ case CONVERT (A52_CHANNEL, A52_CHANNEL2):
-+ memcpy (samples + 256, samples, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_MONO):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_MONO):
-+ case CONVERT (A52_2F2R, A52_MONO):
-+ zero_MMX (samples + 768);
-+ case CONVERT (A52_3F, A52_MONO):
-+ case CONVERT (A52_2F1R, A52_MONO):
-+ zero_MMX (samples + 512);
-+ case CONVERT (A52_CHANNEL, A52_MONO):
-+ case CONVERT (A52_STEREO, A52_MONO):
-+ zero_MMX (samples + 256);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_STEREO):
-+ case CONVERT (A52_3F2R, A52_DOLBY):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_STEREO):
-+ case CONVERT (A52_3F1R, A52_DOLBY):
-+ zero_MMX (samples + 768);
-+ case CONVERT (A52_3F, A52_STEREO):
-+ case CONVERT (A52_3F, A52_DOLBY):
-+ mix_3to2_MMX:
-+ memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
-+ zero_MMX (samples + 256);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_STEREO):
-+ case CONVERT (A52_2F2R, A52_DOLBY):
-+ zero_MMX (samples + 768);
-+ case CONVERT (A52_2F1R, A52_STEREO):
-+ case CONVERT (A52_2F1R, A52_DOLBY):
-+ zero_MMX (samples + 512);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_3F):
-+ case CONVERT (A52_2F2R, A52_2F1R):
-+ zero_MMX (samples + 768);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F1R):
-+ zero_MMX (samples + 1024);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F1R):
-+ zero_MMX (samples + 1024);
-+ case CONVERT (A52_3F1R, A52_2F1R):
-+ mix_31to21_MMX:
-+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
-+ goto mix_3to2_MMX;
-+
-+ case CONVERT (A52_3F2R, A52_2F2R):
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
-+ goto mix_31to21_MMX;
-+ }
-+}
-+
-+static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %2, %%mm7 \n\t"
-+ "punpckldq %2, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
-+ "pfadd (%1, %%"REG_S"), %%mm0 \n\t"
-+ "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t"
-+ "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t"
-+ "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%mm3 \n\t"
-+ "movq %%mm0, (%1, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%1, %%"REG_S") \n\t"
-+ "movq %%mm2, 16(%1, %%"REG_S") \n\t"
-+ "movq %%mm3, 24(%1, %%"REG_S") \n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to1_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm2, %%mm0 \n\t"
-+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix4to1_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm2, %%mm0 \n\t"
-+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix5to1_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm2\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm2, %%mm0 \n\t"
-+ "pfadd %%mm3, %%mm1 \n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix3to2_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" //common
-+ "pfadd %%mm7, %%mm1 \n\t" //common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %2, %%mm7 \n\t"
-+ "punpckldq %2, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" //common
-+ "pfadd %%mm7, %%mm1 \n\t" //common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq (%1, %%"REG_S"), %%mm4 \n\t"
-+ "movq 8(%1, %%"REG_S"), %%mm5 \n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, (%1, %%"REG_S") \n\t"
-+ "movq %%mm5, 8(%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (left+256), "r" (right+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix21toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround
-+ "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%mm3 \n\t"
-+ "pfadd %%mm7, %%mm4 \n\t"
-+ "pfadd %%mm7, %%mm5 \n\t"
-+ "pfsub %%mm0, %%mm2 \n\t"
-+ "pfsub %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31to2_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" // common
-+ "pfadd %%mm7, %%mm1 \n\t" // common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix31toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" // common
-+ "pfadd %%mm7, %%mm1 \n\t" // common
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
-+ "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
-+ "pfsub %%mm0, %%mm2 \n\t"
-+ "pfsub %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix22toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround
-+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%mm3 \n\t"
-+ "pfadd %%mm7, %%mm4 \n\t"
-+ "pfadd %%mm7, %%mm5 \n\t"
-+ "pfsub %%mm0, %%mm2 \n\t"
-+ "pfsub %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm4 \n\t"
-+ "pfadd %%mm1, %%mm5 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm4, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm5, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void mix32to2_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" // common
-+ "pfadd %%mm7, %%mm1 \n\t" // common
-+ "movq %%mm0, %%mm2 \n\t" // common
-+ "movq %%mm1, %%mm3 \n\t" // common
-+ "pfadd (%0, %%"REG_S"), %%mm0 \n\t"
-+ "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t"
-+ "movq %%mm0, (%0, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm2, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm3, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+/* todo: should be optimized better */
-+static void mix32toS_3dnow (sample_t * samples, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movd %1, %%mm7 \n\t"
-+ "punpckldq %1, %%mm7 \n\t"
-+ "movq 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "movq 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "movq 3072(%0, %%"REG_S"), %%mm4\n\t"
-+ "movq 3080(%0, %%"REG_S"), %%mm5\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t" // common
-+ "pfadd %%mm7, %%mm1 \n\t" // common
-+ "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround
-+ "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround
-+ "movq (%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm3 \n\t"
-+ "movq 2048(%0, %%"REG_S"), %%mm6\n\t"
-+ "movq 2056(%0, %%"REG_S"), %%mm7\n\t"
-+ "pfsub %%mm4, %%mm2 \n\t"
-+ "pfsub %%mm5, %%mm3 \n\t"
-+ "pfadd %%mm4, %%mm6 \n\t"
-+ "pfadd %%mm5, %%mm7 \n\t"
-+ "pfadd %%mm0, %%mm2 \n\t"
-+ "pfadd %%mm1, %%mm3 \n\t"
-+ "pfadd %%mm0, %%mm6 \n\t"
-+ "pfadd %%mm1, %%mm7 \n\t"
-+ "movq %%mm2, (%0, %%"REG_S") \n\t"
-+ "movq %%mm3, 8(%0, %%"REG_S") \n\t"
-+ "movq %%mm6, 1024(%0, %%"REG_S")\n\t"
-+ "movq %%mm7, 1032(%0, %%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (samples+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
-+{
-+ __asm__ volatile(
-+ "movd %2, %%mm7 \n\t"
-+ "punpckldq %2, %%mm7 \n\t"
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movq (%0, %%"REG_S"), %%mm0 \n\t"
-+ "movq 8(%0, %%"REG_S"), %%mm1 \n\t"
-+ "movq 16(%0, %%"REG_S"), %%mm2 \n\t"
-+ "movq 24(%0, %%"REG_S"), %%mm3 \n\t"
-+ "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t"
-+ "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t"
-+ "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t"
-+ "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t"
-+ "pfadd %%mm7, %%mm0 \n\t"
-+ "pfadd %%mm7, %%mm1 \n\t"
-+ "pfadd %%mm7, %%mm2 \n\t"
-+ "pfadd %%mm7, %%mm3 \n\t"
-+ "movq %%mm0, (%1, %%"REG_S") \n\t"
-+ "movq %%mm1, 8(%1, %%"REG_S") \n\t"
-+ "movq %%mm2, 16(%1, %%"REG_S") \n\t"
-+ "movq %%mm3, 24(%1, %%"REG_S") \n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (src+256), "r" (dest+256), "m" (bias)
-+ : "%"REG_S
-+ );
-+}
-+
-+static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
-+ sample_t clev, sample_t slev)
-+{
-+ switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
-+
-+ case CONVERT (A52_CHANNEL, A52_CHANNEL2):
-+ memcpy (samples, samples + 256, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_CHANNEL, A52_MONO):
-+ case CONVERT (A52_STEREO, A52_MONO):
-+ mix_2to1_3dnow:
-+ mix2to1_3dnow (samples, samples + 256, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_2to1_3dnow;
-+ case CONVERT (A52_3F, A52_MONO):
-+ mix_3to1_3dnow:
-+ mix3to1_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_3to1_3dnow;
-+ case CONVERT (A52_2F2R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_2to1_3dnow;
-+ mix4to1_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_MONO):
-+ if (slev == 0)
-+ goto mix_3to1_3dnow;
-+ mix5to1_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_MONO, A52_DOLBY):
-+ memcpy (samples + 256, samples, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F, A52_STEREO):
-+ case CONVERT (A52_3F, A52_DOLBY):
-+ mix_3to2_3dnow:
-+ mix3to2_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_STEREO):
-+ if (slev == 0)
-+ break;
-+ mix21to2_3dnow (samples, samples + 256, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_DOLBY):
-+ mix21toS_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_STEREO):
-+ if (slev == 0)
-+ goto mix_3to2_3dnow;
-+ mix31to2_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_DOLBY):
-+ mix31toS_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_STEREO):
-+ if (slev == 0)
-+ break;
-+ mix2to1_3dnow (samples, samples + 512, bias);
-+ mix2to1_3dnow (samples + 256, samples + 768, bias);
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_DOLBY):
-+ mix22toS_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_STEREO):
-+ if (slev == 0)
-+ goto mix_3to2_3dnow;
-+ mix32to2_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_DOLBY):
-+ mix32toS_3dnow (samples, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_3F):
-+ if (slev == 0)
-+ break;
-+ mix21to2_3dnow (samples, samples + 512, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F):
-+ if (slev == 0)
-+ break;
-+ mix2to1_3dnow (samples, samples + 768, bias);
-+ mix2to1_3dnow (samples + 512, samples + 1024, bias);
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_2F1R):
-+ mix3to2_3dnow (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_2F2R, A52_2F1R):
-+ mix2to1_3dnow (samples + 512, samples + 768, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F1R):
-+ mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
-+ move2to1_3dnow (samples + 768, samples + 512, bias);
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_3F1R):
-+ mix2to1_3dnow (samples + 768, samples + 1024, bias);
-+ break;
-+
-+ case CONVERT (A52_2F1R, A52_2F2R):
-+ memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_2F2R):
-+ mix3to2_3dnow (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F2R, A52_2F2R):
-+ mix3to2_3dnow (samples, bias);
-+ memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
-+ memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
-+ break;
-+
-+ case CONVERT (A52_3F1R, A52_3F2R):
-+ memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
-+ break;
-+ }
-+ __asm__ volatile("femms":::"memory");
-+}
-+
-+#endif // ARCH_X86 || ARCH_X86_64
---- liba52/imdct.c 2008-02-19 00:18:33.000000000 +0100
-+++ liba52/imdct.c 2008-02-19 00:16:40.000000000 +0100
-@@ -22,6 +26,11 @@
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-+ *
-+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
-+ * 3DNOW optimizations from Nick Kurshev <nickols_k@mail.ru>
-+ * michael did port them from libac3 (untested, perhaps totally broken)
-+ * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org)
- */
-
- #include "config.h"
-@@ -39,12 +48,50 @@
- #include "a52.h"
- #include "a52_internal.h"
- #include "mm_accel.h"
-+#include "mangle.h"
-+
-+void (*a52_imdct_512) (sample_t * data, sample_t * delay, sample_t bias);
-+
-+#if CONFIG_RUNTIME_CPUDETECT
-+#undef HAVE_AMD3DNOWEXT
-+#define HAVE_AMD3DNOWEXT 0
-+#endif
-
- typedef struct complex_s {
- sample_t real;
- sample_t imag;
- } complex_t;
-
-+static const int pm128[128] attribute_used __attribute__((aligned(16))) =
-+{
-+ 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
-+ 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
-+ 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
-+ 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
-+ 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
-+ 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
-+ 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
-+ 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
-+};
-+
-+static uint8_t attribute_used bit_reverse_512[] = {
-+ 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
-+ 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
-+ 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
-+ 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
-+ 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
-+ 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
-+ 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
-+ 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
-+ 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
-+ 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
-+ 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
-+ 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
-+ 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
-+ 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
-+ 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
-+ 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
-+
- static uint8_t fftorder[] = {
- 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
- 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
-@@ -56,6 +103,40 @@
- 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
- };
-
-+static complex_t __attribute__((aligned(16))) buf[128];
-+
-+/* Twiddle factor LUT */
-+static complex_t __attribute__((aligned(16))) w_1[1];
-+static complex_t __attribute__((aligned(16))) w_2[2];
-+static complex_t __attribute__((aligned(16))) w_4[4];
-+static complex_t __attribute__((aligned(16))) w_8[8];
-+static complex_t __attribute__((aligned(16))) w_16[16];
-+static complex_t __attribute__((aligned(16))) w_32[32];
-+static complex_t __attribute__((aligned(16))) w_64[64];
-+static complex_t __attribute__((aligned(16))) * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};
-+
-+/* Twiddle factors for IMDCT */
-+static sample_t __attribute__((aligned(16))) xcos1[128];
-+static sample_t __attribute__((aligned(16))) xsin1[128];
-+
-+#if ARCH_X86 || ARCH_X86_64
-+// NOTE: SSE needs 16byte alignment or it will segfault
-+//
-+static float __attribute__((aligned(16))) sseSinCos1c[256];
-+static float __attribute__((aligned(16))) sseSinCos1d[256];
-+static float attribute_used __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1};
-+//static float __attribute__((aligned(16))) sseW0[4];
-+static float __attribute__((aligned(16))) sseW1[8];
-+static float __attribute__((aligned(16))) sseW2[16];
-+static float __attribute__((aligned(16))) sseW3[32];
-+static float __attribute__((aligned(16))) sseW4[64];
-+static float __attribute__((aligned(16))) sseW5[128];
-+static float __attribute__((aligned(16))) sseW6[256];
-+static float __attribute__((aligned(16))) *sseW[7]=
-+ {NULL /*sseW0*/,sseW1,sseW2,sseW3,sseW4,sseW5,sseW6};
-+static float __attribute__((aligned(16))) sseWindow[512];
-+#endif
-+
- /* Root values for IFFT */
- static sample_t roots16[3];
- static sample_t roots32[7];
-@@ -241,7 +322,7 @@
- ifft_pass (buf, roots128 - 32, 32);
- }
-
--void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)
-+void imdct_do_512 (sample_t * data, sample_t * delay, sample_t bias)
- {
- int i, k;
- sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2;
-@@ -285,6 +366,704 @@
- }
- }
-
-+#if HAVE_ALTIVEC
-+
-+#ifdef HAVE_ALTIVEC_H
-+#include <altivec.h>
-+#endif
-+
-+// used to build registers permutation vectors (vcprm)
-+// the 's' are for words in the _s_econd vector
-+#define WORD_0 0x00,0x01,0x02,0x03
-+#define WORD_1 0x04,0x05,0x06,0x07
-+#define WORD_2 0x08,0x09,0x0a,0x0b
-+#define WORD_3 0x0c,0x0d,0x0e,0x0f
-+#define WORD_s0 0x10,0x11,0x12,0x13
-+#define WORD_s1 0x14,0x15,0x16,0x17
-+#define WORD_s2 0x18,0x19,0x1a,0x1b
-+#define WORD_s3 0x1c,0x1d,0x1e,0x1f
-+
-+#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
-+#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
-+
-+#define FOUROF(a) {a,a,a,a}
-+
-+// vcprmle is used to keep the same index as in the SSE version.
-+// it's the same as vcprm, with the index inversed
-+// ('le' is Little Endian)
-+#define vcprmle(a,b,c,d) vcprm(d,c,b,a)
-+
-+// used to build inverse/identity vectors (vcii)
-+// n is _n_egative, p is _p_ositive
-+#define FLOAT_n -1.
-+#define FLOAT_p 1.
-+
-+
-+void
-+imdct_do_512_altivec(sample_t data[],sample_t delay[], sample_t bias)
-+{
-+ int i;
-+ int k;
-+ int p,q;
-+ int m;
-+ long two_m;
-+ long two_m_plus_one;
-+
-+ sample_t tmp_b_i;
-+ sample_t tmp_b_r;
-+ sample_t tmp_a_i;
-+ sample_t tmp_a_r;
-+
-+ sample_t *data_ptr;
-+ sample_t *delay_ptr;
-+ sample_t *window_ptr;
-+
-+ /* 512 IMDCT with source and dest data in 'data' */
-+
-+ /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/
-+ for( i=0; i < 128; i++) {
-+ /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
-+ int j= bit_reverse_512[i];
-+ buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
-+ buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j]));
-+ }
-+
-+ /* 1. iteration */
-+ for(i = 0; i < 128; i += 2) {
-+#if 0
-+ tmp_a_r = buf[i].real;
-+ tmp_a_i = buf[i].imag;
-+ tmp_b_r = buf[i+1].real;
-+ tmp_b_i = buf[i+1].imag;
-+ buf[i].real = tmp_a_r + tmp_b_r;
-+ buf[i].imag = tmp_a_i + tmp_b_i;
-+ buf[i+1].real = tmp_a_r - tmp_b_r;
-+ buf[i+1].imag = tmp_a_i - tmp_b_i;
-+#else
-+ vector float temp, bufv;
-+
-+ bufv = vec_ld(i << 3, (float*)buf);
-+ temp = vec_perm(bufv, bufv, vcprm(2,3,0,1));
-+ bufv = vec_madd(bufv, vcii(p,p,n,n), temp);
-+ vec_st(bufv, i << 3, (float*)buf);
-+#endif
-+ }
-+
-+ /* 2. iteration */
-+ // Note w[1]={{1,0}, {0,-1}}
-+ for(i = 0; i < 128; i += 4) {
-+#if 0
-+ tmp_a_r = buf[i].real;
-+ tmp_a_i = buf[i].imag;
-+ tmp_b_r = buf[i+2].real;
-+ tmp_b_i = buf[i+2].imag;
-+ buf[i].real = tmp_a_r + tmp_b_r;
-+ buf[i].imag = tmp_a_i + tmp_b_i;
-+ buf[i+2].real = tmp_a_r - tmp_b_r;
-+ buf[i+2].imag = tmp_a_i - tmp_b_i;
-+ tmp_a_r = buf[i+1].real;
-+ tmp_a_i = buf[i+1].imag;
-+ /* WARNING: im <-> re here ! */
-+ tmp_b_r = buf[i+3].imag;
-+ tmp_b_i = buf[i+3].real;
-+ buf[i+1].real = tmp_a_r + tmp_b_r;
-+ buf[i+1].imag = tmp_a_i - tmp_b_i;
-+ buf[i+3].real = tmp_a_r - tmp_b_r;
-+ buf[i+3].imag = tmp_a_i + tmp_b_i;
-+#else
-+ vector float buf01, buf23, temp1, temp2;
-+
-+ buf01 = vec_ld((i + 0) << 3, (float*)buf);
-+ buf23 = vec_ld((i + 2) << 3, (float*)buf);
-+ buf23 = vec_perm(buf23,buf23,vcprm(0,1,3,2));
-+
-+ temp1 = vec_madd(buf23, vcii(p,p,p,n), buf01);
-+ temp2 = vec_madd(buf23, vcii(n,n,n,p), buf01);
-+
-+ vec_st(temp1, (i + 0) << 3, (float*)buf);
-+ vec_st(temp2, (i + 2) << 3, (float*)buf);
-+#endif
-+ }
-+
-+ /* 3. iteration */
-+ for(i = 0; i < 128; i += 8) {
-+#if 0
-+ tmp_a_r = buf[i].real;
-+ tmp_a_i = buf[i].imag;
-+ tmp_b_r = buf[i+4].real;
-+ tmp_b_i = buf[i+4].imag;
-+ buf[i].real = tmp_a_r + tmp_b_r;
-+ buf[i].imag = tmp_a_i + tmp_b_i;
-+ buf[i+4].real = tmp_a_r - tmp_b_r;
-+ buf[i+4].imag = tmp_a_i - tmp_b_i;
-+ tmp_a_r = buf[1+i].real;
-+ tmp_a_i = buf[1+i].imag;
-+ tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;
-+ tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;
-+ buf[1+i].real = tmp_a_r + tmp_b_r;
-+ buf[1+i].imag = tmp_a_i + tmp_b_i;
-+ buf[i+5].real = tmp_a_r - tmp_b_r;
-+ buf[i+5].imag = tmp_a_i - tmp_b_i;
-+ tmp_a_r = buf[i+2].real;
-+ tmp_a_i = buf[i+2].imag;
-+ /* WARNING re <-> im & sign */
-+ tmp_b_r = buf[i+6].imag;
-+ tmp_b_i = - buf[i+6].real;
-+ buf[i+2].real = tmp_a_r + tmp_b_r;
-+ buf[i+2].imag = tmp_a_i + tmp_b_i;
-+ buf[i+6].real = tmp_a_r - tmp_b_r;
-+ buf[i+6].imag = tmp_a_i - tmp_b_i;
-+ tmp_a_r = buf[i+3].real;
-+ tmp_a_i = buf[i+3].imag;
-+ tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;
-+ tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;
-+ buf[i+3].real = tmp_a_r + tmp_b_r;
-+ buf[i+3].imag = tmp_a_i + tmp_b_i;
-+ buf[i+7].real = tmp_a_r - tmp_b_r;
-+ buf[i+7].imag = tmp_a_i - tmp_b_i;
-+#else
-+ vector float buf01, buf23, buf45, buf67;
-+
-+ buf01 = vec_ld((i + 0) << 3, (float*)buf);
-+ buf23 = vec_ld((i + 2) << 3, (float*)buf);
-+
-+ tmp_b_r = (buf[i+5].real + buf[i+5].imag) * w[2][1].real;
-+ tmp_b_i = (buf[i+5].imag - buf[i+5].real) * w[2][1].real;
-+ buf[i+5].real = tmp_b_r;
-+ buf[i+5].imag = tmp_b_i;
-+ tmp_b_r = (buf[i+7].real - buf[i+7].imag) * w[2][3].imag;
-+ tmp_b_i = (buf[i+7].imag + buf[i+7].real) * w[2][3].imag;
-+ buf[i+7].real = tmp_b_r;
-+ buf[i+7].imag = tmp_b_i;
-+
-+ buf23 = vec_ld((i + 2) << 3, (float*)buf);
-+ buf45 = vec_ld((i + 4) << 3, (float*)buf);
-+ buf67 = vec_ld((i + 6) << 3, (float*)buf);
-+ buf67 = vec_perm(buf67, buf67, vcprm(1,0,2,3));
-+
-+ vec_st(vec_add(buf01, buf45), (i + 0) << 3, (float*)buf);
-+ vec_st(vec_madd(buf67, vcii(p,n,p,p), buf23), (i + 2) << 3, (float*)buf);
-+ vec_st(vec_sub(buf01, buf45), (i + 4) << 3, (float*)buf);
-+ vec_st(vec_nmsub(buf67, vcii(p,n,p,p), buf23), (i + 6) << 3, (float*)buf);
-+#endif
-+ }
-+
-+ /* 4-7. iterations */
-+ for (m=3; m < 7; m++) {
-+ two_m = (1 << m);
-+
-+ two_m_plus_one = two_m<<1;
-+
-+ for(i = 0; i < 128; i += two_m_plus_one) {
-+ for(k = 0; k < two_m; k+=2) {
-+#if 0
-+ int p = k + i;
-+ int q = p + two_m;
-+ tmp_a_r = buf[p].real;
-+ tmp_a_i = buf[p].imag;
-+ tmp_b_r =
-+ buf[q].real * w[m][k].real -
-+ buf[q].imag * w[m][k].imag;
-+ tmp_b_i =
-+ buf[q].imag * w[m][k].real +
-+ buf[q].real * w[m][k].imag;
-+ buf[p].real = tmp_a_r + tmp_b_r;
-+ buf[p].imag = tmp_a_i + tmp_b_i;
-+ buf[q].real = tmp_a_r - tmp_b_r;
-+ buf[q].imag = tmp_a_i - tmp_b_i;
-+
-+ tmp_a_r = buf[(p + 1)].real;
-+ tmp_a_i = buf[(p + 1)].imag;
-+ tmp_b_r =
-+ buf[(q + 1)].real * w[m][(k + 1)].real -
-+ buf[(q + 1)].imag * w[m][(k + 1)].imag;
-+ tmp_b_i =
-+ buf[(q + 1)].imag * w[m][(k + 1)].real +
-+ buf[(q + 1)].real * w[m][(k + 1)].imag;
-+ buf[(p + 1)].real = tmp_a_r + tmp_b_r;
-+ buf[(p + 1)].imag = tmp_a_i + tmp_b_i;
-+ buf[(q + 1)].real = tmp_a_r - tmp_b_r;
-+ buf[(q + 1)].imag = tmp_a_i - tmp_b_i;
-+#else
-+ int p = k + i;
-+ int q = p + two_m;
-+ vector float vecp, vecq, vecw, temp1, temp2, temp3, temp4;
-+ const vector float vczero = (const vector float)FOUROF(0.);
-+ // first compute buf[q] and buf[q+1]
-+ vecq = vec_ld(q << 3, (float*)buf);
-+ vecw = vec_ld(0, (float*)&(w[m][k]));
-+ temp1 = vec_madd(vecq, vecw, vczero);
-+ temp2 = vec_perm(vecq, vecq, vcprm(1,0,3,2));
-+ temp2 = vec_madd(temp2, vecw, vczero);
-+ temp3 = vec_perm(temp1, temp2, vcprm(0,s0,2,s2));
-+ temp4 = vec_perm(temp1, temp2, vcprm(1,s1,3,s3));
-+ vecq = vec_madd(temp4, vcii(n,p,n,p), temp3);
-+ // then butterfly with buf[p] and buf[p+1]
-+ vecp = vec_ld(p << 3, (float*)buf);
-+
-+ temp1 = vec_add(vecp, vecq);
-+ temp2 = vec_sub(vecp, vecq);
-+
-+ vec_st(temp1, p << 3, (float*)buf);
-+ vec_st(temp2, q << 3, (float*)buf);
-+#endif
-+ }
-+ }
-+ }
-+
-+ /* Post IFFT complex multiply plus IFFT complex conjugate*/
-+ for( i=0; i < 128; i+=4) {
-+ /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
-+#if 0
-+ tmp_a_r = buf[(i + 0)].real;
-+ tmp_a_i = -1.0 * buf[(i + 0)].imag;
-+ buf[(i + 0)].real =
-+ (tmp_a_r * xcos1[(i + 0)]) - (tmp_a_i * xsin1[(i + 0)]);
-+ buf[(i + 0)].imag =
-+ (tmp_a_r * xsin1[(i + 0)]) + (tmp_a_i * xcos1[(i + 0)]);
-+
-+ tmp_a_r = buf[(i + 1)].real;
-+ tmp_a_i = -1.0 * buf[(i + 1)].imag;
-+ buf[(i + 1)].real =
-+ (tmp_a_r * xcos1[(i + 1)]) - (tmp_a_i * xsin1[(i + 1)]);
-+ buf[(i + 1)].imag =
-+ (tmp_a_r * xsin1[(i + 1)]) + (tmp_a_i * xcos1[(i + 1)]);
-+
-+ tmp_a_r = buf[(i + 2)].real;
-+ tmp_a_i = -1.0 * buf[(i + 2)].imag;
-+ buf[(i + 2)].real =
-+ (tmp_a_r * xcos1[(i + 2)]) - (tmp_a_i * xsin1[(i + 2)]);
-+ buf[(i + 2)].imag =
-+ (tmp_a_r * xsin1[(i + 2)]) + (tmp_a_i * xcos1[(i + 2)]);
-+
-+ tmp_a_r = buf[(i + 3)].real;
-+ tmp_a_i = -1.0 * buf[(i + 3)].imag;
-+ buf[(i + 3)].real =
-+ (tmp_a_r * xcos1[(i + 3)]) - (tmp_a_i * xsin1[(i + 3)]);
-+ buf[(i + 3)].imag =
-+ (tmp_a_r * xsin1[(i + 3)]) + (tmp_a_i * xcos1[(i + 3)]);
-+#else
-+ vector float bufv_0, bufv_2, cosv, sinv, temp1, temp2;
-+ vector float temp0022, temp1133, tempCS01;
-+ const vector float vczero = (const vector float)FOUROF(0.);
-+
-+ bufv_0 = vec_ld((i + 0) << 3, (float*)buf);
-+ bufv_2 = vec_ld((i + 2) << 3, (float*)buf);
-+
-+ cosv = vec_ld(i << 2, xcos1);
-+ sinv = vec_ld(i << 2, xsin1);
-+
-+ temp0022 = vec_perm(bufv_0, bufv_0, vcprm(0,0,2,2));
-+ temp1133 = vec_perm(bufv_0, bufv_0, vcprm(1,1,3,3));
-+ tempCS01 = vec_perm(cosv, sinv, vcprm(0,s0,1,s1));
-+ temp1 = vec_madd(temp0022, tempCS01, vczero);
-+ tempCS01 = vec_perm(cosv, sinv, vcprm(s0,0,s1,1));
-+ temp2 = vec_madd(temp1133, tempCS01, vczero);
-+ bufv_0 = vec_madd(temp2, vcii(p,n,p,n), temp1);
-+
-+ vec_st(bufv_0, (i + 0) << 3, (float*)buf);
-+
-+ /* idem with bufv_2 and high-order cosv/sinv */
-+
-+ temp0022 = vec_perm(bufv_2, bufv_2, vcprm(0,0,2,2));
-+ temp1133 = vec_perm(bufv_2, bufv_2, vcprm(1,1,3,3));
-+ tempCS01 = vec_perm(cosv, sinv, vcprm(2,s2,3,s3));
-+ temp1 = vec_madd(temp0022, tempCS01, vczero);
-+ tempCS01 = vec_perm(cosv, sinv, vcprm(s2,2,s3,3));
-+ temp2 = vec_madd(temp1133, tempCS01, vczero);
-+ bufv_2 = vec_madd(temp2, vcii(p,n,p,n), temp1);
-+
-+ vec_st(bufv_2, (i + 2) << 3, (float*)buf);
-+
-+#endif
-+ }
-+
-+ data_ptr = data;
-+ delay_ptr = delay;
-+ window_ptr = a52_imdct_window;
-+
-+ /* Window and convert to real valued signal */
-+ for(i=0; i< 64; i++) {
-+ *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
-+ *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
-+ }
-+
-+ for(i=0; i< 64; i++) {
-+ *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
-+ *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
-+ }
-+
-+ /* The trailing edge of the window goes into the delay line */
-+ delay_ptr = delay;
-+
-+ for(i=0; i< 64; i++) {
-+ *delay_ptr++ = -buf[64+i].real * *--window_ptr;
-+ *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
-+ }
-+
-+ for(i=0; i<64; i++) {
-+ *delay_ptr++ = buf[i].imag * *--window_ptr;
-+ *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
-+ }
-+}
-+#endif
-+
-+
-+// Stuff below this line is borrowed from libac3
-+#include "srfftp.h"
-+#if ARCH_X86 || ARCH_X86_64
-+#undef HAVE_AMD3DNOW
-+#define HAVE_AMD3DNOW 1
-+#include "srfftp_3dnow.h"
-+
-+const i_cmplx_t x_plus_minus_3dnow __attribute__ ((aligned (8))) = {{ 0x00000000UL, 0x80000000UL }};
-+const i_cmplx_t x_minus_plus_3dnow __attribute__ ((aligned (8))) = {{ 0x80000000UL, 0x00000000UL }};
-+const complex_t HSQRT2_3DNOW __attribute__ ((aligned (8))) = { 0.707106781188, 0.707106781188 };
-+
-+#undef HAVE_AMD3DNOWEXT
-+#define HAVE_AMD3DNOWEXT 0
-+#include "imdct_3dnow.h"
-+#undef HAVE_AMD3DNOWEXT
-+#define HAVE_AMD3DNOWEXT 1
-+#include "imdct_3dnow.h"
-+
-+#if !ARCH_X86_64 || !defined(PIC)
-+void
-+imdct_do_512_sse(sample_t data[],sample_t delay[], sample_t bias)
-+{
-+/* int i,k;
-+ int p,q;*/
-+ int m;
-+ long two_m;
-+ long two_m_plus_one;
-+ long two_m_plus_one_shl3;
-+ complex_t *buf_offset;
-+
-+/* sample_t tmp_a_i;
-+ sample_t tmp_a_r;
-+ sample_t tmp_b_i;
-+ sample_t tmp_b_r;*/
-+
-+ sample_t *data_ptr;
-+ sample_t *delay_ptr;
-+ sample_t *window_ptr;
-+
-+ /* 512 IMDCT with source and dest data in 'data' */
-+ /* see the c version (dct_do_512()), its allmost identical, just in C */
-+
-+ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
-+ /* Bit reversed shuffling */
-+ __asm__ volatile(
-+ "xor %%"REG_S", %%"REG_S" \n\t"
-+ "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t"
-+ "mov $1008, %%"REG_D" \n\t"
-+ "push %%"REG_BP" \n\t" //use ebp without telling gcc
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI
-+ "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI
-+ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi
-+ "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi
-+ "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR
-+ "movaps "MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm2\n\t"
-+ "mulps %%xmm0, %%xmm2 \n\t"
-+ "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI
-+ "mulps "MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t"
-+ "subps %%xmm0, %%xmm2 \n\t"
-+ "movzb (%%"REG_a"), %%"REG_d" \n\t"
-+ "movzb 1(%%"REG_a"), %%"REG_BP" \n\t"
-+ "movlps %%xmm2, (%1, %%"REG_d", 8) \n\t"
-+ "movhps %%xmm2, (%1, %%"REG_BP", 8) \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ "add $2, %%"REG_a" \n\t" // avoid complex addressing for P4 crap
-+ "sub $16, %%"REG_D" \n\t"
-+ "jnc 1b \n\t"
-+ "pop %%"REG_BP" \n\t"//no we didnt touch ebp *g*
-+ :: "b" (data), "c" (buf)
-+ : "%"REG_S, "%"REG_D, "%"REG_a, "%"REG_d
-+ );
-+
-+
-+ /* FFT Merge */
-+/* unoptimized variant
-+ for (m=1; m < 7; m++) {
-+ if(m)
-+ two_m = (1 << m);
-+ else
-+ two_m = 1;
-+
-+ two_m_plus_one = (1 << (m+1));
-+
-+ for(i = 0; i < 128; i += two_m_plus_one) {
-+ for(k = 0; k < two_m; k++) {
-+ p = k + i;
-+ q = p + two_m;
-+ tmp_a_r = buf[p].real;
-+ tmp_a_i = buf[p].imag;
-+ tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
-+ tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
-+ buf[p].real = tmp_a_r + tmp_b_r;
-+ buf[p].imag = tmp_a_i + tmp_b_i;
-+ buf[q].real = tmp_a_r - tmp_b_r;
-+ buf[q].imag = tmp_a_i - tmp_b_i;
-+ }
-+ }
-+ }
-+*/
-+
-+ /* 1. iteration */
-+ // Note w[0][0]={1,0}
-+ __asm__ volatile(
-+ "xorps %%xmm1, %%xmm1 \n\t"
-+ "xorps %%xmm2, %%xmm2 \n\t"
-+ "mov %0, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p]
-+ "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q]
-+ "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p]
-+ "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q]
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "subps %%xmm2, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%%"REG_S")\n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ "cmp %1, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "g" (buf), "r" (buf + 128)
-+ : "%"REG_S
-+ );
-+
-+ /* 2. iteration */
-+ // Note w[1]={{1,0}, {0,-1}}
-+ __asm__ volatile(
-+ "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
-+ "mov %0, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3
-+ "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3
-+ "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3
-+ "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1
-+ "movaps (%%"REG_S"), %%xmm1 \n\t" //r0,i0,r1,i1
-+ "addps %%xmm2, %%xmm0 \n\t"
-+ "subps %%xmm2, %%xmm1 \n\t"
-+ "movaps %%xmm0, (%%"REG_S") \n\t"
-+ "movaps %%xmm1, 16(%%"REG_S") \n\t"
-+ "add $32, %%"REG_S" \n\t"
-+ "cmp %1, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "g" (buf), "r" (buf + 128)
-+ : "%"REG_S
-+ );
-+
-+ /* 3. iteration */
-+/*
-+ Note sseW2+0={1,1,sqrt(2),sqrt(2))
-+ Note sseW2+16={0,0,sqrt(2),-sqrt(2))
-+ Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
-+ Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
-+*/
-+ __asm__ volatile(
-+ "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
-+ "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
-+ "xorps %%xmm5, %%xmm5 \n\t"
-+ "xorps %%xmm2, %%xmm2 \n\t"
-+ "mov %0, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5
-+ "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7
-+ "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5
-+ "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7
-+ "mulps %%xmm2, %%xmm4 \n\t"
-+ "mulps %%xmm3, %%xmm5 \n\t"
-+ "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5
-+ "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7
-+ "mulps %%xmm6, %%xmm3 \n\t"
-+ "mulps %%xmm7, %%xmm2 \n\t"
-+ "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1
-+ "movaps 16(%%"REG_S"), %%xmm1 \n\t" //r2,i2,r3,i3
-+ "addps %%xmm4, %%xmm2 \n\t"
-+ "addps %%xmm5, %%xmm3 \n\t"
-+ "movaps %%xmm2, %%xmm4 \n\t"
-+ "movaps %%xmm3, %%xmm5 \n\t"
-+ "addps %%xmm0, %%xmm2 \n\t"
-+ "addps %%xmm1, %%xmm3 \n\t"
-+ "subps %%xmm4, %%xmm0 \n\t"
-+ "subps %%xmm5, %%xmm1 \n\t"
-+ "movaps %%xmm2, (%%"REG_S") \n\t"
-+ "movaps %%xmm3, 16(%%"REG_S") \n\t"
-+ "movaps %%xmm0, 32(%%"REG_S") \n\t"
-+ "movaps %%xmm1, 48(%%"REG_S") \n\t"
-+ "add $64, %%"REG_S" \n\t"
-+ "cmp %1, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "g" (buf), "r" (buf + 128)
-+ : "%"REG_S
-+ );
-+
-+ /* 4-7. iterations */
-+ for (m=3; m < 7; m++) {
-+ two_m = (1 << m);
-+ two_m_plus_one = two_m<<1;
-+ two_m_plus_one_shl3 = (two_m_plus_one<<3);
-+ buf_offset = buf+128;
-+ __asm__ volatile(
-+ "mov %0, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "xor %%"REG_D", %%"REG_D" \n\t" // k
-+ "lea (%%"REG_S", %3), %%"REG_d" \n\t"
-+ "2: \n\t"
-+ "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t"
-+ "movaps (%4, %%"REG_D", 2), %%xmm2 \n\t"
-+ "mulps %%xmm1, %%xmm2 \n\t"
-+ "shufps $0xB1, %%xmm1, %%xmm1 \n\t"
-+ "mulps 16(%4, %%"REG_D", 2), %%xmm1 \n\t"
-+ "movaps (%%"REG_S", %%"REG_D"), %%xmm0 \n\t"
-+ "addps %%xmm2, %%xmm1 \n\t"
-+ "movaps %%xmm1, %%xmm2 \n\t"
-+ "addps %%xmm0, %%xmm1 \n\t"
-+ "subps %%xmm2, %%xmm0 \n\t"
-+ "movaps %%xmm1, (%%"REG_S", %%"REG_D") \n\t"
-+ "movaps %%xmm0, (%%"REG_d", %%"REG_D") \n\t"
-+ "add $16, %%"REG_D" \n\t"
-+ "cmp %3, %%"REG_D" \n\t" //FIXME (opt) count against 0
-+ "jb 2b \n\t"
-+ "add %2, %%"REG_S" \n\t"
-+ "cmp %1, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "g" (buf), "m" (buf_offset), "m" (two_m_plus_one_shl3), "r" (two_m<<3),
-+ "r" (sseW[m])
-+ : "%"REG_S, "%"REG_D, "%"REG_d
-+ );
-+ }
-+
-+ /* Post IFFT complex multiply plus IFFT complex conjugate*/
-+ __asm__ volatile(
-+ "mov $-1024, %%"REG_S" \n\t"
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm0 \n\t"
-+ "movaps (%0, %%"REG_S"), %%xmm1 \n\t"
-+ "shufps $0xB1, %%xmm0, %%xmm0 \n\t"
-+ "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t"
-+ "mulps 1024+"MANGLE(sseSinCos1d)"(%%"REG_S"), %%xmm0\n\t"
-+ "addps %%xmm1, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%0, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ " jnz 1b \n\t"
-+ :: "r" (buf+128)
-+ : "%"REG_S
-+ );
-+
-+
-+ data_ptr = data;
-+ delay_ptr = delay;
-+ window_ptr = a52_imdct_window;
-+
-+ /* Window and convert to real valued signal */
-+ __asm__ volatile(
-+ "xor %%"REG_D", %%"REG_D" \n\t" // 0
-+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
-+ "movss %3, %%xmm2 \n\t" // bias
-+ "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
-+ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
-+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
-+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
-+ "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
-+ "mulps "MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
-+ "addps (%2, %%"REG_S"), %%xmm0 \n\t"
-+ "addps %%xmm2, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ "sub $16, %%"REG_D" \n\t"
-+ "cmp $512, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "r" (buf+64), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
-+ : "%"REG_S, "%"REG_D
-+ );
-+ data_ptr+=128;
-+ delay_ptr+=128;
-+// window_ptr+=128;
-+
-+ __asm__ volatile(
-+ "mov $1024, %%"REG_D" \n\t" // 512
-+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
-+ "movss %3, %%xmm2 \n\t" // bias
-+ "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ...
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
-+ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
-+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
-+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
-+ "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
-+ "mulps 512+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
-+ "addps (%2, %%"REG_S"), %%xmm0 \n\t"
-+ "addps %%xmm2, %%xmm0 \n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ "sub $16, %%"REG_D" \n\t"
-+ "cmp $512, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "r" (buf), "r" (data_ptr), "r" (delay_ptr), "m" (bias)
-+ : "%"REG_S, "%"REG_D
-+ );
-+ data_ptr+=128;
-+// window_ptr+=128;
-+
-+ /* The trailing edge of the window goes into the delay line */
-+ delay_ptr = delay;
-+
-+ __asm__ volatile(
-+ "xor %%"REG_D", %%"REG_D" \n\t" // 0
-+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A
-+ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C
-+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C
-+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A
-+ "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
-+ "mulps 1024+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ "sub $16, %%"REG_D" \n\t"
-+ "cmp $512, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "r" (buf+64), "r" (delay_ptr)
-+ : "%"REG_S, "%"REG_D
-+ );
-+ delay_ptr+=128;
-+// window_ptr-=128;
-+
-+ __asm__ volatile(
-+ "mov $1024, %%"REG_D" \n\t" // 1024
-+ "xor %%"REG_S", %%"REG_S" \n\t" // 0
-+ ASMALIGN(4)
-+ "1: \n\t"
-+ "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ?
-+ "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ?
-+ "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ?
-+ "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ?
-+ "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
-+ "mulps 1536+"MANGLE(sseWindow)"(%%"REG_S"), %%xmm0\n\t"
-+ "movaps %%xmm0, (%1, %%"REG_S") \n\t"
-+ "add $16, %%"REG_S" \n\t"
-+ "sub $16, %%"REG_D" \n\t"
-+ "cmp $512, %%"REG_S" \n\t"
-+ " jb 1b \n\t"
-+ :: "r" (buf), "r" (delay_ptr)
-+ : "%"REG_S, "%"REG_D
-+ );
-+}
-+#endif
-+#endif // ARCH_X86 || ARCH_X86_64
-+
- void a52_imdct_256(sample_t * data, sample_t * delay, sample_t bias)
- {
- int i, k;
-@@ -364,7 +1143,7 @@
-
- void a52_imdct_init (uint32_t mm_accel)
- {
-- int i, k;
-+ int i, j, k;
- double sum;
-
- /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
-@@ -416,6 +1195,101 @@
- post2[i].real = cos ((M_PI / 128) * (i + 0.5));
- post2[i].imag = sin ((M_PI / 128) * (i + 0.5));
- }
-+ for (i = 0; i < 128; i++) {
-+ xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
-+ xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
-+ }
-+ for (i = 0; i < 7; i++) {
-+ j = 1 << i;
-+ for (k = 0; k < j; k++) {
-+ w[i][k].real = cos (-M_PI * k / j);
-+ w[i][k].imag = sin (-M_PI * k / j);
-+ }
-+ }
-+#if ARCH_X86 || ARCH_X86_64
-+ for (i = 0; i < 128; i++) {
-+ sseSinCos1c[2*i+0]= xcos1[i];
-+ sseSinCos1c[2*i+1]= -xcos1[i];
-+ sseSinCos1d[2*i+0]= xsin1[i];
-+ sseSinCos1d[2*i+1]= xsin1[i];
-+ }
-+ for (i = 1; i < 7; i++) {
-+ j = 1 << i;
-+ for (k = 0; k < j; k+=2) {
-+
-+ sseW[i][4*k + 0] = w[i][k+0].real;
-+ sseW[i][4*k + 1] = w[i][k+0].real;
-+ sseW[i][4*k + 2] = w[i][k+1].real;
-+ sseW[i][4*k + 3] = w[i][k+1].real;
-+
-+ sseW[i][4*k + 4] = -w[i][k+0].imag;
-+ sseW[i][4*k + 5] = w[i][k+0].imag;
-+ sseW[i][4*k + 6] = -w[i][k+1].imag;
-+ sseW[i][4*k + 7] = w[i][k+1].imag;
-+
-+ //we multiply more or less uninitalized numbers so we need to use exactly 0.0
-+ if(k==0)
-+ {
-+// sseW[i][4*k + 0]= sseW[i][4*k + 1]= 1.0;
-+ sseW[i][4*k + 4]= sseW[i][4*k + 5]= 0.0;
-+ }
-+
-+ if(2*k == j)
-+ {
-+ sseW[i][4*k + 0]= sseW[i][4*k + 1]= 0.0;
-+// sseW[i][4*k + 4]= -(sseW[i][4*k + 5]= -1.0);
-+ }
-+ }
-+ }
-+
-+ for(i=0; i<128; i++)
-+ {
-+ sseWindow[2*i+0]= -a52_imdct_window[2*i+0];
-+ sseWindow[2*i+1]= a52_imdct_window[2*i+1];
-+ }
-+
-+ for(i=0; i<64; i++)
-+ {
-+ sseWindow[256 + 2*i+0]= -a52_imdct_window[254 - 2*i+1];
-+ sseWindow[256 + 2*i+1]= a52_imdct_window[254 - 2*i+0];
-+ sseWindow[384 + 2*i+0]= a52_imdct_window[126 - 2*i+1];
-+ sseWindow[384 + 2*i+1]= -a52_imdct_window[126 - 2*i+0];
-+ }
-+#endif
-+ a52_imdct_512 = imdct_do_512;
-+ ifft128 = ifft128_c;
-+ ifft64 = ifft64_c;
-+
-+#if ARCH_X86 || ARCH_X86_64
-+#if !ARCH_X86_64 || !defined(PIC)
-+ if(mm_accel & MM_ACCEL_X86_SSE)
-+ {
-+ fprintf (stderr, "Using SSE optimized IMDCT transform\n");
-+ a52_imdct_512 = imdct_do_512_sse;
-+ }
-+ else
-+#endif
-+ if(mm_accel & MM_ACCEL_X86_3DNOWEXT)
-+ {
-+ fprintf (stderr, "Using 3DNowEx optimized IMDCT transform\n");
-+ a52_imdct_512 = imdct_do_512_3dnowex;
-+ }
-+ else
-+ if(mm_accel & MM_ACCEL_X86_3DNOW)
-+ {
-+ fprintf (stderr, "Using 3DNow optimized IMDCT transform\n");
-+ a52_imdct_512 = imdct_do_512_3dnow;
-+ }
-+ else
-+#endif // ARCH_X86 || ARCH_X86_64
-+#if HAVE_ALTIVEC
-+ if (mm_accel & MM_ACCEL_PPC_ALTIVEC)
-+ {
-+ fprintf(stderr, "Using AltiVec optimized IMDCT transform\n");
-+ a52_imdct_512 = imdct_do_512_altivec;
-+ }
-+ else
-+#endif
-
- #ifdef LIBA52_DJBFFT
- if (mm_accel & MM_ACCEL_DJBFFT) {
-@@ -426,7 +1300,5 @@
- #endif
- {
- fprintf (stderr, "No accelerated IMDCT transform found\n");
-- ifft128 = ifft128_c;
-- ifft64 = ifft64_c;
- }
- }
---- include/mm_accel.h 2006-06-12 15:05:00.000000000 +0200
-+++ liba52/mm_accel.h 2006-06-05 02:23:04.000000000 +0200
-@@ -30,7 +34,12 @@
- /* x86 accelerations */
- #define MM_ACCEL_X86_MMX 0x80000000
- #define MM_ACCEL_X86_3DNOW 0x40000000
-+#define MM_ACCEL_X86_3DNOWEXT 0x08000000
- #define MM_ACCEL_X86_MMXEXT 0x20000000
-+#define MM_ACCEL_X86_SSE 0x10000000
-+
-+/* PPC accelerations */
-+#define MM_ACCEL_PPC_ALTIVEC 0x00010000
-
- uint32_t mm_accel (void);
-
---- liba52/parse.c 2006-12-05 08:08:01.000000000 +0100
-+++ liba52/parse.c 2006-12-05 08:08:44.000000000 +0100
-@@ -24,6 +28,7 @@
- #include "config.h"
-
- #include <stdlib.h>
-+#include <stdio.h>
- #include <string.h>
- #include <inttypes.h>
-
-@@ -31,13 +36,12 @@
- #include "a52_internal.h"
- #include "bitstream.h"
- #include "tables.h"
-+#include "mm_accel.h"
-+#include "libavutil/avutil.h"
-
--#ifdef HAVE_MEMALIGN
-+#if HAVE_MEMALIGN
- /* some systems have memalign() but no declaration for it */
- void * memalign (size_t align, size_t size);
--#else
--/* assume malloc alignment is sufficient */
--#define memalign(align,size) malloc (size)
- #endif
-
- typedef struct {
-@@ -60,7 +64,16 @@
- if (state == NULL)
- return NULL;
-
-+#if defined(__MINGW32__) && defined(HAVE_SSE)
-+ state->samples = av_malloc(256 * 12 * sizeof (sample_t));
-+#else
- state->samples = memalign (16, 256 * 12 * sizeof (sample_t));
-+#endif
-+ if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){
-+ mm_accel &=~MM_ACCEL_X86_SSE;
-+ fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n");
-+ }
-+
- if (state->samples == NULL) {
- free (state);
- return NULL;
-@@ -74,6 +87,7 @@
- state->lfsr_state = 1;
-
- a52_imdct_init (mm_accel);
-+ downmix_accel_init(mm_accel);
-
- return state;
- }
-@@ -141,7 +155,7 @@
- state->acmod = acmod = buf[6] >> 5;
-
- a52_bitstream_set_ptr (state, buf + 6);
-- bitstream_get (state, 3); /* skip acmod we already parsed */
-+ bitstream_skip (state, 3); /* skip acmod we already parsed */
-
- if ((acmod == 2) && (bitstream_get (state, 2) == 2)) /* dsurmod */
- acmod = A52_DOLBY;
-@@ -172,28 +186,28 @@
-
- chaninfo = !acmod;
- do {
-- bitstream_get (state, 5); /* dialnorm */
-+ bitstream_skip (state, 5); /* dialnorm */
- if (bitstream_get (state, 1)) /* compre */
-- bitstream_get (state, 8); /* compr */
-+ bitstream_skip (state, 8); /* compr */
- if (bitstream_get (state, 1)) /* langcode */
-- bitstream_get (state, 8); /* langcod */
-+ bitstream_skip (state, 8); /* langcod */
- if (bitstream_get (state, 1)) /* audprodie */
-- bitstream_get (state, 7); /* mixlevel + roomtyp */
-+ bitstream_skip (state, 7); /* mixlevel + roomtyp */
- } while (chaninfo--);
-
-- bitstream_get (state, 2); /* copyrightb + origbs */
-+ bitstream_skip (state, 2); /* copyrightb + origbs */
-
- if (bitstream_get (state, 1)) /* timecod1e */
-- bitstream_get (state, 14); /* timecod1 */
-+ bitstream_skip (state, 14); /* timecod1 */
- if (bitstream_get (state, 1)) /* timecod2e */
-- bitstream_get (state, 14); /* timecod2 */
-+ bitstream_skip (state, 14); /* timecod2 */
-
- if (bitstream_get (state, 1)) { /* addbsie */
- int addbsil;
-
- addbsil = bitstream_get (state, 6);
- do {
-- bitstream_get (state, 8); /* addbsi */
-+ bitstream_skip (state, 8); /* addbsi */
- } while (addbsil--);
- }
-
-@@ -680,7 +694,7 @@
- state->fbw_expbap[i].exp[0],
- state->fbw_expbap[i].exp + 1))
- return 1;
-- bitstream_get (state, 2); /* gainrng */
-+ bitstream_skip (state, 2); /* gainrng */
- }
- if (lfeexpstr != EXP_REUSE) {
- do_bit_alloc |= 32;
-@@ -755,7 +769,7 @@
- if (bitstream_get (state, 1)) { /* skiple */
- i = bitstream_get (state, 9); /* skipl */
- while (i--)
-- bitstream_get (state, 8);
-+ bitstream_skip (state, 8);
- }
-
- samples = state->samples;
-@@ -896,6 +910,10 @@
-
- void a52_free (a52_state_t * state)
- {
-- free (state->samples);
-+#if defined(__MINGW32__) && defined(HAVE_SSE)
-+ av_free (state->samples);
-+#else
-+ free (state->samples);
-+#endif
- free (state);
- }
diff --git a/liba52/mm_accel.h b/liba52/mm_accel.h
deleted file mode 100644
index 330b174b71..0000000000
--- a/liba52/mm_accel.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * mm_accel.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef MM_ACCEL_H
-#define MM_ACCEL_H
-
-/* generic accelerations */
-#define MM_ACCEL_DJBFFT 0x00000001
-
-/* x86 accelerations */
-#define MM_ACCEL_X86_MMX 0x80000000
-#define MM_ACCEL_X86_3DNOW 0x40000000
-#define MM_ACCEL_X86_3DNOWEXT 0x08000000
-#define MM_ACCEL_X86_MMXEXT 0x20000000
-#define MM_ACCEL_X86_SSE 0x10000000
-
-/* PPC accelerations */
-#define MM_ACCEL_PPC_ALTIVEC 0x00010000
-
-uint32_t mm_accel (void);
-
-#endif /* MM_ACCEL_H */
diff --git a/liba52/parse.c b/liba52/parse.c
deleted file mode 100644
index cba5b80ff7..0000000000
--- a/liba52/parse.c
+++ /dev/null
@@ -1,919 +0,0 @@
-/*
- * parse.c
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * Modified for use with MPlayer, changes contained in liba52_changes.diff.
- * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/
- * $Id$
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include "config.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-
-#include "a52.h"
-#include "a52_internal.h"
-#include "bitstream.h"
-#include "tables.h"
-#include "mm_accel.h"
-#include "libavutil/avutil.h"
-
-#if HAVE_MEMALIGN
-/* some systems have memalign() but no declaration for it */
-void * memalign (size_t align, size_t size);
-#endif
-
-typedef struct {
- sample_t q1[2];
- sample_t q2[2];
- sample_t q4;
- int q1_ptr;
- int q2_ptr;
- int q4_ptr;
-} quantizer_t;
-
-static uint8_t halfrate[12] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3};
-
-a52_state_t * a52_init (uint32_t mm_accel)
-{
- a52_state_t * state;
- int i;
-
- state = malloc (sizeof (a52_state_t));
- if (state == NULL)
- return NULL;
-
-#if defined(__MINGW32__) && defined(HAVE_SSE)
- state->samples = av_malloc(256 * 12 * sizeof (sample_t));
-#else
- state->samples = memalign (16, 256 * 12 * sizeof (sample_t));
-#endif
- if(((int)state->samples%16) && (mm_accel&MM_ACCEL_X86_SSE)){
- mm_accel &=~MM_ACCEL_X86_SSE;
- fprintf(stderr, "liba52: unable to get 16 byte aligned memory disabling usage of SSE instructions\n");
- }
-
- if (state->samples == NULL) {
- free (state);
- return NULL;
- }
-
- for (i = 0; i < 256 * 12; i++)
- state->samples[i] = 0;
-
- state->downmixed = 1;
-
- state->lfsr_state = 1;
-
- a52_imdct_init (mm_accel);
- downmix_accel_init(mm_accel);
-
- return state;
-}
-
-sample_t * a52_samples (a52_state_t * state)
-{
- return state->samples;
-}
-
-int a52_syncinfo (uint8_t * buf, int * flags,
- int * sample_rate, int * bit_rate)
-{
- static int rate[] = { 32, 40, 48, 56, 64, 80, 96, 112,
- 128, 160, 192, 224, 256, 320, 384, 448,
- 512, 576, 640};
- static uint8_t lfeon[8] = {0x10, 0x10, 0x04, 0x04, 0x04, 0x01, 0x04, 0x01};
- int frmsizecod;
- int bitrate;
- int half;
- int acmod;
-
- if ((buf[0] != 0x0b) || (buf[1] != 0x77)) /* syncword */
- return 0;
-
- if (buf[5] >= 0x60) /* bsid >= 12 */
- return 0;
- half = halfrate[buf[5] >> 3];
-
- /* acmod, dsurmod and lfeon */
- acmod = buf[6] >> 5;
- *flags = ((((buf[6] & 0xf8) == 0x50) ? A52_DOLBY : acmod) |
- ((buf[6] & lfeon[acmod]) ? A52_LFE : 0));
-
- frmsizecod = buf[4] & 63;
- if (frmsizecod >= 38)
- return 0;
- bitrate = rate [frmsizecod >> 1];
- *bit_rate = (bitrate * 1000) >> half;
-
- switch (buf[4] & 0xc0) {
- case 0:
- *sample_rate = 48000 >> half;
- return 4 * bitrate;
- case 0x40:
- *sample_rate = 44100 >> half;
- return 2 * (320 * bitrate / 147 + (frmsizecod & 1));
- case 0x80:
- *sample_rate = 32000 >> half;
- return 6 * bitrate;
- default:
- return 0;
- }
-}
-
-int a52_frame (a52_state_t * state, uint8_t * buf, int * flags,
- sample_t * level, sample_t bias)
-{
- static sample_t clev[4] = {LEVEL_3DB, LEVEL_45DB, LEVEL_6DB, LEVEL_45DB};
- static sample_t slev[4] = {LEVEL_3DB, LEVEL_6DB, 0, LEVEL_6DB};
- int chaninfo;
- int acmod;
-
- state->fscod = buf[4] >> 6;
- state->halfrate = halfrate[buf[5] >> 3];
- state->acmod = acmod = buf[6] >> 5;
-
- a52_bitstream_set_ptr (state, buf + 6);
- bitstream_skip (state, 3); /* skip acmod we already parsed */
-
- if ((acmod == 2) && (bitstream_get (state, 2) == 2)) /* dsurmod */
- acmod = A52_DOLBY;
-
- if ((acmod & 1) && (acmod != 1))
- state->clev = clev[bitstream_get (state, 2)]; /* cmixlev */
-
- if (acmod & 4)
- state->slev = slev[bitstream_get (state, 2)]; /* surmixlev */
-
- state->lfeon = bitstream_get (state, 1);
-
- state->output = a52_downmix_init (acmod, *flags, level,
- state->clev, state->slev);
- if (state->output < 0)
- return 1;
- if (state->lfeon && (*flags & A52_LFE))
- state->output |= A52_LFE;
- *flags = state->output;
- /* the 2* compensates for differences in imdct */
- state->dynrng = state->level = 2 * *level;
- state->bias = bias;
- state->dynrnge = 1;
- state->dynrngcall = NULL;
- state->cplba.deltbae = DELTA_BIT_NONE;
- state->ba[0].deltbae = state->ba[1].deltbae = state->ba[2].deltbae =
- state->ba[3].deltbae = state->ba[4].deltbae = DELTA_BIT_NONE;
-
- chaninfo = !acmod;
- do {
- bitstream_skip (state, 5); /* dialnorm */
- if (bitstream_get (state, 1)) /* compre */
- bitstream_skip (state, 8); /* compr */
- if (bitstream_get (state, 1)) /* langcode */
- bitstream_skip (state, 8); /* langcod */
- if (bitstream_get (state, 1)) /* audprodie */
- bitstream_skip (state, 7); /* mixlevel + roomtyp */
- } while (chaninfo--);
-
- bitstream_skip (state, 2); /* copyrightb + origbs */
-
- if (bitstream_get (state, 1)) /* timecod1e */
- bitstream_skip (state, 14); /* timecod1 */
- if (bitstream_get (state, 1)) /* timecod2e */
- bitstream_skip (state, 14); /* timecod2 */
-
- if (bitstream_get (state, 1)) { /* addbsie */
- int addbsil;
-
- addbsil = bitstream_get (state, 6);
- do {
- bitstream_skip (state, 8); /* addbsi */
- } while (addbsil--);
- }
-
- return 0;
-}
-
-void a52_dynrng (a52_state_t * state,
- sample_t (* call) (sample_t, void *), void * data)
-{
- state->dynrnge = 0;
- if (call) {
- state->dynrnge = 1;
- state->dynrngcall = call;
- state->dynrngdata = data;
- }
-}
-
-static int parse_exponents (a52_state_t * state, int expstr, int ngrps,
- uint8_t exponent, uint8_t * dest)
-{
- int exps;
-
- while (ngrps--) {
- exps = bitstream_get (state, 7);
-
- exponent += exp_1[exps];
- if (exponent > 24)
- return 1;
-
- switch (expstr) {
- case EXP_D45:
- *(dest++) = exponent;
- *(dest++) = exponent;
- case EXP_D25:
- *(dest++) = exponent;
- case EXP_D15:
- *(dest++) = exponent;
- }
-
- exponent += exp_2[exps];
- if (exponent > 24)
- return 1;
-
- switch (expstr) {
- case EXP_D45:
- *(dest++) = exponent;
- *(dest++) = exponent;
- case EXP_D25:
- *(dest++) = exponent;
- case EXP_D15:
- *(dest++) = exponent;
- }
-
- exponent += exp_3[exps];
- if (exponent > 24)
- return 1;
-
- switch (expstr) {
- case EXP_D45:
- *(dest++) = exponent;
- *(dest++) = exponent;
- case EXP_D25:
- *(dest++) = exponent;
- case EXP_D15:
- *(dest++) = exponent;
- }
- }
-
- return 0;
-}
-
-static int parse_deltba (a52_state_t * state, int8_t * deltba)
-{
- int deltnseg, deltlen, delta, j;
-
- memset (deltba, 0, 50);
-
- deltnseg = bitstream_get (state, 3);
- j = 0;
- do {
- j += bitstream_get (state, 5);
- deltlen = bitstream_get (state, 4);
- delta = bitstream_get (state, 3);
- delta -= (delta >= 4) ? 3 : 4;
- if (!deltlen)
- continue;
- if (j + deltlen >= 50)
- return 1;
- while (deltlen--)
- deltba[j++] = delta;
- } while (deltnseg--);
-
- return 0;
-}
-
-static inline int zero_snr_offsets (int nfchans, a52_state_t * state)
-{
- int i;
-
- if ((state->csnroffst) ||
- (state->chincpl && state->cplba.bai >> 3) || /* cplinu, fsnroffst */
- (state->lfeon && state->lfeba.bai >> 3)) /* fsnroffst */
- return 0;
- for (i = 0; i < nfchans; i++)
- if (state->ba[i].bai >> 3) /* fsnroffst */
- return 0;
- return 1;
-}
-
-static inline int16_t dither_gen (a52_state_t * state)
-{
- int16_t nstate;
-
- nstate = dither_lut[state->lfsr_state >> 8] ^ (state->lfsr_state << 8);
-
- state->lfsr_state = (uint16_t) nstate;
-
- return nstate;
-}
-
-static void coeff_get (a52_state_t * state, sample_t * coeff,
- expbap_t * expbap, quantizer_t * quantizer,
- sample_t level, int dither, int end)
-{
- int i;
- uint8_t * exp;
- int8_t * bap;
- sample_t factor[25];
-
- for (i = 0; i <= 24; i++)
- factor[i] = scale_factor[i] * level;
-
- exp = expbap->exp;
- bap = expbap->bap;
-
- for (i = 0; i < end; i++) {
- int bapi;
-
- bapi = bap[i];
- switch (bapi) {
- case 0:
- if (dither) {
- coeff[i] = dither_gen (state) * LEVEL_3DB * factor[exp[i]];
- continue;
- } else {
- coeff[i] = 0;
- continue;
- }
-
- case -1:
- if (quantizer->q1_ptr >= 0) {
- coeff[i] = quantizer->q1[quantizer->q1_ptr--] * factor[exp[i]];
- continue;
- } else {
- int code;
-
- code = bitstream_get (state, 5);
-
- quantizer->q1_ptr = 1;
- quantizer->q1[0] = q_1_2[code];
- quantizer->q1[1] = q_1_1[code];
- coeff[i] = q_1_0[code] * factor[exp[i]];
- continue;
- }
-
- case -2:
- if (quantizer->q2_ptr >= 0) {
- coeff[i] = quantizer->q2[quantizer->q2_ptr--] * factor[exp[i]];
- continue;
- } else {
- int code;
-
- code = bitstream_get (state, 7);
-
- quantizer->q2_ptr = 1;
- quantizer->q2[0] = q_2_2[code];
- quantizer->q2[1] = q_2_1[code];
- coeff[i] = q_2_0[code] * factor[exp[i]];
- continue;
- }
-
- case 3:
- coeff[i] = q_3[bitstream_get (state, 3)] * factor[exp[i]];
- continue;
-
- case -3:
- if (quantizer->q4_ptr == 0) {
- quantizer->q4_ptr = -1;
- coeff[i] = quantizer->q4 * factor[exp[i]];
- continue;
- } else {
- int code;
-
- code = bitstream_get (state, 7);
-
- quantizer->q4_ptr = 0;
- quantizer->q4 = q_4_1[code];
- coeff[i] = q_4_0[code] * factor[exp[i]];
- continue;
- }
-
- case 4:
- coeff[i] = q_5[bitstream_get (state, 4)] * factor[exp[i]];
- continue;
-
- default:
- coeff[i] = ((bitstream_get_2 (state, bapi) << (16 - bapi)) *
- factor[exp[i]]);
- }
- }
-}
-
-static void coeff_get_coupling (a52_state_t * state, int nfchans,
- sample_t * coeff, sample_t (* samples)[256],
- quantizer_t * quantizer, uint8_t dithflag[5])
-{
- int cplbndstrc, bnd, i, i_end, ch;
- uint8_t * exp;
- int8_t * bap;
- sample_t cplco[5];
-
- exp = state->cpl_expbap.exp;
- bap = state->cpl_expbap.bap;
- bnd = 0;
- cplbndstrc = state->cplbndstrc;
- i = state->cplstrtmant;
- while (i < state->cplendmant) {
- i_end = i + 12;
- while (cplbndstrc & 1) {
- cplbndstrc >>= 1;
- i_end += 12;
- }
- cplbndstrc >>= 1;
- for (ch = 0; ch < nfchans; ch++)
- cplco[ch] = state->cplco[ch][bnd] * coeff[ch];
- bnd++;
-
- while (i < i_end) {
- sample_t cplcoeff;
- int bapi;
-
- bapi = bap[i];
- switch (bapi) {
- case 0:
- cplcoeff = LEVEL_3DB * scale_factor[exp[i]];
- for (ch = 0; ch < nfchans; ch++)
- if ((state->chincpl >> ch) & 1) {
- if (dithflag[ch])
- samples[ch][i] = (cplcoeff * cplco[ch] *
- dither_gen (state));
- else
- samples[ch][i] = 0;
- }
- i++;
- continue;
-
- case -1:
- if (quantizer->q1_ptr >= 0) {
- cplcoeff = quantizer->q1[quantizer->q1_ptr--];
- break;
- } else {
- int code;
-
- code = bitstream_get (state, 5);
-
- quantizer->q1_ptr = 1;
- quantizer->q1[0] = q_1_2[code];
- quantizer->q1[1] = q_1_1[code];
- cplcoeff = q_1_0[code];
- break;
- }
-
- case -2:
- if (quantizer->q2_ptr >= 0) {
- cplcoeff = quantizer->q2[quantizer->q2_ptr--];
- break;
- } else {
- int code;
-
- code = bitstream_get (state, 7);
-
- quantizer->q2_ptr = 1;
- quantizer->q2[0] = q_2_2[code];
- quantizer->q2[1] = q_2_1[code];
- cplcoeff = q_2_0[code];
- break;
- }
-
- case 3:
- cplcoeff = q_3[bitstream_get (state, 3)];
- break;
-
- case -3:
- if (quantizer->q4_ptr == 0) {
- quantizer->q4_ptr = -1;
- cplcoeff = quantizer->q4;
- break;
- } else {
- int code;
-
- code = bitstream_get (state, 7);
-
- quantizer->q4_ptr = 0;
- quantizer->q4 = q_4_1[code];
- cplcoeff = q_4_0[code];
- break;
- }
-
- case 4:
- cplcoeff = q_5[bitstream_get (state, 4)];
- break;
-
- default:
- cplcoeff = bitstream_get_2 (state, bapi) << (16 - bapi);
- }
-
- cplcoeff *= scale_factor[exp[i]];
- for (ch = 0; ch < nfchans; ch++)
- if ((state->chincpl >> ch) & 1)
- samples[ch][i] = cplcoeff * cplco[ch];
- i++;
- }
- }
-}
-
-int a52_block (a52_state_t * state)
-{
- static const uint8_t nfchans_tbl[] = {2, 1, 2, 3, 3, 4, 4, 5, 1, 1, 2};
- static int rematrix_band[4] = {25, 37, 61, 253};
- int i, nfchans, chaninfo;
- uint8_t cplexpstr, chexpstr[5], lfeexpstr, do_bit_alloc, done_cpl;
- uint8_t blksw[5], dithflag[5];
- sample_t coeff[5];
- int chanbias;
- quantizer_t quantizer;
- sample_t * samples;
-
- nfchans = nfchans_tbl[state->acmod];
-
- for (i = 0; i < nfchans; i++)
- blksw[i] = bitstream_get (state, 1);
-
- for (i = 0; i < nfchans; i++)
- dithflag[i] = bitstream_get (state, 1);
-
- chaninfo = !state->acmod;
- do {
- if (bitstream_get (state, 1)) { /* dynrnge */
- int dynrng;
-
- dynrng = bitstream_get_2 (state, 8);
- if (state->dynrnge) {
- sample_t range;
-
- range = ((((dynrng & 0x1f) | 0x20) << 13) *
- scale_factor[3 - (dynrng >> 5)]);
- if (state->dynrngcall)
- range = state->dynrngcall (range, state->dynrngdata);
- state->dynrng = state->level * range;
- }
- }
- } while (chaninfo--);
-
- if (bitstream_get (state, 1)) { /* cplstre */
- state->chincpl = 0;
- if (bitstream_get (state, 1)) { /* cplinu */
- static uint8_t bndtab[16] = {31, 35, 37, 39, 41, 42, 43, 44,
- 45, 45, 46, 46, 47, 47, 48, 48};
- int cplbegf;
- int cplendf;
- int ncplsubnd;
-
- for (i = 0; i < nfchans; i++)
- state->chincpl |= bitstream_get (state, 1) << i;
- switch (state->acmod) {
- case 0: case 1:
- return 1;
- case 2:
- state->phsflginu = bitstream_get (state, 1);
- }
- cplbegf = bitstream_get (state, 4);
- cplendf = bitstream_get (state, 4);
-
- if (cplendf + 3 - cplbegf < 0)
- return 1;
- state->ncplbnd = ncplsubnd = cplendf + 3 - cplbegf;
- state->cplstrtbnd = bndtab[cplbegf];
- state->cplstrtmant = cplbegf * 12 + 37;
- state->cplendmant = cplendf * 12 + 73;
-
- state->cplbndstrc = 0;
- for (i = 0; i < ncplsubnd - 1; i++)
- if (bitstream_get (state, 1)) {
- state->cplbndstrc |= 1 << i;
- state->ncplbnd--;
- }
- }
- }
-
- if (state->chincpl) { /* cplinu */
- int j, cplcoe;
-
- cplcoe = 0;
- for (i = 0; i < nfchans; i++)
- if ((state->chincpl) >> i & 1)
- if (bitstream_get (state, 1)) { /* cplcoe */
- int mstrcplco, cplcoexp, cplcomant;
-
- cplcoe = 1;
- mstrcplco = 3 * bitstream_get (state, 2);
- for (j = 0; j < state->ncplbnd; j++) {
- cplcoexp = bitstream_get (state, 4);
- cplcomant = bitstream_get (state, 4);
- if (cplcoexp == 15)
- cplcomant <<= 14;
- else
- cplcomant = (cplcomant | 0x10) << 13;
- state->cplco[i][j] =
- cplcomant * scale_factor[cplcoexp + mstrcplco];
- }
- }
- if ((state->acmod == 2) && state->phsflginu && cplcoe)
- for (j = 0; j < state->ncplbnd; j++)
- if (bitstream_get (state, 1)) /* phsflg */
- state->cplco[1][j] = -state->cplco[1][j];
- }
-
- if ((state->acmod == 2) && (bitstream_get (state, 1))) { /* rematstr */
- int end;
-
- state->rematflg = 0;
- end = (state->chincpl) ? state->cplstrtmant : 253; /* cplinu */
- i = 0;
- do
- state->rematflg |= bitstream_get (state, 1) << i;
- while (rematrix_band[i++] < end);
- }
-
- cplexpstr = EXP_REUSE;
- lfeexpstr = EXP_REUSE;
- if (state->chincpl) /* cplinu */
- cplexpstr = bitstream_get (state, 2);
- for (i = 0; i < nfchans; i++)
- chexpstr[i] = bitstream_get (state, 2);
- if (state->lfeon)
- lfeexpstr = bitstream_get (state, 1);
-
- for (i = 0; i < nfchans; i++)
- if (chexpstr[i] != EXP_REUSE) {
- if ((state->chincpl >> i) & 1)
- state->endmant[i] = state->cplstrtmant;
- else {
- int chbwcod;
-
- chbwcod = bitstream_get (state, 6);
- if (chbwcod > 60)
- return 1;
- state->endmant[i] = chbwcod * 3 + 73;
- }
- }
-
- do_bit_alloc = 0;
-
- if (cplexpstr != EXP_REUSE) {
- int cplabsexp, ncplgrps;
-
- do_bit_alloc = 64;
- ncplgrps = ((state->cplendmant - state->cplstrtmant) /
- (3 << (cplexpstr - 1)));
- cplabsexp = bitstream_get (state, 4) << 1;
- if (parse_exponents (state, cplexpstr, ncplgrps, cplabsexp,
- state->cpl_expbap.exp + state->cplstrtmant))
- return 1;
- }
- for (i = 0; i < nfchans; i++)
- if (chexpstr[i] != EXP_REUSE) {
- int grp_size, nchgrps;
-
- do_bit_alloc |= 1 << i;
- grp_size = 3 << (chexpstr[i] - 1);
- nchgrps = (state->endmant[i] + grp_size - 4) / grp_size;
- state->fbw_expbap[i].exp[0] = bitstream_get (state, 4);
- if (parse_exponents (state, chexpstr[i], nchgrps,
- state->fbw_expbap[i].exp[0],
- state->fbw_expbap[i].exp + 1))
- return 1;
- bitstream_skip (state, 2); /* gainrng */
- }
- if (lfeexpstr != EXP_REUSE) {
- do_bit_alloc |= 32;
- state->lfe_expbap.exp[0] = bitstream_get (state, 4);
- if (parse_exponents (state, lfeexpstr, 2, state->lfe_expbap.exp[0],
- state->lfe_expbap.exp + 1))
- return 1;
- }
-
- if (bitstream_get (state, 1)) { /* baie */
- do_bit_alloc = -1;
- state->bai = bitstream_get (state, 11);
- }
- if (bitstream_get (state, 1)) { /* snroffste */
- do_bit_alloc = -1;
- state->csnroffst = bitstream_get (state, 6);
- if (state->chincpl) /* cplinu */
- state->cplba.bai = bitstream_get (state, 7);
- for (i = 0; i < nfchans; i++)
- state->ba[i].bai = bitstream_get (state, 7);
- if (state->lfeon)
- state->lfeba.bai = bitstream_get (state, 7);
- }
- if ((state->chincpl) && (bitstream_get (state, 1))) { /* cplleake */
- do_bit_alloc |= 64;
- state->cplfleak = 9 - bitstream_get (state, 3);
- state->cplsleak = 9 - bitstream_get (state, 3);
- }
-
- if (bitstream_get (state, 1)) { /* deltbaie */
- do_bit_alloc = -1;
- if (state->chincpl) /* cplinu */
- state->cplba.deltbae = bitstream_get (state, 2);
- for (i = 0; i < nfchans; i++)
- state->ba[i].deltbae = bitstream_get (state, 2);
- if (state->chincpl && /* cplinu */
- (state->cplba.deltbae == DELTA_BIT_NEW) &&
- parse_deltba (state, state->cplba.deltba))
- return 1;
- for (i = 0; i < nfchans; i++)
- if ((state->ba[i].deltbae == DELTA_BIT_NEW) &&
- parse_deltba (state, state->ba[i].deltba))
- return 1;
- }
-
- if (do_bit_alloc) {
- if (zero_snr_offsets (nfchans, state)) {
- memset (state->cpl_expbap.bap, 0, sizeof (state->cpl_expbap.bap));
- for (i = 0; i < nfchans; i++)
- memset (state->fbw_expbap[i].bap, 0,
- sizeof (state->fbw_expbap[i].bap));
- memset (state->lfe_expbap.bap, 0, sizeof (state->lfe_expbap.bap));
- } else {
- if (state->chincpl && (do_bit_alloc & 64)) /* cplinu */
- a52_bit_allocate (state, &state->cplba, state->cplstrtbnd,
- state->cplstrtmant, state->cplendmant,
- state->cplfleak << 8, state->cplsleak << 8,
- &state->cpl_expbap);
- for (i = 0; i < nfchans; i++)
- if (do_bit_alloc & (1 << i))
- a52_bit_allocate (state, state->ba + i, 0, 0,
- state->endmant[i], 0, 0,
- state->fbw_expbap +i);
- if (state->lfeon && (do_bit_alloc & 32)) {
- state->lfeba.deltbae = DELTA_BIT_NONE;
- a52_bit_allocate (state, &state->lfeba, 0, 0, 7, 0, 0,
- &state->lfe_expbap);
- }
- }
- }
-
- if (bitstream_get (state, 1)) { /* skiple */
- i = bitstream_get (state, 9); /* skipl */
- while (i--)
- bitstream_skip (state, 8);
- }
-
- samples = state->samples;
- if (state->output & A52_LFE)
- samples += 256; /* shift for LFE channel */
-
- chanbias = a52_downmix_coeff (coeff, state->acmod, state->output,
- state->dynrng, state->clev, state->slev);
-
- quantizer.q1_ptr = quantizer.q2_ptr = quantizer.q4_ptr = -1;
- done_cpl = 0;
-
- for (i = 0; i < nfchans; i++) {
- int j;
-
- coeff_get (state, samples + 256 * i, state->fbw_expbap +i, &quantizer,
- coeff[i], dithflag[i], state->endmant[i]);
-
- if ((state->chincpl >> i) & 1) {
- if (!done_cpl) {
- done_cpl = 1;
- coeff_get_coupling (state, nfchans, coeff,
- (sample_t (*)[256])samples, &quantizer,
- dithflag);
- }
- j = state->cplendmant;
- } else
- j = state->endmant[i];
- do
- (samples + 256 * i)[j] = 0;
- while (++j < 256);
- }
-
- if (state->acmod == 2) {
- int j, end, band, rematflg;
-
- end = ((state->endmant[0] < state->endmant[1]) ?
- state->endmant[0] : state->endmant[1]);
-
- i = 0;
- j = 13;
- rematflg = state->rematflg;
- do {
- if (! (rematflg & 1)) {
- rematflg >>= 1;
- j = rematrix_band[i++];
- continue;
- }
- rematflg >>= 1;
- band = rematrix_band[i++];
- if (band > end)
- band = end;
- do {
- sample_t tmp0, tmp1;
-
- tmp0 = samples[j];
- tmp1 = (samples+256)[j];
- samples[j] = tmp0 + tmp1;
- (samples+256)[j] = tmp0 - tmp1;
- } while (++j < band);
- } while (j < end);
- }
-
- if (state->lfeon) {
- if (state->output & A52_LFE) {
- coeff_get (state, samples - 256, &state->lfe_expbap, &quantizer,
- state->dynrng, 0, 7);
- for (i = 7; i < 256; i++)
- (samples-256)[i] = 0;
- a52_imdct_512 (samples - 256, samples + 1536 - 256, state->bias);
- } else {
- /* just skip the LFE coefficients */
- coeff_get (state, samples + 1280, &state->lfe_expbap, &quantizer,
- 0, 0, 7);
- }
- }
-
- i = 0;
- if (nfchans_tbl[state->output & A52_CHANNEL_MASK] < nfchans)
- for (i = 1; i < nfchans; i++)
- if (blksw[i] != blksw[0])
- break;
-
- if (i < nfchans) {
- if (state->downmixed) {
- state->downmixed = 0;
- a52_upmix (samples + 1536, state->acmod, state->output);
- }
-
- for (i = 0; i < nfchans; i++) {
- sample_t bias;
-
- bias = 0;
- if (!(chanbias & (1 << i)))
- bias = state->bias;
-
- if (coeff[i]) {
- if (blksw[i])
- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
- bias);
- else
- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
- bias);
- } else {
- int j;
-
- for (j = 0; j < 256; j++)
- (samples + 256 * i)[j] = bias;
- }
- }
-
- a52_downmix (samples, state->acmod, state->output, state->bias,
- state->clev, state->slev);
- } else {
- nfchans = nfchans_tbl[state->output & A52_CHANNEL_MASK];
-
- a52_downmix (samples, state->acmod, state->output, 0,
- state->clev, state->slev);
-
- if (!state->downmixed) {
- state->downmixed = 1;
- a52_downmix (samples + 1536, state->acmod, state->output, 0,
- state->clev, state->slev);
- }
-
- if (blksw[0])
- for (i = 0; i < nfchans; i++)
- a52_imdct_256 (samples + 256 * i, samples + 1536 + 256 * i,
- state->bias);
- else
- for (i = 0; i < nfchans; i++)
- a52_imdct_512 (samples + 256 * i, samples + 1536 + 256 * i,
- state->bias);
- }
-
- return 0;
-}
-
-void a52_free (a52_state_t * state)
-{
-#if defined(__MINGW32__) && defined(HAVE_SSE)
- av_free (state->samples);
-#else
- free (state->samples);
-#endif
- free (state);
-}
diff --git a/liba52/resample.c b/liba52/resample.c
deleted file mode 100644
index 7284f567f7..0000000000
--- a/liba52/resample.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * resample.c
- * Copyright (C) 2001 Árpád Gereöffy
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * File added for use with MPlayer and not part of original a52dec.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-// a52_resample_init should find the requested converter (from type flags ->
-// given number of channels) and set up some function pointers...
-
-// a52_resample() should do the conversion.
-
-#include <inttypes.h>
-#include <stdio.h>
-#include "a52.h"
-#include "mm_accel.h"
-#include "config.h"
-#include "mangle.h"
-
-int (* a52_resample) (float * _f, int16_t * s16)=NULL;
-
-#include "resample_c.c"
-
-#if ARCH_X86 || ARCH_X86_64
-#include "resample_mmx.c"
-#endif
-
-#if HAVE_ALTIVEC
-#include "resample_altivec.c"
-#endif
-
-void* a52_resample_init(uint32_t mm_accel,int flags,int chans){
-void* tmp;
-
-#if ARCH_X86 || ARCH_X86_64
- if(mm_accel&MM_ACCEL_X86_MMX){
- tmp=a52_resample_MMX(flags,chans);
- if(tmp){
- if(a52_resample==NULL) fprintf(stderr, "Using MMX optimized resampler\n");
- a52_resample=tmp;
- return tmp;
- }
- }
-#endif
-#if HAVE_ALTIVEC
- if(mm_accel&MM_ACCEL_PPC_ALTIVEC){
- tmp=a52_resample_altivec(flags,chans);
- if(tmp){
- if(a52_resample==NULL) fprintf(stderr, "Using AltiVec optimized resampler\n");
- a52_resample=tmp;
- return tmp;
- }
- }
-#endif
-
- tmp=a52_resample_C(flags,chans);
- if(tmp){
- if(a52_resample==NULL) fprintf(stderr, "No accelerated resampler found\n");
- a52_resample=tmp;
- return tmp;
- }
-
- fprintf(stderr, "Unimplemented resampler for mode 0x%X -> %d channels conversion - Contact MPlayer developers!\n", flags, chans);
- return NULL;
-}
diff --git a/liba52/resample_altivec.c b/liba52/resample_altivec.c
deleted file mode 100644
index 1328a2a021..0000000000
--- a/liba52/resample_altivec.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * resample.c
- * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * File added for use with MPlayer and not part of original a52dec.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifdef HAVE_ALTIVEC_H
-#include <altivec.h>
-#endif
-
-const vector signed int magic = {0x43c00000,0x43c00000,0x43c00000,0x43c00000};
-
-static inline vector signed short convert16_altivec(vector signed int v1, vector signed int v2)
-{
- register vector signed short result;
- v1 = vec_subs(v1, magic);
- v2 = vec_subs(v2, magic);
- result = vec_packs(v1, v2);
-
- return result;
-}
-
-static void unaligned_store(vector signed short value, int off, int16_t *dst)
-{
- register vector unsigned char align = vec_lvsr(0, dst),
- mask = vec_lvsl(0, dst);
- register vector signed short t0,t1, edges;
-
- t0 = vec_ld(0+off, dst);
- t1 = vec_ld(15+off, dst);
- edges = vec_perm(t1 ,t0, mask);
- t1 = vec_perm(value, edges, align);
- t0 = vec_perm(edges, value, align);
- vec_st(t1, 15+off, dst);
- vec_st(t0, 0+off, dst);
-}
-
-static int a52_resample_STEREO_to_2_altivec(float * _f, int16_t * s16){
-#if 0
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[2*i] = convert (f[i]);
- s16[2*i+1] = convert (f[i+256]);
- }
- return 2*256;
-#else
- int i = 0;
- int32_t * f = (int32_t *) _f;
- register vector signed int f0, f4, f256, f260;
- register vector signed short reven, rodd, r0, r1;
-
- for (i = 0; i < 256; i+= 8) {
- f0 = vec_ld(0, f);
- f4 = vec_ld(16, f);
-
- f256 = vec_ld(1024, f);
- f260 = vec_ld(1040, f);
-
- reven = convert16_altivec(f0, f4);
- rodd = convert16_altivec(f256, f260);
-
- r0 = vec_mergeh(reven, rodd);
- r1 = vec_mergel(reven, rodd);
- // FIXME can be merged to spare some I/O
- unaligned_store(r0, 0, s16);
- unaligned_store(r1, 16, s16);
-
- f += 8;
- s16 += 16;
- }
- return(2*256);
-#endif
-}
-
-static void* a52_resample_altivec(int flags, int ch){
-fprintf(stderr, "Checking for AltiVec resampler : 0x%08x, %d\n", flags, ch);
-
- switch (flags) {
- case A52_CHANNEL:
- case A52_STEREO:
- case A52_DOLBY:
- if(ch==2) return a52_resample_STEREO_to_2_altivec;
- break;
-
- default:
- fprintf(stderr, "Unsupported flags: 0x%08x (%d channels)\n", flags, ch);
- break;
- }
- return NULL;
-}
-
diff --git a/liba52/resample_c.c b/liba52/resample_c.c
deleted file mode 100644
index 58deed6aab..0000000000
--- a/liba52/resample_c.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * resample_c.c
- * Copyright (C) 2001 Árpád Gereöffy
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * File added for use with MPlayer and not part of original a52dec.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-static inline int16_t convert (int32_t i)
-{
- if (i > 0x43c07fff)
- return 32767;
- else if (i < 0x43bf8000)
- return -32768;
- else
- return i - 0x43c00000;
-}
-
-static int a52_resample_MONO_to_5_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[5*i] = s16[5*i+1] = s16[5*i+2] = s16[5*i+3] = 0;
- s16[5*i+4] = convert (f[i]);
- }
- return 5*256;
-}
-
-static int a52_resample_MONO_to_1_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[i] = convert (f[i]);
- }
- return 1*256;
-}
-
-static int a52_resample_STEREO_to_2_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[2*i] = convert (f[i]);
- s16[2*i+1] = convert (f[i+256]);
- }
- return 2*256;
-}
-
-static int a52_resample_3F_to_5_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[5*i] = convert (f[i]);
- s16[5*i+1] = convert (f[i+512]);
- s16[5*i+2] = s16[5*i+3] = 0;
- s16[5*i+4] = convert (f[i+256]);
- }
- return 5*256;
-}
-
-static int a52_resample_2F_2R_to_4_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[4*i] = convert (f[i]);
- s16[4*i+1] = convert (f[i+256]);
- s16[4*i+2] = convert (f[i+512]);
- s16[4*i+3] = convert (f[i+768]);
- }
- return 4*256;
-}
-
-static int a52_resample_3F_2R_to_5_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[5*i] = convert (f[i]);
- s16[5*i+1] = convert (f[i+512]);
- s16[5*i+2] = convert (f[i+768]);
- s16[5*i+3] = convert (f[i+1024]);
- s16[5*i+4] = convert (f[i+256]);
- }
- return 5*256;
-}
-
-static int a52_resample_MONO_LFE_to_6_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[6*i] = s16[6*i+1] = s16[6*i+2] = s16[6*i+3] = 0;
- s16[6*i+4] = convert (f[i+256]);
- s16[6*i+5] = convert (f[i]);
- }
- return 6*256;
-}
-
-static int a52_resample_STEREO_LFE_to_6_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[6*i] = convert (f[i+256]);
- s16[6*i+1] = convert (f[i+512]);
- s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0;
- s16[6*i+5] = convert (f[i]);
- }
- return 6*256;
-}
-
-static int a52_resample_3F_LFE_to_6_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[6*i] = convert (f[i+256]);
- s16[6*i+1] = convert (f[i+768]);
- s16[6*i+2] = s16[6*i+3] = 0;
- s16[6*i+4] = convert (f[i+512]);
- s16[6*i+5] = convert (f[i]);
- }
- return 6*256;
-}
-
-static int a52_resample_2F_2R_LFE_to_6_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[6*i] = convert (f[i+256]);
- s16[6*i+1] = convert (f[i+512]);
- s16[6*i+2] = convert (f[i+768]);
- s16[6*i+3] = convert (f[i+1024]);
- s16[6*i+4] = 0;
- s16[6*i+5] = convert (f[i]);
- }
- return 6*256;
-}
-
-static int a52_resample_3F_2R_LFE_to_6_C(float * _f, int16_t * s16){
- int i;
- int32_t * f = (int32_t *) _f;
- for (i = 0; i < 256; i++) {
- s16[6*i] = convert (f[i+256]);
- s16[6*i+1] = convert (f[i+768]);
- s16[6*i+2] = convert (f[i+1024]);
- s16[6*i+3] = convert (f[i+1280]);
- s16[6*i+4] = convert (f[i+512]);
- s16[6*i+5] = convert (f[i]);
- }
- return 6*256;
-}
-
-
-static void* a52_resample_C(int flags, int ch){
- switch (flags) {
- case A52_MONO:
- if(ch==5) return a52_resample_MONO_to_5_C;
- if(ch==1) return a52_resample_MONO_to_1_C;
- break;
- case A52_CHANNEL:
- case A52_STEREO:
- case A52_DOLBY:
- if(ch==2) return a52_resample_STEREO_to_2_C;
- break;
- case A52_3F:
- if(ch==5) return a52_resample_3F_to_5_C;
- break;
- case A52_2F2R:
- if(ch==4) return a52_resample_2F_2R_to_4_C;
- break;
- case A52_3F2R:
- if(ch==5) return a52_resample_3F_2R_to_5_C;
- break;
- case A52_MONO | A52_LFE:
- if(ch==6) return a52_resample_MONO_LFE_to_6_C;
- break;
- case A52_CHANNEL | A52_LFE:
- case A52_STEREO | A52_LFE:
- case A52_DOLBY | A52_LFE:
- if(ch==6) return a52_resample_STEREO_LFE_to_6_C;
- break;
- case A52_3F | A52_LFE:
- if(ch==6) return a52_resample_3F_LFE_to_6_C;
- break;
- case A52_2F2R | A52_LFE:
- if(ch==6) return a52_resample_2F_2R_LFE_to_6_C;
- break;
- case A52_3F2R | A52_LFE:
- if(ch==6) return a52_resample_3F_2R_LFE_to_6_C;
- break;
- }
- return NULL;
-}
diff --git a/liba52/resample_mmx.c b/liba52/resample_mmx.c
deleted file mode 100644
index 782d9cd3f0..0000000000
--- a/liba52/resample_mmx.c
+++ /dev/null
@@ -1,541 +0,0 @@
-/*
- * resample_mmx.c
- * Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at)
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * File added for use with MPlayer and not part of original a52dec.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* optimization TODO / NOTES
- * movntq is slightly faster (0.5% with the current test.c benchmark)
- * (but that is just test.c so that needs to be tested in reality)
- * and it would mean (C / MMX2 / MMX / 3DNOW) versions.
- */
-
-#include "a52_internal.h"
-
-
-static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL;
-static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL;
-static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL;
-static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL;
-
-static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-512, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "movq "MANGLE(wm1100)", %%mm3 \n\t"
- "movq "MANGLE(wm0101)", %%mm4 \n\t"
- "movq "MANGLE(wm1010)", %%mm5 \n\t"
- "pxor %%mm6, %%mm6 \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_S", 2), %%mm0 \n\t"
- "movq 8(%1, %%"REG_S", 2), %%mm1\n\t"
- "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "pand %%mm4, %%mm0 \n\t"
- "pand %%mm5, %%mm1 \n\t"
- "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0
- "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0
- "pand %%mm3, %%mm0 \n\t"
- "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0
- "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B
- "pand %%mm3, %%mm1 \n\t"
- "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0
- "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0
- "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B
- "add $8, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1280), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 5*256;
-}
-
-static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
-/* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it
-#if HAVE_SSE
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "1: \n\t"
- "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t"
- "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t"
- "movq %%mm0, %%mm1 \n\t"
- "punpcklwd %%mm2, %%mm0 \n\t"
- "punpckhwd %%mm2, %%mm1 \n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm1, 8(%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+512), "r" (f+256)
- :"%"REG_S, "memory"
- );*/
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
- "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "packssdw %%mm3, %%mm2 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "punpcklwd %%mm2, %%mm0 \n\t"
- "punpckhwd %%mm2, %%mm1 \n\t"
- "movq %%mm0, (%0, %%"REG_S") \n\t"
- "movq %%mm1, 8(%0, %%"REG_S") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+512), "r" (f+256)
- :"%"REG_S, "memory"
- );
- return 2*256;
-}
-
-static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "pxor %%mm6, %%mm6 \n\t"
- "movq %%mm7, %%mm5 \n\t"
- "punpckldq %%mm6, %%mm5 \n\t"
- "1: \n\t"
- "movd (%1, %%"REG_S"), %%mm0 \n\t"
- "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
- "movd 1024(%1, %%"REG_S"), %%mm1\n\t"
- "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t"
- "movd 2052(%1, %%"REG_S"), %%mm2\n\t"
- "movq %%mm7, %%mm3 \n\t"
- "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t"
- "movd 8(%1, %%"REG_S"), %%mm4 \n\t"
- "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t"
- "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
- "sar $1, %%"REG_D" \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm5, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "psubd %%mm7, %%mm4 \n\t"
- "packssdw %%mm6, %%mm0 \n\t"
- "packssdw %%mm2, %%mm1 \n\t"
- "packssdw %%mm4, %%mm3 \n\t"
- "movq %%mm0, (%0, %%"REG_D") \n\t"
- "movq %%mm1, 8(%0, %%"REG_D") \n\t"
- "movq %%mm3, 16(%0, %%"REG_D") \n\t"
- "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
- "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
- "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
- "movq %%mm7, %%mm3 \n\t"
- "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
- "pxor %%mm0, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm5, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "packssdw %%mm3, %%mm2 \n\t"
- "movq %%mm0, 24(%0, %%"REG_D") \n\t"
- "movq %%mm2, 32(%0, %%"REG_D") \n\t"
-
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1280), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 5*256;
-}
-
-static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "1: \n\t"
- "movq (%1, %%"REG_S"), %%mm0 \n\t"
- "movq 8(%1, %%"REG_S"), %%mm1 \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm2\n\t"
- "movq 1032(%1, %%"REG_S"), %%mm3\n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "packssdw %%mm3, %%mm2 \n\t"
- "movq 2048(%1, %%"REG_S"), %%mm3\n\t"
- "movq 2056(%1, %%"REG_S"), %%mm4\n\t"
- "movq 3072(%1, %%"REG_S"), %%mm5\n\t"
- "movq 3080(%1, %%"REG_S"), %%mm6\n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "psubd %%mm7, %%mm4 \n\t"
- "psubd %%mm7, %%mm5 \n\t"
- "psubd %%mm7, %%mm6 \n\t"
- "packssdw %%mm4, %%mm3 \n\t"
- "packssdw %%mm6, %%mm5 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "punpcklwd %%mm2, %%mm0 \n\t"
- "punpckhwd %%mm2, %%mm1 \n\t"
- "punpcklwd %%mm5, %%mm3 \n\t"
- "punpckhwd %%mm5, %%mm4 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm5 \n\t"
- "punpckldq %%mm3, %%mm0 \n\t"
- "punpckhdq %%mm3, %%mm2 \n\t"
- "punpckldq %%mm4, %%mm1 \n\t"
- "punpckhdq %%mm4, %%mm5 \n\t"
- "movq %%mm0, (%0, %%"REG_S",2) \n\t"
- "movq %%mm2, 8(%0, %%"REG_S",2) \n\t"
- "movq %%mm1, 16(%0, %%"REG_S",2)\n\t"
- "movq %%mm5, 24(%0, %%"REG_S",2)\n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1024), "r" (f+256)
- :"%"REG_S, "memory"
- );
- return 4*256;
-}
-
-static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "1: \n\t"
- "movd (%1, %%"REG_S"), %%mm0 \n\t"
- "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t"
- "movd 3072(%1, %%"REG_S"), %%mm1\n\t"
- "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t"
- "movd 1024(%1, %%"REG_S"), %%mm2\n\t"
- "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t"
- "movd 2052(%1, %%"REG_S"), %%mm3\n\t"
- "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t"
- "movd 4100(%1, %%"REG_S"), %%mm4\n\t"
- "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t"
- "movd 8(%1, %%"REG_S"), %%mm5 \n\t"
- "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t"
- "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t"
- "sar $1, %%"REG_D" \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "psubd %%mm7, %%mm4 \n\t"
- "psubd %%mm7, %%mm5 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "packssdw %%mm3, %%mm2 \n\t"
- "packssdw %%mm5, %%mm4 \n\t"
- "movq %%mm0, (%0, %%"REG_D") \n\t"
- "movq %%mm2, 8(%0, %%"REG_D") \n\t"
- "movq %%mm4, 16(%0, %%"REG_D") \n\t"
-
- "movd 3080(%1, %%"REG_S"), %%mm0\n\t"
- "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t"
- "movd 1032(%1, %%"REG_S"), %%mm1\n\t"
- "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t"
- "movd 2060(%1, %%"REG_S"), %%mm2\n\t"
- "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t"
- "movd 4108(%1, %%"REG_S"), %%mm3\n\t"
- "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "packssdw %%mm3, %%mm2 \n\t"
- "movq %%mm0, 24(%0, %%"REG_D") \n\t"
- "movq %%mm2, 32(%0, %%"REG_D") \n\t"
-
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1280), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 5*256;
-}
-
-static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "pxor %%mm6, %%mm6 \n\t"
- "1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
- "movq 1032(%1, %%"REG_S"), %%mm1\n\t"
- "movq (%1, %%"REG_S"), %%mm2 \n\t"
- "movq 8(%1, %%"REG_S"), %%mm3 \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "packssdw %%mm3, %%mm2 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "punpcklwd %%mm2, %%mm0 \n\t"
- "punpckhwd %%mm2, %%mm1 \n\t"
- "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
- "movq %%mm6, (%0, %%"REG_D") \n\t"
- "movd %%mm0, 8(%0, %%"REG_D") \n\t"
- "punpckhdq %%mm0, %%mm0 \n\t"
- "movq %%mm6, 12(%0, %%"REG_D") \n\t"
- "movd %%mm0, 20(%0, %%"REG_D") \n\t"
- "movq %%mm6, 24(%0, %%"REG_D") \n\t"
- "movd %%mm1, 32(%0, %%"REG_D") \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movq %%mm6, 36(%0, %%"REG_D") \n\t"
- "movd %%mm1, 44(%0, %%"REG_D") \n\t"
- "add $16, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1536), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 6*256;
-}
-
-static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "pxor %%mm6, %%mm6 \n\t"
- "1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
- "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
- "movq (%1, %%"REG_S"), %%mm5 \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm5 \n\t"
- "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
-
- "pxor %%mm4, %%mm4 \n\t"
- "packssdw %%mm5, %%mm0 \n\t" // FfAa
- "packssdw %%mm4, %%mm1 \n\t" // 00Bb
- "punpckhwd %%mm0, %%mm4 \n\t" // F0f0
- "punpcklwd %%mm1, %%mm0 \n\t" // BAba
- "movq %%mm0, %%mm1 \n\t" // BAba
- "punpckldq %%mm4, %%mm3 \n\t" // f0XX
- "punpckldq %%mm6, %%mm0 \n\t" // 00ba
- "punpckhdq %%mm1, %%mm3 \n\t" // BAf0
-
- "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba
- "punpckhdq %%mm4, %%mm0 \n\t" // F000
- "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0
- "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000
- "add $8, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1536), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 6*256;
-}
-
-static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
- "pxor %%mm6, %%mm6 \n\t"
- "1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
- "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
- "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
- "movq (%1, %%"REG_S"), %%mm5 \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm4 \n\t"
- "psubd %%mm7, %%mm5 \n\t"
- "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
-
- "packssdw %%mm4, %%mm0 \n\t" // EeAa
- "packssdw %%mm5, %%mm1 \n\t" // FfBb
- "movq %%mm0, %%mm2 \n\t" // EeAa
- "punpcklwd %%mm1, %%mm0 \n\t" // BAba
- "punpckhwd %%mm1, %%mm2 \n\t" // FEfe
- "movq %%mm0, %%mm1 \n\t" // BAba
- "punpckldq %%mm6, %%mm0 \n\t" // 00ba
- "punpckhdq %%mm1, %%mm1 \n\t" // BABA
-
- "movq %%mm0, (%0, %%"REG_D") \n\t"
- "punpckhdq %%mm2, %%mm0 \n\t" // FE00
- "punpckldq %%mm1, %%mm2 \n\t" // BAfe
- "movq %%mm2, 8(%0, %%"REG_D") \n\t"
- "movq %%mm0, 16(%0, %%"REG_D") \n\t"
- "add $8, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1536), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 6*256;
-}
-
-static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
-// "pxor %%mm6, %%mm6 \n\t"
- "1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
- "movq 2048(%1, %%"REG_S"), %%mm1\n\t"
- "movq 3072(%1, %%"REG_S"), %%mm2\n\t"
- "movq 4096(%1, %%"REG_S"), %%mm3\n\t"
- "movq (%1, %%"REG_S"), %%mm5 \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "psubd %%mm7, %%mm5 \n\t"
- "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
-
- "packssdw %%mm2, %%mm0 \n\t" // CcAa
- "packssdw %%mm3, %%mm1 \n\t" // DdBb
- "packssdw %%mm5, %%mm5 \n\t" // FfFf
- "movq %%mm0, %%mm2 \n\t" // CcAa
- "punpcklwd %%mm1, %%mm0 \n\t" // BAba
- "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
- "pxor %%mm4, %%mm4 \n\t" // 0000
- "punpcklwd %%mm5, %%mm4 \n\t" // F0f0
- "movq %%mm0, %%mm1 \n\t" // BAba
- "movq %%mm4, %%mm3 \n\t" // F0f0
- "punpckldq %%mm2, %%mm0 \n\t" // dcba
- "punpckhdq %%mm1, %%mm1 \n\t" // BABA
- "punpckldq %%mm1, %%mm4 \n\t" // BAf0
- "punpckhdq %%mm3, %%mm2 \n\t" // F0DC
-
- "movq %%mm0, (%0, %%"REG_D") \n\t"
- "movq %%mm4, 8(%0, %%"REG_D") \n\t"
- "movq %%mm2, 16(%0, %%"REG_D") \n\t"
- "add $8, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1536), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 6*256;
-}
-
-static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){
- int32_t * f = (int32_t *) _f;
- __asm__ volatile(
- "mov $-1024, %%"REG_S" \n\t"
- "movq "MANGLE(magicF2W)", %%mm7 \n\t"
-// "pxor %%mm6, %%mm6 \n\t"
- "1: \n\t"
- "movq 1024(%1, %%"REG_S"), %%mm0\n\t"
- "movq 3072(%1, %%"REG_S"), %%mm1\n\t"
- "movq 4096(%1, %%"REG_S"), %%mm2\n\t"
- "movq 5120(%1, %%"REG_S"), %%mm3\n\t"
- "movq 2048(%1, %%"REG_S"), %%mm4\n\t"
- "movq (%1, %%"REG_S"), %%mm5 \n\t"
- "psubd %%mm7, %%mm0 \n\t"
- "psubd %%mm7, %%mm1 \n\t"
- "psubd %%mm7, %%mm2 \n\t"
- "psubd %%mm7, %%mm3 \n\t"
- "psubd %%mm7, %%mm4 \n\t"
- "psubd %%mm7, %%mm5 \n\t"
- "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t"
-
- "packssdw %%mm2, %%mm0 \n\t" // CcAa
- "packssdw %%mm3, %%mm1 \n\t" // DdBb
- "packssdw %%mm4, %%mm4 \n\t" // EeEe
- "packssdw %%mm5, %%mm5 \n\t" // FfFf
- "movq %%mm0, %%mm2 \n\t" // CcAa
- "punpcklwd %%mm1, %%mm0 \n\t" // BAba
- "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
- "punpcklwd %%mm5, %%mm4 \n\t" // FEfe
- "movq %%mm0, %%mm1 \n\t" // BAba
- "movq %%mm4, %%mm3 \n\t" // FEfe
- "punpckldq %%mm2, %%mm0 \n\t" // dcba
- "punpckhdq %%mm1, %%mm1 \n\t" // BABA
- "punpckldq %%mm1, %%mm4 \n\t" // BAfe
- "punpckhdq %%mm3, %%mm2 \n\t" // FEDC
-
- "movq %%mm0, (%0, %%"REG_D") \n\t"
- "movq %%mm4, 8(%0, %%"REG_D") \n\t"
- "movq %%mm2, 16(%0, %%"REG_D") \n\t"
- "add $8, %%"REG_S" \n\t"
- " jnz 1b \n\t"
- "emms \n\t"
- :: "r" (s16+1536), "r" (f+256)
- :"%"REG_S, "%"REG_D, "memory"
- );
- return 6*256;
-}
-
-
-static void* a52_resample_MMX(int flags, int ch){
- switch (flags) {
- case A52_MONO:
- if(ch==5) return a52_resample_MONO_to_5_MMX;
- break;
- case A52_CHANNEL:
- case A52_STEREO:
- case A52_DOLBY:
- if(ch==2) return a52_resample_STEREO_to_2_MMX;
- break;
- case A52_3F:
- if(ch==5) return a52_resample_3F_to_5_MMX;
- break;
- case A52_2F2R:
- if(ch==4) return a52_resample_2F_2R_to_4_MMX;
- break;
- case A52_3F2R:
- if(ch==5) return a52_resample_3F_2R_to_5_MMX;
- break;
- case A52_MONO | A52_LFE:
- if(ch==6) return a52_resample_MONO_LFE_to_6_MMX;
- break;
- case A52_CHANNEL | A52_LFE:
- case A52_STEREO | A52_LFE:
- case A52_DOLBY | A52_LFE:
- if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX;
- break;
- case A52_3F | A52_LFE:
- if(ch==6) return a52_resample_3F_LFE_to_6_MMX;
- break;
- case A52_2F2R | A52_LFE:
- if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX;
- break;
- case A52_3F2R | A52_LFE:
- if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX;
- break;
- }
- return NULL;
-}
-
-
diff --git a/liba52/srfftp.h b/liba52/srfftp.h
deleted file mode 100644
index b6bb5ab4cd..0000000000
--- a/liba52/srfftp.h
+++ /dev/null
@@ -1,303 +0,0 @@
-
-/*
- * srfftp.h
- *
- * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
- *
- * 64 and 128 point split radix fft for ac3dec
- *
- * The algorithm is desribed in the book:
- * "Computational Frameworks of the Fast Fourier Transform".
- *
- * The ideas and the the organization of code borrowed from djbfft written by
- * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
- * http://cr.yp.to/djbfft.html.
- *
- * srfftp.h is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * srfftp.h is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifndef SRFFTP_H__
-#define SRFFTP_H__
-
-static complex_t delta16[4] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.92387953251129, -0.38268343236509},
- {0.70710678118655, -0.70710678118655},
- {0.38268343236509, -0.92387953251129}};
-
-static complex_t delta16_3[4] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.38268343236509, -0.92387953251129},
- {-0.70710678118655, -0.70710678118655},
- {-0.92387953251129, 0.38268343236509}};
-
-static complex_t delta32[8] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.98078528040323, -0.19509032201613},
- {0.92387953251129, -0.38268343236509},
- {0.83146961230255, -0.55557023301960},
- {0.70710678118655, -0.70710678118655},
- {0.55557023301960, -0.83146961230255},
- {0.38268343236509, -0.92387953251129},
- {0.19509032201613, -0.98078528040323}};
-
-static complex_t delta32_3[8] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.83146961230255, -0.55557023301960},
- {0.38268343236509, -0.92387953251129},
- {-0.19509032201613, -0.98078528040323},
- {-0.70710678118655, -0.70710678118655},
- {-0.98078528040323, -0.19509032201613},
- {-0.92387953251129, 0.38268343236509},
- {-0.55557023301960, 0.83146961230255}};
-
-static complex_t delta64[16] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.99518472667220, -0.09801714032956},
- {0.98078528040323, -0.19509032201613},
- {0.95694033573221, -0.29028467725446},
- {0.92387953251129, -0.38268343236509},
- {0.88192126434836, -0.47139673682600},
- {0.83146961230255, -0.55557023301960},
- {0.77301045336274, -0.63439328416365},
- {0.70710678118655, -0.70710678118655},
- {0.63439328416365, -0.77301045336274},
- {0.55557023301960, -0.83146961230255},
- {0.47139673682600, -0.88192126434835},
- {0.38268343236509, -0.92387953251129},
- {0.29028467725446, -0.95694033573221},
- {0.19509032201613, -0.98078528040323},
- {0.09801714032956, -0.99518472667220}};
-
-static complex_t delta64_3[16] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.95694033573221, -0.29028467725446},
- {0.83146961230255, -0.55557023301960},
- {0.63439328416365, -0.77301045336274},
- {0.38268343236509, -0.92387953251129},
- {0.09801714032956, -0.99518472667220},
- {-0.19509032201613, -0.98078528040323},
- {-0.47139673682600, -0.88192126434836},
- {-0.70710678118655, -0.70710678118655},
- {-0.88192126434835, -0.47139673682600},
- {-0.98078528040323, -0.19509032201613},
- {-0.99518472667220, 0.09801714032956},
- {-0.92387953251129, 0.38268343236509},
- {-0.77301045336274, 0.63439328416365},
- {-0.55557023301960, 0.83146961230255},
- {-0.29028467725446, 0.95694033573221}};
-
-static complex_t delta128[32] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.99879545620517, -0.04906767432742},
- {0.99518472667220, -0.09801714032956},
- {0.98917650996478, -0.14673047445536},
- {0.98078528040323, -0.19509032201613},
- {0.97003125319454, -0.24298017990326},
- {0.95694033573221, -0.29028467725446},
- {0.94154406518302, -0.33688985339222},
- {0.92387953251129, -0.38268343236509},
- {0.90398929312344, -0.42755509343028},
- {0.88192126434836, -0.47139673682600},
- {0.85772861000027, -0.51410274419322},
- {0.83146961230255, -0.55557023301960},
- {0.80320753148064, -0.59569930449243},
- {0.77301045336274, -0.63439328416365},
- {0.74095112535496, -0.67155895484702},
- {0.70710678118655, -0.70710678118655},
- {0.67155895484702, -0.74095112535496},
- {0.63439328416365, -0.77301045336274},
- {0.59569930449243, -0.80320753148064},
- {0.55557023301960, -0.83146961230255},
- {0.51410274419322, -0.85772861000027},
- {0.47139673682600, -0.88192126434835},
- {0.42755509343028, -0.90398929312344},
- {0.38268343236509, -0.92387953251129},
- {0.33688985339222, -0.94154406518302},
- {0.29028467725446, -0.95694033573221},
- {0.24298017990326, -0.97003125319454},
- {0.19509032201613, -0.98078528040323},
- {0.14673047445536, -0.98917650996478},
- {0.09801714032956, -0.99518472667220},
- {0.04906767432742, -0.99879545620517}};
-
-static complex_t delta128_3[32] __attribute__((aligned(16))) =
- { {1.00000000000000, 0.00000000000000},
- {0.98917650996478, -0.14673047445536},
- {0.95694033573221, -0.29028467725446},
- {0.90398929312344, -0.42755509343028},
- {0.83146961230255, -0.55557023301960},
- {0.74095112535496, -0.67155895484702},
- {0.63439328416365, -0.77301045336274},
- {0.51410274419322, -0.85772861000027},
- {0.38268343236509, -0.92387953251129},
- {0.24298017990326, -0.97003125319454},
- {0.09801714032956, -0.99518472667220},
- {-0.04906767432742, -0.99879545620517},
- {-0.19509032201613, -0.98078528040323},
- {-0.33688985339222, -0.94154406518302},
- {-0.47139673682600, -0.88192126434836},
- {-0.59569930449243, -0.80320753148065},
- {-0.70710678118655, -0.70710678118655},
- {-0.80320753148065, -0.59569930449243},
- {-0.88192126434835, -0.47139673682600},
- {-0.94154406518302, -0.33688985339222},
- {-0.98078528040323, -0.19509032201613},
- {-0.99879545620517, -0.04906767432742},
- {-0.99518472667220, 0.09801714032956},
- {-0.97003125319454, 0.24298017990326},
- {-0.92387953251129, 0.38268343236509},
- {-0.85772861000027, 0.51410274419322},
- {-0.77301045336274, 0.63439328416365},
- {-0.67155895484702, 0.74095112535496},
- {-0.55557023301960, 0.83146961230255},
- {-0.42755509343028, 0.90398929312344},
- {-0.29028467725446, 0.95694033573221},
- {-0.14673047445536, 0.98917650996478}};
-
-#define HSQRT2 0.707106781188;
-
-#define TRANSZERO(A0,A4,A8,A12) { \
- u_r = wTB[0].real; \
- v_i = u_r - wTB[k*2].real; \
- u_r += wTB[k*2].real; \
- u_i = wTB[0].imag; \
- v_r = wTB[k*2].imag - u_i; \
- u_i += wTB[k*2].imag; \
- a_r = A0.real; \
- a_i = A0.imag; \
- a1_r = a_r; \
- a1_r += u_r; \
- A0.real = a1_r; \
- a_r -= u_r; \
- A8.real = a_r; \
- a1_i = a_i; \
- a1_i += u_i; \
- A0.imag = a1_i; \
- a_i -= u_i; \
- A8.imag = a_i; \
- a1_r = A4.real; \
- a1_i = A4.imag; \
- a_r = a1_r; \
- a_r -= v_r; \
- A4.real = a_r; \
- a1_r += v_r; \
- A12.real = a1_r; \
- a_i = a1_i; \
- a_i -= v_i; \
- A4.imag = a_i; \
- a1_i += v_i; \
- A12.imag = a1_i; \
- }
-
-#define TRANSHALF_16(A2,A6,A10,A14) {\
- u_r = wTB[2].real; \
- a_r = u_r; \
- u_i = wTB[2].imag; \
- u_r += u_i; \
- u_i -= a_r; \
- a_r = wTB[6].real; \
- a1_r = a_r; \
- a_i = wTB[6].imag; \
- a_r = a_i - a_r; \
- a_i += a1_r; \
- v_i = u_r - a_r; \
- u_r += a_r; \
- v_r = u_i + a_i; \
- u_i -= a_i; \
- v_i *= HSQRT2; \
- v_r *= HSQRT2; \
- u_r *= HSQRT2; \
- u_i *= HSQRT2; \
- a_r = A2.real; \
- a_i = A2.imag; \
- a1_r = a_r; \
- a1_r += u_r; \
- A2.real = a1_r; \
- a_r -= u_r; \
- A10.real = a_r; \
- a1_i = a_i; \
- a1_i += u_i; \
- A2.imag = a1_i; \
- a_i -= u_i; \
- A10.imag = a_i; \
- a1_r = A6.real; \
- a1_i = A6.imag; \
- a_r = a1_r; \
- a1_r += v_r; \
- A6.real = a1_r; \
- a_r -= v_r; \
- A14.real = a_r; \
- a_i = a1_i; \
- a1_i -= v_i; \
- A6.imag = a1_i; \
- a_i += v_i; \
- A14.imag = a_i; \
- }
-
-#define TRANS(A1,A5,A9,A13,WT,WB,D,D3) { \
- u_r = WT.real; \
- a_r = u_r; \
- a_r *= D.imag; \
- u_r *= D.real; \
- a_i = WT.imag; \
- a1_i = a_i; \
- a1_i *= D.real; \
- a_i *= D.imag; \
- u_r -= a_i; \
- u_i = a_r; \
- u_i += a1_i; \
- a_r = WB.real; \
- a1_r = a_r; \
- a1_r *= D3.real; \
- a_r *= D3.imag; \
- a_i = WB.imag; \
- a1_i = a_i; \
- a_i *= D3.real; \
- a1_i *= D3.imag; \
- a1_r -= a1_i; \
- a_r += a_i; \
- v_i = u_r - a1_r; \
- u_r += a1_r; \
- v_r = a_r - u_i; \
- u_i += a_r; \
- a_r = A1.real; \
- a_i = A1.imag; \
- a1_r = a_r; \
- a1_r += u_r; \
- A1.real = a1_r; \
- a_r -= u_r; \
- A9.real = a_r; \
- a1_i = a_i; \
- a1_i += u_i; \
- A1.imag = a1_i; \
- a_i -= u_i; \
- A9.imag = a_i; \
- a1_r = A5.real; \
- a1_i = A5.imag; \
- a_r = a1_r; \
- a1_r -= v_r; \
- A5.real = a1_r; \
- a_r += v_r; \
- A13.real = a_r; \
- a_i = a1_i; \
- a1_i -= v_i; \
- A5.imag = a1_i; \
- a_i += v_i; \
- A13.imag = a_i; \
- }
-
-#endif
diff --git a/liba52/srfftp_3dnow.h b/liba52/srfftp_3dnow.h
deleted file mode 100644
index 9ce224c726..0000000000
--- a/liba52/srfftp_3dnow.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * srfftp.h
- *
- * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
- *
- * 64 and 128 point split radix fft for ac3dec
- *
- * The algorithm is desribed in the book:
- * "Computational Frameworks of the Fast Fourier Transform".
- *
- * The ideas and the the organization of code borrowed from djbfft written by
- * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
- * http://cr.yp.to/djbfft.html.
- *
- * srfftp.h is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * srfftp.h is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations
- * by Nick Kurshev <nickols_k@mail.ru>
- */
-
-#ifndef SRFFTP_3DNOW_H__
-#define SRFFTP_3DNOW_H__
-
-typedef struct
-{
- unsigned long val[2];
-}i_cmplx_t;
-
-#define TRANS_FILL_MM6_MM7_3DNOW()\
- __asm__ volatile(\
- "movq %1, %%mm7\n\t"\
- "movq %0, %%mm6\n\t"\
- ::"m"(x_plus_minus_3dnow),\
- "m"(x_minus_plus_3dnow)\
- :"memory");
-
-#if HAVE_AMD3DNOWEXT
-#define PSWAP_MM(mm_base,mm_hlp) "pswapd "mm_base","mm_base"\n\t"
-#else
-#define PSWAP_MM(mm_base,mm_hlp)\
- "movq "mm_base","mm_hlp"\n\t"\
- "psrlq $32, "mm_base"\n\t"\
- "punpckldq "mm_hlp","mm_base"\n\t"
-#endif
-#if HAVE_AMD3DNOWEXT
-#define PFNACC_MM(mm_base,mm_hlp) "pfnacc "mm_base","mm_base"\n\t"
-#else
-#define PFNACC_MM(mm_base,mm_hlp)\
- "movq "mm_base","mm_hlp"\n\t"\
- "psrlq $32,"mm_hlp"\n\t"\
- "punpckldq "mm_hlp","mm_hlp"\n\t"\
- "pfsub "mm_hlp","mm_base"\n\t"
-#endif
-
-#define TRANSZERO_3DNOW(A0,A4,A8,A12) \
-{ \
- __asm__ volatile(\
- "movq %4, %%mm0\n\t" /* mm0 = wTB[0]*/\
- "movq %5, %%mm1\n\t" /* mm1 = wTB[k*2]*/ \
- "movq %%mm0, %%mm5\n\t"/*u.re = wTB[0].re + wTB[k*2].re;*/\
- "pfadd %%mm1, %%mm5\n\t"/*u.im = wTB[0].im + wTB[k*2].im; mm5 = u*/\
- "pxor %%mm6, %%mm0\n\t"/*mm0 = wTB[0].re | -wTB[0].im */\
- "pxor %%mm7, %%mm1\n\t"/*mm1 = -wTB[k*2].re | wTB[k*2].im */\
- "pfadd %%mm1, %%mm0\n\t"/*v.im = wTB[0].re - wTB[k*2].re;*/\
- "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\
- PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\
- "movq %6, %%mm0\n\t" /* a1 = A0;*/\
- "movq %7, %%mm2\n\t" /* a1 = A4;*/\
- "movq %%mm0, %%mm1\n\t"\
- "movq %%mm2, %%mm3\n\t"\
- "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\
- "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\
- "movq %%mm0, %0\n\t"\
- "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\
- "movq %%mm2, %3\n\t"\
- "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\
- "movq %%mm1, %1\n\t"\
- "movq %%mm3, %2"\
- :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\
- :"m"(wTB[0]), "m"(wTB[k*2]), "m"(A0), "m"(A4)\
- :"memory");\
-}
-
-#define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\
-{\
- __asm__ volatile(\
- "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\
- "movq %%mm0, %%mm1\n\t"\
- "pxor %%mm7, %%mm1\n\t"\
- "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\
- "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\
- "movq %%mm1, %%mm2\n\t"\
- "pxor %%mm7, %%mm1\n\t"\
- "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\
- "movq %%mm1, %%mm2\n\t"\
- "pxor %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\
- "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\
- "pfadd %%mm2, %%mm3\n\t"\
- PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\
- "pxor %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\
- "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\
- "movq %8, %%mm2\n\t"\
- "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\
- "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\
- "movq %6, %%mm1\n\t" /* a1 = A2;*/\
- "movq %7, %%mm5\n\t" /* a1 = A6;*/\
- "movq %%mm1, %%mm2\n\t"\
- "movq %%mm3, %%mm4\n\t"\
- "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\
- "pxor %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\
- "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\
- "pxor %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\
- "movq %%mm1, %0\n\t"\
- "movq %%mm2, %1\n\t"\
- "movq %%mm5, %%mm2\n\t"\
- "pfadd %%mm4, %%mm5\n\t"/*A6.im = a1.im - v.im;*/\
- "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\
- "movq %%mm5, %2\n\t"\
- "movq %%mm2, %3"\
- :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\
- :"m"(wTB[2]), "m"(wTB[6]), "m"(A2), "m"(A6), "m"(HSQRT2_3DNOW)\
- :"memory");\
-}
-
-#define TRANS_3DNOW(A1,A5,A9,A13,WT,WB,D,D3)\
-{ \
- __asm__ volatile(\
- "movq %1, %%mm4\n\t"\
- "movq %%mm4, %%mm5\n\t"\
- "punpckldq %%mm4, %%mm4\n\t"/*mm4 = D.re | D.re */\
- "punpckhdq %%mm5, %%mm5\n\t"/*mm5 = D.im | D.im */\
- "movq %0, %%mm0\n\t"\
- "pfmul %%mm0, %%mm4\n\t"/* mm4 =u.re | u.im */\
- "pfmul %%mm0, %%mm5\n\t"/* mm5 = a.re | a.im */\
- PSWAP_MM("%%mm5","%%mm3")\
- "pxor %%mm7, %%mm5\n\t"\
- "pfadd %%mm5, %%mm4\n\t"/* mm4 = u*/\
- "movq %3, %%mm1\n\t"\
- "movq %2, %%mm0\n\t"\
- PSWAP_MM("%%mm1","%%mm3")\
- "movq %%mm0, %%mm2\n\t"\
- "pfmul %%mm1, %%mm0\n\t"/* mm0 = a*/\
- "pfmul %3, %%mm2\n\t"/* mm2 = v*/\
- PFNACC_MM("%%mm2","%%mm3")\
- "pfacc %%mm0, %%mm0\n\t"\
- "movq %%mm4, %%mm5\n\t"\
- "punpckldq %%mm0,%%mm2\n\t"/*mm2 = v.re | a.re*/\
- "pxor %%mm6, %%mm5\n\t"\
- "movq %%mm2, %%mm3\n\t"\
- "pxor %%mm7, %%mm3\n\t"\
- "pfadd %%mm3, %%mm5\n\t"\
- PSWAP_MM("%%mm5","%%mm3")/* mm5 = v*/\
- "pfadd %%mm2, %%mm4\n\t"\
- :\
- :"m"(WT), "m"(D), "m"(WB), "m"(D3)\
- :"memory");\
- __asm__ volatile(\
- "movq %4, %%mm0\n\t"/* a1 = A1*/\
- "movq %5, %%mm2\n\t"/* a1 = A5*/\
- "movq %%mm0, %%mm1\n\t"\
- "movq %%mm2, %%mm3\n\t"\
- "pfadd %%mm4, %%mm0\n\t"/*A1 = a1 + u*/\
- "pfsub %%mm5, %%mm2\n\t"/*A5 = a1 - v*/\
- "movq %%mm0, %0\n\t"\
- "pfsub %%mm4, %%mm1\n\t"/*A9 = a1 - u*/\
- "movq %%mm2, %2\n\t"\
- "pfadd %%mm5, %%mm3\n\t"/*A9 = a1 + v*/\
- "movq %%mm1, %1\n\t"\
- "movq %%mm3, %3"\
- :"=m"(A1), "=m"(A9), "=m"(A5), "=m"(A13)\
- :"m"(A1), "m"(A5)\
- :"memory");\
-}
-
-#endif
diff --git a/liba52/tables.h b/liba52/tables.h
deleted file mode 100644
index a35543db7c..0000000000
--- a/liba52/tables.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * tables.h
- * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
- * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
- *
- * This file is part of a52dec, a free ATSC A-52 stream decoder.
- * See http://liba52.sourceforge.net/ for updates.
- *
- * a52dec is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * a52dec is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-static const int8_t exp_1[128] = {
- -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 25,25,25
-};
-static const int8_t exp_2[128] = {
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- 25,25,25
-};
-static const int8_t exp_3[128] = {
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- 25,25,25
-};
-
-#define Q0 ((-2 << 15) / 3.0)
-#define Q1 (0)
-#define Q2 ((2 << 15) / 3.0)
-
-static const sample_t q_1_0[32] = {
- Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
- Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
- Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
- 0,0,0,0,0
-};
-
-static const sample_t q_1_1[32] = {
- Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
- Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
- Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
- 0,0,0,0,0
-};
-
-static const sample_t q_1_2[32] = {
- Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
- Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
- Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
- 0,0,0,0,0
-};
-
-#undef Q0
-#undef Q1
-#undef Q2
-
-#define Q0 ((-4 << 15) / 5.0)
-#define Q1 ((-2 << 15) / 5.0)
-#define Q2 (0)
-#define Q3 ((2 << 15) / 5.0)
-#define Q4 ((4 << 15) / 5.0)
-
-static const sample_t q_2_0[128] = {
- Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
- Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
- Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
- Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
- Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
- 0,0,0
-};
-
-static const sample_t q_2_1[128] = {
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- 0,0,0
-};
-
-static const sample_t q_2_2[128] = {
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- 0,0,0
-};
-
-#undef Q0
-#undef Q1
-#undef Q2
-#undef Q3
-#undef Q4
-
-static const sample_t q_3[8] = {
- (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, 0,
- ( 2 << 15)/7.0, ( 4 << 15)/7.0, ( 6 << 15)/7.0, 0
-};
-
-#define Q0 ((-10 << 15) / 11.0)
-#define Q1 ((-8 << 15) / 11.0)
-#define Q2 ((-6 << 15) / 11.0)
-#define Q3 ((-4 << 15) / 11.0)
-#define Q4 ((-2 << 15) / 11.0)
-#define Q5 (0)
-#define Q6 ((2 << 15) / 11.0)
-#define Q7 ((4 << 15) / 11.0)
-#define Q8 ((6 << 15) / 11.0)
-#define Q9 ((8 << 15) / 11.0)
-#define QA ((10 << 15) / 11.0)
-
-static const sample_t q_4_0[128] = {
- Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
- Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
- Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
- Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
- Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
- Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
- Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
- Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
- Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
- Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
- QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
- 0, 0, 0, 0, 0, 0, 0
-};
-
-static const sample_t q_4_1[128] = {
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- 0, 0, 0, 0, 0, 0, 0
-};
-
-#undef Q0
-#undef Q1
-#undef Q2
-#undef Q3
-#undef Q4
-#undef Q5
-#undef Q6
-#undef Q7
-#undef Q8
-#undef Q9
-#undef QA
-
-static const sample_t q_5[16] = {
- (-14 << 15)/15.0,(-12 << 15)/15.0,(-10 << 15)/15.0,
- ( -8 << 15)/15.0,( -6 << 15)/15.0,( -4 << 15)/15.0,
- ( -2 << 15)/15.0, 0 ,( 2 << 15)/15.0,
- ( 4 << 15)/15.0,( 6 << 15)/15.0,( 8 << 15)/15.0,
- ( 10 << 15)/15.0,( 12 << 15)/15.0,( 14 << 15)/15.0,
- 0
-};
-
-static const sample_t scale_factor[25] = {
- 0.000030517578125,
- 0.0000152587890625,
- 0.00000762939453125,
- 0.000003814697265625,
- 0.0000019073486328125,
- 0.00000095367431640625,
- 0.000000476837158203125,
- 0.0000002384185791015625,
- 0.00000011920928955078125,
- 0.000000059604644775390625,
- 0.0000000298023223876953125,
- 0.00000001490116119384765625,
- 0.000000007450580596923828125,
- 0.0000000037252902984619140625,
- 0.00000000186264514923095703125,
- 0.000000000931322574615478515625,
- 0.0000000004656612873077392578125,
- 0.00000000023283064365386962890625,
- 0.000000000116415321826934814453125,
- 0.0000000000582076609134674072265625,
- 0.00000000002910383045673370361328125,
- 0.000000000014551915228366851806640625,
- 0.0000000000072759576141834259033203125,
- 0.00000000000363797880709171295166015625,
- 0.000000000001818989403545856475830078125
-};
-
-static const uint16_t dither_lut[256] = {
- 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
- 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
- 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198,
- 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176,
- 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf,
- 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321,
- 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202,
- 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec,
- 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761,
- 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f,
- 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac,
- 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642,
- 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb,
- 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415,
- 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536,
- 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8,
- 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c,
- 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2,
- 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1,
- 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f,
- 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6,
- 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58,
- 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b,
- 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95,
- 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918,
- 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6,
- 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5,
- 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b,
- 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82,
- 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c,
- 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f,
- 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
-};
diff --git a/liba52/test.c b/liba52/test.c
deleted file mode 100644
index 2f445226a5..0000000000
--- a/liba52/test.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * liba52 sample by A'rpi/ESP-team
- * Reads an AC-3 stream from stdin, decodes and downmixes to s16 stereo PCM
- * and writes it to stdout. The resulting stream is playable with sox:
- * play -c2 -r48000 -sw -fs out.sw
- *
- * Copyright (C) 2001 Árpád Gereöffy
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-//#define TIMING //needs Pentium or newer
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <string.h>
-
-#include "a52.h"
-#include "mm_accel.h"
-#include "cpudetect.h"
-
-static a52_state_t *state;
-static uint8_t buf[3840];
-static int buf_size=0;
-
-static int16_t out_buf[6*256*6];
-
-void mp_msg( int x, const char *format, ... ) // stub for cpudetect.c
-{
-}
-
-#ifdef TIMING
-static inline long long rdtsc(void)
-{
- long long l;
- __asm__ volatile("rdtsc\n\t"
- : "=A" (l)
- );
-// printf("%d\n", int(l/1000));
- return l;
-}
-
-#define STARTTIMING t=rdtsc();
-#define ENDTIMING sum+=rdtsc()-t; t=rdtsc();
-#else
-#define STARTTIMING ;
-#define ENDTIMING ;
-#endif
-
-
-int main(void){
-int accel=0;
-int sample_rate=0;
-int bit_rate=0;
-#ifdef TIMING
-long long t, sum=0, min=256*256*256*64;
-#endif
-
- FILE *temp= stdout;
- stdout= stderr; //EVIL HACK FIXME
- GetCpuCaps(&gCpuCaps);
- stdout= temp;
-// gCpuCaps.hasMMX=0;
-// gCpuCaps.hasSSE=0;
- if(gCpuCaps.hasMMX) accel |= MM_ACCEL_X86_MMX;
- if(gCpuCaps.hasMMX2) accel |= MM_ACCEL_X86_MMXEXT;
- if(gCpuCaps.hasSSE) accel |= MM_ACCEL_X86_SSE;
- if(gCpuCaps.has3DNow) accel |= MM_ACCEL_X86_3DNOW;
-// if(gCpuCaps.has3DNowExt) accel |= MM_ACCEL_X86_3DNOWEXT;
-
- state = a52_init (accel);
- if (state == NULL) {
- fprintf (stderr, "A52 init failed\n");
- return 1;
- }
-
-while(1){
- int length,i;
- int16_t *s16;
- sample_t level=1, bias=384;
- int flags=0;
- int channels=0;
-
- while(buf_size<7){
- int c=getchar();
- if(c<0) goto eof;
- buf[buf_size++]=c;
- }
-STARTTIMING
- length = a52_syncinfo (buf, &flags, &sample_rate, &bit_rate);
-ENDTIMING
- if(!length){
- // bad file => resync
- memcpy(buf,buf+1,6);
- --buf_size;
- continue;
- }
- fprintf(stderr,"sync. %d bytes 0x%X %d Hz %d kbit\n",length,flags,sample_rate,bit_rate);
- while(buf_size<length){
- buf[buf_size++]=getchar();
- }
-
- buf_size=0;
-
- // decode:
- flags=A52_STEREO; //A52_STEREO; //A52_DOLBY; //A52_STEREO; // A52_DOLBY // A52_2F2R // A52_3F2R | A52_LFE
- channels=2;
-
- flags |= A52_ADJUST_LEVEL;
-STARTTIMING
- if (a52_frame (state, buf, &flags, &level, bias))
- { fprintf(stderr,"error at decoding\n"); continue; }
-ENDTIMING
-
- // a52_dynrng (state, NULL, NULL); // disable dynamic range compensation
-
-STARTTIMING
- a52_resample_init(accel,flags,channels);
- s16 = out_buf;
- for (i = 0; i < 6; i++) {
- if (a52_block (state))
- { fprintf(stderr,"error at sampling\n"); break; }
- // float->int + channels interleaving:
- s16+=a52_resample(a52_samples(state),s16);
-ENDTIMING
- }
-#ifdef TIMING
-if(sum<min) min=sum;
-sum=0;
-#endif
- fwrite(out_buf,6*256*2*channels,1,stdout);
-
-}
-
-eof:
-#ifdef TIMING
-fprintf(stderr, "%4.4fk cycles\n",min/1000.0);
-sum=0;
-#endif
-return 0;
-}
diff --git a/libmpcodecs/ad_liba52.c b/libmpcodecs/ad_liba52.c
index cac671718a..2e2006abc1 100644
--- a/libmpcodecs/ad_liba52.c
+++ b/libmpcodecs/ad_liba52.c
@@ -35,14 +35,9 @@
#include "libaf/af_format.h"
-#ifdef CONFIG_LIBA52_INTERNAL
-#include "liba52/a52.h"
-#include "liba52/mm_accel.h"
-#else
#include <a52dec/a52.h>
#include <a52dec/mm_accel.h>
int (* a52_resample) (float * _f, int16_t * s16);
-#endif
static a52_state_t *a52_state;
static uint32_t a52_flags=0;
@@ -150,11 +145,7 @@ static int preinit(sh_audio_t *sh)
{
/* Dolby AC3 audio: */
/* however many channels, 2 bytes in a word, 256 samples in a block, 6 blocks in a frame */
-#ifdef CONFIG_LIBA52_INTERNAL
- if (sh->samplesize < 2) sh->samplesize = 2;
-#else
if (sh->samplesize < 4) sh->samplesize = 4;
-#endif
sh->audio_out_minsize=audio_output_channels*sh->samplesize*256*6;
sh->audio_in_minsize=3840;
a52_level = 1.0;
@@ -208,9 +199,7 @@ static int init(sh_audio_t *sh_audio)
mp_msg(MSGT_DECAUDIO,MSGL_ERR,"A52 init failed\n");
return 0;
}
-#ifndef CONFIG_LIBA52_INTERNAL
sh_audio->sample_format = AF_FORMAT_FLOAT_NE;
-#endif
if(a52_fillbuff(sh_audio)<0){
mp_msg(MSGT_DECAUDIO,MSGL_ERR,"A52 sync failed\n");
return 0;
@@ -283,12 +272,7 @@ while(sh_audio->channels>0){
break;
}
} else
-#ifdef CONFIG_LIBA52_INTERNAL
- if(a52_resample_init(a52_accel,flags,sh_audio->channels)) break;
- --sh_audio->channels; /* try to decrease no. of channels*/
-#else
break;
-#endif
}
if(sh_audio->channels<=0){
mp_msg(MSGT_DECAUDIO,MSGL_ERR,"a52: no resampler. try different channel setup!\n");
diff --git a/libmpdemux/muxer_mpeg.c b/libmpdemux/muxer_mpeg.c
index 868c20da43..dcf2e879eb 100644
--- a/libmpdemux/muxer_mpeg.c
+++ b/libmpdemux/muxer_mpeg.c
@@ -41,8 +41,6 @@
#ifdef CONFIG_LIBA52
#include <a52dec/a52.h>
-#else
-#include "liba52/a52.h"
#endif
#define PACK_HEADER_START_CODE 0x01ba