aboutsummaryrefslogtreecommitdiffhomepage
path: root/mp3lib
diff options
context:
space:
mode:
authorGravatar nick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-14 09:32:51 +0000
committerGravatar nick <nick@b3059339-0415-0410-9bf9-f77b7e298cf2>2002-01-14 09:32:51 +0000
commit2da69665f6d72867a7507510ef87b1268e9752e9 (patch)
tree60eddd579ba73f1a47916901e217098d5d102255 /mp3lib
parent8629e9e819cfce49406f55c2655bd1ad6f72d0df (diff)
S->C
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4148 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'mp3lib')
-rw-r--r--mp3lib/Makefile2
-rw-r--r--mp3lib/decode_i586.c307
-rw-r--r--mp3lib/decode_i586.s321
3 files changed, 308 insertions, 322 deletions
diff --git a/mp3lib/Makefile b/mp3lib/Makefile
index eb526f3d15..35307f2ae1 100644
--- a/mp3lib/Makefile
+++ b/mp3lib/Makefile
@@ -9,7 +9,7 @@ OPTFLAGS := $(OPTFLAGS:-O4=-O0)
endif
CFLAGS = $(OPTFLAGS) $(EXTRA_INC)
ifeq ($(TARGET_ARCH_X86),yes)
-SRCS += d_cpu.s decode_i586.s
+SRCS += d_cpu.s decode_i586.c
OBJS += d_cpu.o decode_i586.o
ifeq ($(TARGET_MMX),yes)
SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c
diff --git a/mp3lib/decode_i586.c b/mp3lib/decode_i586.c
new file mode 100644
index 0000000000..d0ec6e758c
--- /dev/null
+++ b/mp3lib/decode_i586.c
@@ -0,0 +1,307 @@
+/*
+* mpg123_synth_1to1 works the same way as the c version of this
+* file. only two types of changes have been made:
+* - reordered floating point instructions to
+* prevent pipline stalls
+* - made WRITE_SAMPLE use integer instead of
+* (slower) floating point
+* all kinds of x86 processors should benefit from these
+* modifications.
+*
+* useful sources of information on optimizing x86 code include:
+*
+* Intel Architecture Optimization Manual
+* http://www.intel.com/design/pentium/manuals/242816.htm
+*
+* Cyrix 6x86 Instruction Set Summary
+* ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
+*
+* AMD-K5 Processor Software Development
+* http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
+*
+* Stefan Bieschewski <stb@acm.org>
+*
+* $Id$
+*/
+#define real float /* ugly - but only way */
+
+static long buffs[1088];
+static long bo=1;
+
+int synth_1to1_pent(real *bandPtr, int channel, short *samples)
+{
+ real tmp[3];
+ register int retval;
+ __asm __volatile(
+" movl %1,%%eax\n\t"/*bandPtr*/
+" movl %3,%%esi\n\t"
+" xorl %%edi,%%edi\n\t"
+" movl bo,%%ebp\n\t"
+" cmpl %%edi,%2\n\t"
+" jne .L48\n\t"
+" decl %%ebp\n\t"
+" andl $15,%%ebp\n\t"
+" movl %%ebp,bo\n\t"
+" movl $buffs,%%ecx\n\t"
+" jmp .L49\n\t"
+".L48:\n\t"
+" addl $2,%%esi\n\t"
+" movl $buffs+2176,%%ecx\n\t"
+".L49:\n\t"
+" testl $1,%%ebp\n\t"
+" je .L50\n\t"
+" movl %%ecx,%%ebx\n\t"
+" movl %%ebp,%4\n\t"
+" pushl %%eax\n\t"
+" movl 4+%4,%%edx\n\t"
+" leal (%%ebx,%%edx,4),%%eax\n\t"
+" pushl %%eax\n\t"
+" movl 8+%4,%%eax\n\t"
+" incl %%eax\n\t"
+" andl $15,%%eax\n\t"
+" leal 1088(,%%eax,4),%%eax\n\t"
+" addl %%ebx,%%eax\n\t"
+" jmp .L74\n\t"
+".L50:\n\t"
+" leal 1088(%%ecx),%%ebx\n\t"
+" leal 1(%%ebp),%%edx\n\t"
+" movl %%edx,%4\n\t"
+" pushl %%eax\n\t"
+" leal 1092(%%ecx,%%ebp,4),%%eax\n\t"
+" pushl %%eax\n\t"
+" leal (%%ecx,%%ebp,4),%%eax\n\t"
+".L74:\n\t"
+" pushl %%eax\n\t"
+" call dct64\n\t"
+" addl $12,%%esp\n\t"
+" movl %4,%%edx\n\t"
+" leal 0(,%%edx,4),%%edx\n\t"
+" movl $decwin+64,%%eax\n\t"
+" movl %%eax,%%ecx\n\t"
+" subl %%edx,%%ecx\n\t"
+" movl $16,%%ebp\n\t"
+".L55:\n\t"
+" flds (%%ecx)\n\t"
+" fmuls (%%ebx)\n\t"
+" flds 4(%%ecx)\n\t"
+" fmuls 4(%%ebx)\n\t"
+" fxch %%st(1)\n\t"
+" flds 8(%%ecx)\n\t"
+" fmuls 8(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 12(%%ecx)\n\t"
+" fmuls 12(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 16(%%ecx)\n\t"
+" fmuls 16(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 20(%%ecx)\n\t"
+" fmuls 20(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 24(%%ecx)\n\t"
+" fmuls 24(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 28(%%ecx)\n\t"
+" fmuls 28(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 32(%%ecx)\n\t"
+" fmuls 32(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 36(%%ecx)\n\t"
+" fmuls 36(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 40(%%ecx)\n\t"
+" fmuls 40(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 44(%%ecx)\n\t"
+" fmuls 44(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 48(%%ecx)\n\t"
+" fmuls 48(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 52(%%ecx)\n\t"
+" fmuls 52(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 56(%%ecx)\n\t"
+" fmuls 56(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds 60(%%ecx)\n\t"
+" fmuls 60(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" subl $4,%%esp\n\t"
+" faddp %%st,%%st(1)\n\t"
+" fxch %%st(1)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" fistpl (%%esp)\n\t"
+" popl %%eax\n\t"
+" cmpl $32767,%%eax\n\t"
+" jg 1f\n\t"
+" cmpl $-32768,%%eax\n\t"
+" jl 2f\n\t"
+" movw %%ax,(%%esi)\n\t"
+" jmp 4f\n\t"
+"1: movw $32767,(%%esi)\n\t"
+" jmp 3f\n\t"
+"2: movw $-32768,(%%esi)\n\t"
+"3: incl %%edi\n\t"
+"4:\n\t"
+".L54:\n\t"
+" addl $64,%%ebx\n\t"
+" subl $-128,%%ecx\n\t"
+" addl $4,%%esi\n\t"
+" decl %%ebp\n\t"
+" jnz .L55\n\t"
+" flds (%%ecx)\n\t"
+" fmuls (%%ebx)\n\t"
+" flds 8(%%ecx)\n\t"
+" fmuls 8(%%ebx)\n\t"
+" flds 16(%%ecx)\n\t"
+" fmuls 16(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 24(%%ecx)\n\t"
+" fmuls 24(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 32(%%ecx)\n\t"
+" fmuls 32(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 40(%%ecx)\n\t"
+" fmuls 40(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 48(%%ecx)\n\t"
+" fmuls 48(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" flds 56(%%ecx)\n\t"
+" fmuls 56(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" subl $4,%%esp\n\t"
+" faddp %%st,%%st(1)\n\t"
+" fxch %%st(1)\n\t"
+" faddp %%st,%%st(1)\n\t"
+" fistpl (%%esp)\n\t"
+" popl %%eax\n\t"
+" cmpl $32767,%%eax\n\t"
+" jg 1f\n\t"
+" cmpl $-32768,%%eax\n\t"
+" jl 2f\n\t"
+" movw %%ax,(%%esi)\n\t"
+" jmp 4f\n\t"
+"1: movw $32767,(%%esi)\n\t"
+" jmp 3f\n\t"
+"2: movw $-32768,(%%esi)\n\t"
+"3: incl %%edi\n\t"
+"4:\n\t"
+".L62:\n\t"
+" addl $-64,%%ebx\n\t"
+" addl $4,%%esi\n\t"
+" movl %4,%%edx\n\t"
+" leal -128(%%ecx,%%edx,8),%%ecx\n\t"
+" movl $15,%%ebp\n\t"
+".L68:\n\t"
+" flds -4(%%ecx)\n\t"
+" fchs\n\t"
+" fmuls (%%ebx)\n\t"
+" flds -8(%%ecx)\n\t"
+" fmuls 4(%%ebx)\n\t"
+" fxch %%st(1)\n\t"
+" flds -12(%%ecx)\n\t"
+" fmuls 8(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -16(%%ecx)\n\t"
+" fmuls 12(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -20(%%ecx)\n\t"
+" fmuls 16(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -24(%%ecx)\n\t"
+" fmuls 20(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -28(%%ecx)\n\t"
+" fmuls 24(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -32(%%ecx)\n\t"
+" fmuls 28(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -36(%%ecx)\n\t"
+" fmuls 32(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -40(%%ecx)\n\t"
+" fmuls 36(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -44(%%ecx)\n\t"
+" fmuls 40(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -48(%%ecx)\n\t"
+" fmuls 44(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -52(%%ecx)\n\t"
+" fmuls 48(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -56(%%ecx)\n\t"
+" fmuls 52(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds -60(%%ecx)\n\t"
+" fmuls 56(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" flds (%%ecx)\n\t"
+" fmuls 60(%%ebx)\n\t"
+" fxch %%st(2)\n\t"
+" subl $4,%%esp\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" fxch %%st(1)\n\t"
+" fsubrp %%st,%%st(1)\n\t"
+" fistpl (%%esp)\n\t"
+" popl %%eax\n\t"
+" cmpl $32767,%%eax\n\t"
+" jg 1f\n\t"
+" cmpl $-32768,%%eax\n\t"
+" jl 2f\n\t"
+" movw %%ax,(%%esi)\n\t"
+" jmp 4f\n\t"
+"1: movw $32767,(%%esi)\n\t"
+" jmp 3f\n\t"
+"2: movw $-32768,(%%esi)\n\t"
+"3: incl %%edi\n\t"
+"4:\n\t"
+".L67:\n\t"
+" addl $-64,%%ebx\n\t"
+" addl $-128,%%ecx\n\t"
+" addl $4,%%esi\n\t"
+" decl %%ebp\n\t"
+" jnz .L68\n\t"
+" movl %%edi,%%eax\n\t"
+ :"=a"(retval)
+ :"m"(bandPtr),"m"(channel),"m"(samples),"m"(tmp[0])
+ :"memory","%ebp","%edi","%esi","%ebx");
+ return retval;
+}
diff --git a/mp3lib/decode_i586.s b/mp3lib/decode_i586.s
deleted file mode 100644
index a4dc904071..0000000000
--- a/mp3lib/decode_i586.s
+++ /dev/null
@@ -1,321 +0,0 @@
-/
-/ mpg123_synth_1to1 works the same way as the c version of this
-/ file. only two types of changes have been made:
-/ - reordered floating point instructions to
-/ prevent pipline stalls
-/ - made WRITE_SAMPLE use integer instead of
-/ (slower) floating point
-/ all kinds of x86 processors should benefit from these
-/ modifications.
-/
-/ useful sources of information on optimizing x86 code include:
-/
-/ Intel Architecture Optimization Manual
-/ http://www.intel.com/design/pentium/manuals/242816.htm
-/
-/ Cyrix 6x86 Instruction Set Summary
-/ ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
-/
-/ AMD-K5 Processor Software Development
-/ http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
-/
-/ Stefan Bieschewski <stb@acm.org>
-/
-/ $Id$
-/
-.bss
- .comm buffs,4352,4
-.data
- .align 4
-bo:
- .long 1
-.section .rodata
- .align 8
-.LC0:
- .long 0x0,0x40dfffc0
- .align 8
-.LC1:
- .long 0x0,0xc0e00000
- .align 8
-.text
-.globl synth_1to1_pent
-synth_1to1_pent:
- subl $12,%esp
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %ebx
- movl 32(%esp),%eax
- movl 40(%esp),%esi
- xorl %edi,%edi
- movl bo,%ebp
- cmpl %edi,36(%esp)
- jne .L48
- decl %ebp
- andl $15,%ebp
- movl %ebp,bo
- movl $buffs,%ecx
- jmp .L49
-.L48:
- addl $2,%esi
- movl $buffs+2176,%ecx
-.L49:
- testl $1,%ebp
- je .L50
- movl %ecx,%ebx
- movl %ebp,16(%esp)
- pushl %eax
- movl 20(%esp),%edx
- leal (%ebx,%edx,4),%eax
- pushl %eax
- movl 24(%esp),%eax
- incl %eax
- andl $15,%eax
- leal 1088(,%eax,4),%eax
- addl %ebx,%eax
- jmp .L74
-.L50:
- leal 1088(%ecx),%ebx
- leal 1(%ebp),%edx
- movl %edx,16(%esp)
- pushl %eax
- leal 1092(%ecx,%ebp,4),%eax
- pushl %eax
- leal (%ecx,%ebp,4),%eax
-.L74:
- pushl %eax
- call dct64
- addl $12,%esp
- movl 16(%esp),%edx
- leal 0(,%edx,4),%edx
- movl $decwin+64,%eax
- movl %eax,%ecx
- subl %edx,%ecx
- movl $16,%ebp
-.L55:
- flds (%ecx)
- fmuls (%ebx)
- flds 4(%ecx)
- fmuls 4(%ebx)
- fxch %st(1)
- flds 8(%ecx)
- fmuls 8(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 12(%ecx)
- fmuls 12(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 16(%ecx)
- fmuls 16(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 20(%ecx)
- fmuls 20(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 24(%ecx)
- fmuls 24(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 28(%ecx)
- fmuls 28(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 32(%ecx)
- fmuls 32(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 36(%ecx)
- fmuls 36(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 40(%ecx)
- fmuls 40(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 44(%ecx)
- fmuls 44(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 48(%ecx)
- fmuls 48(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 52(%ecx)
- fmuls 52(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 56(%ecx)
- fmuls 56(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds 60(%ecx)
- fmuls 60(%ebx)
- fxch %st(2)
- subl $4,%esp
- faddp %st,%st(1)
- fxch %st(1)
- fsubrp %st,%st(1)
- fistpl (%esp)
- popl %eax
- cmpl $32767,%eax
- jg 1f
- cmpl $-32768,%eax
- jl 2f
- movw %ax,(%esi)
- jmp 4f
-1: movw $32767,(%esi)
- jmp 3f
-2: movw $-32768,(%esi)
-3: incl %edi
-4:
-.L54:
- addl $64,%ebx
- subl $-128,%ecx
- addl $4,%esi
- decl %ebp
- jnz .L55
- flds (%ecx)
- fmuls (%ebx)
- flds 8(%ecx)
- fmuls 8(%ebx)
- flds 16(%ecx)
- fmuls 16(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 24(%ecx)
- fmuls 24(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 32(%ecx)
- fmuls 32(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 40(%ecx)
- fmuls 40(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 48(%ecx)
- fmuls 48(%ebx)
- fxch %st(2)
- faddp %st,%st(1)
- flds 56(%ecx)
- fmuls 56(%ebx)
- fxch %st(2)
- subl $4,%esp
- faddp %st,%st(1)
- fxch %st(1)
- faddp %st,%st(1)
- fistpl (%esp)
- popl %eax
- cmpl $32767,%eax
- jg 1f
- cmpl $-32768,%eax
- jl 2f
- movw %ax,(%esi)
- jmp 4f
-1: movw $32767,(%esi)
- jmp 3f
-2: movw $-32768,(%esi)
-3: incl %edi
-4:
-.L62:
- addl $-64,%ebx
- addl $4,%esi
- movl 16(%esp),%edx
- leal -128(%ecx,%edx,8),%ecx
- movl $15,%ebp
-.L68:
- flds -4(%ecx)
- fchs
- fmuls (%ebx)
- flds -8(%ecx)
- fmuls 4(%ebx)
- fxch %st(1)
- flds -12(%ecx)
- fmuls 8(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -16(%ecx)
- fmuls 12(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -20(%ecx)
- fmuls 16(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -24(%ecx)
- fmuls 20(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -28(%ecx)
- fmuls 24(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -32(%ecx)
- fmuls 28(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -36(%ecx)
- fmuls 32(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -40(%ecx)
- fmuls 36(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -44(%ecx)
- fmuls 40(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -48(%ecx)
- fmuls 44(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -52(%ecx)
- fmuls 48(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -56(%ecx)
- fmuls 52(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds -60(%ecx)
- fmuls 56(%ebx)
- fxch %st(2)
- fsubrp %st,%st(1)
- flds (%ecx)
- fmuls 60(%ebx)
- fxch %st(2)
- subl $4,%esp
- fsubrp %st,%st(1)
- fxch %st(1)
- fsubrp %st,%st(1)
- fistpl (%esp)
- popl %eax
- cmpl $32767,%eax
- jg 1f
- cmpl $-32768,%eax
- jl 2f
- movw %ax,(%esi)
- jmp 4f
-1: movw $32767,(%esi)
- jmp 3f
-2: movw $-32768,(%esi)
-3: incl %edi
-4:
-.L67:
- addl $-64,%ebx
- addl $-128,%ecx
- addl $4,%esi
- decl %ebp
- jnz .L68
- movl %edi,%eax
- popl %ebx
- popl %esi
- popl %edi
- popl %ebp
- addl $12,%esp
- ret
-