diff options
author | henry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-08-02 11:26:43 +0000 |
---|---|---|
committer | henry <henry@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2004-08-02 11:26:43 +0000 |
commit | 943139cc78038c3aea0837229298cb2c08e3f8a2 (patch) | |
tree | 56b2a2dac2c09fe1016e3e146ec19cb2aae0777a | |
parent | 4779094c4be9af5ec0c5145d8a460b75e4a510c8 (diff) |
Importing libmpeg2 from mpeg2dec-0.4.0b
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@12933 b3059339-0415-0410-9bf9-f77b7e298cf2
-rw-r--r-- | libmpcodecs/vd_libmpeg2.c | 19 | ||||
-rw-r--r-- | libmpeg2/Makefile | 4 | ||||
-rw-r--r-- | libmpeg2/alloc.c | 54 | ||||
-rw-r--r-- | libmpeg2/alpha_asm.h | 75 | ||||
-rw-r--r-- | libmpeg2/attributes.h | 2 | ||||
-rw-r--r-- | libmpeg2/convert.h | 56 | ||||
-rw-r--r-- | libmpeg2/cpu_accel.c | 54 | ||||
-rw-r--r-- | libmpeg2/cpu_state.c | 4 | ||||
-rw-r--r-- | libmpeg2/decode.c | 255 | ||||
-rw-r--r-- | libmpeg2/header.c | 643 | ||||
-rw-r--r-- | libmpeg2/idct.c | 95 | ||||
-rw-r--r-- | libmpeg2/idct_alpha.c | 75 | ||||
-rw-r--r-- | libmpeg2/idct_altivec.c | 128 | ||||
-rw-r--r-- | libmpeg2/idct_mlib.c | 60 | ||||
-rw-r--r-- | libmpeg2/idct_mmx.c | 10 | ||||
-rw-r--r-- | libmpeg2/mmx.h | 2 | ||||
-rw-r--r-- | libmpeg2/motion_comp.c | 7 | ||||
-rw-r--r-- | libmpeg2/motion_comp_alpha.c | 198 | ||||
-rw-r--r-- | libmpeg2/motion_comp_altivec.c | 1 | ||||
-rw-r--r-- | libmpeg2/motion_comp_mlib.c | 190 | ||||
-rw-r--r-- | libmpeg2/motion_comp_mmx.c | 2 | ||||
-rw-r--r-- | libmpeg2/motion_comp_vis.c | 2061 | ||||
-rw-r--r-- | libmpeg2/mpeg2.h | 136 | ||||
-rw-r--r-- | libmpeg2/mpeg2_internal.h | 144 | ||||
-rw-r--r-- | libmpeg2/slice.c | 1299 | ||||
-rw-r--r-- | libmpeg2/vis.h | 328 | ||||
-rw-r--r-- | libmpeg2/vlc.h | 87 |
27 files changed, 4304 insertions, 1685 deletions
diff --git a/libmpcodecs/vd_libmpeg2.c b/libmpcodecs/vd_libmpeg2.c index fa326286f2..b73ea1aa80 100644 --- a/libmpcodecs/vd_libmpeg2.c +++ b/libmpcodecs/vd_libmpeg2.c @@ -12,7 +12,7 @@ static vd_info_t info = { - "MPEG 1/2 Video decoder libmpeg2-v0.3.1", + "MPEG 1/2 Video decoder libmpeg2-v0.4.0b", "libmpeg2", "A'rpi & Fabian Franz", "Aaron & Walken", @@ -24,6 +24,7 @@ LIBVD_EXTERN(libmpeg2) //#include "libvo/video_out.h" // FIXME!!! #include "libmpeg2/mpeg2.h" +#include "libmpeg2/attributes.h" #include "libmpeg2/mpeg2_internal.h" //#include "libmpeg2/convert.h" @@ -111,7 +112,7 @@ static mp_image_t* decode(sh_video_t *sh,void* data,int len,int flags){ while(1){ int state=mpeg2_parse (mpeg2dec); switch(state){ - case -1: + case STATE_BUFFER: // parsing of the passed buffer finished, return. // if(!mpi) printf("\nNO PICTURE!\n"); return mpi; @@ -153,28 +154,18 @@ static mp_image_t* decode(sh_video_t *sh,void* data,int len,int flags){ else mpi->fields &= ~MP_IMGFIELD_REPEAT_FIRST; mpi->fields |= MP_IMGFIELD_ORDERED; -#ifdef MPEG12_POSTPROC - if(!mpi->qscale){ - mpi->qstride=(info->sequence->picture_width+15)>>4; - mpi->qscale=malloc(mpi->qstride*((info->sequence->picture_height+15)>>4)); - } - mpeg2dec->decoder.quant_store=mpi->qscale; - mpeg2dec->decoder.quant_stride=mpi->qstride; - mpi->pict_type=type; // 1->I, 2->P, 3->B - mpi->qscale_type= 1; -#endif - if(mpi->flags&MP_IMGFLAG_DRAW_CALLBACK && !(mpi->flags&MP_IMGFLAG_DIRECT)){ // nice, filter/vo likes draw_callback :) mpeg2dec->decoder.convert=draw_slice; - mpeg2dec->decoder.fbuf_id=sh; + mpeg2dec->decoder.convert_id=sh; } else mpeg2dec->decoder.convert=NULL; break; } case STATE_SLICE: case STATE_END: + case STATE_INVALID_END: // decoding done: if(mpi) printf("AJAJJJJJJJJ2!\n"); if(info->display_fbuf) mpi=info->display_fbuf->id; diff --git a/libmpeg2/Makefile b/libmpeg2/Makefile index b7e93d796b..2c4c77b467 100644 --- a/libmpeg2/Makefile +++ b/libmpeg2/Makefile @@ -3,10 +3,10 @@ LIBNAME = libmpeg2.a include ../config.mak -SRCS = alloc.c cpu_accel.c cpu_state.c decode.c header.c idct.c idct_alpha.c idct_mlib.c idct_mmx.c motion_comp.c motion_comp_alpha.c motion_comp_mlib.c motion_comp_mmx.c slice.c +SRCS = alloc.c cpu_accel.c cpu_state.c decode.c header.c idct.c idct_alpha.c idct_mmx.c motion_comp.c motion_comp_alpha.c motion_comp_mmx.c slice.c OBJS = $(SRCS:.c=.o) -INCLUDE = -I. -I../libvo -I.. $(EXTRA_INC) $(MLIB_INC) +INCLUDE = -I. -I../libvo -I.. $(EXTRA_INC) CFLAGS = $(OPTFLAGS) $(INCLUDE) -DMPG12PLAY ifeq ($(TARGET_ALTIVEC),yes) diff --git a/libmpeg2/alloc.c b/libmpeg2/alloc.c index 2e4792e94d..0698937bce 100644 --- a/libmpeg2/alloc.c +++ b/libmpeg2/alloc.c @@ -21,56 +21,50 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include "config.h" - #include <stdlib.h> #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" - -#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) -/* some systems have memalign() but no declaration for it */ -void * memalign (size_t align, size_t size); -#endif -void * (* mpeg2_malloc_hook) (int size, int reason) = NULL; -int (* mpeg2_free_hook) (void * buf) = NULL; +static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL; +static int (* free_hook) (void * buf) = NULL; -void * mpeg2_malloc (int size, int reason) +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason) { char * buf; - if (mpeg2_malloc_hook) { - buf = (char *) mpeg2_malloc_hook (size, reason); + if (malloc_hook) { + buf = (char *) malloc_hook (size, reason); if (buf) return buf; } -#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG) - return memalign (16, size); -#else - buf = (char *) malloc (size + 15 + sizeof (void **)); - if (buf) { - char * align_buf; + if (size) { + buf = (char *) malloc (size + 63 + sizeof (void **)); + if (buf) { + char * align_buf; - align_buf = buf + 15 + sizeof (void **); - align_buf -= (long)align_buf & 15; - *(((void **)align_buf) - 1) = buf; - return align_buf; + align_buf = buf + 63 + sizeof (void **); + align_buf -= (long)align_buf & 63; + *(((void **)align_buf) - 1) = buf; + return align_buf; + } } return NULL; -#endif } void mpeg2_free (void * buf) { - if (mpeg2_free_hook && mpeg2_free_hook (buf)) + if (free_hook && free_hook (buf)) return; -#if defined(HAVE_MEMALIGN) && !defined(__cplusplus) && !defined(DEBUG) - free (buf); -#else - free (*(((void **)buf) - 1)); -#endif + if (buf) + free (*(((void **)buf) - 1)); +} + +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)) +{ + malloc_hook = malloc; + free_hook = free; } diff --git a/libmpeg2/alpha_asm.h b/libmpeg2/alpha_asm.h index 6864ccc2e7..bf1081f249 100644 --- a/libmpeg2/alpha_asm.h +++ b/libmpeg2/alpha_asm.h @@ -1,6 +1,6 @@ /* * Alpha assembly macros - * Copyright (c) 2002 Falk Hueffner <falk@debian.org> + * Copyright (c) 2002-2003 Falk Hueffner <falk@debian.org> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. * See http://libmpeg2.sourceforge.net/ for updates. @@ -83,22 +83,11 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) #define uldq(a) (((const struct unaligned_long *) (a))->l) -#if GNUC_PREREQ(3,0) -/* Unfortunately, __builtin_prefetch is slightly buggy on Alpha. The - defines here are kludged so we still get the right - instruction. This needs to be adapted as soon as gcc is fixed. */ -# define prefetch(p) __builtin_prefetch((p), 0, 1) -# define prefetch_en(p) __builtin_prefetch((p), 1, 1) -# define prefetch_m(p) __builtin_prefetch((p), 0, 0) -# define prefetch_men(p) __builtin_prefetch((p), 1, 0) -#else -# define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") -# define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") -# define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") -# define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") -#endif - #if GNUC_PREREQ(3,3) +#define prefetch(p) __builtin_prefetch((p), 0, 1) +#define prefetch_en(p) __builtin_prefetch((p), 0, 0) +#define prefetch_m(p) __builtin_prefetch((p), 1, 1) +#define prefetch_men(p) __builtin_prefetch((p), 1, 0) #define cmpbge __builtin_alpha_cmpbge /* Avoid warnings. */ #define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) @@ -109,6 +98,24 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #define amask __builtin_alpha_amask #define implver __builtin_alpha_implver #define rpcc __builtin_alpha_rpcc +#else +#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) +#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) +#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) +#endif +#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") + +#if GNUC_PREREQ(3,3) && defined(__alpha_max__) #define minub8 __builtin_alpha_minub8 #define minsb8 __builtin_alpha_minsb8 #define minuw4 __builtin_alpha_minuw4 @@ -123,30 +130,20 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); #define unpkbl __builtin_alpha_unpkbl #define unpkbw __builtin_alpha_unpkbw #else -#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) -#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) -#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) -#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) -#define minub8(a, b) ({ uint64_t __r; asm ("minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minsb8(a, b) ({ uint64_t __r; asm ("minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minuw4(a, b) ({ uint64_t __r; asm ("minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define minsw4(a, b) ({ uint64_t __r; asm ("minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxub8(a, b) ({ uint64_t __r; asm ("maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxsb8(a, b) ({ uint64_t __r; asm ("maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxuw4(a, b) ({ uint64_t __r; asm ("maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define maxsw4(a, b) ({ uint64_t __r; asm ("maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) -#define perr(a, b) ({ uint64_t __r; asm ("perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) -#define pklb(a) ({ uint64_t __r; asm ("pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define pkwb(a) ({ uint64_t __r; asm ("pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define unpkbl(a) ({ uint64_t __r; asm ("unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) -#define unpkbw(a) ({ uint64_t __r; asm ("unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) +#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) #endif -#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ diff --git a/libmpeg2/attributes.h b/libmpeg2/attributes.h index 96a86b26c0..eefbc0dd1b 100644 --- a/libmpeg2/attributes.h +++ b/libmpeg2/attributes.h @@ -1,6 +1,6 @@ /* * attributes.h - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. diff --git a/libmpeg2/convert.h b/libmpeg2/convert.h deleted file mode 100644 index fd51fd84c2..0000000000 --- a/libmpeg2/convert.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * convert.h - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> - * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef CONVERT_H -#define CONVERT_H - -#define CONVERT_FRAME 0 -#define CONVERT_TOP_FIELD 1 -#define CONVERT_BOTTOM_FIELD 2 -#define CONVERT_BOTH_FIELDS 3 - -typedef struct convert_init_s { - void * id; - int id_size; - int buf_size[3]; - void (* start) (void * id, uint8_t * const * dest, int flags); - void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset); -} convert_init_t; - -typedef void convert_t (int width, int height, uint32_t accel, void * arg, - convert_init_t * result); - -convert_t convert_rgb32; -convert_t convert_rgb24; -convert_t convert_rgb16; -convert_t convert_rgb15; -convert_t convert_bgr32; -convert_t convert_bgr24; -convert_t convert_bgr16; -convert_t convert_bgr15; - -#define CONVERT_RGB 0 -#define CONVERT_BGR 1 -convert_t * convert_rgb (int order, int bpp); - -#endif /* CONVERT_H */ diff --git a/libmpeg2/cpu_accel.c b/libmpeg2/cpu_accel.c index c2c91e5c0a..e87f5b20a9 100644 --- a/libmpeg2/cpu_accel.c +++ b/libmpeg2/cpu_accel.c @@ -26,6 +26,8 @@ #include <inttypes.h> #include "mpeg2.h" +#include "attributes.h" +#include "mpeg2_internal.h" #ifdef ACCEL_DETECT #ifdef ARCH_X86 @@ -35,7 +37,7 @@ static inline uint32_t arch_accel (void) int AMD; uint32_t caps; -#ifndef PIC +#if !defined(PIC) && !defined(__PIC__) #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("cpuid" \ : "=a" (eax), \ @@ -106,7 +108,7 @@ static inline uint32_t arch_accel (void) } #endif /* ARCH_X86 */ -#ifdef ARCH_PPC +#if defined(ARCH_PPC) || defined(ARCH_SPARC) #include <signal.h> #include <setjmp.h> @@ -124,6 +126,7 @@ static RETSIGTYPE sigill_handler (int sig) siglongjmp (jmpbuf, 1); } +#ifdef ARCH_PPC static inline uint32_t arch_accel (void) { static RETSIGTYPE (* oldsig) (int); @@ -146,11 +149,49 @@ static inline uint32_t arch_accel (void) : : "r" (-1)); + canjump = 0; + signal (SIGILL, oldsig); return MPEG2_ACCEL_PPC_ALTIVEC; } #endif /* ARCH_PPC */ +#ifdef ARCH_SPARC +static inline uint32_t arch_accel (void) +{ + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return 0; + } + + canjump = 1; + + /* pdist %f0, %f0, %f0 */ + __asm__ __volatile__(".word\t0x81b007c0"); + + canjump = 0; + + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return MPEG2_ACCEL_SPARC_VIS; + } + + canjump = 1; + + /* edge8n %g0, %g0, %g0 */ + __asm__ __volatile__(".word\t0x81b00020"); + + canjump = 0; + + signal (SIGILL, oldsig); + return MPEG2_ACCEL_SPARC_VIS | MPEG2_ACCEL_SPARC_VIS2; +} +#endif /* ARCH_SPARC */ +#endif /* ARCH_PPC || ARCH_SPARC */ + #ifdef ARCH_ALPHA static inline uint32_t arch_accel (void) { @@ -167,7 +208,7 @@ static inline uint32_t arch_accel (void) #endif } #endif /* ARCH_ALPHA */ -#endif +#endif /* ACCEL_DETECT */ uint32_t mpeg2_detect_accel (void) { @@ -175,11 +216,8 @@ uint32_t mpeg2_detect_accel (void) accel = 0; #ifdef ACCEL_DETECT -#ifdef LIBMPEG2_MLIB - accel = MPEG2_ACCEL_MLIB; -#endif -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) - accel |= arch_accel (); +#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) + accel = arch_accel (); #endif #endif return accel; diff --git a/libmpeg2/cpu_state.c b/libmpeg2/cpu_state.c index e6544c6904..2a032dec00 100644 --- a/libmpeg2/cpu_state.c +++ b/libmpeg2/cpu_state.c @@ -27,8 +27,8 @@ #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" #include "attributes.h" +#include "mpeg2_internal.h" #ifdef ARCH_X86 #include "mmx.h" #endif @@ -43,7 +43,7 @@ static void state_restore_mmx (cpu_state_t * state) } #endif -#if defined(ARCH_PPC) && defined(HAVE_ALTIVEC) +#ifdef ARCH_PPC #ifdef HAVE_ALTIVEC_H /* gnu */ #define LI(a,b) "li " #a "," #b "\n\t" #define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" diff --git a/libmpeg2/decode.c b/libmpeg2/decode.c index fa87a824c1..bba2af4954 100644 --- a/libmpeg2/decode.c +++ b/libmpeg2/decode.c @@ -1,6 +1,6 @@ /* * decode.c - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -28,8 +28,8 @@ #include <inttypes.h> #include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" -#include "convert.h" static int mpeg2_accels = 0; @@ -44,7 +44,6 @@ static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) { uint8_t * current; uint32_t shift; - uint8_t * chunk_ptr; uint8_t * limit; uint8_t byte; @@ -53,7 +52,6 @@ static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) current = mpeg2dec->buf_start; shift = mpeg2dec->shift; - chunk_ptr = mpeg2dec->chunk_ptr; limit = current + bytes; do { @@ -116,69 +114,54 @@ void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end) mpeg2dec->buf_end = end; } -static inline int seek_chunk (mpeg2dec_t * mpeg2dec) +int mpeg2_getpos (mpeg2dec_t * mpeg2dec) +{ + return mpeg2dec->buf_end - mpeg2dec->buf_start; +} + +static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec) { int size, skipped; size = mpeg2dec->buf_end - mpeg2dec->buf_start; skipped = skip_chunk (mpeg2dec, size); if (!skipped) { - mpeg2dec->bytes_since_pts += size; - return -1; + mpeg2dec->bytes_since_tag += size; + return STATE_BUFFER; } - mpeg2dec->bytes_since_pts += skipped; + mpeg2dec->bytes_since_tag += skipped; mpeg2dec->code = mpeg2dec->buf_start[-1]; - return 0; + return (mpeg2_state_t)-1; } -int mpeg2_seek_header (mpeg2dec_t * mpeg2dec) +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec) { while (mpeg2dec->code != 0xb3 && ((mpeg2dec->code != 0xb7 && mpeg2dec->code != 0xb8 && - mpeg2dec->code) || mpeg2dec->sequence.width == -1)) - if (seek_chunk (mpeg2dec)) - return -1; + mpeg2dec->code) || mpeg2dec->sequence.width == (unsigned)-1)) + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - return mpeg2_parse_header (mpeg2dec); -} - -int mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec) -{ - mpeg2dec->sequence.width = -1; - return mpeg2_seek_header (mpeg2dec); + mpeg2dec->user_data_len = 0; + return (mpeg2dec->code ? mpeg2_parse_header (mpeg2dec) : + mpeg2_header_picture_start (mpeg2dec)); } #define RECEIVED(code,state) (((state) << 8) + (code)) -int mpeg2_parse (mpeg2dec_t * mpeg2dec) +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) { int size_buffer, size_chunk, copied; - if(mpeg2dec->code==0xff){ - // FIXME: we need to resync stream (esp. mpeg2dec-->code) as we - // left parser at 0x1FF last time at the end of prev. chunk. - // Why? mpeg2dec->action is set to mpeg2_header_picture_start, but - // it will call mpeg2_parse_header() too... - // - // following code copied from mpeg2_seek_header(): - while (mpeg2dec->code != 0xb3 && - ((mpeg2dec->code != 0xb7 && mpeg2dec->code != 0xb8 && - mpeg2dec->code) || mpeg2dec->sequence.width == -1)) - if (seek_chunk (mpeg2dec)) - return -1; - mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - } - if (mpeg2dec->action) { - int state; + mpeg2_state_t state; state = mpeg2dec->action (mpeg2dec); - if (state) + if ((int)state >= 0) return state; } - + while (1) { - //printf("code=0x%X \n",mpeg2dec->code); while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) < mpeg2dec->nb_decode_slices) { size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; @@ -187,20 +170,20 @@ int mpeg2_parse (mpeg2dec_t * mpeg2dec) if (size_buffer <= size_chunk) { copied = copy_chunk (mpeg2dec, size_buffer); if (!copied) { - mpeg2dec->bytes_since_pts += size_buffer; + mpeg2dec->bytes_since_tag += size_buffer; mpeg2dec->chunk_ptr += size_buffer; - return -1; + return STATE_BUFFER; } } else { copied = copy_chunk (mpeg2dec, size_chunk); if (!copied) { /* filled the chunk buffer without finding a start code */ - mpeg2dec->bytes_since_pts += size_chunk; + mpeg2dec->bytes_since_tag += size_chunk; mpeg2dec->action = seek_chunk; return STATE_INVALID; } } - mpeg2dec->bytes_since_pts += copied; + mpeg2dec->bytes_since_tag += copied; mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code, mpeg2dec->chunk_start); @@ -209,37 +192,29 @@ int mpeg2_parse (mpeg2dec_t * mpeg2dec) } if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1) break; - if (seek_chunk (mpeg2dec)) - return -1; - } - - //printf("next_code=0x%X state=%d \n",mpeg2dec->code,mpeg2dec->state); - - if(mpeg2dec->code==0xff){ - mpeg2dec->action = mpeg2_header_picture_start; //mpeg2_seek_header; - return mpeg2dec->state; + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; } - switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) { - case RECEIVED (0x00, STATE_SLICE_1ST): - case RECEIVED (0x00, STATE_SLICE): + switch (mpeg2dec->code) { + case 0x00: mpeg2dec->action = mpeg2_header_picture_start; - break; - case RECEIVED (0xb7, STATE_SLICE): + return mpeg2dec->state; + case 0xb7: mpeg2dec->action = mpeg2_header_end; break; - case RECEIVED (0xb3, STATE_SLICE): - case RECEIVED (0xb8, STATE_SLICE): + case 0xb3: + case 0xb8: mpeg2dec->action = mpeg2_parse_header; break; default: - mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->action = seek_chunk; return STATE_INVALID; } - return mpeg2dec->state; + return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; } -int mpeg2_parse_header (mpeg2dec_t * mpeg2dec) +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) { static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = { mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data, @@ -248,6 +223,7 @@ int mpeg2_parse_header (mpeg2dec_t * mpeg2dec) int size_buffer, size_chunk, copied; mpeg2dec->action = mpeg2_parse_header; + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; while (1) { size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - @@ -255,26 +231,21 @@ int mpeg2_parse_header (mpeg2dec_t * mpeg2dec) if (size_buffer <= size_chunk) { copied = copy_chunk (mpeg2dec, size_buffer); if (!copied) { - mpeg2dec->bytes_since_pts += size_buffer; + mpeg2dec->bytes_since_tag += size_buffer; mpeg2dec->chunk_ptr += size_buffer; - return -1; + return STATE_BUFFER; } } else { copied = copy_chunk (mpeg2dec, size_chunk); if (!copied) { /* filled the chunk buffer without finding a start code */ - mpeg2dec->bytes_since_pts += size_chunk; + mpeg2dec->bytes_since_tag += size_chunk; mpeg2dec->code = 0xb4; mpeg2dec->action = mpeg2_seek_header; return STATE_INVALID; } } - mpeg2dec->bytes_since_pts += copied; - - //printf("header_code=0x%X state=%d \n",mpeg2dec->code,mpeg2dec->state); - -// if(!mpeg2dec->code && mpeg2dec->state==7) - + mpeg2dec->bytes_since_tag += copied; if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) { mpeg2dec->code = mpeg2dec->buf_start[-1]; @@ -283,9 +254,6 @@ int mpeg2_parse_header (mpeg2dec_t * mpeg2dec) } mpeg2dec->code = mpeg2dec->buf_start[-1]; - - //printf("next_header_code=0x%X state=%d \n",mpeg2dec->code,mpeg2dec->state); - switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) { /* state transition after a sequence header */ @@ -297,10 +265,12 @@ int mpeg2_parse_header (mpeg2dec_t * mpeg2dec) /* other legal state transitions */ case RECEIVED (0x00, STATE_GOP): + mpeg2_header_gop_finalize (mpeg2dec); mpeg2dec->action = mpeg2_header_picture_start; break; case RECEIVED (0x01, STATE_PICTURE): case RECEIVED (0x01, STATE_PICTURE_2ND): + mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels); mpeg2dec->action = mpeg2_header_slice_start; break; @@ -321,56 +291,58 @@ int mpeg2_parse_header (mpeg2dec_t * mpeg2dec) } mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->user_data_len = 0; return mpeg2dec->state; } } -void mpeg2_convert (mpeg2dec_t * mpeg2dec, - void (* convert) (int, int, uint32_t, void *, - struct convert_init_s *), void * arg) +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg) { - convert_init_t convert_init; - int size; - - convert_init.id = NULL; - convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height, - mpeg2_accels, arg, &convert_init); - if (convert_init.id_size) { - convert_init.id = mpeg2dec->convert_id = - mpeg2_malloc (convert_init.id_size, ALLOC_CONVERT_ID); - convert (mpeg2dec->decoder.width, mpeg2dec->decoder.height, - mpeg2_accels, arg, &convert_init); + mpeg2_convert_init_t convert_init; + int error; + + error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0, + mpeg2_accels, arg, &convert_init); + if (!error) { + mpeg2dec->convert = convert; + mpeg2dec->convert_arg = arg; + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = 0; } - mpeg2dec->convert_size[0] = size = convert_init.buf_size[0]; - mpeg2dec->convert_size[1] = size += convert_init.buf_size[1]; - mpeg2dec->convert_size[2] = size += convert_init.buf_size[2]; - mpeg2dec->convert_start = convert_init.start; - mpeg2dec->convert_copy = convert_init.copy; - - size = mpeg2dec->decoder.width * mpeg2dec->decoder.height >> 2; - mpeg2dec->yuv_buf[0][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); - mpeg2dec->yuv_buf[0][1] = mpeg2dec->yuv_buf[0][0] + 4 * size; - mpeg2dec->yuv_buf[0][2] = mpeg2dec->yuv_buf[0][0] + 5 * size; - mpeg2dec->yuv_buf[1][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); - mpeg2dec->yuv_buf[1][1] = mpeg2dec->yuv_buf[1][0] + 4 * size; - mpeg2dec->yuv_buf[1][2] = mpeg2dec->yuv_buf[1][0] + 5 * size; - size = mpeg2dec->decoder.width * 8; - mpeg2dec->yuv_buf[2][0] = (uint8_t *) mpeg2_malloc (6 * size, ALLOC_YUV); - mpeg2dec->yuv_buf[2][1] = mpeg2dec->yuv_buf[2][0] + 4 * size; - mpeg2dec->yuv_buf[2][2] = mpeg2dec->yuv_buf[2][0] + 5 * size; + return error; +} + +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride) +{ + if (!mpeg2dec->convert) { + if (stride < (int) mpeg2dec->sequence.width) + stride = mpeg2dec->sequence.width; + mpeg2dec->decoder.stride_frame = stride; + } else { + mpeg2_convert_init_t convert_init; + + stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL, + &(mpeg2dec->sequence), stride, + mpeg2_accels, mpeg2dec->convert_arg, + &convert_init); + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = stride; + } + return stride; } void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) { - fbuf_t * fbuf; + mpeg2_fbuf_t * fbuf; if (mpeg2dec->custom_fbuf) { - mpeg2_set_fbuf (mpeg2dec, mpeg2dec->decoder.coding_type); - fbuf = mpeg2dec->fbuf[0]; if (mpeg2dec->state == STATE_SEQUENCE) { mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; } + mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type == + PIC_FLAG_CODING_TYPE_B)); + fbuf = mpeg2dec->fbuf[0]; } else { fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf); mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index; @@ -409,12 +381,14 @@ void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end) mpeg2dec->nb_decode_slices = end - start; } -void mpeg2_pts (mpeg2dec_t * mpeg2dec, uint32_t pts) +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2) { - mpeg2dec->pts_previous = mpeg2dec->pts_current; - mpeg2dec->pts_current = pts; - mpeg2dec->num_pts++; - mpeg2dec->bytes_since_pts = 0; + mpeg2dec->tag_previous = mpeg2dec->tag_current; + mpeg2dec->tag2_previous = mpeg2dec->tag2_current; + mpeg2dec->tag_current = tag; + mpeg2dec->tag2_current = tag2; + mpeg2dec->num_tags++; + mpeg2dec->bytes_since_tag = 0; } uint32_t mpeg2_accel (uint32_t accel) @@ -430,6 +404,27 @@ uint32_t mpeg2_accel (uint32_t accel) return mpeg2_accels & ~MPEG2_ACCEL_DETECT; } +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset) +{ + mpeg2dec->buf_start = mpeg2dec->buf_end = NULL; + mpeg2dec->num_tags = 0; + mpeg2dec->shift = 0xffffff00; + mpeg2dec->code = 0xb4; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_INVALID; + mpeg2dec->first = 1; + + mpeg2_reset_info(&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + mpeg2dec->info.user_data = NULL; + mpeg2dec->info.user_data_len = 0; + if (full_reset) { + mpeg2dec->info.sequence = NULL; + mpeg2_header_state_init (mpeg2dec); + } + +} + mpeg2dec_t * mpeg2_init (void) { mpeg2dec_t * mpeg2dec; @@ -437,43 +432,25 @@ mpeg2dec_t * mpeg2_init (void) mpeg2_accel (MPEG2_ACCEL_DETECT); mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), - ALLOC_MPEG2DEC); + MPEG2_ALLOC_MPEG2DEC); if (mpeg2dec == NULL) return NULL; - memset (mpeg2dec, 0, sizeof (mpeg2dec_t)); + memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); + memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4, - ALLOC_CHUNK); - - mpeg2dec->shift = 0xffffff00; - mpeg2dec->action = mpeg2_seek_sequence; - mpeg2dec->code = 0xb4; - mpeg2dec->first_decode_slice = 1; - mpeg2dec->nb_decode_slices = 0xb0 - 1; - mpeg2dec->convert_id = NULL; + MPEG2_ALLOC_CHUNK); - /* initialize substructures */ - mpeg2_header_state_init (mpeg2dec); + mpeg2dec->sequence.width = (unsigned)-1; + mpeg2_reset (mpeg2dec, 1); return mpeg2dec; } void mpeg2_close (mpeg2dec_t * mpeg2dec) { - int i; - - /* static uint8_t finalizer[] = {0,0,1,0xb4}; */ - /* mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4); */ - + mpeg2_header_state_init (mpeg2dec); mpeg2_free (mpeg2dec->chunk_buffer); - if (!mpeg2dec->custom_fbuf) - for (i = mpeg2dec->alloc_index_user; i < mpeg2dec->alloc_index; i++) - mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); - if (mpeg2dec->convert_start) - for (i = 0; i < 3; i++) - mpeg2_free (mpeg2dec->yuv_buf[i][0]); - if (mpeg2dec->convert_id) - mpeg2_free (mpeg2dec->convert_id); mpeg2_free (mpeg2dec); } diff --git a/libmpeg2/header.c b/libmpeg2/header.c index 248b2e24e2..8312e9a7d6 100644 --- a/libmpeg2/header.c +++ b/libmpeg2/header.c @@ -1,6 +1,7 @@ /* * header.c - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -28,9 +29,8 @@ #include <string.h> /* memcmp */ #include "mpeg2.h" -#include "mpeg2_internal.h" -#include "convert.h" #include "attributes.h" +#include "mpeg2_internal.h" #define SEQ_EXT 2 #define SEQ_DISPLAY_EXT 4 @@ -76,57 +76,93 @@ uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) { - mpeg2dec->decoder.scan = mpeg2_scan_norm; + if (mpeg2dec->sequence.width != (unsigned)-1) { + int i; + + mpeg2dec->sequence.width = (unsigned)-1; + if (!mpeg2dec->custom_fbuf) + for (i = mpeg2dec->alloc_index_user; + i < mpeg2dec->alloc_index; i++) { + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]); + } + if (mpeg2dec->convert_start) + for (i = 0; i < 3; i++) { + mpeg2_free (mpeg2dec->yuv_buf[i][0]); + mpeg2_free (mpeg2dec->yuv_buf[i][1]); + mpeg2_free (mpeg2dec->yuv_buf[i][2]); + } + if (mpeg2dec->decoder.convert_id) + mpeg2_free (mpeg2dec->decoder.convert_id); + } + mpeg2dec->decoder.coding_type = I_TYPE; + mpeg2dec->decoder.convert = NULL; + mpeg2dec->decoder.convert_id = NULL; mpeg2dec->picture = mpeg2dec->pictures; + memset(&mpeg2dec->fbuf_alloc[0].fbuf, 0, sizeof(mpeg2_fbuf_t)); + memset(&mpeg2dec->fbuf_alloc[1].fbuf, 0, sizeof(mpeg2_fbuf_t)); + memset(&mpeg2dec->fbuf_alloc[2].fbuf, 0, sizeof(mpeg2_fbuf_t)); mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; mpeg2dec->first = 1; mpeg2dec->alloc_index = 0; mpeg2dec->alloc_index_user = 0; + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = 0xb0 - 1; + mpeg2dec->convert = NULL; + mpeg2dec->convert_start = NULL; + mpeg2dec->custom_fbuf = 0; + mpeg2dec->yuv_index = 0; } -static void reset_info (mpeg2_info_t * info) +void mpeg2_reset_info (mpeg2_info_t * info) { info->current_picture = info->current_picture_2nd = NULL; info->display_picture = info->display_picture_2nd = NULL; info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; - info->user_data = NULL; info->user_data_len = 0; +} + +static void info_user_data (mpeg2dec_t * mpeg2dec) +{ + if (mpeg2dec->user_data_len) { + mpeg2dec->info.user_data = mpeg2dec->chunk_buffer; + mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3; + } } int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - sequence_t * sequence = &(mpeg2dec->new_sequence); - decoder_t * decoder = &(mpeg2dec->decoder); - static unsigned int frame_period[9] = { - 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000 + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + static unsigned int frame_period[16] = { + 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000, + /* unofficial: xing 15 fps */ + 1800000, + /* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */ + 5400000, 2700000, 2250000, 1800000, 0, 0 }; - int width, height; int i; if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */ return 1; i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; - sequence->display_width = sequence->picture_width = width = i >> 12; - sequence->display_height = sequence->picture_height = height = i & 0xfff; - - if(width == 0 || height == 0) - return 1; - - decoder->width = sequence->width = width = (width + 15) & ~15; - decoder->height = sequence->height = height = (height + 15) & ~15; - decoder->vertical_position_extension = (height > 2800); - sequence->chroma_width = width >> 1; - sequence->chroma_height = height >> 1; + if (! (sequence->display_width = sequence->picture_width = i >> 12)) + return 1; + if (! (sequence->display_height = sequence->picture_height = i & 0xfff)) + return 1; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + sequence->chroma_width = sequence->width >> 1; + sequence->chroma_height = sequence->height >> 1; - sequence->flags = SEQ_FLAG_PROGRESSIVE_SEQUENCE; + sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE | + SEQ_VIDEO_FORMAT_UNSPECIFIED); sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ - sequence->frame_period = 0; - if ((buffer[3] & 15) < 9) - sequence->frame_period = frame_period[buffer[3] & 15]; + sequence->frame_period = frame_period[buffer[3] & 15]; sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); @@ -135,51 +171,40 @@ int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) if (buffer[7] & 4) sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS; + mpeg2dec->copy_matrix = 3; if (buffer[7] & 2) { for (i = 0; i < 64; i++) - decoder->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = (buffer[i+7] << 7) | (buffer[i+8] >> 1); buffer += 64; } else for (i = 0; i < 64; i++) - decoder->intra_quantizer_matrix[mpeg2_scan_norm[i]] = - default_intra_quantizer_matrix [i]; + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = + default_intra_quantizer_matrix[i]; if (buffer[7] & 1) for (i = 0; i < 64; i++) - decoder->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] = buffer[i+8]; else - for (i = 0; i < 64; i++) - decoder->non_intra_quantizer_matrix[i] = 16; + memset (mpeg2dec->new_quantizer_matrix[1], 16, 64); sequence->profile_level_id = 0x80; - sequence->colour_primaries = 1; - sequence->transfer_characteristics = 1; - sequence->matrix_coefficients = 1; - - decoder->mpeg1 = 1; - decoder->intra_dc_precision = 0; - decoder->frame_pred_frame_dct = 1; - decoder->q_scale_type = 0; - decoder->concealment_motion_vectors = 0; - decoder->scan = mpeg2_scan_norm; - decoder->picture_structure = FRAME_PICTURE; + sequence->colour_primaries = 0; + sequence->transfer_characteristics = 0; + sequence->matrix_coefficients = 0; mpeg2dec->ext_state = SEQ_EXT; mpeg2dec->state = STATE_SEQUENCE; mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; - reset_info (&(mpeg2dec->info)); return 0; } static int sequence_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - sequence_t * sequence = &(mpeg2dec->new_sequence); - decoder_t * decoder = &(mpeg2dec->decoder); - int width, height; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); uint32_t flags; if (!(buffer[3] & 1)) @@ -187,31 +212,30 @@ static int sequence_ext (mpeg2dec_t * mpeg2dec) sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4); - width = sequence->display_width = sequence->picture_width += + sequence->display_width = sequence->picture_width += ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000; - height = sequence->display_height = sequence->picture_height += + sequence->display_height = sequence->picture_height += (buffer[2] << 7) & 0x3000; - decoder->vertical_position_extension = (height > 2800); + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; flags = sequence->flags | SEQ_FLAG_MPEG2; if (!(buffer[1] & 8)) { flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE; - height = (height + 31) & ~31; + sequence->height = (sequence->height + 31) & ~31; } if (buffer[5] & 0x80) flags |= SEQ_FLAG_LOW_DELAY; sequence->flags = flags; - decoder->width = sequence->width = width = (width + 15) & ~15; - decoder->height = sequence->height = height = (height + 15) & ~15; + sequence->chroma_width = sequence->width; + sequence->chroma_height = sequence->height; switch (buffer[1] & 6) { case 0: /* invalid */ return 1; case 2: /* 4:2:0 */ - height >>= 1; + sequence->chroma_height >>= 1; case 4: /* 4:2:2 */ - width >>= 1; + sequence->chroma_width >>= 1; } - sequence->chroma_width = width; - sequence->chroma_height = height; sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000; @@ -220,8 +244,6 @@ static int sequence_ext (mpeg2dec_t * mpeg2dec) sequence->frame_period = sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); - decoder->mpeg1 = 0; - mpeg2dec->ext_state = SEQ_DISPLAY_EXT; return 0; @@ -230,7 +252,7 @@ static int sequence_ext (mpeg2dec_t * mpeg2dec) static int sequence_display_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - sequence_t * sequence = &(mpeg2dec->new_sequence); + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); uint32_t flags; flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | @@ -253,7 +275,7 @@ static int sequence_display_ext (mpeg2dec_t * mpeg2dec) return 0; } -static inline void finalize_sequence (sequence_t * sequence) +static inline void finalize_sequence (mpeg2_sequence_t * sequence) { int width; int height; @@ -308,35 +330,121 @@ static inline void finalize_sequence (sequence_t * sequence) sequence->pixel_height /= height; } +static void copy_matrix (mpeg2dec_t * mpeg2dec, int index) +{ + if (memcmp (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64)) { + memcpy (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64); + mpeg2dec->scaled[index] = -1; + } +} + +static void finalize_matrix (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int i; + + for (i = 0; i < 2; i++) { + if (mpeg2dec->copy_matrix & (1 << i)) + copy_matrix (mpeg2dec, i); + if ((mpeg2dec->copy_matrix & (4 << i)) && + memcmp (mpeg2dec->quantizer_matrix[i], + mpeg2dec->new_quantizer_matrix[i+2], 64)) { + copy_matrix (mpeg2dec, i + 2); + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2]; + } else if (mpeg2dec->copy_matrix & (5 << i)) + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i]; + } +} + +static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec) +{ + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->sequence = mpeg2dec->new_sequence; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_SEQUENCE; + return STATE_SEQUENCE; +} + void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) { - sequence_t * sequence = &(mpeg2dec->new_sequence); + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); finalize_sequence (sequence); - - /* - * according to 6.1.1.6, repeat sequence headers should be - * identical to the original. However some DVDs dont respect that - * and have different bitrates in the repeat sequence headers. So - * we'll ignore that in the comparison and still consider these as - * repeat sequence headers. - */ - mpeg2dec->sequence.byte_rate = sequence->byte_rate; - if (!memcmp (&(mpeg2dec->sequence), sequence, sizeof (sequence_t))) + finalize_matrix (mpeg2dec); + + decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2); + decoder->width = sequence->width; + decoder->height = sequence->height; + decoder->vertical_position_extension = (sequence->picture_height > 2800); + decoder->chroma_format = ((sequence->chroma_width == sequence->width) + + (sequence->chroma_height == sequence->height)); + + if (mpeg2dec->sequence.width != (unsigned)-1) { + unsigned int new_byte_rate; + + /* + * According to 6.1.1.6, repeat sequence headers should be + * identical to the original. However some DVDs dont respect + * that and have different bitrates in the repeat sequence + * headers. So we'll ignore that in the comparison and still + * consider these as repeat sequence headers. + * + * However, be careful not to alter the current sequence when + * returning STATE_INVALID_END. + */ + new_byte_rate = sequence->byte_rate; + sequence->byte_rate = mpeg2dec->sequence.byte_rate; + if (memcmp (&(mpeg2dec->sequence), sequence, + sizeof (mpeg2_sequence_t))) { + decoder->stride_frame = sequence->width; + sequence->byte_rate = new_byte_rate; + mpeg2_header_end (mpeg2dec); + mpeg2dec->action = invalid_end_action; + mpeg2dec->state = STATE_INVALID_END; + return; + } + sequence->byte_rate = new_byte_rate; mpeg2dec->state = STATE_SEQUENCE_REPEATED; + } else + decoder->stride_frame = sequence->width; mpeg2dec->sequence = *sequence; - + mpeg2_reset_info (&(mpeg2dec->info)); mpeg2dec->info.sequence = &(mpeg2dec->sequence); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); } int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) { + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_gop_t * gop = &(mpeg2dec->new_gop); + + if (! (buffer[1] & 8)) + return 1; + gop->hours = (buffer[0] >> 2) & 31; + gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63; + gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63; + gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63; + gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6); mpeg2dec->state = STATE_GOP; - reset_info (&(mpeg2dec->info)); return 0; } -void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->gop = mpeg2dec->new_gop; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = &(mpeg2dec->gop); + info_user_data (mpeg2dec); +} + +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type) { int i; @@ -345,9 +453,8 @@ void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; - if ((coding_type == B_TYPE) || - (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { - if ((coding_type == B_TYPE) || (mpeg2dec->convert_start)) + if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + if (b_type || mpeg2dec->convert) mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; } @@ -355,32 +462,25 @@ void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) } } -int mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec) +mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec) { - decoder_t * decoder = &(mpeg2dec->decoder); - picture_t * picture; - - if (mpeg2dec->state != STATE_SLICE_1ST) { - mpeg2dec->state = STATE_PICTURE; - picture = mpeg2dec->pictures; - if ((decoder->coding_type != PIC_FLAG_CODING_TYPE_B) ^ - (mpeg2dec->picture >= mpeg2dec->pictures + 2)) - picture += 2; - } else { - mpeg2dec->state = STATE_PICTURE_2ND; - picture = mpeg2dec->picture + 1; /* second field picture */ - } - mpeg2dec->picture = picture; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + + mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ? + STATE_PICTURE : STATE_PICTURE_2ND); picture->flags = 0; - if (mpeg2dec->num_pts) { - if (mpeg2dec->bytes_since_pts >= 4) { - mpeg2dec->num_pts = 0; - picture->pts = mpeg2dec->pts_current; - picture->flags = PIC_FLAG_PTS; - } else if (mpeg2dec->num_pts > 1) { - mpeg2dec->num_pts = 1; - picture->pts = mpeg2dec->pts_previous; - picture->flags = PIC_FLAG_PTS; + picture->tag = picture->tag2 = 0; + if (mpeg2dec->num_tags) { + if (mpeg2dec->bytes_since_tag >= 4) { + mpeg2dec->num_tags = 0; + picture->tag = mpeg2dec->tag_current; + picture->tag2 = mpeg2dec->tag2_current; + picture->flags = PIC_FLAG_TAGS; + } else if (mpeg2dec->num_tags > 1) { + mpeg2dec->num_tags = 1; + picture->tag = mpeg2dec->tag_previous; + picture->tag2 = mpeg2dec->tag2_previous; + picture->flags = PIC_FLAG_TAGS; } } picture->display_offset[0].x = picture->display_offset[1].x = @@ -393,80 +493,15 @@ int mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec) int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - picture_t * picture = mpeg2dec->picture; - decoder_t * decoder = &(mpeg2dec->decoder); + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); int type; - int low_delay; type = (buffer [1] >> 3) & 7; - low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; - - if (mpeg2dec->state == STATE_PICTURE) { - picture_t * other; - - decoder->second_field = 0; - other = mpeg2dec->pictures; - if (other == picture) - other += 2; - if (decoder->coding_type != PIC_FLAG_CODING_TYPE_B) { - mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; - mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; - } - mpeg2dec->fbuf[0] = NULL; - reset_info (&(mpeg2dec->info)); - mpeg2dec->info.current_picture = picture; - mpeg2dec->info.display_picture = picture; - if (type != PIC_FLAG_CODING_TYPE_B) { - if (!low_delay) { - if (mpeg2dec->first) { - mpeg2dec->info.display_picture = NULL; - mpeg2dec->first = 0; - } else { - mpeg2dec->info.display_picture = other; - if (other->nb_fields == 1) - mpeg2dec->info.display_picture_2nd = other + 1; - mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; - } - } - if (!low_delay + !mpeg2dec->convert_start) - mpeg2dec->info.discard_fbuf = - mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert_start]; - } - if (!mpeg2dec->custom_fbuf) { - while (mpeg2dec->alloc_index < 3) { - fbuf_t * fbuf; - - fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); - fbuf->id = NULL; - if (mpeg2dec->convert_start) { - fbuf->buf[0] = - (uint8_t *) mpeg2_malloc (mpeg2dec->convert_size[0], - ALLOC_CONVERTED); - fbuf->buf[1] = fbuf->buf[0] + mpeg2dec->convert_size[1]; - fbuf->buf[2] = fbuf->buf[0] + mpeg2dec->convert_size[2]; - } else { - int size; - size = mpeg2dec->decoder.width * mpeg2dec->decoder.height; - fbuf->buf[0] = (uint8_t *) mpeg2_malloc (6 * size >> 2, - ALLOC_YUV); - fbuf->buf[1] = fbuf->buf[0] + size; - fbuf->buf[2] = fbuf->buf[1] + (size >> 2); - } - } - mpeg2_set_fbuf (mpeg2dec, type); - } - } else { - decoder->second_field = 1; - mpeg2dec->info.current_picture_2nd = picture; - mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; - if (low_delay || type == PIC_FLAG_CODING_TYPE_B) - mpeg2dec->info.display_picture_2nd = picture; - } mpeg2dec->ext_state = PIC_CODING_EXT; picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); - decoder->coding_type = type; picture->flags |= type; if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { @@ -482,14 +517,22 @@ int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) picture->nb_fields = 2; + mpeg2dec->q_scale_type = 0; + decoder->intra_dc_precision = 7; + decoder->frame_pred_frame_dct = 1; + decoder->concealment_motion_vectors = 0; + decoder->scan = mpeg2_scan_norm; + decoder->picture_structure = FRAME_PICTURE; + mpeg2dec->copy_matrix = 0; + return 0; } static int picture_coding_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - picture_t * picture = mpeg2dec->picture; - decoder_t * decoder = &(mpeg2dec->decoder); + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); uint32_t flags; /* pre subtract 1 for use later in compute_motion_vector */ @@ -499,7 +542,7 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; flags = picture->flags; - decoder->intra_dc_precision = (buffer[2] >> 2) & 3; + decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3); decoder->picture_structure = buffer[2] & 3; switch (decoder->picture_structure) { case TOP_FIELD: @@ -510,7 +553,7 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) case FRAME_PICTURE: if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { picture->nb_fields = (buffer[3] & 2) ? 3 : 2; - flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; flags |= (buffer[3] & 2) ? PIC_FLAG_REPEAT_FIRST_FIELD : 0; } else picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; @@ -521,7 +564,7 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) decoder->top_field_first = buffer[3] >> 7; decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; - decoder->q_scale_type = (buffer[3] >> 4) & 1; + mpeg2dec->q_scale_type = buffer[3] & 16; decoder->intra_vlc_format = (buffer[3] >> 3) & 1; decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; flags |= (buffer[4] & 0x80) ? PIC_FLAG_PROGRESSIVE_FRAME : 0; @@ -538,7 +581,7 @@ static int picture_coding_ext (mpeg2dec_t * mpeg2dec) static int picture_display_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - picture_t * picture = mpeg2dec->picture; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); int i, nb_pos; nb_pos = picture->nb_fields; @@ -564,6 +607,140 @@ static int picture_display_ext (mpeg2dec_t * mpeg2dec) return 0; } +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int old_type_b = (decoder->coding_type == B_TYPE); + int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; + + finalize_matrix (mpeg2dec); + decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE; + + if (mpeg2dec->state == STATE_PICTURE) { + mpeg2_picture_t * picture; + mpeg2_picture_t * other; + + decoder->second_field = 0; + + picture = other = mpeg2dec->pictures; + if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2)) + picture += 2; + else + other += 2; + mpeg2dec->picture = picture; + *picture = mpeg2dec->new_picture; + + if (!old_type_b) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2dec->fbuf[0] = NULL; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.current_picture = picture; + mpeg2dec->info.display_picture = picture; + if (decoder->coding_type != B_TYPE) { + if (!low_delay) { + if (mpeg2dec->first) { + mpeg2dec->info.display_picture = NULL; + mpeg2dec->first = 0; + } else { + mpeg2dec->info.display_picture = other; + if (other->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = other + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; + } + } + if (!low_delay + !mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = + mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert]; + } + if (mpeg2dec->convert) { + mpeg2_convert_init_t convert_init; + if (!mpeg2dec->convert_start) { + int y_size, uv_size; + + mpeg2dec->decoder.convert_id = + mpeg2_malloc (mpeg2dec->convert_id_size, + MPEG2_ALLOC_CONVERT_ID); + mpeg2dec->convert (MPEG2_CONVERT_START, + mpeg2dec->decoder.convert_id, + &(mpeg2dec->sequence), + mpeg2dec->convert_stride, accels, + mpeg2dec->convert_arg, &convert_init); + mpeg2dec->convert_start = convert_init.start; + mpeg2dec->decoder.convert = convert_init.copy; + + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[0][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + y_size = decoder->stride_frame * 32; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[2][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + } + if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + + fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf; + fbuf->id = NULL; + fbuf->buf[0] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[0], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[1] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[1], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[2] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[2], + MPEG2_ALLOC_CONVERTED); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + int y_size, uv_size; + + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); + fbuf->id = NULL; + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - decoder->chroma_format); + fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size, + MPEG2_ALLOC_YUV); + fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else { + decoder->second_field = 1; + mpeg2dec->picture++; /* second field picture */ + *(mpeg2dec->picture) = mpeg2dec->new_picture; + mpeg2dec->info.current_picture_2nd = mpeg2dec->picture; + if (low_delay || decoder->coding_type == B_TYPE) + mpeg2dec->info.display_picture_2nd = mpeg2dec->picture; + } + + info_user_data (mpeg2dec); +} + static int copyright_ext (mpeg2dec_t * mpeg2dec) { return 0; @@ -572,20 +749,16 @@ static int copyright_ext (mpeg2dec_t * mpeg2dec) static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) { uint8_t * buffer = mpeg2dec->chunk_start; - decoder_t * decoder = &(mpeg2dec->decoder); - int i; - - if (buffer[0] & 8) { - for (i = 0; i < 64; i++) - decoder->intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i] << 5) | (buffer[i+1] >> 3); - buffer += 64; - } - - if (buffer[0] & 4) - for (i = 0; i < 64; i++) - decoder->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i] << 6) | (buffer[i+1] >> 2); + int i, j; + + for (i = 0; i < 4; i++) + if (buffer[0] & (8 >> i)) { + for (j = 0; j < 64; j++) + mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] = + (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i)); + mpeg2dec->copy_matrix |= 1 << i; + buffer += 64; + } return 0; } @@ -609,41 +782,59 @@ int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) { - if (!mpeg2dec->info.user_data_len) - mpeg2dec->info.user_data = mpeg2dec->chunk_start; - else - mpeg2dec->info.user_data_len += 3; - mpeg2dec->info.user_data_len += (mpeg2dec->chunk_ptr - 4 - - mpeg2dec->chunk_start); + mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start; mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; return 0; } -int mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) +static void prescale (mpeg2dec_t * mpeg2dec, int index) { + static int non_linear_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 + }; + int i, j, k; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) { + mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; + for (i = 0; i < 32; i++) { + k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); + for (j = 0; j < 64; j++) + decoder->quantizer_prescale[index][i][j] = + k * mpeg2dec->quantizer_matrix[index][j]; + } + } +} + +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || mpeg2dec->state == STATE_PICTURE_2ND) ? STATE_SLICE : STATE_SLICE_1ST); + if (mpeg2dec->decoder.coding_type != D_TYPE) { + prescale (mpeg2dec, 0); + if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2]) + prescale (mpeg2dec, 2); + if (mpeg2dec->decoder.coding_type != I_TYPE) { + prescale (mpeg2dec, 1); + if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3]) + prescale (mpeg2dec, 3); + } + } + if (!(mpeg2dec->nb_decode_slices)) mpeg2dec->picture->flags |= PIC_FLAG_SKIP; else if (mpeg2dec->convert_start) { - int flags; - - switch (mpeg2dec->decoder.picture_structure) { - case TOP_FIELD: flags = CONVERT_TOP_FIELD; break; - case BOTTOM_FIELD: flags = CONVERT_BOTTOM_FIELD; break; - default: - flags = - ((mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) ? - CONVERT_FRAME : CONVERT_BOTH_FIELDS); - } - mpeg2dec->convert_start (mpeg2dec->convert_id, - mpeg2dec->fbuf[0]->buf, flags); - - mpeg2dec->decoder.convert = mpeg2dec->convert_copy; - mpeg2dec->decoder.fbuf_id = mpeg2dec->convert_id; + mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0], + mpeg2dec->picture, mpeg2dec->info.gop); if (mpeg2dec->decoder.coding_type == B_TYPE) mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], @@ -660,37 +851,45 @@ int mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) } else { int b_type; - //mpeg2dec->decoder.convert = NULL; b_type = (mpeg2dec->decoder.coding_type == B_TYPE); mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, mpeg2dec->fbuf[b_type + 1]->buf, mpeg2dec->fbuf[b_type]->buf); } mpeg2dec->action = NULL; - return 0; + return (mpeg2_state_t)-1; } -int mpeg2_header_end (mpeg2dec_t * mpeg2dec) +static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec) { - picture_t * picture; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.sequence = NULL; + mpeg2dec->info.gop = NULL; + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->action = mpeg2_seek_header; + return mpeg2_seek_header (mpeg2dec); +} + +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) +{ + mpeg2_picture_t * picture; int b_type; + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); picture = mpeg2dec->pictures; - if (mpeg2dec->picture < picture + 2) + if ((mpeg2dec->picture >= picture + 2) ^ b_type) picture = mpeg2dec->pictures + 2; - mpeg2dec->state = STATE_INVALID; - reset_info (&(mpeg2dec->info)); - b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + mpeg2_reset_info (&(mpeg2dec->info)); if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { mpeg2dec->info.display_picture = picture; if (picture->nb_fields == 1) mpeg2dec->info.display_picture_2nd = picture + 1; mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; - if (!mpeg2dec->convert_start) + if (!mpeg2dec->convert) mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; - } else if (!mpeg2dec->convert_start) + } else if (!mpeg2dec->convert) mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; - mpeg2dec->action = mpeg2_seek_sequence; + mpeg2dec->action = seek_sequence; return STATE_END; } diff --git a/libmpeg2/idct.c b/libmpeg2/idct.c index 35abf2c5da..8737e500b7 100644 --- a/libmpeg2/idct.c +++ b/libmpeg2/idct.c @@ -1,6 +1,6 @@ /* * idct.c - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -27,29 +27,35 @@ #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" #include "attributes.h" +#include "mpeg2_internal.h" -#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ -#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ -#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ -#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ -#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ -#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ /* idct main entry point */ void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); void (* mpeg2_idct_add) (int last, int16_t * block, uint8_t * dest, int stride); -static uint8_t clip_lut[1024]; -#define CLIP(i) ((clip_lut+384)[(i)]) +/* + * In legal streams, the IDCT output should be between -384 and +384. + * In corrupted streams, it is possible to force the IDCT output to go + * to +-3826 - this is the worst case for a column IDCT where the + * column inputs are 16-bit values. + */ +uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) #if 0 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ do { \ - t0 = W0*d0 + W1*d1; \ - t1 = W0*d1 - W1*d0; \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ } while (0) #else #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ @@ -69,7 +75,7 @@ static inline void idct_row (int16_t * const block) /* shortcut */ if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | ((int32_t *)block)[3]))) { - uint32_t tmp = (uint16_t) (block[0] << 3); + uint32_t tmp = (uint16_t) (block[0] >> 1); tmp |= tmp << 16; ((int32_t *)block)[0] = tmp; ((int32_t *)block)[1] = tmp; @@ -78,7 +84,7 @@ static inline void idct_row (int16_t * const block) return; } - d0 = (block[0] << 11) + 128; + d0 = (block[0] << 11) + 2048; d1 = block[1]; d2 = block[2] << 11; d3 = block[3]; @@ -100,17 +106,17 @@ static inline void idct_row (int16_t * const block) b3 = t1 + t3; t0 -= t2; t1 -= t3; - b1 = ((t0 + t1) * 181) >> 8; - b2 = ((t0 - t1) * 181) >> 8; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; - block[0] = (a0 + b0) >> 8; - block[1] = (a1 + b1) >> 8; - block[2] = (a2 + b2) >> 8; - block[3] = (a3 + b3) >> 8; - block[4] = (a3 - b3) >> 8; - block[5] = (a2 - b2) >> 8; - block[6] = (a1 - b1) >> 8; - block[7] = (a0 - b0) >> 8; + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; } static inline void idct_col (int16_t * const block) @@ -139,10 +145,10 @@ static inline void idct_col (int16_t * const block) BUTTERFLY (t2, t3, W3, W5, d1, d2); b0 = t0 + t2; b3 = t1 + t3; - t0 = (t0 - t2) >> 8; - t1 = (t1 - t3) >> 8; - b1 = (t0 + t1) * 181; - b2 = (t0 - t1) * 181; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; block[8*0] = (a0 + b0) >> 17; block[8*1] = (a1 + b1) >> 17; @@ -173,8 +179,8 @@ static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, dest[6] = CLIP (block[6]); dest[7] = CLIP (block[7]); - block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; - block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; dest += stride; block += 8; @@ -186,7 +192,7 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, { int i; - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { for (i = 0; i < 8; i++) idct_row (block + 8 * i); for (i = 0; i < 8; i++) @@ -201,8 +207,8 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, dest[6] = CLIP (block[6] + dest[6]); dest[7] = CLIP (block[7] + dest[7]); - block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; - block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; dest += stride; block += 8; @@ -210,7 +216,7 @@ static void mpeg2_idct_add_c (const int last, int16_t * block, } else { int DC; - DC = (block[0] + 4) >> 3; + DC = (block[0] + 64) >> 7; block[0] = block[63] = 0; i = 8; do { @@ -241,33 +247,28 @@ void mpeg2_idct_init (uint32_t accel) } else #endif #ifdef ARCH_PPC -#ifdef HAVE_ALTIVEC if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { mpeg2_idct_copy = mpeg2_idct_copy_altivec; mpeg2_idct_add = mpeg2_idct_add_altivec; mpeg2_idct_altivec_init (); } else #endif -#endif #ifdef ARCH_ALPHA #ifdef CAN_COMPILE_ALPHA_MVI if (accel & MPEG2_ACCEL_ALPHA_MVI) { mpeg2_idct_copy = mpeg2_idct_copy_mvi; mpeg2_idct_add = mpeg2_idct_add_mvi; - mpeg2_idct_alpha_init (0); + mpeg2_idct_alpha_init (); } else #endif if (accel & MPEG2_ACCEL_ALPHA) { + int i; + mpeg2_idct_copy = mpeg2_idct_copy_alpha; mpeg2_idct_add = mpeg2_idct_add_alpha; - mpeg2_idct_alpha_init (1); - } else -#endif -#ifdef LIBMPEG2_MLIB - if (accel & MPEG2_ACCEL_MLIB) { - mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; - mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ? - mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib); + mpeg2_idct_alpha_init (); + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); } else #endif { @@ -277,8 +278,8 @@ void mpeg2_idct_init (uint32_t accel) mpeg2_idct_copy = mpeg2_idct_copy_c; mpeg2_idct_add = mpeg2_idct_add_c; - for (i = -384; i < 640; i++) - clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); for (i = 0; i < 64; i++) { j = mpeg2_scan_norm[i]; mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); diff --git a/libmpeg2/idct_alpha.c b/libmpeg2/idct_alpha.c index bc3ad479ae..bc8bd7b673 100644 --- a/libmpeg2/idct_alpha.c +++ b/libmpeg2/idct_alpha.c @@ -1,7 +1,7 @@ /* * idct_alpha.c - * Copyright (C) 2002 Falk Hueffner <falk@debian.org> - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -29,24 +29,26 @@ #include <stdlib.h> #include <inttypes.h> -#include "alpha_asm.h" +#include "mpeg2.h" #include "attributes.h" +#include "mpeg2_internal.h" +#include "alpha_asm.h" -#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ -#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ -#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ -#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ -#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ -#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ -static uint8_t clip_lut[1024]; -#define CLIP(i) ((clip_lut+384)[(i)]) +extern uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) #if 0 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ do { \ - t0 = W0*d0 + W1*d1; \ - t1 = W0*d1 - W1*d0; \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ } while (0) #else #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ @@ -69,7 +71,7 @@ static inline void idct_row (int16_t * const block) /* shortcut */ if (likely (!((l & ~0xffffUL) | r))) { - uint64_t tmp = (uint16_t) (l << 3); + uint64_t tmp = (uint16_t) (l >> 1); tmp |= tmp << 16; tmp |= tmp << 32; ((int32_t *)block)[0] = tmp; @@ -79,7 +81,7 @@ static inline void idct_row (int16_t * const block) return; } - d0 = (sextw (l) << 11) + 128; + d0 = (sextw (l) << 11) + 2048; d1 = sextw (extwl (l, 2)); d2 = sextw (extwl (l, 4)) << 11; d3 = sextw (extwl (l, 6)); @@ -101,17 +103,17 @@ static inline void idct_row (int16_t * const block) b3 = t1 + t3; t0 -= t2; t1 -= t3; - b1 = ((t0 + t1) * 181) >> 8; - b2 = ((t0 - t1) * 181) >> 8; - - block[0] = (a0 + b0) >> 8; - block[1] = (a1 + b1) >> 8; - block[2] = (a2 + b2) >> 8; - block[3] = (a3 + b3) >> 8; - block[4] = (a3 - b3) >> 8; - block[5] = (a2 - b2) >> 8; - block[6] = (a1 - b1) >> 8; - block[7] = (a0 - b0) >> 8; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; } static inline void idct_col (int16_t * const block) @@ -140,10 +142,10 @@ static inline void idct_col (int16_t * const block) BUTTERFLY (t2, t3, W3, W5, d1, d2); b0 = t0 + t2; b3 = t1 + t3; - t0 = (t0 - t2) >> 8; - t1 = (t1 - t3) >> 8; - b1 = (t0 + t1) * 181; - b2 = (t0 - t1) * 181; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; block[8*0] = (a0 + b0) >> 17; block[8*1] = (a1 + b1) >> 17; @@ -196,7 +198,7 @@ void mpeg2_idct_add_mvi (const int last, int16_t * block, uint64_t signmask; int i; - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { for (i = 0; i < 8; i++) idct_row (block + 8 * i); for (i = 0; i < 8; i++) @@ -244,7 +246,7 @@ void mpeg2_idct_add_mvi (const int last, int16_t * block, uint64_t p0, p1, p2, p3, p4, p5, p6, p7; uint64_t DCs; - DC = (block[0] + 4) >> 3; + DC = (block[0] + 64) >> 7; block[0] = block[63] = 0; p0 = ldq (dest + 0 * stride); @@ -321,7 +323,7 @@ void mpeg2_idct_add_alpha (const int last, int16_t * block, { int i; - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { for (i = 0; i < 8; i++) idct_row (block + 8 * i); for (i = 0; i < 8; i++) @@ -345,7 +347,7 @@ void mpeg2_idct_add_alpha (const int last, int16_t * block, } else { int DC; - DC = (block[0] + 4) >> 3; + DC = (block[0] + 64) >> 7; block[0] = block[63] = 0; i = 8; do { @@ -362,15 +364,12 @@ void mpeg2_idct_add_alpha (const int last, int16_t * block, } } -void mpeg2_idct_alpha_init(int no_mvi) +void mpeg2_idct_alpha_init (void) { extern uint8_t mpeg2_scan_norm[64]; extern uint8_t mpeg2_scan_alt[64]; int i, j; - if (no_mvi) - for (i = -384; i < 640; i++) - clip_lut[i + 384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); for (i = 0; i < 64; i++) { j = mpeg2_scan_norm[i]; mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); diff --git a/libmpeg2/idct_altivec.c b/libmpeg2/idct_altivec.c index e9fc28bc4b..6b1b8586c0 100644 --- a/libmpeg2/idct_altivec.c +++ b/libmpeg2/idct_altivec.c @@ -31,8 +31,8 @@ #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" #include "attributes.h" +#include "mpeg2_internal.h" typedef vector signed char vector_s8_t; typedef vector unsigned char vector_u8_t; @@ -67,46 +67,11 @@ static const vector_s16_t constants ATTR_ALIGN(16) = static const vector_s16_t constants_1 ATTR_ALIGN(16) = VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725); static const vector_s16_t constants_2 ATTR_ALIGN(16) = - VEC_S16 (22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521); + VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289); static const vector_s16_t constants_3 ATTR_ALIGN(16) = VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692); static const vector_s16_t constants_4 ATTR_ALIGN(16) = - VEC_S16 (19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722); - -#define IDCT_HALF \ - /* 1st stage */ \ - t1 = vec_mradds (a1, vx7, vx1 ); \ - t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ - t7 = vec_mradds (a2, vx5, vx3); \ - t3 = vec_mradds (ma2, vx3, vx5); \ - \ - /* 2nd stage */ \ - t5 = vec_adds (vx0, vx4); \ - t0 = vec_subs (vx0, vx4); \ - t2 = vec_mradds (a0, vx6, vx2); \ - t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ - t6 = vec_adds (t8, t3); \ - t3 = vec_subs (t8, t3); \ - t8 = vec_subs (t1, t7); \ - t1 = vec_adds (t1, t7); \ - \ - /* 3rd stage */ \ - t7 = vec_adds (t5, t2); \ - t2 = vec_subs (t5, t2); \ - t5 = vec_adds (t0, t4); \ - t0 = vec_subs (t0, t4); \ - t4 = vec_subs (t8, t3); \ - t3 = vec_adds (t8, t3); \ - \ - /* 4th stage */ \ - vy0 = vec_adds (t7, t1); \ - vy7 = vec_subs (t7, t1); \ - vy1 = vec_mradds (c4, t3, t5); \ - vy6 = vec_mradds (mc4, t3, t5); \ - vy2 = vec_mradds (c4, t4, t0); \ - vy5 = vec_mradds (mc4, t4, t0); \ - vy3 = vec_adds (t2, t6); \ - vy4 = vec_subs (t2, t6); + VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895); #define IDCT \ vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ @@ -124,18 +89,49 @@ static const vector_s16_t constants_4 ATTR_ALIGN(16) = bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \ \ zero = vec_splat_s16 (0); \ - shift = vec_splat_u16 (4); \ \ - vx0 = vec_mradds (vec_sl (block[0], shift), constants_1, zero); \ - vx1 = vec_mradds (vec_sl (block[1], shift), constants_2, zero); \ - vx2 = vec_mradds (vec_sl (block[2], shift), constants_3, zero); \ - vx3 = vec_mradds (vec_sl (block[3], shift), constants_4, zero); \ - vx4 = vec_mradds (vec_sl (block[4], shift), constants_1, zero); \ - vx5 = vec_mradds (vec_sl (block[5], shift), constants_4, zero); \ - vx6 = vec_mradds (vec_sl (block[6], shift), constants_3, zero); \ - vx7 = vec_mradds (vec_sl (block[7], shift), constants_2, zero); \ + vx0 = vec_adds (block[0], block[4]); \ + vx4 = vec_subs (block[0], block[4]); \ + t5 = vec_mradds (vx0, constants_1, zero); \ + t0 = vec_mradds (vx4, constants_1, zero); \ + \ + vx1 = vec_mradds (a1, block[7], block[1]); \ + vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \ + t1 = vec_mradds (vx1, constants_2, zero); \ + t8 = vec_mradds (vx7, constants_2, zero); \ + \ + vx2 = vec_mradds (a0, block[6], block[2]); \ + vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \ + t2 = vec_mradds (vx2, constants_3, zero); \ + t4 = vec_mradds (vx6, constants_3, zero); \ + \ + vx3 = vec_mradds (block[3], constants_4, zero); \ + vx5 = vec_mradds (block[5], constants_4, zero); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + t6 = vec_mradds (a0, t6, t6); /* a0+1 == 2*c4 */ \ + t1 = vec_mradds (a0, t1, t1); /* a0+1 == 2*c4 */ \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ \ - IDCT_HALF \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_adds (t5, t3); \ + vy6 = vec_subs (t5, t3); \ + vy2 = vec_adds (t0, t4); \ + vy5 = vec_subs (t0, t4); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ \ vx0 = vec_mergeh (vy0, vy4); \ vx1 = vec_mergel (vy0, vy4); \ @@ -155,7 +151,7 @@ static const vector_s16_t constants_4 ATTR_ALIGN(16) = vy6 = vec_mergeh (vx3, vx7); \ vy7 = vec_mergel (vx3, vx7); \ \ - vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ + vx0 = vec_mergeh (vy0, vy4); \ vx1 = vec_mergel (vy0, vy4); \ vx2 = vec_mergeh (vy1, vy5); \ vx3 = vec_mergel (vy1, vy5); \ @@ -164,7 +160,39 @@ static const vector_s16_t constants_4 ATTR_ALIGN(16) = vx6 = vec_mergeh (vy3, vy7); \ vx7 = vec_mergel (vy3, vy7); \ \ - IDCT_HALF \ + vx0 = vec_adds (vx0, bias); \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + \ + t1 = vec_mradds (a1, vx7, vx1); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ + \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ \ shift = vec_splat_u16 (6); \ vx0 = vec_sra (vy0, shift); \ diff --git a/libmpeg2/idct_mlib.c b/libmpeg2/idct_mlib.c deleted file mode 100644 index 83c39738d8..0000000000 --- a/libmpeg2/idct_mlib.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * idct_mlib.c - * Copyright (C) 1999-2003 Håkan Hjort <d95hjort@dtek.chalmers.se> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include <mlib_types.h> -#include <mlib_status.h> -#include <mlib_sys.h> -#include <mlib_video.h> -#include <string.h> -#include <inttypes.h> - -#include "mpeg2.h" -#include "mpeg2_internal.h" - -void mpeg2_idct_add_mlib (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT_IEEE_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, - uint8_t * const dest, const int stride) -{ - mlib_VideoIDCT8x8_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -#endif diff --git a/libmpeg2/idct_mmx.c b/libmpeg2/idct_mmx.c index e2afe6bb45..51d01dc926 100644 --- a/libmpeg2/idct_mmx.c +++ b/libmpeg2/idct_mmx.c @@ -28,11 +28,11 @@ #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" #include "attributes.h" +#include "mpeg2_internal.h" #include "mmx.h" -#define ROW_SHIFT 11 +#define ROW_SHIFT 15 #define COL_SHIFT 6 #define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT))) @@ -701,7 +701,7 @@ do { \ static inline void block_add_DC (int16_t * const block, uint8_t * dest, const int stride, const int cpu) { - movd_v2r ((block[0] + 4) >> 3, mm0); + movd_v2r ((block[0] + 64) >> 7, mm0); pxor_r2r (mm1, mm1); movq_m2r (*dest, mm2); dup4 (mm0); @@ -763,7 +763,7 @@ void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, void mpeg2_idct_add_mmxext (const int last, int16_t * const block, uint8_t * const dest, const int stride) { - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { mmxext_idct (block); block_add (block, dest, stride); block_zero (block); @@ -786,7 +786,7 @@ void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, void mpeg2_idct_add_mmx (const int last, int16_t * const block, uint8_t * const dest, const int stride) { - if (last != 129 || (block[0] & 7) == 4) { + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { mmx_idct (block); block_add (block, dest, stride); block_zero (block); diff --git a/libmpeg2/mmx.h b/libmpeg2/mmx.h index c05bfe1ccb..08b4d47760 100644 --- a/libmpeg2/mmx.h +++ b/libmpeg2/mmx.h @@ -1,6 +1,6 @@ /* * mmx.h - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. diff --git a/libmpeg2/motion_comp.c b/libmpeg2/motion_comp.c index 0ef3cf30bf..6686b016c1 100644 --- a/libmpeg2/motion_comp.c +++ b/libmpeg2/motion_comp.c @@ -26,6 +26,7 @@ #include <inttypes.h> #include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" mpeg2_mc_t mpeg2_mc; @@ -53,9 +54,9 @@ void mpeg2_mc_init (uint32_t accel) mpeg2_mc = mpeg2_mc_alpha; else #endif -#ifdef LIBMPEG2_MLIB - if (accel & MPEG2_ACCEL_MLIB) - mpeg2_mc = mpeg2_mc_mlib; +#ifdef ARCH_SPARC + if (accel & MPEG2_ACCEL_SPARC_VIS) + mpeg2_mc = mpeg2_mc_vis; else #endif mpeg2_mc = mpeg2_mc_c; diff --git a/libmpeg2/motion_comp_alpha.c b/libmpeg2/motion_comp_alpha.c index 86deb33fcd..05cd550841 100644 --- a/libmpeg2/motion_comp_alpha.c +++ b/libmpeg2/motion_comp_alpha.c @@ -1,6 +1,6 @@ /* * motion_comp_alpha.c - * Copyright (C) 2002 Falk Hueffner <falk@debian.org> + * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. * See http://libmpeg2.sourceforge.net/ for updates. @@ -27,135 +27,136 @@ #include <inttypes.h> #include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" #include "alpha_asm.h" -static inline uint64_t avg2(uint64_t a, uint64_t b) +static inline uint64_t avg2 (uint64_t a, uint64_t b) { - return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); + return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1); } // Load two unaligned quadwords from addr. This macro only works if // addr is actually unaligned. -#define ULOAD16(ret_l, ret_r, addr) \ +#define ULOAD16(ret_l,ret_r,addr) \ do { \ - uint64_t _l = ldq_u(addr + 0); \ - uint64_t _m = ldq_u(addr + 8); \ - uint64_t _r = ldq_u(addr + 16); \ - ret_l = extql(_l, addr) | extqh(_m, addr); \ - ret_r = extql(_m, addr) | extqh(_r, addr); \ + uint64_t _l = ldq_u (addr + 0); \ + uint64_t _m = ldq_u (addr + 8); \ + uint64_t _r = ldq_u (addr + 16); \ + ret_l = extql (_l, addr) | extqh (_m, addr); \ + ret_r = extql (_m, addr) | extqh (_r, addr); \ } while (0) // Load two aligned quadwords from addr. -#define ALOAD16(ret_l, ret_r, addr) \ +#define ALOAD16(ret_l,ret_r,addr) \ do { \ - ret_l = ldq(addr); \ - ret_r = ldq(addr + 8); \ + ret_l = ldq (addr); \ + ret_r = ldq (addr + 8); \ } while (0) -#define OP8(LOAD, LOAD16, STORE) \ +#define OP8(LOAD,LOAD16,STORE) \ do { \ - STORE(LOAD(pixels), block); \ + STORE (LOAD (pixels), block); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP16(LOAD, LOAD16, STORE) \ +#define OP16(LOAD,LOAD16,STORE) \ do { \ uint64_t l, r; \ - LOAD16(l, r, pixels); \ - STORE(l, block); \ - STORE(r, block + 8); \ + LOAD16 (l, r, pixels); \ + STORE (l, block); \ + STORE (r, block + 8); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP8_X2(LOAD, LOAD16, STORE) \ +#define OP8_X2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1; \ \ - p0 = LOAD(pixels); \ + p0 = LOAD (pixels); \ p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ - STORE(avg2(p0, p1), block); \ + STORE (avg2 (p0, p1), block); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP16_X2(LOAD, LOAD16, STORE) \ +#define OP16_X2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1; \ \ - LOAD16(p0, p1, pixels); \ - STORE(avg2(p0, p0 >> 8 | p1 << 56), block); \ - STORE(avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ - block + 8); \ + LOAD16 (p0, p1, pixels); \ + STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \ + STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ + block + 8); \ pixels += line_size; \ block += line_size; \ } while (--h) -#define OP8_Y2(LOAD, LOAD16, STORE) \ +#define OP8_Y2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1; \ - p0 = LOAD(pixels); \ + p0 = LOAD (pixels); \ pixels += line_size; \ - p1 = LOAD(pixels); \ + p1 = LOAD (pixels); \ do { \ - uint64_t av = avg2(p0, p1); \ + uint64_t av = avg2 (p0, p1); \ if (--h == 0) line_size = 0; \ pixels += line_size; \ p0 = p1; \ - p1 = LOAD(pixels); \ - STORE(av, block); \ + p1 = LOAD (pixels); \ + STORE (av, block); \ block += line_size; \ } while (h); \ } while (0) -#define OP16_Y2(LOAD, LOAD16, STORE) \ +#define OP16_Y2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0l, p0r, p1l, p1r; \ - LOAD16(p0l, p0r, pixels); \ + LOAD16 (p0l, p0r, pixels); \ pixels += line_size; \ - LOAD16(p1l, p1r, pixels); \ + LOAD16 (p1l, p1r, pixels); \ do { \ uint64_t avl, avr; \ if (--h == 0) line_size = 0; \ - avl = avg2(p0l, p1l); \ - avr = avg2(p0r, p1r); \ + avl = avg2 (p0l, p1l); \ + avr = avg2 (p0r, p1r); \ p0l = p1l; \ p0r = p1r; \ pixels += line_size; \ - LOAD16(p1l, p1r, pixels); \ - STORE(avl, block); \ - STORE(avr, block + 8); \ + LOAD16 (p1l, p1r, pixels); \ + STORE (avl, block); \ + STORE (avr, block + 8); \ block += line_size; \ } while (h); \ } while (0) -#define OP8_XY2(LOAD, LOAD16, STORE) \ +#define OP8_XY2(LOAD,LOAD16,STORE) \ do { \ uint64_t pl, ph; \ - uint64_t p1 = LOAD(pixels); \ + uint64_t p1 = LOAD (pixels); \ uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ \ - ph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p2 & ~BYTE_VEC(0x03)) >> 2); \ - pl = (p1 & BYTE_VEC(0x03)) \ - + (p2 & BYTE_VEC(0x03)); \ + ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + pl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ \ do { \ uint64_t npl, nph; \ \ pixels += line_size; \ - p1 = LOAD(pixels); \ + p1 = LOAD (pixels); \ p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ - nph = ((p1 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p2 & ~BYTE_VEC(0x03)) >> 2); \ - npl = (p1 & BYTE_VEC(0x03)) \ - + (p2 & BYTE_VEC(0x03)); \ + nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + npl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ \ - STORE(ph + nph \ - + (((pl + npl + BYTE_VEC(0x02)) >> 2) \ - & BYTE_VEC(0x03)), block); \ + STORE (ph + nph + \ + (((pl + npl + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC (0x03)), block); \ \ block += line_size; \ pl = npl; \ @@ -163,44 +164,44 @@ static inline uint64_t avg2(uint64_t a, uint64_t b) } while (--h); \ } while (0) -#define OP16_XY2(LOAD, LOAD16, STORE) \ +#define OP16_XY2(LOAD,LOAD16,STORE) \ do { \ uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ - LOAD16(p0, p2, pixels); \ + LOAD16 (p0, p2, pixels); \ p1 = p0 >> 8 | (p2 << 56); \ - p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ \ - ph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p1 & ~BYTE_VEC(0x03)) >> 2); \ - pl_l = (p0 & BYTE_VEC(0x03)) \ - + (p1 & BYTE_VEC(0x03)); \ - ph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p3 & ~BYTE_VEC(0x03)) >> 2); \ - pl_r = (p2 & BYTE_VEC(0x03)) \ - + (p3 & BYTE_VEC(0x03)); \ + ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC(0x03))); \ + ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ \ do { \ uint64_t npl_l, nph_l, npl_r, nph_r; \ \ pixels += line_size; \ - LOAD16(p0, p2, pixels); \ + LOAD16 (p0, p2, pixels); \ p1 = p0 >> 8 | (p2 << 56); \ - p3 = p2 >> 8 | ((uint64_t) pixels[16] << 56); \ - nph_l = ((p0 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p1 & ~BYTE_VEC(0x03)) >> 2); \ - npl_l = (p0 & BYTE_VEC(0x03)) \ - + (p1 & BYTE_VEC(0x03)); \ - nph_r = ((p2 & ~BYTE_VEC(0x03)) >> 2) \ - + ((p3 & ~BYTE_VEC(0x03)) >> 2); \ - npl_r = (p2 & BYTE_VEC(0x03)) \ - + (p3 & BYTE_VEC(0x03)); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC (0x03))); \ + nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ \ - STORE(ph_l + nph_l \ - + (((pl_l + npl_l + BYTE_VEC(0x02)) >> 2) \ - & BYTE_VEC(0x03)), block); \ - STORE(ph_r + nph_r \ - + (((pl_r + npl_r + BYTE_VEC(0x02)) >> 2) \ - & BYTE_VEC(0x03)), block + 8); \ + STORE (ph_l + nph_l + \ + (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block); \ + STORE (ph_r + nph_r + \ + (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block + 8); \ \ block += line_size; \ pl_l = npl_l; \ @@ -210,34 +211,33 @@ static inline uint64_t avg2(uint64_t a, uint64_t b) } while (--h); \ } while (0) -#define MAKE_OP(OPNAME, SIZE, SUFF, OPKIND, STORE) \ +#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \ static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ (uint8_t *restrict block, const uint8_t *restrict pixels, \ int line_size, int h) \ { \ if ((uint64_t) pixels & 0x7) { \ - OPKIND(uldq, ULOAD16, STORE); \ + OPKIND (uldq, ULOAD16, STORE); \ } else { \ - OPKIND(ldq, ALOAD16, STORE); \ + OPKIND (ldq, ALOAD16, STORE); \ } \ } -#define PIXOP(OPNAME, STORE) \ - MAKE_OP(OPNAME, 8, o, OP8, STORE); \ - MAKE_OP(OPNAME, 8, x, OP8_X2, STORE); \ - MAKE_OP(OPNAME, 8, y, OP8_Y2, STORE); \ - MAKE_OP(OPNAME, 8, xy, OP8_XY2, STORE); \ - MAKE_OP(OPNAME, 16, o, OP16, STORE); \ - MAKE_OP(OPNAME, 16, x, OP16_X2, STORE); \ - MAKE_OP(OPNAME, 16, y, OP16_Y2, STORE); \ - MAKE_OP(OPNAME, 16, xy, OP16_XY2, STORE); - -#define STORE(l, b) stq(l, b) -PIXOP(put, STORE); - +#define PIXOP(OPNAME,STORE) \ + MAKE_OP (OPNAME, 8, o, OP8, STORE); \ + MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \ + MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \ + MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \ + MAKE_OP (OPNAME, 16, o, OP16, STORE); \ + MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \ + MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \ + MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE); + +#define STORE(l,b) stq (l, b) +PIXOP (put, STORE); #undef STORE -#define STORE(l, b) stq(avg2(l, ldq(b)), b); -PIXOP(avg, STORE); +#define STORE(l,b) stq (avg2 (l, ldq (b)), b); +PIXOP (avg, STORE); mpeg2_mc_t mpeg2_mc_alpha = { { MC_put_o_16_alpha, MC_put_x_16_alpha, diff --git a/libmpeg2/motion_comp_altivec.c b/libmpeg2/motion_comp_altivec.c index f5d884e6e4..4356aa6e78 100644 --- a/libmpeg2/motion_comp_altivec.c +++ b/libmpeg2/motion_comp_altivec.c @@ -31,6 +31,7 @@ #include <inttypes.h> #include "mpeg2.h" +#include "attributes.h" #include "mpeg2_internal.h" typedef vector signed char vector_s8_t; diff --git a/libmpeg2/motion_comp_mlib.c b/libmpeg2/motion_comp_mlib.c deleted file mode 100644 index c7ed6b285a..0000000000 --- a/libmpeg2/motion_comp_mlib.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * motion_comp_mlib.c - * Copyright (C) 2000-2003 Håkan Hjort <d95hjort@dtek.chalmers.se> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include <mlib_types.h> -#include <mlib_status.h> -#include <mlib_sys.h> -#include <mlib_video.h> -#include <inttypes.h> - -#include "mpeg2.h" -#include "mpeg2_internal.h" - -static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); -} - -static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); -} - -static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); -} - -static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, - stride, stride); -} - -static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); -} - -static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); - else - mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); -} - -static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, - stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, - stride, stride); -} - -MPEG2_MC_EXTERN (mlib) - -#endif diff --git a/libmpeg2/motion_comp_mmx.c b/libmpeg2/motion_comp_mmx.c index 2434ccee11..aceb0080b9 100644 --- a/libmpeg2/motion_comp_mmx.c +++ b/libmpeg2/motion_comp_mmx.c @@ -28,8 +28,8 @@ #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" #include "attributes.h" +#include "mpeg2_internal.h" #include "mmx.h" #define CPU_MMXEXT 0 diff --git a/libmpeg2/motion_comp_vis.c b/libmpeg2/motion_comp_vis.c new file mode 100644 index 0000000000..54c0f7e758 --- /dev/null +++ b/libmpeg2/motion_comp_vis.c @@ -0,0 +1,2061 @@ +/* + * motion_comp_vis.c + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_SPARC + +#include <inttypes.h> + +#include "mpeg2.h" +#include "attributes.h" +#include "mpeg2_internal.h" +#include "vis.h" + +/* The trick used in some of this file is the formula from the MMX + * motion comp code, which is: + * + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + * + * This allows us to average 8 bytes at a time in a 64-bit FPU reg. + * We avoid overflows by masking before we do the shift, and we + * implement the shift by multiplying by 1/2 using mul8x16. So in + * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask + * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and + * the value 0x80808080 is in f8): + * + * fxor f0, f2, f10 + * fand f10, f4, f10 + * fmul8x16 f8, f10, f10 + * fand f10, f6, f10 + * for f0, f2, f12 + * fpsub16 f12, f10, f10 + */ + +#define DUP4(x) {x, x, x, x} +#define DUP8(x) {x, x, x, x, x, x, x, x} +static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); +static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); +static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); +static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); +static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); +static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); +static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); +static const int16_t constants256_512[] ATTR_ALIGN(8) = + {256, 512, 256, 512}; +static const int16_t constants256_1024[] ATTR_ALIGN(8) = + {256, 1024, 256, 1024}; + +#define REF_0 0 +#define REF_0_1 1 +#define REF_2 2 +#define REF_2_1 3 +#define REF_4 4 +#define REF_4_1 5 +#define REF_6 6 +#define REF_6_1 7 +#define REF_S0 8 +#define REF_S0_1 9 +#define REF_S2 10 +#define REF_S2_1 11 +#define REF_S4 12 +#define REF_S4_1 13 +#define REF_S6 14 +#define REF_S6_1 15 +#define DST_0 16 +#define DST_1 17 +#define DST_2 18 +#define DST_3 19 +#define CONST_1 20 +#define CONST_2 20 +#define CONST_3 20 +#define CONST_6 20 +#define MASK_fe 20 +#define CONST_128 22 +#define CONST_256 22 +#define CONST_512 22 +#define CONST_1024 22 +#define TMP0 24 +#define TMP1 25 +#define TMP2 26 +#define TMP3 27 +#define TMP4 28 +#define TMP5 29 +#define ZERO 30 +#define MASK_7f 30 + +#define TMP6 32 +#define TMP8 34 +#define TMP10 36 +#define TMP12 38 +#define TMP14 40 +#define TMP16 42 +#define TMP18 44 +#define TMP20 46 +#define TMP22 48 +#define TMP24 50 +#define TMP26 52 +#define TMP28 54 +#define TMP30 56 +#define TMP32 58 + +static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + do { /* 5 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + + vis_faligndata(TMP2, TMP4, REF_2); + vis_st64_2(REF_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + do { /* 4 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + /* stall */ + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + dest += stride; + } while (--height); +} + + +static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + + vis_ld64(dest[0], DST_0); + + vis_ld64(dest[8], DST_2); + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP2, TMP4, REF_2); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_ld64_2(ref, 8, TMP16); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP18); + vis_faligndata(TMP2, TMP4, REF_2); + ref += stride; + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_ld64_2(dest, stride, DST_0); + vis_faligndata(TMP14, TMP16, REF_0); + + vis_ld64_2(dest, stride_8, DST_2); + vis_faligndata(TMP16, TMP18, REF_2); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_2); + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); +} + +static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + + vis_ld64(dest[0], DST_0); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + ref += stride; + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_ld64(ref[0], TMP12); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP2); + vis_xor(DST_0, REF_0, TMP0); + ref += stride; + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + + vis_faligndata(TMP12, TMP2, REF_0); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(DST_0, REF_0, TMP0); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); +} + +static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, 16, TMP4); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 34 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP14); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_ld64_2(ref, 8, TMP16); + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_ld64_2(ref, 16, TMP18); + ref += stride; + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP14, TMP16, REF_0); + + vis_faligndata(TMP16, TMP18, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP14, TMP16, REF_2); + vis_faligndata(TMP16, TMP18, REF_6); + } else { + vis_src1(TMP16, REF_2); + vis_src1(TMP18, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); +} + +static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 20 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP8); + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_alignaddr_g0((void *)off); + vis_faligndata(TMP8, TMP10, REF_0); + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP8, TMP10, REF_2); + } else { + vis_src1(TMP10, REF_2); + } + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; +} + +static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + do { /* 26 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[16], TMP4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(dest[8], DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_mul8x16al(DST_0, CONST_512, TMP4); + vis_padd16(TMP2, TMP6, TMP2); + + vis_mul8x16al(DST_1, CONST_512, TMP6); + + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4, CONST_256, TMP16); + + vis_padd16(TMP0, CONST_3, TMP8); + vis_mul8x16au(REF_4_1, CONST_256, TMP18); + + vis_padd16(TMP2, CONST_3, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_padd16(TMP16, TMP12, TMP0); + + vis_st64(DST_0, dest[0]); + vis_mul8x16al(DST_2, CONST_512, TMP4); + vis_padd16(TMP18, TMP14, TMP2); + + vis_mul8x16al(DST_3, CONST_512, TMP6); + vis_padd16(TMP0, CONST_3, TMP0); + + vis_padd16(TMP2, CONST_3, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[8]); + + ref += stride; + dest += stride; + } while (--height); +} + +static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_times_2 = stride << 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + height >>= 2; + do { /* 47 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + ref += stride; + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[0], TMP4); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP8); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP4, TMP6, REF_4); + + vis_ld64(ref[0], TMP12); + + vis_ld64_2(ref, 8, TMP14); + ref += stride; + vis_faligndata(TMP8, TMP10, REF_S0); + + vis_faligndata(TMP12, TMP14, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP4, TMP6, REF_6); + + vis_faligndata(TMP8, TMP10, REF_S2); + + vis_faligndata(TMP12, TMP14, REF_S6); + } else { + vis_ld64(dest[0], DST_0); + vis_src1(TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_src1(TMP6, REF_6); + + vis_src1(TMP10, REF_S2); + + vis_src1(TMP14, REF_S6); + } + + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP8); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP16, TMP0); + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP18, TMP2); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_2, CONST_512, TMP16); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(DST_3, CONST_512, TMP18); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP10, CONST_3, TMP10); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP8, TMP16, TMP8); + + vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); + vis_padd16(TMP10, TMP18, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_pmerge(ZERO, REF_S0, TMP0); + + vis_pmerge(ZERO, REF_S2, TMP24); + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16au(REF_S4, CONST_256, TMP8); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16au(REF_S4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP24, TMP0); + vis_mul8x16au(REF_S6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_S6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP10, CONST_3, TMP10); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); + + vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); + vis_padd16(TMP0, TMP16, TMP0); + + vis_padd16(TMP2, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(TMP8, TMP20, TMP8); + + vis_padd16(TMP10, TMP22, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP6); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP6, TMP8, REF_2); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP8, TMP10, REF_6); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_or(REF_0, REF_2, TMP14); + + vis_ld64(ref[0], TMP6); + vis_or(REF_4, REF_6, TMP18); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_faligndata(TMP8, TMP10, REF_6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + vis_or(REF_0, REF_2, TMP14); + + vis_or(REF_4, REF_6, TMP18); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); +} + +static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + vis_ld64(ref[0], TMP4); + + vis_ld64_2(ref, offset, TMP6); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP4, TMP6, REF_2); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_faligndata(TMP0, TMP2, REF_2); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); +} + +static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int stride_16; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + stride_16 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_6); + height >>= 1; + + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP12); + vis_mul8x16au(REF_2_1, CONST_256, TMP14); + + vis_ld64_2(ref, stride_8, TMP2); + vis_pmerge(ZERO, REF_6, TMP16); + vis_mul8x16au(REF_6_1, CONST_256, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, stride, TMP6); + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_ld64_2(ref, stride_8, TMP8); + vis_pmerge(ZERO, REF_4, TMP4); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + + vis_ld64_2(dest, stride, REF_S0/*DST_4*/); + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); + vis_faligndata(TMP8, TMP10, REF_6); + vis_mul8x16al(DST_0, CONST_512, TMP20); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_1, CONST_512, TMP22); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP4, CONST_3, TMP4); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_padd16(TMP6, CONST_3, TMP6); + + vis_padd16(TMP12, TMP20, TMP12); + vis_mul8x16al(REF_S0, CONST_512, TMP20); + + vis_padd16(TMP14, TMP22, TMP14); + vis_mul8x16al(REF_S0_1, CONST_512, TMP22); + + vis_padd16(TMP16, TMP24, TMP16); + vis_mul8x16al(REF_S2, CONST_512, TMP24); + + vis_padd16(TMP18, TMP26, TMP18); + vis_mul8x16al(REF_S2_1, CONST_512, TMP26); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_2, CONST_256, TMP28); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_2_1, CONST_256, TMP30); + + vis_padd16(TMP16, TMP4, TMP16); + vis_mul8x16au(REF_6, CONST_256, REF_S4); + + vis_padd16(TMP18, TMP6, TMP18); + vis_mul8x16au(REF_6_1, CONST_256, REF_S6); + + vis_pack16(TMP12, DST_0); + vis_padd16(TMP28, TMP0, TMP12); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP30, TMP2, TMP14); + + vis_pack16(TMP16, DST_2); + vis_padd16(REF_S4, TMP4, TMP16); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(REF_S6, TMP6, TMP18); + + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + vis_pack16(TMP12, DST_0); + + vis_padd16(TMP16, TMP24, TMP16); + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(TMP18, TMP26, TMP18); + vis_pack16(TMP16, DST_2); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, offset, TMP2); + stride_8 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + + height >>= 1; + do { /* 20 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP8); + vis_mul8x16au(REF_2_1, CONST_256, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + + vis_ld64(dest[0], DST_0); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride, TMP4); + vis_mul8x16al(DST_0, CONST_512, TMP16); + vis_pmerge(ZERO, REF_0, TMP12); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_mul8x16al(DST_1, CONST_512, TMP18); + vis_pmerge(ZERO, REF_0_1, TMP14); + + vis_padd16(TMP12, CONST_3, TMP12); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP14, CONST_3, TMP14); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_faligndata(TMP4, TMP6, REF_2); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_mul8x16au(REF_2, CONST_256, TMP20); + + vis_padd16(TMP8, TMP16, TMP0); + vis_mul8x16au(REF_2_1, CONST_256, TMP22); + + vis_padd16(TMP10, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + + vis_padd16(TMP12, TMP24, TMP0); + + vis_padd16(TMP14, TMP26, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants2[0], CONST_2); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, CONST_2, TMP8); + vis_mul8x16au(REF_4, CONST_256, TMP0); + + vis_padd16(TMP2, CONST_2, TMP10); + vis_mul8x16au(REF_4_1, CONST_256, TMP2); + + vis_padd16(TMP8, TMP4, TMP8); + vis_mul8x16au(REF_6, CONST_256, TMP4); + + vis_padd16(TMP10, TMP6, TMP10); + vis_mul8x16au(REF_6_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP8, TMP12); + + vis_padd16(TMP14, TMP10, TMP14); + + vis_padd16(TMP12, TMP16, TMP12); + + vis_padd16(TMP14, TMP18, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP0, CONST_2, TMP12); + + vis_mul8x16au(REF_S0, CONST_256, TMP0); + vis_padd16(TMP2, CONST_2, TMP14); + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_padd16(TMP12, TMP4, TMP12); + + vis_mul8x16au(REF_S2, CONST_256, TMP4); + vis_padd16(TMP14, TMP6, TMP14); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + vis_padd16(TMP20, TMP12, TMP20); + + vis_padd16(TMP22, TMP14, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(TMP0, TMP4, TMP24); + + vis_mul8x16au(REF_S4, CONST_256, TMP0); + vis_padd16(TMP2, TMP6, TMP26); + + vis_mul8x16au(REF_S4_1, CONST_256, TMP2); + vis_padd16(TMP24, TMP8, TMP24); + + vis_padd16(TMP26, TMP10, TMP26); + vis_pack16(TMP24, DST_0); + + vis_pack16(TMP26, DST_1); + vis_st64(DST_0, dest[0]); + vis_pmerge(ZERO, REF_S6, TMP4); + + vis_pmerge(ZERO, REF_S6_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + + vis_padd16(TMP0, TMP12, TMP0); + + vis_padd16(TMP2, TMP14, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(constants2[0], CONST_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 26 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S2, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S0_1, CONST_256, TMP10); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_ld64_2(ref, stride, TMP4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_pmerge(ZERO, REF_S4, TMP18); + + vis_pmerge(ZERO, REF_S4_1, TMP20); + + vis_faligndata(TMP4, TMP6, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_padd16(TMP18, CONST_2, TMP18); + vis_mul8x16au(REF_S6, CONST_256, TMP22); + + vis_padd16(TMP20, CONST_2, TMP20); + vis_mul8x16au(REF_S6_1, CONST_256, TMP24); + + vis_mul8x16au(REF_S0, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S0_1, TMP28); + + vis_mul8x16au(REF_S2, CONST_256, TMP30); + vis_padd16(TMP18, TMP22, TMP18); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP32); + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP8, TMP18, TMP8); + + vis_padd16(TMP10, TMP20, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP18, TMP26, TMP18); + + vis_padd16(TMP20, TMP28, TMP20); + + vis_padd16(TMP18, TMP30, TMP18); + + vis_padd16(TMP20, TMP32, TMP20); + vis_pack16(TMP18, DST_2); + + vis_pack16(TMP20, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants6[0], CONST_6); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { /* 55 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_0, TMP0); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP0, CONST_6, TMP0); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP2, CONST_6, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP4); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_6, CONST_256, TMP8); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_6_1, CONST_256, TMP10); + + vis_padd16(TMP12, TMP16, TMP12); + vis_mul8x16au(REF_S0, CONST_256, REF_4); + + vis_padd16(TMP14, TMP18, TMP14); + vis_mul8x16au(REF_S0_1, CONST_256, REF_6); + + vis_padd16(TMP12, TMP30, TMP12); + + vis_padd16(TMP14, TMP32, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP4, CONST_6, TMP4); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP6, CONST_6, TMP6); + vis_mul8x16au(REF_S2, CONST_256, TMP12); + + vis_padd16(TMP4, TMP8, TMP4); + vis_mul8x16au(REF_S2_1, CONST_256, TMP14); + + vis_padd16(TMP6, TMP10, TMP6); + + vis_padd16(TMP20, TMP4, TMP20); + + vis_padd16(TMP22, TMP6, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + + vis_padd16(TMP20, REF_0, TMP20); + vis_mul8x16au(REF_S4, CONST_256, REF_0); + + vis_padd16(TMP22, REF_2, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + + vis_ld64_2(dest, 8, DST_2); + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4_1, REF_2); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_padd16(REF_4, TMP0, TMP8); + + vis_mul8x16au(REF_S6, CONST_256, REF_4); + vis_padd16(REF_6, TMP2, TMP10); + + vis_mul8x16au(REF_S6_1, CONST_256, REF_6); + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(REF_0, TMP4, REF_0); + + vis_mul8x16al(DST_2, CONST_1024, TMP30); + vis_padd16(REF_2, TMP6, REF_2); + + vis_mul8x16al(DST_3, CONST_1024, TMP32); + vis_padd16(REF_0, REF_4, REF_0); + + vis_padd16(REF_2, REF_6, REF_2); + + vis_padd16(REF_0, TMP30, REF_0); + + /* stall */ + + vis_padd16(REF_2, TMP32, REF_2); + vis_pack16(REF_0, DST_2); + + vis_pack16(REF_2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64(constants6[0], CONST_6); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S0_1, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S2, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride, TMP4); + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP4, TMP6, REF_S0); + + vis_ld64_2(dest, stride, DST_2); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4, TMP22); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_S4_1, TMP24); + + vis_mul8x16au(REF_S6, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S6_1, TMP28); + + vis_mul8x16au(REF_S0, CONST_256, REF_S4); + vis_padd16(TMP22, CONST_6, TMP22); + + vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); + vis_padd16(TMP24, CONST_6, TMP24); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP22, TMP26, TMP22); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP24, TMP28, TMP24); + + vis_mul8x16au(REF_S2, CONST_256, TMP26); + vis_padd16(TMP8, TMP22, TMP8); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP28); + vis_padd16(TMP10, TMP24, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(REF_S4, TMP22, TMP12); + + vis_padd16(REF_S6, TMP24, TMP14); + + vis_padd16(TMP12, TMP26, TMP12); + + vis_padd16(TMP14, TMP28, TMP14); + + vis_padd16(TMP12, REF_0, TMP12); + + vis_padd16(TMP14, REF_2, TMP14); + vis_pack16(TMP12, DST_2); + + vis_pack16(TMP14, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +MPEG2_MC_EXTERN(vis); + +#endif /* !(ARCH_SPARC) */ diff --git a/libmpeg2/mpeg2.h b/libmpeg2/mpeg2.h index 09ad795533..3987ced1fd 100644 --- a/libmpeg2/mpeg2.h +++ b/libmpeg2/mpeg2.h @@ -1,6 +1,6 @@ /* * mpeg2.h - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -24,6 +24,9 @@ #ifndef MPEG2_H #define MPEG2_H +#define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c)) +#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 0) /* 0.4.0 */ + #define SEQ_FLAG_MPEG2 1 #define SEQ_FLAG_CONSTRAINED_PARAMETERS 2 #define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4 @@ -38,7 +41,7 @@ #define SEQ_VIDEO_FORMAT_MAC 0x80 #define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0 -typedef struct { +typedef struct mpeg2_sequence_s { unsigned int width, height; unsigned int chroma_width, chroma_height; unsigned int byte_rate; @@ -54,7 +57,19 @@ typedef struct { uint8_t colour_primaries; uint8_t transfer_characteristics; uint8_t matrix_coefficients; -} sequence_t; +} mpeg2_sequence_t; + +#define GOP_FLAG_DROP_FRAME 1 +#define GOP_FLAG_BROKEN_LINK 2 +#define GOP_FLAG_CLOSED_GOP 4 + +typedef struct mpeg2_gop_s { + uint8_t hours; + uint8_t minutes; + uint8_t seconds; + uint8_t pictures; + uint32_t flags; +} mpeg2_gop_t; #define PIC_MASK_CODING_TYPE 7 #define PIC_FLAG_CODING_TYPE_I 1 @@ -66,61 +81,76 @@ typedef struct { #define PIC_FLAG_PROGRESSIVE_FRAME 16 #define PIC_FLAG_COMPOSITE_DISPLAY 32 #define PIC_FLAG_SKIP 64 -#define PIC_FLAG_PTS 128 +#define PIC_FLAG_TAGS 128 #define PIC_FLAG_REPEAT_FIRST_FIELD 256 #define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 -typedef struct { +typedef struct mpeg2_picture_s { unsigned int temporal_reference; unsigned int nb_fields; - uint32_t pts; + uint32_t tag, tag2; uint32_t flags; struct { int x, y; } display_offset[3]; -} picture_t; +} mpeg2_picture_t; -typedef struct { +typedef struct mpeg2_fbuf_s { uint8_t * buf[3]; void * id; -} fbuf_t; - -typedef struct { - const sequence_t * sequence; - const picture_t * current_picture; - const picture_t * current_picture_2nd; - const fbuf_t * current_fbuf; - const picture_t * display_picture; - const picture_t * display_picture_2nd; - const fbuf_t * display_fbuf; - const fbuf_t * discard_fbuf; +} mpeg2_fbuf_t; + +typedef struct mpeg2_info_s { + const mpeg2_sequence_t * sequence; + const mpeg2_gop_t * gop; + const mpeg2_picture_t * current_picture; + const mpeg2_picture_t * current_picture_2nd; + const mpeg2_fbuf_t * current_fbuf; + const mpeg2_picture_t * display_picture; + const mpeg2_picture_t * display_picture_2nd; + const mpeg2_fbuf_t * display_fbuf; + const mpeg2_fbuf_t * discard_fbuf; const uint8_t * user_data; - int user_data_len; + unsigned int user_data_len; } mpeg2_info_t; typedef struct mpeg2dec_s mpeg2dec_t; -typedef struct decoder_s decoder_t; - -#define STATE_SEQUENCE 1 -#define STATE_SEQUENCE_REPEATED 2 -#define STATE_GOP 3 -#define STATE_PICTURE 4 -#define STATE_SLICE_1ST 5 -#define STATE_PICTURE_2ND 6 -#define STATE_SLICE 7 -#define STATE_END 8 -#define STATE_INVALID 9 - -struct convert_init_s; -void mpeg2_convert (mpeg2dec_t * mpeg2dec, - void (* convert) (int, int, uint32_t, void *, - struct convert_init_s *), void * arg); +typedef struct mpeg2_decoder_s mpeg2_decoder_t; + +typedef enum { + STATE_BUFFER = 0, + STATE_SEQUENCE = 1, + STATE_SEQUENCE_REPEATED = 2, + STATE_GOP = 3, + STATE_PICTURE = 4, + STATE_SLICE_1ST = 5, + STATE_PICTURE_2ND = 6, + STATE_SLICE = 7, + STATE_END = 8, + STATE_INVALID = 9, + STATE_INVALID_END = 10 +} mpeg2_state_t; + +typedef struct mpeg2_convert_init_s { + unsigned int id_size; + unsigned int buf_size[3]; + void (* start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, const mpeg2_gop_t * gop); + void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset); +} mpeg2_convert_init_t; +typedef enum { + MPEG2_CONVERT_SET = 0, + MPEG2_CONVERT_STRIDE = 1, + MPEG2_CONVERT_START = 2 +} mpeg2_convert_stage_t; +typedef int mpeg2_convert_t (int stage, void * id, + const mpeg2_sequence_t * sequence, int stride, + uint32_t accel, void * arg, + mpeg2_convert_init_t * result); +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg); +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride); void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id); void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf); -void mpeg2_init_fbuf (decoder_t * decoder, uint8_t * current_fbuf[3], - uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]); - -void mpeg2_slice (decoder_t * decoder, int code, const uint8_t * buffer); #define MPEG2_ACCEL_X86_MMX 1 #define MPEG2_ACCEL_X86_3DNOW 2 @@ -128,7 +158,8 @@ void mpeg2_slice (decoder_t * decoder, int code, const uint8_t * buffer); #define MPEG2_ACCEL_PPC_ALTIVEC 1 #define MPEG2_ACCEL_ALPHA 1 #define MPEG2_ACCEL_ALPHA_MVI 2 -#define MPEG2_ACCEL_MLIB 0x40000000 +#define MPEG2_ACCEL_SPARC_VIS 1 +#define MPEG2_ACCEL_SPARC_VIS2 2 #define MPEG2_ACCEL_DETECT 0x80000000 uint32_t mpeg2_accel (uint32_t accel); @@ -137,11 +168,30 @@ const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec); void mpeg2_close (mpeg2dec_t * mpeg2dec); void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end); -int mpeg2_parse (mpeg2dec_t * mpeg2dec); +int mpeg2_getpos (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec); +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset); void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip); void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end); -void mpeg2_pts (mpeg2dec_t * mpeg2dec, uint32_t pts); +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2); + +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]); +void mpeg2_slice (mpeg2_decoder_t * decoder, int code, const uint8_t * buffer); + +typedef enum { + MPEG2_ALLOC_MPEG2DEC = 0, + MPEG2_ALLOC_CHUNK = 1, + MPEG2_ALLOC_YUV = 2, + MPEG2_ALLOC_CONVERT_ID = 3, + MPEG2_ALLOC_CONVERTED = 4 +} mpeg2_alloc_t; + +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason); +void mpeg2_free (void * buf); +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)); #endif /* MPEG2_H */ diff --git a/libmpeg2/mpeg2_internal.h b/libmpeg2/mpeg2_internal.h index 90ae5702da..850456b1f8 100644 --- a/libmpeg2/mpeg2_internal.h +++ b/libmpeg2/mpeg2_internal.h @@ -1,6 +1,6 @@ /* * mpeg2_internal.h - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -29,12 +29,11 @@ #define MACROBLOCK_QUANT 16 #define DCT_TYPE_INTERLACED 32 /* motion_type */ -#define MOTION_TYPE_MASK (3*64) -#define MOTION_TYPE_BASE 64 -#define MC_FIELD (1*64) -#define MC_FRAME (2*64) -#define MC_16X8 (2*64) -#define MC_DMV (3*64) +#define MOTION_TYPE_SHIFT 6 +#define MC_FIELD 1 +#define MC_FRAME 2 +#define MC_16X8 2 +#define MC_DMV 3 /* picture structure */ #define TOP_FIELD 1 @@ -47,6 +46,8 @@ #define B_TYPE 3 #define D_TYPE 4 +typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); + typedef struct { uint8_t * ref[2][3]; uint8_t ** ref2[2]; @@ -54,27 +55,27 @@ typedef struct { int f_code[2]; } motion_t; -struct decoder_s { +typedef void motion_parser_t (mpeg2_decoder_t * decoder, + motion_t * motion, + mpeg2_mc_fct * const * table); + +struct mpeg2_decoder_s { /* first, state that carries information from one macroblock to the */ /* next inside a slice, and is never used outside of mpeg2_slice() */ - /* DCT coefficients - should be kept aligned ! */ - int16_t DCTblock[64]; - /* bit parsing stuff */ uint32_t bitstream_buf; /* current 32 bit working set */ int bitstream_bits; /* used bits in working set */ const uint8_t * bitstream_ptr; /* buffer with stream data */ uint8_t * dest[3]; - uint8_t * picture_dest[3]; - void (* convert) (void * fbuf_id, uint8_t * const * src, - unsigned int v_offset); - void * fbuf_id; int offset; int stride; int uv_stride; + int slice_stride; + int slice_uv_stride; + int stride_frame; unsigned int limit_x; unsigned int limit_y_16; unsigned int limit_y_8; @@ -85,24 +86,34 @@ struct decoder_s { /* predictors */ motion_t b_motion; motion_t f_motion; + motion_parser_t * motion_parser[5]; /* predictor for DC coefficients in intra blocks */ int16_t dc_dct_pred[3]; - int quantizer_scale; /* remove */ - int dmv_offset; /* remove */ - unsigned int v_offset; /* remove */ + /* DCT coefficients */ + int16_t DCTblock[64] ATTR_ALIGN(64); + + uint8_t * picture_dest[3]; + void (* convert) (void * convert_id, uint8_t * const * src, + unsigned int v_offset); + void * convert_id; + + int dmv_offset; + unsigned int v_offset; /* now non-slice-specific information */ /* sequence header stuff */ - uint8_t intra_quantizer_matrix [64]; - uint8_t non_intra_quantizer_matrix [64]; + uint16_t * quantizer_matrix[4]; + uint16_t (* chroma_quantizer[2])[64]; + uint16_t quantizer_prescale[4][32][64]; /* The width and height of the picture snapped to macroblock units */ int width; int height; int vertical_position_extension; + int chroma_format; /* picture header stuff */ @@ -120,8 +131,6 @@ struct decoder_s { /* bool to indicate whether intra blocks have motion vectors */ /* (for concealment) */ int concealment_motion_vectors; - /* bit to indicate which quantization table to use */ - int q_scale_type; /* bool to use different vlc tables */ int intra_vlc_format; /* used for DMV MC */ @@ -135,25 +144,21 @@ struct decoder_s { int second_field; int mpeg1; - - /* for MPlayer: */ - char* quant_store; - int quant_stride; }; typedef struct { - fbuf_t fbuf; + mpeg2_fbuf_t fbuf; } fbuf_alloc_t; struct mpeg2dec_s { - decoder_t decoder; + mpeg2_decoder_t decoder; mpeg2_info_t info; uint32_t shift; int is_display_initialized; - int (* action) (struct mpeg2dec_s * mpeg2dec); - int state; + mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec); + mpeg2_state_t state; uint32_t ext_state; /* allocated in init - gcc has problems allocating such big structures */ @@ -165,10 +170,10 @@ struct mpeg2dec_s { /* last start code ? */ uint8_t code; - /* PTS */ - uint32_t pts_current, pts_previous; - int num_pts; - int bytes_since_pts; + /* picture tags */ + uint32_t tag_current, tag2_current, tag_previous, tag2_previous; + int num_tags; + int bytes_since_tag; int first; int alloc_index_user; @@ -176,27 +181,39 @@ struct mpeg2dec_s { uint8_t first_decode_slice; uint8_t nb_decode_slices; - sequence_t new_sequence; - sequence_t sequence; - picture_t pictures[4]; - picture_t * picture; - /*const*/ fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ + unsigned int user_data_len; + + mpeg2_sequence_t new_sequence; + mpeg2_sequence_t sequence; + mpeg2_gop_t new_gop; + mpeg2_gop_t gop; + mpeg2_picture_t new_picture; + mpeg2_picture_t pictures[4]; + mpeg2_picture_t * picture; + /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ fbuf_alloc_t fbuf_alloc[3]; int custom_fbuf; uint8_t * yuv_buf[3][3]; int yuv_index; - void * convert_id; - int convert_size[3]; - void (* convert_start) (void * id, uint8_t * const * dest, int flags); - void (* convert_copy) (void * id, uint8_t * const * src, - unsigned int v_offset); + mpeg2_convert_t * convert; + void * convert_arg; + unsigned int convert_id_size; + int convert_stride; + void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop); uint8_t * buf_start; uint8_t * buf_end; int16_t display_offset_x, display_offset_y; + + int copy_matrix; + int8_t q_scale_type, scaled[4]; + uint8_t quantizer_matrix[4][64]; + uint8_t new_quantizer_matrix[4][64]; }; typedef struct { @@ -206,15 +223,6 @@ typedef struct { int dummy; } cpu_state_t; -/* alloc.c */ -#define ALLOC_MPEG2DEC 0 -#define ALLOC_CHUNK 1 -#define ALLOC_YUV 2 -#define ALLOC_CONVERT_ID 3 -#define ALLOC_CONVERTED 4 -void * mpeg2_malloc (int size, int reason); -void mpeg2_free (void * buf); - /* cpu_accel.c */ uint32_t mpeg2_detect_accel (void); @@ -222,34 +230,28 @@ uint32_t mpeg2_detect_accel (void); void mpeg2_cpu_state_init (uint32_t accel); /* decode.c */ -int mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec); -int mpeg2_seek_header (mpeg2dec_t * mpeg2dec); -int mpeg2_parse_header (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec); /* header.c */ void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); +void mpeg2_reset_info (mpeg2_info_t * info); int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); -int mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); -int mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); -int mpeg2_header_end (mpeg2dec_t * mpeg2dec); -void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type); +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels); +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec); +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type); /* idct.c */ void mpeg2_idct_init (uint32_t accel); -/* idct_mlib.c */ -void mpeg2_idct_add_mlib (int last, int16_t * block, - uint8_t * dest, int stride); -void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, - int stride); -void mpeg2_idct_add_mlib_non_ieee (int last, int16_t * block, - uint8_t * dest, int stride); - /* idct_mmx.c */ void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); void mpeg2_idct_add_mmxext (int last, int16_t * block, @@ -272,13 +274,11 @@ void mpeg2_idct_add_mvi (int last, int16_t * block, void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride); void mpeg2_idct_add_alpha (int last, int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_alpha_init(int no_mvi); +void mpeg2_idct_alpha_init (void); /* motion_comp.c */ void mpeg2_mc_init (uint32_t accel); -typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); - typedef struct { mpeg2_mc_fct * put [8]; mpeg2_mc_fct * avg [8]; @@ -297,4 +297,4 @@ extern mpeg2_mc_t mpeg2_mc_mmxext; extern mpeg2_mc_t mpeg2_mc_3dnow; extern mpeg2_mc_t mpeg2_mc_altivec; extern mpeg2_mc_t mpeg2_mc_alpha; -extern mpeg2_mc_t mpeg2_mc_mlib; +extern mpeg2_mc_t mpeg2_mc_vis; diff --git a/libmpeg2/slice.c b/libmpeg2/slice.c index 327612e0e4..15740e1151 100644 --- a/libmpeg2/slice.c +++ b/libmpeg2/slice.c @@ -1,6 +1,7 @@ /* * slice.c - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Peter Gubanov <peter@elecard.net.ru> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -26,8 +27,8 @@ #include <inttypes.h> #include "mpeg2.h" -#include "mpeg2_internal.h" #include "attributes.h" +#include "mpeg2_internal.h" extern mpeg2_mc_t mpeg2_mc; extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); @@ -38,14 +39,7 @@ extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); #include "vlc.h" -static int non_linear_quantizer_scale [] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 -}; - -static inline int get_macroblock_modes (decoder_t * const decoder) +static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -76,24 +70,24 @@ static inline int get_macroblock_modes (decoder_t * const decoder) if (decoder->picture_structure != FRAME_PICTURE) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); } - return macroblock_modes; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; } else if (decoder->frame_pred_frame_dct) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - return macroblock_modes; + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; } else { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); } if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; DUMPBITS (bit_buf, bits, 1); } - return macroblock_modes; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; } case B_TYPE: @@ -104,18 +98,18 @@ static inline int get_macroblock_modes (decoder_t * const decoder) if (decoder->picture_structure != FRAME_PICTURE) { if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); } return macroblock_modes; } else if (decoder->frame_pred_frame_dct) { /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME; + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; return macroblock_modes; } else { if (macroblock_modes & MACROBLOCK_INTRA) goto intra; - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; DUMPBITS (bit_buf, bits, 2); if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { intra: @@ -138,7 +132,7 @@ static inline int get_macroblock_modes (decoder_t * const decoder) #undef bit_ptr } -static inline int get_quantizer_scale (decoder_t * const decoder) +static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -149,16 +143,20 @@ static inline int get_quantizer_scale (decoder_t * const decoder) quantizer_scale_code = UBITS (bit_buf, 5); DUMPBITS (bit_buf, bits, 5); - if (decoder->q_scale_type) - return non_linear_quantizer_scale [quantizer_scale_code]; - else - return quantizer_scale_code << 1; + decoder->quantizer_matrix[0] = + decoder->quantizer_prescale[0][quantizer_scale_code]; + decoder->quantizer_matrix[1] = + decoder->quantizer_prescale[1][quantizer_scale_code]; + decoder->quantizer_matrix[2] = + decoder->chroma_quantizer[0][quantizer_scale_code]; + decoder->quantizer_matrix[3] = + decoder->chroma_quantizer[1][quantizer_scale_code]; #undef bit_buf #undef bits #undef bit_ptr } -static inline int get_motion_delta (decoder_t * const decoder, +static inline int get_motion_delta (mpeg2_decoder_t * const decoder, const int f_code) { #define bit_buf (decoder->bitstream_buf) @@ -214,24 +212,10 @@ static inline int get_motion_delta (decoder_t * const decoder, static inline int bound_motion_vector (const int vector, const int f_code) { -#if 0 - unsigned int limit; - int sign; - - limit = 16 << f_code; - - if ((unsigned int)(vector + limit) < 2 * limit) - return vector; - else { - sign = ((int32_t)vector) >> 31; - return vector - ((2 * limit) ^ sign) + sign; - } -#else return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); -#endif } -static inline int get_dmv (decoder_t * const decoder) +static inline int get_dmv (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -247,7 +231,7 @@ static inline int get_dmv (decoder_t * const decoder) #undef bit_ptr } -static inline int get_coded_block_pattern (decoder_t * const decoder) +static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -275,7 +259,7 @@ static inline int get_coded_block_pattern (decoder_t * const decoder) #undef bit_ptr } -static inline int get_luma_dc_dct_diff (decoder_t * const decoder) +static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -293,7 +277,7 @@ static inline int get_luma_dc_dct_diff (decoder_t * const decoder) dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); bit_buf <<= size; - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } else { DUMPBITS (bit_buf, bits, 3); return 0; @@ -305,14 +289,14 @@ static inline int get_luma_dc_dct_diff (decoder_t * const decoder) NEEDBITS (bit_buf, bits, bit_ptr); dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); DUMPBITS (bit_buf, bits, size); - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } #undef bit_buf #undef bits #undef bit_ptr } -static inline int get_chroma_dc_dct_diff (decoder_t * const decoder) +static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -330,7 +314,7 @@ static inline int get_chroma_dc_dct_diff (decoder_t * const decoder) dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); bit_buf <<= size; - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } else { DUMPBITS (bit_buf, bits, 2); return 0; @@ -342,35 +326,34 @@ static inline int get_chroma_dc_dct_diff (decoder_t * const decoder) NEEDBITS (bit_buf, bits, bit_ptr); dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); DUMPBITS (bit_buf, bits, size); - return dc_diff; + return dc_diff << decoder->intra_dc_precision; } #undef bit_buf #undef bits #undef bit_ptr } -#define SATURATE(val) \ -do { \ - if (unlikely ((uint32_t)(val + 2048) > 4095)) \ - val = SBITS (val, 1) ^ 2047; \ +#define SATURATE(val) \ +do { \ + val <<= 4; \ + if (unlikely (val != (int16_t) val)) \ + val = (SBITS (val, 1) ^ 2047) << 4; \ } while (0) -static void get_intra_block_B14 (decoder_t * const decoder) +static void get_intra_block_B14 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; int mismatch; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; - dest = decoder->DCTblock; i = 0; mismatch = ~dest[0]; @@ -393,7 +376,7 @@ static void get_intra_block_B14 (decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quant_matrix[j]) >> 4; /* if (bitstream_get (1)) val = -val; */ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); @@ -425,8 +408,7 @@ static void get_intra_block_B14 (decoder_t * const decoder) DUMPBITS (bit_buf, bits, 12); NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[j]) / 16; + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; SATURATE (val); dest[j] = val; @@ -462,29 +444,27 @@ static void get_intra_block_B14 (decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; + dest[63] ^= mismatch & 16; DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; } -static void get_intra_block_B15 (decoder_t * const decoder) +static void get_intra_block_B15 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; int mismatch; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; - dest = decoder->DCTblock; i = 0; mismatch = ~dest[0]; @@ -506,7 +486,7 @@ static void get_intra_block_B15 (decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quant_matrix[j]) >> 4; /* if (bitstream_get (1)) val = -val; */ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); @@ -537,8 +517,7 @@ static void get_intra_block_B15 (decoder_t * const decoder) DUMPBITS (bit_buf, bits, 12); NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[j]) / 16; + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; SATURATE (val); dest[j] = val; @@ -575,31 +554,29 @@ static void get_intra_block_B15 (decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; + dest[63] ^= mismatch & 16; DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; decoder->bitstream_ptr = bit_ptr; } -static int get_non_intra_block (decoder_t * const decoder) +static int get_non_intra_block (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; int mismatch; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; i = -1; - mismatch = 1; - dest = decoder->DCTblock; + mismatch = -1; bit_buf = decoder->bitstream_buf; bits = decoder->bitstream_bits; @@ -626,7 +603,7 @@ static int get_non_intra_block (decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; /* if (bitstream_get (1)) val = -val; */ val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); @@ -662,7 +639,7 @@ static int get_non_intra_block (decoder_t * const decoder) DUMPBITS (bit_buf, bits, 12); NEEDBITS (bit_buf, bits, bit_ptr); val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; + val = (val * quant_matrix[j]) / 32; SATURATE (val); dest[j] = val; @@ -698,7 +675,7 @@ static int get_non_intra_block (decoder_t * const decoder) } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; + dest[63] ^= mismatch & 16; DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ decoder->bitstream_buf = bit_buf; decoder->bitstream_bits = bits; @@ -706,22 +683,20 @@ static int get_non_intra_block (decoder_t * const decoder) return i; } -static void get_mpeg1_intra_block (decoder_t * const decoder) +static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[0]; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; i = 0; - dest = decoder->DCTblock; bit_buf = decoder->bitstream_buf; bits = decoder->bitstream_bits; @@ -742,7 +717,7 @@ static void get_mpeg1_intra_block (decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quant_matrix[j]) >> 4; /* oddification */ val = (val - 1) | 1; @@ -781,7 +756,7 @@ static void get_mpeg1_intra_block (decoder_t * const decoder) DUMPBITS (bit_buf, bits, 8); val = UBITS (bit_buf, 8) + 2 * val; } - val = (val * quantizer_scale * quant_matrix[j]) / 16; + val = (val * quant_matrix[j]) / 16; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; @@ -825,22 +800,20 @@ static void get_mpeg1_intra_block (decoder_t * const decoder) decoder->bitstream_ptr = bit_ptr; } -static int get_mpeg1_non_intra_block (decoder_t * const decoder) +static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) { int i; int j; int val; - const uint8_t * scan = decoder->scan; - const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; - int quantizer_scale = decoder->quantizer_scale; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[1]; const DCTtab * tab; uint32_t bit_buf; int bits; const uint8_t * bit_ptr; - int16_t * dest; + int16_t * const dest = decoder->DCTblock; i = -1; - dest = decoder->DCTblock; bit_buf = decoder->bitstream_buf; bits = decoder->bitstream_bits; @@ -867,7 +840,7 @@ static int get_mpeg1_non_intra_block (decoder_t * const decoder) j = scan[i]; bit_buf <<= tab->len; bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; /* oddification */ val = (val - 1) | 1; @@ -910,7 +883,7 @@ static int get_mpeg1_non_intra_block (decoder_t * const decoder) val = UBITS (bit_buf, 8) + 2 * val; } val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; + val = (val * quant_matrix[j]) / 32; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; @@ -955,7 +928,8 @@ static int get_mpeg1_non_intra_block (decoder_t * const decoder) return i; } -static inline void slice_intra_DCT (decoder_t * const decoder, const int cc, +static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, uint8_t * const dest, const int stride) { #define bit_buf (decoder->bitstream_buf) @@ -964,26 +938,27 @@ static inline void slice_intra_DCT (decoder_t * const decoder, const int cc, NEEDBITS (bit_buf, bits, bit_ptr); /* Get the intra DC coefficient and inverse quantize it */ if (cc == 0) - decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); + decoder->DCTblock[0] = + decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); else - decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); - decoder->DCTblock[0] = - decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision); + decoder->DCTblock[0] = + decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); if (decoder->mpeg1) { if (decoder->coding_type != D_TYPE) get_mpeg1_intra_block (decoder); } else if (decoder->intra_vlc_format) - get_intra_block_B15 (decoder); + get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); else - get_intra_block_B14 (decoder); + get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); mpeg2_idct_copy (decoder->DCTblock, dest, stride); #undef bit_buf #undef bits #undef bit_ptr } -static inline void slice_non_intra_DCT (decoder_t * const decoder, +static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, uint8_t * const dest, const int stride) { int last; @@ -991,15 +966,22 @@ static inline void slice_non_intra_DCT (decoder_t * const decoder, if (decoder->mpeg1) last = get_mpeg1_non_intra_block (decoder); else - last = get_non_intra_block (decoder); + last = get_non_intra_block (decoder, + decoder->quantizer_matrix[cc ? 3 : 1]); mpeg2_idct_add (last, decoder->DCTblock, dest, stride); } -#define MOTION(table,ref,motion_x,motion_y,size,y) \ +#define MOTION_420(table,ref,motion_x,motion_y,size,y) \ pos_x = 2 * decoder->offset + motion_x; \ pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ - if ((pos_x > decoder->limit_x) || (pos_y > decoder->limit_y_ ## size)) \ - return; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \ @@ -1016,11 +998,17 @@ static inline void slice_non_intra_DCT (decoder_t * const decoder, (decoder->offset >> 1), ref[2] + offset, \ decoder->uv_stride, size/2) -#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ +#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \ pos_x = 2 * decoder->offset + motion_x; \ pos_y = decoder->v_offset + motion_y; \ - if ((pos_x > decoder->limit_x) || (pos_y > decoder->limit_y)) \ - return; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ decoder->offset, \ @@ -1039,12 +1027,237 @@ static inline void slice_non_intra_DCT (decoder_t * const decoder, (decoder->offset >> 1), ref[2] + offset, \ 2 * decoder->uv_stride, 4) -static void motion_mp1 (decoder_t * const decoder, motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ +#define MOTION_DMV_420(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y & ~1)) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4) + +#define MOTION_ZERO_420(table,ref) \ + table[0] (decoder->dest[0] + decoder->offset, \ + (ref[0] + decoder->offset + \ + decoder->v_offset * decoder->stride), decoder->stride, 16); \ + offset = ((decoder->offset >> 1) + \ + (decoder->v_offset >> 1) * decoder->uv_stride); \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 8); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 8) + +#define MOTION_422(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size); \ + table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size) + +#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_DMV_422(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_ZERO_422(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + offset >>= 1; \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 16) + +#define MOTION_444(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \ + ref[1] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \ + ref[2] + offset, decoder->stride, size) + +#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + dest_field * decoder->stride + \ + decoder->offset, ref[1] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + dest_field * decoder->stride + \ + decoder->offset, ref[2] + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_DMV_444(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset, \ + ref[1] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->offset, \ + ref[2] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset, \ + ref[2] + decoder->stride + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_ZERO_444(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->stride, 16) + #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) #define bit_ptr (decoder->bitstream_ptr) + +static void motion_mp1 (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ int motion_x, motion_y; unsigned int pos_x, pos_y, xy_half, offset; @@ -1064,192 +1277,239 @@ static void motion_mp1 (decoder_t * const decoder, motion_t * const motion, motion->f_code[0] + motion->f_code[1]); motion->pmv[0][1] = motion_y; - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_frame (decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_field (decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y, field; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[0][1] = motion_y << 1; - - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (decoder, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion_y << 1; - - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_dmv (decoder_t * const decoder, motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_x = get_dmv (decoder); - - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; - dmv_y = get_dmv (decoder); - - m = decoder->top_field_first ? 1 : 3; - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); - - m = decoder->top_field_first ? 3 : 1; - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); - - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); - offset = (decoder->offset + (motion_x >> 1) + - (decoder->v_offset + (motion_y & ~1)) * decoder->stride); - mpeg2_mc.avg[xy_half] - (decoder->dest[0] + decoder->offset, - motion->ref[0][0] + offset, 2 * decoder->stride, 8); - mpeg2_mc.avg[xy_half] - (decoder->dest[0] + decoder->stride + decoder->offset, - motion->ref[0][0] + decoder->stride + offset, 2 * decoder->stride, 8); - motion_x /= 2; motion_y /= 2; - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); - offset = (((decoder->offset + motion_x) >> 1) + - (((decoder->v_offset >> 1) + (motion_y & ~1)) * - decoder->uv_stride)); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[1] + (decoder->offset >> 1), - motion->ref[0][1] + offset, 2 * decoder->uv_stride, 4); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[1] + decoder->uv_stride + (decoder->offset >> 1), - motion->ref[0][1] + decoder->uv_stride + offset, - 2 * decoder->uv_stride, 4); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[2] + (decoder->offset >> 1), - motion->ref[0][2] + offset, 2 * decoder->uv_stride, 4); - mpeg2_mc.avg[4+xy_half] - (decoder->dest[2] + decoder->uv_stride + (decoder->offset >> 1), - motion->ref[0][2] + decoder->uv_stride + offset, - 2 * decoder->uv_stride, 4); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void motion_reuse (const decoder_t * const decoder, - const motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half, offset; - - motion_x = motion->pmv[0][0]; - motion_y = motion->pmv[0][1]; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); + MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0); } -static inline void motion_zero (const decoder_t * const decoder, - const motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ - unsigned int offset; - - table[0] (decoder->dest[0] + decoder->offset, - (motion->ref[0][0] + decoder->offset + - decoder->v_offset * decoder->stride), - decoder->stride, 16); - - offset = ((decoder->offset >> 1) + - (decoder->v_offset >> 1) * decoder->uv_stride); - table[4] (decoder->dest[1] + (decoder->offset >> 1), - motion->ref[0][1] + offset, decoder->uv_stride, 8); - table[4] (decoder->dest[2] + (decoder->offset >> 1), - motion->ref[0][2] + offset, decoder->uv_stride, 8); -} +#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO) \ + \ +static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[0][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[1][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \ +} \ + \ +static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + dmv_x = get_dmv (decoder); \ + \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; \ + dmv_y = get_dmv (decoder); \ + \ + m = decoder->top_field_first ? 1 : 3; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \ + \ + m = decoder->top_field_first ? 3 : 1; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\ + \ + MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y); \ +} \ + \ +static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + motion_x = motion->pmv[0][0]; \ + motion_y = motion->pmv[0][1]; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + unsigned int offset; \ + \ + motion->pmv[0][0] = motion->pmv[0][1] = 0; \ + motion->pmv[1][0] = motion->pmv[1][1] = 0; \ + \ + MOTION_ZERO (table, motion->ref[0]); \ +} \ + \ +static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 0); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[1][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 8); \ +} \ + \ +static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); \ + \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + \ + decoder->dmv_offset); \ + \ + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); \ + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); \ +} \ + +MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420, + MOTION_ZERO_420) +MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422, + MOTION_ZERO_422) +MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444, + MOTION_ZERO_444) /* like motion_frame, but parsing without actual motion compensation */ -static void motion_fr_conceal (decoder_t * const decoder) +static void motion_fr_conceal (mpeg2_decoder_t * const decoder) { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) int tmp; NEEDBITS (bit_buf, bits, bit_ptr); @@ -1265,127 +1525,10 @@ static void motion_fr_conceal (decoder_t * const decoder) decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_field (decoder_t * const decoder, - motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_16x8 (decoder_t * const decoder, motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[0][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 8, 0); - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[1][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 8, 8); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_dmv (decoder_t * const decoder, motion_t * const motion, - mpeg2_mc_fct * const * const table) -{ -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) - int motion_x, motion_y, other_x, other_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (decoder, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); - - motion_y = motion->pmv[0][1] + get_motion_delta (decoder, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + - decoder->dmv_offset); - - MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); - MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr } -static void motion_fi_conceal (decoder_t * const decoder) +static void motion_fi_conceal (mpeg2_decoder_t * const decoder) { -#define bit_buf (decoder->bitstream_buf) -#define bits (decoder->bitstream_bits) -#define bit_ptr (decoder->bitstream_ptr) int tmp; NEEDBITS (bit_buf, bits, bit_ptr); @@ -1403,10 +1546,11 @@ static void motion_fi_conceal (decoder_t * const decoder) decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +} + #undef bit_buf #undef bits #undef bit_ptr -} #define MOTION_CALL(routine,direction) \ do { \ @@ -1420,21 +1564,18 @@ do { \ #define NEXT_MACROBLOCK \ do { \ - if(decoder->quant_store) \ - decoder->quant_store[decoder->quant_stride*(decoder->v_offset>>4) \ - +(decoder->offset>>4)] = decoder->quantizer_scale; \ decoder->offset += 16; \ if (decoder->offset == decoder->width) { \ do { /* just so we can use the break statement */ \ if (decoder->convert) { \ - decoder->convert (decoder->fbuf_id, decoder->dest, \ + decoder->convert (decoder->convert_id, decoder->dest, \ decoder->v_offset); \ if (decoder->coding_type == B_TYPE) \ break; \ } \ - decoder->dest[0] += 16 * decoder->stride; \ - decoder->dest[1] += 4 * decoder->stride; \ - decoder->dest[2] += 4 * decoder->stride; \ + decoder->dest[0] += decoder->slice_stride; \ + decoder->dest[1] += decoder->slice_uv_stride; \ + decoder->dest[2] += decoder->slice_uv_stride; \ } while (0); \ decoder->v_offset += 16; \ if (decoder->v_offset > decoder->limit_y) { \ @@ -1446,12 +1587,12 @@ do { \ } \ } while (0) -void mpeg2_init_fbuf (decoder_t * decoder, uint8_t * current_fbuf[3], +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) { int offset, stride, height, bottom_field; - stride = decoder->width; + stride = decoder->stride_frame; bottom_field = (decoder->picture_structure == BOTTOM_FIELD); offset = bottom_field ? stride : 0; height = decoder->height; @@ -1493,13 +1634,62 @@ void mpeg2_init_fbuf (decoder_t * decoder, uint8_t * current_fbuf[3], decoder->stride = stride; decoder->uv_stride = stride >> 1; + decoder->slice_stride = 16 * stride; + decoder->slice_uv_stride = + decoder->slice_stride >> (2 - decoder->chroma_format); decoder->limit_x = 2 * decoder->width - 32; decoder->limit_y_16 = 2 * height - 32; decoder->limit_y_8 = 2 * height - 16; decoder->limit_y = height - 16; + + if (decoder->mpeg1) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->picture_structure == FRAME_PICTURE) { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fr_field_420; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_420; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fr_field_422; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_422; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fr_field_444; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_444; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } else { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fi_field_420; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_420; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fi_field_422; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_422; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fi_field_444; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_444; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } } -static inline int slice_init (decoder_t * const decoder, int code) +static inline int slice_init (mpeg2_decoder_t * const decoder, int code) { #define bit_buf (decoder->bitstream_buf) #define bits (decoder->bitstream_bits) @@ -1508,7 +1698,7 @@ static inline int slice_init (decoder_t * const decoder, int code) const MBAtab * mba; decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + decoder->dc_dct_pred[2] = 16384; decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; @@ -1522,13 +1712,14 @@ static inline int slice_init (decoder_t * const decoder, int code) decoder->v_offset = (code - 1) * 16; offset = 0; if (!(decoder->convert) || decoder->coding_type != B_TYPE) - offset = (code - 1) * decoder->stride * 4; + offset = (code - 1) * decoder->slice_stride; - decoder->dest[0] = decoder->picture_dest[0] + offset * 4; + decoder->dest[0] = decoder->picture_dest[0] + offset; + offset >>= (2 - decoder->chroma_format); decoder->dest[1] = decoder->picture_dest[1] + offset; decoder->dest[2] = decoder->picture_dest[2] + offset; - decoder->quantizer_scale = get_quantizer_scale (decoder); + get_quantizer_scale (decoder); /* ignore intra_slice and all the extra data */ while (bit_buf & 0x80000000) { @@ -1566,9 +1757,9 @@ static inline int slice_init (decoder_t * const decoder, int code) while (decoder->offset - decoder->width >= 0) { decoder->offset -= decoder->width; if (!(decoder->convert) || decoder->coding_type != B_TYPE) { - decoder->dest[0] += 16 * decoder->stride; - decoder->dest[1] += 4 * decoder->stride; - decoder->dest[2] += 4 * decoder->stride; + decoder->dest[0] += decoder->slice_stride; + decoder->dest[1] += decoder->slice_uv_stride; + decoder->dest[2] += decoder->slice_uv_stride; } decoder->v_offset += 16; } @@ -1581,7 +1772,7 @@ static inline int slice_init (decoder_t * const decoder, int code) #undef bit_ptr } -void mpeg2_slice (decoder_t * const decoder, const int code, +void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, const uint8_t * const buffer) { #define bit_buf (decoder->bitstream_buf) @@ -1608,7 +1799,7 @@ void mpeg2_slice (decoder_t * const decoder, const int code, /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ if (macroblock_modes & MACROBLOCK_QUANT) - decoder->quantizer_scale = get_quantizer_scale (decoder); + get_quantizer_scale (decoder); if (macroblock_modes & MACROBLOCK_INTRA) { @@ -1642,72 +1833,49 @@ void mpeg2_slice (decoder_t * const decoder, const int code, slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); - slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), - decoder->uv_stride); - slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), - decoder->uv_stride); - - if (decoder->coding_type == D_TYPE) { - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); + if (likely (decoder->chroma_format == 0)) { + slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + if (decoder->coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else if (likely (decoder->chroma_format == 1)) { + uint8_t * dest_u = decoder->dest[1] + (offset >> 1); + uint8_t * dest_v = decoder->dest[2] + (offset >> 1); + DCT_stride >>= 1; + DCT_offset >>= 1; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + } else { + uint8_t * dest_u = decoder->dest[1] + offset; + uint8_t * dest_v = decoder->dest[2] + offset; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8, + DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8, + DCT_stride); } } else { - if (decoder->picture_structure == FRAME_PICTURE) - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FRAME: - if (decoder->mpeg1) - MOTION_CALL (motion_mp1, macroblock_modes); - else - MOTION_CALL (motion_fr_frame, macroblock_modes); - break; - - case MC_FIELD: - MOTION_CALL (motion_fr_field, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - decoder->f_motion.pmv[0][0] = 0; - decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = 0; - decoder->f_motion.pmv[1][1] = 0; - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } - else - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FIELD: - MOTION_CALL (motion_fi_field, macroblock_modes); - break; - - case MC_16X8: - MOTION_CALL (motion_fi_16x8, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - decoder->f_motion.pmv[0][0] = 0; - decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = 0; - decoder->f_motion.pmv[1][1] = 0; - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } + motion_parser_t * parser; + + parser = + decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT]; + MOTION_CALL (parser, macroblock_modes); if (macroblock_modes & MACROBLOCK_PATTERN) { int coded_block_pattern; int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; if (macroblock_modes & DCT_TYPE_INTERLACED) { DCT_offset = decoder->stride; @@ -1719,30 +1887,123 @@ void mpeg2_slice (decoder_t * const decoder, const int code, coded_block_pattern = get_coded_block_pattern (decoder); - offset = decoder->offset; - dest_y = decoder->dest[0] + offset; - if (coded_block_pattern & 0x20) - slice_non_intra_DCT (decoder, dest_y, DCT_stride); - if (coded_block_pattern & 0x10) - slice_non_intra_DCT (decoder, dest_y + 8, DCT_stride); - if (coded_block_pattern & 0x08) - slice_non_intra_DCT (decoder, dest_y + DCT_offset, - DCT_stride); - if (coded_block_pattern & 0x04) - slice_non_intra_DCT (decoder, dest_y + DCT_offset + 8, - DCT_stride); - if (coded_block_pattern & 0x2) - slice_non_intra_DCT (decoder, - decoder->dest[1] + (offset >> 1), - decoder->uv_stride); - if (coded_block_pattern & 0x1) - slice_non_intra_DCT (decoder, - decoder->dest[2] + (offset >> 1), - decoder->uv_stride); + if (likely (decoder->chroma_format == 0)) { + int offset = decoder->offset; + uint8_t * dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + } else if (likely (decoder->chroma_format == 1)) { + int offset; + uint8_t * dest_y; + + coded_block_pattern |= bit_buf & (3 << 30); + DUMPBITS (bit_buf, bits, 2); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + DCT_stride >>= 1; + DCT_offset = (DCT_offset + offset) >> 1; + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & (2 << 30)) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + DCT_offset, + DCT_stride); + if (coded_block_pattern & (1 << 30)) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + DCT_offset, + DCT_stride); + } else { + int offset; + uint8_t * dest_y, * dest_u, * dest_v; + + coded_block_pattern |= bit_buf & (63 << 26); + DUMPBITS (bit_buf, bits, 6); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + dest_u = decoder->dest[1] + offset; + dest_v = decoder->dest[2] + offset; + + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride); + if (coded_block_pattern & (32 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset, + DCT_stride); + if (coded_block_pattern & (16 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset, + DCT_stride); + if (coded_block_pattern & (8 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + 8, + DCT_stride); + if (coded_block_pattern & (4 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + 8, + DCT_stride); + if (coded_block_pattern & (2 << 26)) + slice_non_intra_DCT (decoder, 1, + dest_u + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & (1 << 26)) + slice_non_intra_DCT (decoder, 2, + dest_v + DCT_offset + 8, + DCT_stride); + } } decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + decoder->dc_dct_pred[2] = 16384; } NEXT_MACROBLOCK; @@ -1775,19 +2036,17 @@ void mpeg2_slice (decoder_t * const decoder, const int code, if (mba_inc) { decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = - decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + decoder->dc_dct_pred[2] = 16384; if (decoder->coding_type == P_TYPE) { - decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; - decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; - do { - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + MOTION_CALL (decoder->motion_parser[0], + MACROBLOCK_MOTION_FORWARD); NEXT_MACROBLOCK; } while (--mba_inc); } else { do { - MOTION_CALL (motion_reuse, macroblock_modes); + MOTION_CALL (decoder->motion_parser[4], macroblock_modes); NEXT_MACROBLOCK; } while (--mba_inc); } diff --git a/libmpeg2/vis.h b/libmpeg2/vis.h new file mode 100644 index 0000000000..69dd49075b --- /dev/null +++ b/libmpeg2/vis.h @@ -0,0 +1,328 @@ +/* + * vis.h + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* You may be asking why I hard-code the instruction opcodes and don't + * use the normal VIS assembler mnenomics for the VIS instructions. + * + * The reason is that Sun, in their infinite wisdom, decided that a binary + * using a VIS instruction will cause it to be marked (in the ELF headers) + * as doing so, and this prevents the OS from loading such binaries if the + * current cpu doesn't have VIS. There is no way to easily override this + * behavior of the assembler that I am aware of. + * + * This totally defeats what libmpeg2 is trying to do which is allow a + * single binary to be created, and then detect the availability of VIS + * at runtime. + * + * I'm not saying that tainting the binary by default is bad, rather I'm + * saying that not providing a way to override this easily unnecessarily + * ties people's hands. + * + * Thus, we do the opcode encoding by hand and output 32-bit words in + * the assembler to keep the binary from becoming tainted. + */ + +#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) +#define vis_opf(X) ((X) << 5) +#define vis_sreg(X) (X) +#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) +#define vis_rs1_s(X) (vis_sreg(X) << 14) +#define vis_rs1_d(X) (vis_dreg(X) << 14) +#define vis_rs2_s(X) (vis_sreg(X) << 0) +#define vis_rs2_d(X) (vis_dreg(X) << 0) +#define vis_rd_s(X) (vis_sreg(X) << 25) +#define vis_rd_d(X) (vis_dreg(X) << 25) + +#define vis_ss2s(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_dd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_ss2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_sd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_d2s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_s(rd))) + +#define vis_s2d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_d12d(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rd_d(rd))) + +#define vis_d22d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_s12s(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rd_s(rd))) + +#define vis_s22s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_s(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_s(rd))) + +#define vis_d(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_d(rd))) + +#define vis_r2m(op,rd,mem) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) + +#define vis_r2m_2(op,rd,mem1,mem2) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) + +#define vis_m2r(op,mem,rd) \ + __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) + +#define vis_m2r_2(op,mem1,mem2,rd) \ + __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) + +static inline void vis_set_gsr(unsigned int _val) +{ + register unsigned int val asm("g1"); + + val = _val; + __asm__ __volatile__(".word 0xa7804000" + : : "r" (val)); +} + +#define VIS_GSR_ALIGNADDR_MASK 0x0000007 +#define VIS_GSR_ALIGNADDR_SHIFT 0 +#define VIS_GSR_SCALEFACT_MASK 0x0000078 +#define VIS_GSR_SCALEFACT_SHIFT 3 + +#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) +#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) +#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) +#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) +#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) +#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) +#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) +#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) + +#define vis_ldblk(mem, rd) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1985e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_stblk(rd, mem) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1b85e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_membar_storestore() \ + __asm__ __volatile__(".word 0x8143e008" : : : "memory") + +#define vis_membar_sync() \ + __asm__ __volatile__(".word 0x8143e040" : : : "memory") + +/* 16 and 32 bit partitioned addition and subtraction. The normal + * versions perform 4 16-bit or 2 32-bit additions or subtractions. + * The 's' versions perform 2 16-bit or 2 32-bit additions or + * subtractions. + */ + +#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) +#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) +#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) +#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) +#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) +#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) +#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) +#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) + +/* Pixel formatting instructions. */ + +#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) +#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) +#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) +#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) +#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) + +/* Partitioned multiply instructions. */ + +#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) +#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) +#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) +#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) +#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) +#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) +#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) + +/* Alignment instructions. */ + +static inline void *vis_alignaddr(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddr_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +static inline void *vis_alignaddrl(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddrl_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) + +/* Logical operate instructions. */ + +#define vis_fzero(rd) vis_d( 0x60, rd) +#define vis_fzeros(rd) vis_s( 0x61, rd) +#define vis_fone(rd) vis_d( 0x7e, rd) +#define vis_fones(rd) vis_s( 0x7f, rd) +#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) +#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) +#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) +#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) +#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) +#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) +#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) +#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) +#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) +#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) +#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) +#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) +#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) +#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) +#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) +#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) +#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) +#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) +#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) +#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) +#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) +#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) +#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) +#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) +#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) +#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) +#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) +#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) + +/* Pixel component distance. */ + +#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/libmpeg2/vlc.h b/libmpeg2/vlc.h index aa3dfe1841..7098ee0f9b 100644 --- a/libmpeg2/vlc.h +++ b/libmpeg2/vlc.h @@ -1,6 +1,6 @@ /* * vlc.h - * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -27,7 +27,8 @@ do { \ bit_ptr += 2; \ } while (0) -static inline void bitstream_init (decoder_t * decoder, const uint8_t * start) +static inline void bitstream_init (mpeg2_decoder_t * decoder, + const uint8_t * start) { decoder->bitstream_buf = (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; @@ -169,53 +170,53 @@ static const DMVtab DMV_2 [] = { static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7}, + {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7}, + {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6}, + {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6}, + {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, + {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5}, + {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, + {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5}, + {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5}, + {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, + {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, + {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, + {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5}, + {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, + {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3} }; static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} + {0, 0}, {0x00, 9}, {0x39, 9}, {0x36, 9}, + {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9}, + {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8}, + {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8}, + {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8}, + {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8}, + {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8}, + {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8}, + {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8}, + {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8}, + {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8}, + {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8}, + {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8}, + {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8}, + {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8}, + {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8} }; |