diff options
author | arpi <arpi@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2003-04-06 16:36:02 +0000 |
---|---|---|
committer | arpi <arpi@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2003-04-06 16:36:02 +0000 |
commit | 0b6eb24b9a8034287f67f800fc61d07b7f018891 (patch) | |
tree | ee1e7fe1ba37e57d24b8604e1c53f38e7b67484c /libmpeg2 | |
parent | 27ff6fa04a22e2a56848b3ff3e1d681a18bc98e4 (diff) |
Importing libmpeg2 from mpeg2dec-0.3.1
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@9853 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libmpeg2')
-rw-r--r-- | libmpeg2/Makefile | 5 | ||||
-rw-r--r-- | libmpeg2/attributes.h | 14 | ||||
-rw-r--r-- | libmpeg2/header.c | 714 | ||||
-rw-r--r-- | libmpeg2/idct.c | 416 | ||||
-rw-r--r-- | libmpeg2/idct_mlib.c | 25 | ||||
-rw-r--r-- | libmpeg2/idct_mmx.c | 605 | ||||
-rw-r--r-- | libmpeg2/mm_accel.h | 30 | ||||
-rw-r--r-- | libmpeg2/mmx.h | 26 | ||||
-rw-r--r-- | libmpeg2/motion_comp.c | 142 | ||||
-rw-r--r-- | libmpeg2/motion_comp_mlib.c | 148 | ||||
-rw-r--r-- | libmpeg2/motion_comp_mmx.c | 530 | ||||
-rw-r--r-- | libmpeg2/mpeg2.h | 176 | ||||
-rw-r--r-- | libmpeg2/mpeg2_internal.h | 287 | ||||
-rw-r--r-- | libmpeg2/slice.c | 1368 | ||||
-rw-r--r-- | libmpeg2/sse.h | 256 | ||||
-rw-r--r-- | libmpeg2/stats.c | 315 | ||||
-rw-r--r-- | libmpeg2/vlc.h | 59 |
17 files changed, 2605 insertions, 2511 deletions
diff --git a/libmpeg2/Makefile b/libmpeg2/Makefile index 914b41844d..6ee925ddb9 100644 --- a/libmpeg2/Makefile +++ b/libmpeg2/Makefile @@ -3,9 +3,8 @@ LIBNAME = libmpeg2.a include ../config.mak -SRCS = header.c idct.c idct_mmx.c idct_mlib.c \ - motion_comp.c motion_comp_mmx.c motion_comp_mlib.c \ - slice.c stats.c # decode.c +SRCS = alloc.c cpu_accel.c cpu_state.c decode.c header.c idct.c idct_alpha.c idct_altivec.c idct_mlib.c idct_mmx.c motion_comp.c motion_comp_alpha.c motion_comp_altivec.c motion_comp_mlib.c motion_comp_mmx.c slice.c + OBJS = $(SRCS:.c=.o) INCLUDE = -I. -I../libvo -I.. $(EXTRA_INC) $(MLIB_INC) CFLAGS = $(OPTFLAGS) $(INCLUDE) -DMPG12PLAY diff --git a/libmpeg2/attributes.h b/libmpeg2/attributes.h index ab7105c2df..96a86b26c0 100644 --- a/libmpeg2/attributes.h +++ b/libmpeg2/attributes.h @@ -1,8 +1,10 @@ /* * attributes.h - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,7 +23,15 @@ /* use gcc attribs to align critical data structures */ #ifdef ATTRIBUTE_ALIGNED_MAX -#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < (align)) ? ATTRIBUTE_ALIGNED_MAX : (align)))) +#define ATTR_ALIGN(align) __attribute__ ((__aligned__ ((ATTRIBUTE_ALIGNED_MAX < align) ? ATTRIBUTE_ALIGNED_MAX : align))) #else #define ATTR_ALIGN(align) #endif + +#ifdef HAVE_BUILTIN_EXPECT +#define likely(x) __builtin_expect ((x) != 0, 1) +#define unlikely(x) __builtin_expect ((x) != 0, 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif diff --git a/libmpeg2/header.c b/libmpeg2/header.c index 68483a71c1..548d6bf21e 100644 --- a/libmpeg2/header.c +++ b/libmpeg2/header.c @@ -1,8 +1,10 @@ /* - * slice.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * header.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,13 +24,23 @@ #include "config.h" #include <inttypes.h> -#include <stdio.h> +#include <stdlib.h> /* defines NULL */ +#include <string.h> /* memcmp */ +#include "mpeg2.h" #include "mpeg2_internal.h" +#include "convert.h" #include "attributes.h" +#define SEQ_EXT 2 +#define SEQ_DISPLAY_EXT 4 +#define QUANT_MATRIX_EXT 8 +#define COPYRIGHT_EXT 0x10 +#define PIC_DISPLAY_EXT 0x80 +#define PIC_CODING_EXT 0x100 + /* default intra quant matrix, in zig-zag order */ -static uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { +static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { 8, 16, 16, 19, 16, 19, @@ -46,214 +58,634 @@ static uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { 83 }; -uint8_t scan_norm[64] ATTR_ALIGN(16) = -{ +uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = { /* Zig-Zag scan pattern */ - 0, 1, 8,16, 9, 2, 3,10, - 17,24,32,25,18,11, 4, 5, - 12,19,26,33,40,48,41,34, - 27,20,13, 6, 7,14,21,28, - 35,42,49,56,57,50,43,36, - 29,22,15,23,30,37,44,51, - 58,59,52,45,38,31,39,46, - 53,60,61,54,47,55,62,63 + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 }; -uint8_t scan_alt[64] ATTR_ALIGN(16) = -{ +uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { /* Alternate scan pattern */ - 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, - 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, - 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, - 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 }; -void header_state_init (picture_t * picture) +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) { - picture->scan = scan_norm; + mpeg2dec->decoder.scan = mpeg2_scan_norm; + mpeg2dec->picture = mpeg2dec->pictures; + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; + mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; + mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; + mpeg2dec->first = 1; + mpeg2dec->alloc_index = 0; + mpeg2dec->alloc_index_user = 0; } -int header_process_sequence_header (picture_t * picture, uint8_t * buffer) +static void reset_info (mpeg2_info_t * info) { + info->current_picture = info->current_picture_2nd = NULL; + info->display_picture = info->display_picture_2nd = NULL; + info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; + info->user_data = NULL; info->user_data_len = 0; +} + +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + sequence_t * sequence = &(mpeg2dec->new_sequence); + decoder_t * decoder = &(mpeg2dec->decoder); + static unsigned int frame_period[9] = { + 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000 + }; int width, height; int i; - if ((buffer[6] & 0x20) != 0x20){ - printf("missing marker bit!\n"); - return 1; /* missing marker_bit */ - } + if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */ + return 1; - height = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + sequence->display_width = sequence->picture_width = width = i >> 12; + sequence->display_height = sequence->picture_height = height = i & 0xfff; + decoder->width = sequence->width = width = (width + 15) & ~15; + decoder->height = sequence->height = height = (height + 15) & ~15; + decoder->vertical_position_extension = (height > 2800); + sequence->chroma_width = width >> 1; + sequence->chroma_height = height >> 1; - picture->display_picture_width = (height >> 12); - picture->display_picture_height = (height & 0xfff); + sequence->flags = SEQ_FLAG_PROGRESSIVE_SEQUENCE; - width = ((height >> 12) + 15) & ~15; - height = ((height & 0xfff) + 15) & ~15; + sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ + sequence->frame_period = 0; + if ((buffer[3] & 15) < 9) + sequence->frame_period = frame_period[buffer[3] & 15]; - if ((width > 768) || (height > 576)){ - printf("size restrictions for MP@ML or MPEG1 exceeded! (%dx%d)\n",width,height); -// return 1; /* size restrictions for MP@ML or MPEG1 */ - } - - picture->coded_picture_width = width; - picture->coded_picture_height = height; + sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); - /* this is not used by the decoder */ - picture->aspect_ratio_information = buffer[3] >> 4; - picture->frame_rate_code = buffer[3] & 15; - picture->bitrate = (buffer[4]<<10)|(buffer[5]<<2)|(buffer[6]>>6); + sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800; + + if (buffer[7] & 4) + sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS; if (buffer[7] & 2) { for (i = 0; i < 64; i++) - picture->intra_quantizer_matrix[scan_norm[i]] = + decoder->intra_quantizer_matrix[mpeg2_scan_norm[i]] = (buffer[i+7] << 7) | (buffer[i+8] >> 1); buffer += 64; - } else { + } else for (i = 0; i < 64; i++) - picture->intra_quantizer_matrix[scan_norm[i]] = + decoder->intra_quantizer_matrix[mpeg2_scan_norm[i]] = default_intra_quantizer_matrix [i]; - } - if (buffer[7] & 1) { + if (buffer[7] & 1) for (i = 0; i < 64; i++) - picture->non_intra_quantizer_matrix[scan_norm[i]] = + decoder->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = buffer[i+8]; - } else { + else for (i = 0; i < 64; i++) - picture->non_intra_quantizer_matrix[i] = 16; + decoder->non_intra_quantizer_matrix[i] = 16; + + sequence->profile_level_id = 0x80; + sequence->colour_primaries = 1; + sequence->transfer_characteristics = 1; + sequence->matrix_coefficients = 1; + + decoder->mpeg1 = 1; + decoder->intra_dc_precision = 0; + decoder->frame_pred_frame_dct = 1; + decoder->q_scale_type = 0; + decoder->concealment_motion_vectors = 0; + decoder->scan = mpeg2_scan_norm; + decoder->picture_structure = FRAME_PICTURE; + + mpeg2dec->ext_state = SEQ_EXT; + mpeg2dec->state = STATE_SEQUENCE; + mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; + + reset_info (&(mpeg2dec->info)); + return 0; +} + +static int sequence_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + sequence_t * sequence = &(mpeg2dec->new_sequence); + decoder_t * decoder = &(mpeg2dec->decoder); + int width, height; + uint32_t flags; + + if (!(buffer[3] & 1)) + return 1; + + sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4); + + width = sequence->display_width = sequence->picture_width += + ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000; + height = sequence->display_height = sequence->picture_height += + (buffer[2] << 7) & 0x3000; + decoder->vertical_position_extension = (height > 2800); + flags = sequence->flags | SEQ_FLAG_MPEG2; + if (!(buffer[1] & 8)) { + flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE; + height = (height + 31) & ~31; + } + if (buffer[5] & 0x80) + flags |= SEQ_FLAG_LOW_DELAY; + sequence->flags = flags; + decoder->width = sequence->width = width = (width + 15) & ~15; + decoder->height = sequence->height = height = (height + 15) & ~15; + switch (buffer[1] & 6) { + case 0: /* invalid */ + return 1; + case 2: /* 4:2:0 */ + height >>= 1; + case 4: /* 4:2:2 */ + width >>= 1; } + sequence->chroma_width = width; + sequence->chroma_height = height; - /* MPEG1 - for testing only */ - picture->mpeg1 = 1; - picture->intra_dc_precision = 0; - picture->frame_pred_frame_dct = 1; - picture->q_scale_type = 0; - picture->concealment_motion_vectors = 0; - /* picture->alternate_scan = 0; */ - picture->picture_structure = FRAME_PICTURE; - /* picture->second_field = 0; */ + sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000; + + sequence->vbv_buffer_size |= buffer[4] << 21; + + sequence->frame_period = + sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); + + decoder->mpeg1 = 0; + + mpeg2dec->ext_state = SEQ_DISPLAY_EXT; return 0; } -static int header_process_sequence_extension (picture_t * picture, - uint8_t * buffer) +static int sequence_display_ext (mpeg2dec_t * mpeg2dec) { - /* check chroma format, size extensions, marker bit */ - if (((buffer[1] & 0x07) != 0x02) || (buffer[2] & 0xe0) || - ((buffer[3] & 0x01) != 0x01)) + uint8_t * buffer = mpeg2dec->chunk_start; + sequence_t * sequence = &(mpeg2dec->new_sequence); + uint32_t flags; + + flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | + ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); + if (buffer[0] & 1) { + flags |= SEQ_FLAG_COLOUR_DESCRIPTION; + sequence->colour_primaries = buffer[1]; + sequence->transfer_characteristics = buffer[2]; + sequence->matrix_coefficients = buffer[3]; + buffer += 3; + } + + if (!(buffer[2] & 2)) /* missing marker_bit */ return 1; - /* this is not used by the decoder */ - picture->progressive_sequence = (buffer[1] >> 3) & 1; + sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); + sequence->display_height = + ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); + + return 0; +} + +static inline void finalize_sequence (sequence_t * sequence) +{ + int width; + int height; + + sequence->byte_rate *= 50; + + if (sequence->flags & SEQ_FLAG_MPEG2) { + switch (sequence->pixel_width) { + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 2: /* 4:3 aspect ratio */ + width = 4; height = 3; break; + case 3: /* 16:9 aspect ratio */ + width = 16; height = 9; break; + case 4: /* 2.21:1 aspect ratio */ + width = 221; height = 100; break; + default: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + } + width *= sequence->display_height; + height *= sequence->display_width; + + } else { + if (sequence->byte_rate == 50 * 0x3ffff) + sequence->byte_rate = 0; /* mpeg-1 VBR */ + + switch (sequence->pixel_width) { + case 0: case 15: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 3: /* 720x576 16:9 */ + sequence->pixel_width = 64; sequence->pixel_height = 45; return; + case 6: /* 720x480 16:9 */ + sequence->pixel_width = 32; sequence->pixel_height = 27; return; + case 12: /* 720*480 4:3 */ + sequence->pixel_width = 8; sequence->pixel_height = 9; return; + default: + height = 88 * sequence->pixel_width + 1171; + width = 2000; + } + } - if (picture->progressive_sequence) - picture->coded_picture_height = - (picture->coded_picture_height + 31) & ~31; + sequence->pixel_width = width; + sequence->pixel_height = height; + while (width) { /* find greatest common divisor */ + int tmp = width; + width = height % tmp; + height = tmp; + } + sequence->pixel_width /= height; + sequence->pixel_height /= height; +} - /* MPEG1 - for testing only */ - picture->mpeg1 = 0; +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) +{ + sequence_t * sequence = &(mpeg2dec->new_sequence); + + finalize_sequence (sequence); + + /* + * according to 6.1.1.6, repeat sequence headers should be + * identical to the original. However some DVDs dont respect that + * and have different bitrates in the repeat sequence headers. So + * we'll ignore that in the comparison and still consider these as + * repeat sequence headers. + */ + mpeg2dec->sequence.byte_rate = sequence->byte_rate; + if (!memcmp (&(mpeg2dec->sequence), sequence, sizeof (sequence_t))) + mpeg2dec->state = STATE_SEQUENCE_REPEATED; + mpeg2dec->sequence = *sequence; + + mpeg2dec->info.sequence = &(mpeg2dec->sequence); +} +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->state = STATE_GOP; + reset_info (&(mpeg2dec->info)); return 0; } -static int header_process_quant_matrix_extension (picture_t * picture, - uint8_t * buffer) +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type) { int i; - if (buffer[0] & 8) { - for (i = 0; i < 64; i++) - picture->intra_quantizer_matrix[scan_norm[i]] = - (buffer[i] << 5) | (buffer[i+1] >> 3); - buffer += 64; + for (i = 0; i < 3; i++) + if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf && + mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; + mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; + if ((coding_type == B_TYPE) || + (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + if ((coding_type == B_TYPE) || (mpeg2dec->convert_start)) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; + } + break; + } +} + +int mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec) +{ + decoder_t * decoder = &(mpeg2dec->decoder); + picture_t * picture; + + if (mpeg2dec->state != STATE_SLICE_1ST) { + mpeg2dec->state = STATE_PICTURE; + picture = mpeg2dec->pictures; + if ((decoder->coding_type != PIC_FLAG_CODING_TYPE_B) ^ + (mpeg2dec->picture >= mpeg2dec->pictures + 2)) + picture += 2; + } else { + mpeg2dec->state = STATE_PICTURE_2ND; + picture = mpeg2dec->picture + 1; /* second field picture */ } + mpeg2dec->picture = picture; + picture->flags = 0; + if (mpeg2dec->num_pts) { + if (mpeg2dec->bytes_since_pts >= 4) { + mpeg2dec->num_pts = 0; + picture->pts = mpeg2dec->pts_current; + picture->flags = PIC_FLAG_PTS; + } else if (mpeg2dec->num_pts > 1) { + mpeg2dec->num_pts = 1; + picture->pts = mpeg2dec->pts_previous; + picture->flags = PIC_FLAG_PTS; + } + } + picture->display_offset[0].x = picture->display_offset[1].x = + picture->display_offset[2].x = mpeg2dec->display_offset_x; + picture->display_offset[0].y = picture->display_offset[1].y = + picture->display_offset[2].y = mpeg2dec->display_offset_y; + return mpeg2_parse_header (mpeg2dec); +} - if (buffer[0] & 4) { - for (i = 0; i < 64; i++) - picture->non_intra_quantizer_matrix[scan_norm[i]] = - (buffer[i] << 6) | (buffer[i+1] >> 2); +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + picture_t * picture = mpeg2dec->picture; + decoder_t * decoder = &(mpeg2dec->decoder); + int type; + int low_delay; + + type = (buffer [1] >> 3) & 7; + low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; + + if (mpeg2dec->state == STATE_PICTURE) { + picture_t * other; + + decoder->second_field = 0; + other = mpeg2dec->pictures; + if (other == picture) + other += 2; + if (decoder->coding_type != PIC_FLAG_CODING_TYPE_B) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2dec->fbuf[0] = NULL; + reset_info (&(mpeg2dec->info)); + mpeg2dec->info.current_picture = picture; + mpeg2dec->info.display_picture = picture; + if (type != PIC_FLAG_CODING_TYPE_B) { + if (!low_delay) { + if (mpeg2dec->first) { + mpeg2dec->info.display_picture = NULL; + mpeg2dec->first = 0; + } else { + mpeg2dec->info.display_picture = other; + if (other->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = other + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; + } + } + if (!low_delay + !mpeg2dec->convert_start) + mpeg2dec->info.discard_fbuf = + mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert_start]; + } + if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + fbuf_t * fbuf; + + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); + fbuf->id = NULL; + if (mpeg2dec->convert_start) { + fbuf->buf[0] = + (uint8_t *) mpeg2_malloc (mpeg2dec->convert_size[0], + ALLOC_CONVERTED); + fbuf->buf[1] = fbuf->buf[0] + mpeg2dec->convert_size[1]; + fbuf->buf[2] = fbuf->buf[0] + mpeg2dec->convert_size[2]; + } else { + int size; + size = mpeg2dec->decoder.width * mpeg2dec->decoder.height; + fbuf->buf[0] = (uint8_t *) mpeg2_malloc (6 * size >> 2, + ALLOC_YUV); + fbuf->buf[1] = fbuf->buf[0] + size; + fbuf->buf[2] = fbuf->buf[1] + (size >> 2); + } + } + mpeg2_set_fbuf (mpeg2dec, type); + } + } else { + decoder->second_field = 1; + mpeg2dec->info.current_picture_2nd = picture; + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + if (low_delay || type == PIC_FLAG_CODING_TYPE_B) + mpeg2dec->info.display_picture_2nd = picture; + } + mpeg2dec->ext_state = PIC_CODING_EXT; + + picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + + decoder->coding_type = type; + picture->flags |= type; + + if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { + /* forward_f_code and backward_f_code - used in mpeg1 only */ + decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + decoder->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; } + /* XXXXXX decode extra_information_picture as well */ + + picture->nb_fields = 2; + return 0; } -static int header_process_picture_coding_extension (picture_t * picture, uint8_t * buffer) +static int picture_coding_ext (mpeg2dec_t * mpeg2dec) { + uint8_t * buffer = mpeg2dec->chunk_start; + picture_t * picture = mpeg2dec->picture; + decoder_t * decoder = &(mpeg2dec->decoder); + uint32_t flags; + /* pre subtract 1 for use later in compute_motion_vector */ - picture->f_motion.f_code[0] = (buffer[0] & 15) - 1; - picture->f_motion.f_code[1] = (buffer[1] >> 4) - 1; - picture->b_motion.f_code[0] = (buffer[1] & 15) - 1; - picture->b_motion.f_code[1] = (buffer[2] >> 4) - 1; - - picture->intra_dc_precision = (buffer[2] >> 2) & 3; - picture->picture_structure = buffer[2] & 3; - picture->frame_pred_frame_dct = (buffer[3] >> 6) & 1; - picture->concealment_motion_vectors = (buffer[3] >> 5) & 1; - picture->q_scale_type = (buffer[3] >> 4) & 1; - picture->intra_vlc_format = (buffer[3] >> 3) & 1; - - if (buffer[3] & 4) /* alternate_scan */ - picture->scan = scan_alt; - else - picture->scan = scan_norm; - - /* these are not used by the decoder */ - picture->top_field_first = buffer[3] >> 7; - picture->repeat_first_field = (buffer[3] >> 1) & 1; - picture->progressive_frame = buffer[4] >> 7; - - // repeat_first implementation by A'rpi/ESP-team, based on libmpeg3: - picture->display_time=100; - if(picture->repeat_first_field){ - if(picture->progressive_sequence){ - if(picture->top_field_first) - picture->display_time+=200; - else - picture->display_time+=100; - } else - if(picture->progressive_frame){ - picture->display_time+=50; - } + decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1; + decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1; + decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + flags = picture->flags; + decoder->intra_dc_precision = (buffer[2] >> 2) & 3; + decoder->picture_structure = buffer[2] & 3; + switch (decoder->picture_structure) { + case TOP_FIELD: + flags |= PIC_FLAG_TOP_FIELD_FIRST; + case BOTTOM_FIELD: + picture->nb_fields = 1; + break; + case FRAME_PICTURE: + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { + picture->nb_fields = (buffer[3] & 2) ? 3 : 2; + flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + } else + picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; + break; + default: + return 1; + } + decoder->top_field_first = buffer[3] >> 7; + decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; + decoder->q_scale_type = (buffer[3] >> 4) & 1; + decoder->intra_vlc_format = (buffer[3] >> 3) & 1; + decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; + flags |= (buffer[4] & 0x80) ? PIC_FLAG_PROGRESSIVE_FRAME : 0; + if (buffer[4] & 0x40) + flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) & + PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY; + picture->flags = flags; + + mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT; + + return 0; +} + +static int picture_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + picture_t * picture = mpeg2dec->picture; + int i, nb_pos; + + nb_pos = picture->nb_fields; + if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) + nb_pos >>= 1; + + for (i = 0; i < nb_pos; i++) { + int x, y; + + x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) | + (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i); + y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) | + (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i); + if (! (x & y & 1)) + return 1; + picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1; + picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1; + } + for (; i < 3; i++) { + picture->display_offset[i].x = mpeg2dec->display_offset_x; + picture->display_offset[i].y = mpeg2dec->display_offset_y; } - //temopral hack. We calc time on every field, so if we have 2 fields - // interlaced we'll end with double time for 1 frame - if( picture->picture_structure!=3 ) picture->display_time/=2; return 0; } -int header_process_extension (picture_t * picture, uint8_t * buffer) +static int copyright_ext (mpeg2dec_t * mpeg2dec) { - switch (buffer[0] & 0xf0) { - case 0x10: /* sequence extension */ - return header_process_sequence_extension (picture, buffer); + return 0; +} - case 0x30: /* quant matrix extension */ - return header_process_quant_matrix_extension (picture, buffer); +static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + decoder_t * decoder = &(mpeg2dec->decoder); + int i; - case 0x80: /* picture coding extension */ - return header_process_picture_coding_extension (picture, buffer); + if (buffer[0] & 8) { + for (i = 0; i < 64; i++) + decoder->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 5) | (buffer[i+1] >> 3); + buffer += 64; } + if (buffer[0] & 4) + for (i = 0; i < 64; i++) + decoder->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 6) | (buffer[i+1] >> 2); + return 0; } -int header_process_picture_header (picture_t *picture, uint8_t * buffer) +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) { - picture->picture_coding_type = (buffer [1] >> 3) & 7; + static int (* parser[]) (mpeg2dec_t *) = { + 0, sequence_ext, sequence_display_ext, quant_matrix_ext, + copyright_ext, 0, 0, picture_display_ext, picture_coding_ext + }; + int ext, ext_bit; + + ext = mpeg2dec->chunk_start[0] >> 4; + ext_bit = 1 << ext; + + if (!(mpeg2dec->ext_state & ext_bit)) + return 0; /* ignore illegal extensions */ + mpeg2dec->ext_state &= ~ext_bit; + return parser[ext] (mpeg2dec); +} - /* forward_f_code and backward_f_code - used in mpeg1 only */ - picture->f_motion.f_code[1] = (buffer[3] >> 2) & 1; - picture->f_motion.f_code[0] = - (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; - picture->b_motion.f_code[1] = (buffer[4] >> 6) & 1; - picture->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) +{ + if (!mpeg2dec->info.user_data_len) + mpeg2dec->info.user_data = mpeg2dec->chunk_start; + else + mpeg2dec->info.user_data_len += 3; + mpeg2dec->info.user_data_len += (mpeg2dec->chunk_ptr - 4 - + mpeg2dec->chunk_start); + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; + + return 0; +} - /* move in header_process_picture_header */ - picture->second_field = - (picture->picture_structure != FRAME_PICTURE) && - !(picture->second_field); +int mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || + mpeg2dec->state == STATE_PICTURE_2ND) ? + STATE_SLICE : STATE_SLICE_1ST); + + if (!(mpeg2dec->nb_decode_slices)) + mpeg2dec->picture->flags |= PIC_FLAG_SKIP; + else if (mpeg2dec->convert_start) { + int flags; + + switch (mpeg2dec->decoder.picture_structure) { + case TOP_FIELD: flags = CONVERT_TOP_FIELD; break; + case BOTTOM_FIELD: flags = CONVERT_BOTTOM_FIELD; break; + default: + flags = + ((mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) ? + CONVERT_FRAME : CONVERT_BOTH_FIELDS); + } + mpeg2dec->convert_start (mpeg2dec->convert_id, + mpeg2dec->fbuf[0]->buf, flags); + + mpeg2dec->decoder.convert = mpeg2dec->convert_copy; + mpeg2dec->decoder.fbuf_id = mpeg2dec->convert_id; + + if (mpeg2dec->decoder.coding_type == B_TYPE) + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + else { + mpeg2_init_fbuf (&(mpeg2dec->decoder), + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + if (mpeg2dec->state == STATE_SLICE) + mpeg2dec->yuv_index ^= 1; + } + } else { + int b_type; + mpeg2dec->decoder.convert = NULL; + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, + mpeg2dec->fbuf[b_type + 1]->buf, + mpeg2dec->fbuf[b_type]->buf); + } + mpeg2dec->action = NULL; return 0; } + +int mpeg2_header_end (mpeg2dec_t * mpeg2dec) +{ + picture_t * picture; + int b_type; + + picture = mpeg2dec->pictures; + if (mpeg2dec->picture < picture + 2) + picture = mpeg2dec->pictures + 2; + + mpeg2dec->state = STATE_INVALID; + reset_info (&(mpeg2dec->info)); + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + mpeg2dec->info.display_picture = picture; + if (picture->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = picture + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; + if (!mpeg2dec->convert_start) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; + } else if (!mpeg2dec->convert_start) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; + mpeg2dec->action = mpeg2_seek_sequence; + return STATE_END; +} diff --git a/libmpeg2/idct.c b/libmpeg2/idct.c index 1e869c37de..bcae078156 100644 --- a/libmpeg2/idct.c +++ b/libmpeg2/idct.c @@ -1,12 +1,10 @@ /* * idct.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> - * - * Portions of this code are from the MPEG software simulation group - * idct implementation. This code will be replaced with a new - * implementation soon. + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,27 +21,14 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/**********************************************************/ -/* inverse two dimensional DCT, Chen-Wang algorithm */ -/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ -/* 32-bit integer arithmetic (8 bit coefficients) */ -/* 11 mults, 29 adds per DCT */ -/* sE, 18.8.91 */ -/**********************************************************/ -/* coefficients extended to 12 bit for IEEE1180-1990 */ -/* compliance sE, 2.1.94 */ -/**********************************************************/ - -/* this code assumes >> to be a two's-complement arithmetic */ -/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ - #include "config.h" -#include <stdio.h> +#include <stdlib.h> #include <inttypes.h> +#include "mpeg2.h" #include "mpeg2_internal.h" -#include "mm_accel.h" +#include "attributes.h" #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ @@ -53,199 +38,131 @@ #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ /* idct main entry point */ -void (*idct_block_copy) (int16_t * block, uint8_t * dest, int stride); -void (*idct_block_add) (int16_t * block, uint8_t * dest, int stride); - -static void idct_block_copy_c (int16_t *block, uint8_t * dest, int stride); -static void idct_block_add_c (int16_t *block, uint8_t * dest, int stride); +void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); static uint8_t clip_lut[1024]; -#define CLIP(i) ((clip_lut+384)[ (i)]) +#define CLIP(i) ((clip_lut+384)[(i)]) -void idct_init (void) -{ -#ifdef ARCH_X86 - if (config.flags & MM_ACCEL_X86_MMXEXT) { - printf ("libmpeg2: Using MMXEXT for IDCT transform\n"); - idct_block_copy = idct_block_copy_mmxext; - idct_block_add = idct_block_add_mmxext; - idct_mmx_init (); - } else if (config.flags & MM_ACCEL_X86_MMX) { - printf ("libmpeg2: Using MMX for IDCT transform\n"); - idct_block_copy = idct_block_copy_mmx; - idct_block_add = idct_block_add_mmx; - idct_mmx_init (); - } else -#endif -#ifdef LIBMPEG2_MLIB - if (config.flags & MM_ACCEL_MLIB) { - printf ("libmpeg2: Using mlib for IDCT transform\n"); - idct_block_copy = idct_block_copy_mlib; - idct_block_add = idct_block_add_mlib; - } else +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0*d0 + W1*d1; \ + t1 = W0*d1 - W1*d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) #endif - { - int i; - - printf ("libmpeg2: No accelerated IDCT transform found\n"); - idct_block_copy = idct_block_copy_c; - idct_block_add = idct_block_add_c; - for (i = -384; i < 640; i++) - clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); - } -} -/* row (horizontal) IDCT - * - * 7 pi 1 - * dst[k] = sum c[l] * src[l] * cos ( -- * ( k + - ) * l ) - * l=0 8 2 - * - * where: c[0] = 128 - * c[1..7] = 128*sqrt (2) - */ - -static inline void idct_row (int16_t * block) +static void inline idct_row (int16_t * const block) { - int x0, x1, x2, x3, x4, x5, x6, x7, x8; - - x1 = block[4] << 11; - x2 = block[6]; - x3 = block[2]; - x4 = block[1]; - x5 = block[7]; - x6 = block[5]; - x7 = block[3]; + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; /* shortcut */ - if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { - block[0] = block[1] = block[2] = block[3] = block[4] = - block[5] = block[6] = block[7] = block[0]<<3; + if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | + ((int32_t *)block)[3]))) { + uint32_t tmp = (uint16_t) (block[0] << 3); + tmp |= tmp << 16; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; return; } - x0 = (block[0] << 11) + 128; /* for proper rounding in the fourth stage */ - - /* first stage */ - x8 = W7 * (x4 + x5); - x4 = x8 + (W1 - W7) * x4; - x5 = x8 - (W1 + W7) * x5; - x8 = W3 * (x6 + x7); - x6 = x8 - (W3 - W5) * x6; - x7 = x8 - (W3 + W5) * x7; - - /* second stage */ - x8 = x0 + x1; - x0 -= x1; - x1 = W6 * (x3 + x2); - x2 = x1 - (W2 + W6) * x2; - x3 = x1 + (W2 - W6) * x3; - x1 = x4 + x6; - x4 -= x6; - x6 = x5 + x7; - x5 -= x7; - - /* third stage */ - x7 = x8 + x3; - x8 -= x3; - x3 = x0 + x2; - x0 -= x2; - x2 = (181 * (x4 + x5) + 128) >> 8; - x4 = (181 * (x4 - x5) + 128) >> 8; - - /* fourth stage */ - block[0] = (x7 + x1) >> 8; - block[1] = (x3 + x2) >> 8; - block[2] = (x0 + x4) >> 8; - block[3] = (x8 + x6) >> 8; - block[4] = (x8 - x6) >> 8; - block[5] = (x0 - x4) >> 8; - block[6] = (x3 - x2) >> 8; - block[7] = (x7 - x1) >> 8; + d0 = (block[0] << 11) + 128; + d1 = block[1]; + d2 = block[2] << 11; + d3 = block[3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[4]; + d1 = block[5]; + d2 = block[6]; + d3 = block[7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) * 181) >> 8; + b2 = ((t0 - t1) * 181) >> 8; + + block[0] = (a0 + b0) >> 8; + block[1] = (a1 + b1) >> 8; + block[2] = (a2 + b2) >> 8; + block[3] = (a3 + b3) >> 8; + block[4] = (a3 - b3) >> 8; + block[5] = (a2 - b2) >> 8; + block[6] = (a1 - b1) >> 8; + block[7] = (a0 - b0) >> 8; } -/* column (vertical) IDCT - * - * 7 pi 1 - * dst[8*k] = sum c[l] * src[8*l] * cos ( -- * ( k + - ) * l ) - * l=0 8 2 - * - * where: c[0] = 1/1024 - * c[1..7] = (1/1024)*sqrt (2) - */ - -static inline void idct_col (int16_t *block) +static void inline idct_col (int16_t * const block) { - int x0, x1, x2, x3, x4, x5, x6, x7, x8; - - /* shortcut */ - x1 = block [8*4] << 8; - x2 = block [8*6]; - x3 = block [8*2]; - x4 = block [8*1]; - x5 = block [8*7]; - x6 = block [8*5]; - x7 = block [8*3]; - -#if 0 - if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { - block[8*0] = block[8*1] = block[8*2] = block[8*3] = block[8*4] = - block[8*5] = block[8*6] = block[8*7] = (block[8*0] + 32) >> 6; - return; - } -#endif - - x0 = (block[8*0] << 8) + 8192; - - /* first stage */ - x8 = W7 * (x4 + x5) + 4; - x4 = (x8 + (W1 - W7) * x4) >> 3; - x5 = (x8 - (W1 + W7) * x5) >> 3; - x8 = W3 * (x6 + x7) + 4; - x6 = (x8 - (W3 - W5) * x6) >> 3; - x7 = (x8 - (W3 + W5) * x7) >> 3; - - /* second stage */ - x8 = x0 + x1; - x0 -= x1; - x1 = W6 * (x3 + x2) + 4; - x2 = (x1 - (W2 + W6) * x2) >> 3; - x3 = (x1 + (W2 - W6) * x3) >> 3; - x1 = x4 + x6; - x4 -= x6; - x6 = x5 + x7; - x5 -= x7; - - /* third stage */ - x7 = x8 + x3; - x8 -= x3; - x3 = x0 + x2; - x0 -= x2; - x2 = (181 * (x4 + x5) + 128) >> 8; - x4 = (181 * (x4 - x5) + 128) >> 8; - - /* fourth stage */ - block[8*0] = (x7 + x1) >> 14; - block[8*1] = (x3 + x2) >> 14; - block[8*2] = (x0 + x4) >> 14; - block[8*3] = (x8 + x6) >> 14; - block[8*4] = (x8 - x6) >> 14; - block[8*5] = (x0 - x4) >> 14; - block[8*6] = (x3 - x2) >> 14; - block[8*7] = (x7 - x1) >> 14; + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 = (t0 - t2) >> 8; + t1 = (t1 - t3) >> 8; + b1 = (t0 + t1) * 181; + b2 = (t0 - t1) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; } -void idct_block_copy_c (int16_t * block, uint8_t * dest, int stride) +static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, + const int stride) { int i; for (i = 0; i < 8; i++) idct_row (block + 8 * i); - for (i = 0; i < 8; i++) idct_col (block + i); - - i = 8; do { dest[0] = CLIP (block[0]); dest[1] = CLIP (block[1]); @@ -256,33 +173,112 @@ void idct_block_copy_c (int16_t * block, uint8_t * dest, int stride) dest[6] = CLIP (block[6]); dest[7] = CLIP (block[7]); + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + dest += stride; block += 8; } while (--i); } -void idct_block_add_c (int16_t * block, uint8_t * dest, int stride) +static void mpeg2_idct_add_c (const int last, int16_t * block, + uint8_t * dest, const int stride) { int i; - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - - for (i = 0; i < 8; i++) - idct_col (block + i); + if (last != 129 || (block[0] & 7) == 4) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 4) >> 3; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} - i = 8; - do { - dest[0] = CLIP (block[0] + dest[0]); - dest[1] = CLIP (block[1] + dest[1]); - dest[2] = CLIP (block[2] + dest[2]); - dest[3] = CLIP (block[3] + dest[3]); - dest[4] = CLIP (block[4] + dest[4]); - dest[5] = CLIP (block[5] + dest[5]); - dest[6] = CLIP (block[6] + dest[6]); - dest[7] = CLIP (block[7] + dest[7]); +void mpeg2_idct_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) { + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct_mmx_init (); + } else if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct_mmx_init (); + } else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + } else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA_MVI) { + mpeg2_idct_copy = mpeg2_idct_copy_mvi; + mpeg2_idct_add = mpeg2_idct_add_mvi; + mpeg2_idct_alpha_init (0); + } else if (accel & MPEG2_ACCEL_ALPHA) { + mpeg2_idct_copy = mpeg2_idct_copy_alpha; + mpeg2_idct_add = mpeg2_idct_add_alpha; + mpeg2_idct_alpha_init (1); + } else +#endif +#ifdef LIBMPEG2_MLIB + if (accel & MPEG2_ACCEL_MLIB) { + mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; + mpeg2_idct_add = (getenv ("MLIB_NON_IEEE") ? + mpeg2_idct_add_mlib_non_ieee : mpeg2_idct_add_mlib); + } else +#endif + { + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; - dest += stride; - block += 8; - } while (--i); + mpeg2_idct_copy = mpeg2_idct_copy_c; + mpeg2_idct_add = mpeg2_idct_add_c; + for (i = -384; i < 640; i++) + clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } + } } diff --git a/libmpeg2/idct_mlib.c b/libmpeg2/idct_mlib.c index 876ab574a4..eae2a2f1be 100644 --- a/libmpeg2/idct_mlib.c +++ b/libmpeg2/idct_mlib.c @@ -1,8 +1,9 @@ /* * idct_mlib.c - * Copyright (C) 1999-2001 Håkan Hjort <d95hjort@dtek.chalmers.se> + * Copyright (C) 1999-2002 Håkan Hjort <d95hjort@dtek.chalmers.se> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,25 +24,37 @@ #ifdef LIBMPEG2_MLIB -#include <inttypes.h> #include <mlib_types.h> #include <mlib_status.h> #include <mlib_sys.h> #include <mlib_video.h> +#include <string.h> +#include <inttypes.h> +#include "mpeg2.h" #include "mpeg2_internal.h" -void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride) +void mpeg2_idct_add_mlib (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, + uint8_t * const dest, const int stride) { mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); } -void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride) +void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, + uint8_t * const dest, const int stride) { - /* Should we use mlib_VideoIDCT_IEEE_S16_S16 here ?? */ - /* it's ~30% slower. */ mlib_VideoIDCT8x8_S16_S16 (block, block); mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); } #endif diff --git a/libmpeg2/idct_mmx.c b/libmpeg2/idct_mmx.c index 70b3b9b95e..4915b93750 100644 --- a/libmpeg2/idct_mmx.c +++ b/libmpeg2/idct_mmx.c @@ -1,8 +1,10 @@ /* * idct_mmx.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +27,7 @@ #include <inttypes.h> +#include "mpeg2.h" #include "mpeg2_internal.h" #include "attributes.h" #include "mmx.h" @@ -87,104 +90,107 @@ static inline void idct_row (int16_t * row, int offset, c5, -c1, c3, -c1, \ c7, c3, c7, -c5 } -static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) +static inline void mmxext_row_head (int16_t * const row, const int offset, + const int16_t * const table) { - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 - pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ - pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ } -static inline void mmxext_row (int16_t * table, int32_t * rounder) +static inline void mmxext_row (const int16_t * const table, + const int32_t * const rounder) { - movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 - pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 + movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */ + pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */ - pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 - pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */ + pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */ - movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 - pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 + movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */ - paddd_m2r (*rounder, mm3); // mm3 += rounder - pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */ - pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 - paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ - pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 - movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ - pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 - paddd_r2r (mm7, mm1); // mm1 = b1 b0 + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ - paddd_m2r (*rounder, mm0); // mm0 += rounder - psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ - psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 - paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ - paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder - psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ - paddd_r2r (mm6, mm5); // mm5 = b3 b2 - movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */ - paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder - psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */ } -static inline void mmxext_row_tail (int16_t * row, int store) +static inline void mmxext_row_tail (int16_t * const row, const int store) { - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ /* slot */ - movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ } -static inline void mmxext_row_mid (int16_t * row, int store, - int offset, int16_t * table) +static inline void mmxext_row_mid (int16_t * const row, const int store, + const int offset, + const int16_t * const table) { - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ - movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 - movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ - pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ - movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 - pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ } @@ -199,125 +205,127 @@ static inline void mmxext_row_mid (int16_t * row, int store, c5, -c1, c7, -c5, \ c7, c3, c3, -c1 } -static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) +static inline void mmx_row_head (int16_t * const row, const int offset, + const int16_t * const table) { - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ - movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 - pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ - movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 - punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ } -static inline void mmx_row (int16_t * table, int32_t * rounder) +static inline void mmx_row (const int16_t * const table, + const int32_t * const rounder) { - pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 - punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 + pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */ + punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */ - pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 - punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */ + punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */ - movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 - pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 + movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */ - paddd_m2r (*rounder, mm3); // mm3 += rounder - pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */ - pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 - paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ - pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 - movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ - pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 - paddd_r2r (mm7, mm1); // mm1 = b1 b0 + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ - paddd_m2r (*rounder, mm0); // mm0 += rounder - psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ - psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 - paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ - paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder - psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ - paddd_r2r (mm6, mm5); // mm5 = b3 b2 - movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */ - paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder - psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */ } -static inline void mmx_row_tail (int16_t * row, int store) +static inline void mmx_row_tail (int16_t * const row, const int store) { - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ - packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */ - pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 + pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */ - psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 + psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */ - por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 + por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */ /* slot */ - movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ } -static inline void mmx_row_mid (int16_t * row, int store, - int offset, int16_t * table) +static inline void mmx_row_mid (int16_t * const row, const int store, + const int offset, const int16_t * const table) { - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ - packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */ - punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 - psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */ - movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 - pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */ - movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 - por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */ - movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 - punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ - movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 - pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ } #if 0 -// C column IDCT - its just here to document the MMXEXT and MMX versions +/* C column IDCT - its just here to document the MMXEXT and MMX versions */ static inline void idct_col (int16_t * col, int offset) { /* multiplication - as implemented on mmx */ @@ -388,178 +396,178 @@ static inline void idct_col (int16_t * col, int offset) #endif -// MMX column IDCT -static inline void idct_col (int16_t * col, int offset) +/* MMX column IDCT */ +static inline void idct_col (int16_t * const col, const int offset) { #define T1 13036 #define T2 27146 #define T3 43790 #define C4 23170 - static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; - static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; - static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; - static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; /* column code adapted from peter gubanov */ /* http://www.elecard.com/peter/idct.shtml */ - movq_m2r (*_T1, mm0); // mm0 = T1 + movq_m2r (*_T1, mm0); /* mm0 = T1 */ - movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 - movq_r2r (mm0, mm2); // mm2 = T1 + movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ + movq_r2r (mm0, mm2); /* mm2 = T1 */ - movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 - pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 + movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ + pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ - movq_m2r (*_T3, mm5); // mm5 = T3 - pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 + movq_m2r (*_T3, mm5); /* mm5 = T3 */ + pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ - movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 - movq_r2r (mm5, mm7); // mm7 = T3-1 + movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ + movq_r2r (mm5, mm7); /* mm7 = T3-1 */ - movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 - psubsw_r2r (mm4, mm0); // mm0 = v17 + movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ + psubsw_r2r (mm4, mm0); /* mm0 = v17 */ - movq_m2r (*_T2, mm4); // mm4 = T2 - pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 + movq_m2r (*_T2, mm4); /* mm4 = T2 */ + pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ - paddsw_r2r (mm2, mm1); // mm1 = u17 - pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 + paddsw_r2r (mm2, mm1); /* mm1 = u17 */ + pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */ /* slot */ - movq_r2r (mm4, mm2); // mm2 = T2 - paddsw_r2r (mm3, mm5); // mm5 = T3*x3 + movq_r2r (mm4, mm2); /* mm2 = T2 */ + paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */ - pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 - paddsw_r2r (mm6, mm7); // mm7 = T3*x5 + pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */ + paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */ - psubsw_r2r (mm6, mm5); // mm5 = v35 - paddsw_r2r (mm3, mm7); // mm7 = u35 + psubsw_r2r (mm6, mm5); /* mm5 = v35 */ + paddsw_r2r (mm3, mm7); /* mm7 = u35 */ - movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 - movq_r2r (mm0, mm6); // mm6 = v17 + movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */ + movq_r2r (mm0, mm6); /* mm6 = v17 */ - pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 - psubsw_r2r (mm5, mm0); // mm0 = b3 + pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */ + psubsw_r2r (mm5, mm0); /* mm0 = b3 */ - psubsw_r2r (mm3, mm4); // mm4 = v26 - paddsw_r2r (mm6, mm5); // mm5 = v12 + psubsw_r2r (mm3, mm4); /* mm4 = v26 */ + paddsw_r2r (mm6, mm5); /* mm5 = v12 */ - movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 - movq_r2r (mm1, mm6); // mm6 = u17 + movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */ + movq_r2r (mm1, mm6); /* mm6 = u17 */ - paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 - paddsw_r2r (mm7, mm6); // mm6 = b0 + paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */ + paddsw_r2r (mm7, mm6); /* mm6 = b0 */ - psubsw_r2r (mm7, mm1); // mm1 = u12 - movq_r2r (mm1, mm7); // mm7 = u12 + psubsw_r2r (mm7, mm1); /* mm1 = u12 */ + movq_r2r (mm1, mm7); /* mm7 = u12 */ - movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 - paddsw_r2r (mm5, mm1); // mm1 = u12+v12 + movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ + paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ - movq_m2r (*_C4, mm0); // mm0 = C4/2 - psubsw_r2r (mm5, mm7); // mm7 = u12-v12 + movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ + psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ - movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 - pmulhw_r2r (mm0, mm1); // mm1 = b1/2 + movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ + pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */ - movq_r2r (mm4, mm6); // mm6 = v26 - pmulhw_r2r (mm0, mm7); // mm7 = b2/2 + movq_r2r (mm4, mm6); /* mm6 = v26 */ + pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */ - movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 - movq_r2r (mm3, mm0); // mm0 = x0 + movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */ + movq_r2r (mm3, mm0); /* mm0 = x0 */ - psubsw_r2r (mm5, mm3); // mm3 = v04 - paddsw_r2r (mm5, mm0); // mm0 = u04 + psubsw_r2r (mm5, mm3); /* mm3 = v04 */ + paddsw_r2r (mm5, mm0); /* mm0 = u04 */ - paddsw_r2r (mm3, mm4); // mm4 = a1 - movq_r2r (mm0, mm5); // mm5 = u04 + paddsw_r2r (mm3, mm4); /* mm4 = a1 */ + movq_r2r (mm0, mm5); /* mm5 = u04 */ - psubsw_r2r (mm6, mm3); // mm3 = a2 - paddsw_r2r (mm2, mm5); // mm5 = a0 + psubsw_r2r (mm6, mm3); /* mm3 = a2 */ + paddsw_r2r (mm2, mm5); /* mm5 = a0 */ - paddsw_r2r (mm1, mm1); // mm1 = b1 - psubsw_r2r (mm2, mm0); // mm0 = a3 + paddsw_r2r (mm1, mm1); /* mm1 = b1 */ + psubsw_r2r (mm2, mm0); /* mm0 = a3 */ - paddsw_r2r (mm7, mm7); // mm7 = b2 - movq_r2r (mm3, mm2); // mm2 = a2 + paddsw_r2r (mm7, mm7); /* mm7 = b2 */ + movq_r2r (mm3, mm2); /* mm2 = a2 */ - movq_r2r (mm4, mm6); // mm6 = a1 - paddsw_r2r (mm7, mm3); // mm3 = a2+b2 + movq_r2r (mm4, mm6); /* mm6 = a1 */ + paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */ - psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 - paddsw_r2r (mm1, mm4); // mm4 = a1+b1 + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */ + paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */ - psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 - psubsw_r2r (mm1, mm6); // mm6 = a1-b1 + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */ + psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */ - movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 - psubsw_r2r (mm7, mm2); // mm2 = a2-b2 + movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */ + psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */ - psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 - movq_r2r (mm5, mm7); // mm7 = a0 + psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */ + movq_r2r (mm5, mm7); /* mm7 = a0 */ - movq_r2m (mm4, *(col+offset+1*8)); // save y1 - psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 + movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */ + psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */ - movq_r2m (mm3, *(col+offset+2*8)); // save y2 - paddsw_r2r (mm1, mm5); // mm5 = a0+b0 + movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */ + paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */ - movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 - psubsw_r2r (mm1, mm7); // mm7 = a0-b0 + movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */ + psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */ - psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 - movq_r2r (mm0, mm3); // mm3 = a3 + psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */ + movq_r2r (mm0, mm3); /* mm3 = a3 */ - movq_r2m (mm2, *(col+offset+5*8)); // save y5 - psubsw_r2r (mm4, mm3); // mm3 = a3-b3 + movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */ + psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */ - psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 - paddsw_r2r (mm0, mm4); // mm4 = a3+b3 + psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */ + paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */ - movq_r2m (mm5, *(col+offset+0*8)); // save y0 - psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 + movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */ + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */ - movq_r2m (mm6, *(col+offset+6*8)); // save y6 - psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 + movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */ + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */ - movq_r2m (mm7, *(col+offset+7*8)); // save y7 + movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */ - movq_r2m (mm3, *(col+offset+4*8)); // save y4 + movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */ - movq_r2m (mm4, *(col+offset+3*8)); // save y3 + movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */ } -static int32_t rounder0[] ATTR_ALIGN(8) = +static const int32_t rounder0[] ATTR_ALIGN(8) = rounder ((1 << (COL_SHIFT - 1)) - 0.5); -static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); -static int32_t rounder1[] ATTR_ALIGN(8) = +static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static const int32_t rounder1[] ATTR_ALIGN(8) = rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ -static int32_t rounder7[] ATTR_ALIGN(8) = +static const int32_t rounder7[] ATTR_ALIGN(8) = rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ -static int32_t rounder2[] ATTR_ALIGN(8) = +static const int32_t rounder2[] ATTR_ALIGN(8) = rounder (0.60355339059); /* C2 * (C6+C2)/2 */ -static int32_t rounder6[] ATTR_ALIGN(8) = +static const int32_t rounder6[] ATTR_ALIGN(8) = rounder (-0.25); /* C2 * (C6-C2)/2 */ -static int32_t rounder3[] ATTR_ALIGN(8) = +static const int32_t rounder3[] ATTR_ALIGN(8) = rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ -static int32_t rounder5[] ATTR_ALIGN(8) = +static const int32_t rounder5[] ATTR_ALIGN(8) = rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ #define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ -inline void idct (int16_t * block) \ +static inline void idct (int16_t * const block) \ { \ - static int16_t table04[] ATTR_ALIGN(16) = \ + static const int16_t table04[] ATTR_ALIGN(16) = \ table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ - static int16_t table17[] ATTR_ALIGN(16) = \ + static const int16_t table17[] ATTR_ALIGN(16) = \ table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ - static int16_t table26[] ATTR_ALIGN(16) = \ + static const int16_t table26[] ATTR_ALIGN(16) = \ table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ - static int16_t table35[] ATTR_ALIGN(16) = \ + static const int16_t table35[] ATTR_ALIGN(16) = \ table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ \ idct_row_head (block, 0*8, table04); \ @@ -594,7 +602,8 @@ do { \ packuswb_r2r (r1, r0); \ } while (0) -static void block_copy (int16_t * block, uint8_t * dest, int stride) +static inline void block_copy (int16_t * const block, uint8_t * dest, + const int stride) { movq_m2r (*(block+0*8), mm0); movq_m2r (*(block+0*8+4), mm1); @@ -626,7 +635,8 @@ do { \ paddsw_m2r (*(block+offset+4), r2); \ } while (0) -static void block_add (int16_t * block, uint8_t * dest, int stride) +static inline void block_add (int16_t * const block, uint8_t * dest, + const int stride) { movq_m2r (*dest, mm1); pxor_r2r (mm0, mm0); @@ -654,51 +664,150 @@ static void block_add (int16_t * block, uint8_t * dest, int stride) } +static inline void block_zero (int16_t * const block) +{ + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} + + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +#define dup4(reg) \ +do { \ + if (cpu != CPU_MMXEXT) { \ + punpcklwd_r2r (reg, reg); \ + punpckldq_r2r (reg, reg); \ + } else \ + pshufw_r2r (reg, reg, 0x00); \ +} while (0) + +static inline void block_add_DC (int16_t * const block, uint8_t * dest, + const int stride, const int cpu) +{ + movd_v2r ((block[0] + 4) >> 3, mm0); + pxor_r2r (mm1, mm1); + movq_m2r (*dest, mm2); + dup4 (mm0); + psubsw_r2r (mm0, mm1); + packuswb_r2r (mm0, mm0); + paddusb_r2r (mm0, mm2); + packuswb_r2r (mm1, mm1); + movq_m2r (*(dest + stride), mm3); + psubusb_r2r (mm1, mm2); + block[0] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + block[63] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *(dest + stride)); + psubusb_r2r (mm1, mm3); + movq_r2m (mm3, *(dest + 2*stride)); +} + + declare_idct (mmxext_idct, mmxext_table, mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) -void idct_block_copy_mmxext (int16_t * block, uint8_t * dest, int stride) +void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, + const int stride) { mmxext_idct (block); block_copy (block, dest, stride); + block_zero (block); } -void idct_block_add_mmxext (int16_t * block, uint8_t * dest, int stride) +void mpeg2_idct_add_mmxext (const int last, int16_t * const block, + uint8_t * const dest, const int stride) { - mmxext_idct (block); - block_add (block, dest, stride); + if (last != 129 || (block[0] & 7) == 4) { + mmxext_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMXEXT); } declare_idct (mmx_idct, mmx_table, mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) -void idct_block_copy_mmx (int16_t * block, uint8_t * dest, int stride) +void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, + const int stride) { mmx_idct (block); block_copy (block, dest, stride); + block_zero (block); } -void idct_block_add_mmx (int16_t * block, uint8_t * dest, int stride) +void mpeg2_idct_add_mmx (const int last, int16_t * const block, + uint8_t * const dest, const int stride) { - mmx_idct (block); - block_add (block, dest, stride); + if (last != 129 || (block[0] & 7) == 4) { + mmx_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMX); } -void idct_mmx_init (void) +void mpeg2_idct_mmx_init (void) { - extern uint8_t scan_norm[64]; - extern uint8_t scan_alt[64]; + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; int i, j; /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ for (i = 0; i < 64; i++) { - j = scan_norm[i]; - scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); - j = scan_alt[i]; - scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); } } diff --git a/libmpeg2/mm_accel.h b/libmpeg2/mm_accel.h deleted file mode 100644 index 133d6acb03..0000000000 --- a/libmpeg2/mm_accel.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * oms_accel.h - * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -// generic accelerations -#define MM_ACCEL_MLIB 0x00000001 - -// x86 accelerations -#define MM_ACCEL_X86_MMX 0x80000000 -#define MM_ACCEL_X86_3DNOW 0x40000000 -#define MM_ACCEL_X86_MMXEXT 0x20000000 - -//uint32_t mm_accel (void); diff --git a/libmpeg2/mmx.h b/libmpeg2/mmx.h index ac23866690..c05bfe1ccb 100644 --- a/libmpeg2/mmx.h +++ b/libmpeg2/mmx.h @@ -1,8 +1,10 @@ /* * mmx.h - * Copyright (C) 1997-2001 H. Dietz and R. Fisher + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -61,7 +63,12 @@ typedef union { #define movd_m2r(var,reg) mmx_m2r (movd, var, reg) #define movd_r2m(reg,var) mmx_r2m (movd, reg, var) -#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd) +#define movd_v2r(var,reg) __asm__ __volatile__ ("movd %0, %%" #reg \ + : /* nothing */ \ + : "rm" (var)) +#define movd_r2v(reg,var) __asm__ __volatile__ ("movd %%" #reg ", %0" \ + : "=rm" (var) \ + : /* nothing */ ) #define movq_m2r(var,reg) mmx_m2r (movq, var, reg) #define movq_r2m(reg,var) mmx_r2m (movq, reg, var) @@ -196,18 +203,19 @@ typedef union { #define mmx_m2ri(op,mem,reg,imm) \ - __asm__ __volatile__ (#op " %1, %0, %%" #reg \ - : /* nothing */ \ - : "X" (mem), "X" (imm)) + __asm__ __volatile__ (#op " %1, %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem), "i" (imm)) + #define mmx_r2ri(op,regs,regd,imm) \ - __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ - : /* nothing */ \ - : "X" (imm) ) + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "i" (imm) ) #define mmx_fetch(mem,hint) \ __asm__ __volatile__ ("prefetch" #hint " %0" \ : /* nothing */ \ - : "X" (mem)) + : "m" (mem)) #define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) diff --git a/libmpeg2/motion_comp.c b/libmpeg2/motion_comp.c index 6f4d979317..25c001584d 100644 --- a/libmpeg2/motion_comp.c +++ b/libmpeg2/motion_comp.c @@ -1,8 +1,10 @@ /* * motion_comp.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,100 +23,102 @@ #include "config.h" -#include <stdio.h> #include <inttypes.h> +#include "mpeg2.h" #include "mpeg2_internal.h" -#include "mm_accel.h" -mc_functions_t mc_functions; +mpeg2_mc_t mpeg2_mc; -void motion_comp_init (void) +void mpeg2_mc_init (uint32_t accel) { - #ifdef ARCH_X86 - if (config.flags & MM_ACCEL_X86_MMXEXT) { - printf ("libmpeg2: Using MMXEXT for motion compensation\n"); - mc_functions = mc_functions_mmxext; - } else if (config.flags & MM_ACCEL_X86_3DNOW) { - printf ("libmpeg2: Using 3DNOW for motion compensation\n"); - mc_functions = mc_functions_3dnow; - } else if (config.flags & MM_ACCEL_X86_MMX) { - printf ("libmpeg2: Using MMX for motion compensation\n"); - mc_functions = mc_functions_mmx; - } else + if (accel & MPEG2_ACCEL_X86_MMXEXT) + mpeg2_mc = mpeg2_mc_mmxext; + else if (accel & MPEG2_ACCEL_X86_3DNOW) + mpeg2_mc = mpeg2_mc_3dnow; + else if (accel & MPEG2_ACCEL_X86_MMX) + mpeg2_mc = mpeg2_mc_mmx; + else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) + mpeg2_mc = mpeg2_mc_altivec; + else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA) + mpeg2_mc = mpeg2_mc_alpha; + else #endif #ifdef LIBMPEG2_MLIB - if (config.flags & MM_ACCEL_MLIB) { - printf ("libmpeg2: Using mlib for motion compensation\n"); - mc_functions = mc_functions_mlib; - } else + if (accel & MPEG2_ACCEL_MLIB) + mpeg2_mc = mpeg2_mc_mlib; + else #endif - { - printf ("libmpeg2: No accelerated motion compensation found\n"); - mc_functions = mc_functions_c; - } + mpeg2_mc = mpeg2_mc_c; } #define avg2(a,b) ((a+b+1)>>1) #define avg4(a,b,c,d) ((a+b+c+d+2)>>2) -#define predict_(i) (ref[i]) +#define predict_o(i) (ref[i]) #define predict_x(i) (avg2 (ref[i], ref[i+1])) #define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) -#define predict_xy(i) (avg4 (ref[i], ref[i+1], (ref+stride)[i], (ref+stride)[i+1])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) #define put(predictor,i) dest[i] = predictor (i) #define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) /* mc function template */ -#define MC_FUNC(op,xy) \ -static void MC_##op##_##xy##16_c (uint8_t * dest, uint8_t * ref,\ - int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - op (predict_##xy, 8); \ - op (predict_##xy, 9); \ - op (predict_##xy, 10); \ - op (predict_##xy, 11); \ - op (predict_##xy, 12); \ - op (predict_##xy, 13); \ - op (predict_##xy, 14); \ - op (predict_##xy, 15); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} \ -static void MC_##op##_##xy##8_c (uint8_t * dest, uint8_t * ref, \ - int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ } /* definitions of the actual mc functions */ -MC_FUNC (put,) -MC_FUNC (avg,) +MC_FUNC (put,o) +MC_FUNC (avg,o) MC_FUNC (put,x) MC_FUNC (avg,x) MC_FUNC (put,y) @@ -122,4 +126,4 @@ MC_FUNC (avg,y) MC_FUNC (put,xy) MC_FUNC (avg,xy) -MOTION_COMP_EXTERN (c) +MPEG2_MC_EXTERN (c) diff --git a/libmpeg2/motion_comp_mlib.c b/libmpeg2/motion_comp_mlib.c index 91c0fb5a87..de181c0651 100644 --- a/libmpeg2/motion_comp_mlib.c +++ b/libmpeg2/motion_comp_mlib.c @@ -1,8 +1,9 @@ /* * motion_comp_mlib.c - * Copyright (C) 2000-2001 Håkan Hjort <d95hjort@dtek.chalmers.se> + * Copyright (C) 2000-2002 Håkan Hjort <d95hjort@dtek.chalmers.se> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,158 +24,167 @@ #ifdef LIBMPEG2_MLIB -#include <inttypes.h> #include <mlib_types.h> #include <mlib_status.h> #include <mlib_sys.h> #include <mlib_video.h> +#include <inttypes.h> +#include "mpeg2.h" #include "mpeg2_internal.h" -static void MC_put_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { - if (height == 16) - mlib_VideoCopyRef_U8_U8_16x16 (dest, ref, stride); + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); else - mlib_VideoCopyRef_U8_U8_16x8 (dest, ref, stride); + mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); } -static void MC_put_x16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoInterpX_U8_U8_16x16 (dest, ref, stride, stride); + mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpX_U8_U8_16x8 (dest, ref, stride, stride); + mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); } -static void MC_put_y16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoInterpY_U8_U8_16x16 (dest, ref, stride, stride); + mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpY_U8_U8_16x8 (dest, ref, stride, stride); + mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); } -static void MC_put_xy16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoInterpXY_U8_U8_16x16 (dest, ref, stride, stride); + mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpXY_U8_U8_16x8 (dest, ref, stride, stride); + mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); } -static void MC_put_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoCopyRef_U8_U8_8x8 (dest, ref, stride); + mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); else - mlib_VideoCopyRef_U8_U8_8x4 (dest, ref, stride); + mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); } -static void MC_put_x8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoInterpX_U8_U8_8x8 (dest, ref, stride, stride); + mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpX_U8_U8_8x4 (dest, ref, stride, stride); + mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); } -static void MC_put_y8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoInterpY_U8_U8_8x8 (dest, ref, stride, stride); + mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpY_U8_U8_8x4 (dest, ref, stride, stride); + mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); } -static void MC_put_xy8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { - if (height == 8) - mlib_VideoInterpXY_U8_U8_8x8 (dest, ref, stride, stride); + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpXY_U8_U8_8x4 (dest, ref, stride, stride); + mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); } -static void MC_avg_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoCopyRefAve_U8_U8_16x16 (dest, ref, stride); + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); else - mlib_VideoCopyRefAve_U8_U8_16x8 (dest, ref, stride); + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); } -static void MC_avg_x16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoInterpAveX_U8_U8_16x16 (dest, ref, stride, stride); + mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); else - mlib_VideoInterpAveX_U8_U8_16x8 (dest, ref, stride, stride); + mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); } -static void MC_avg_y16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoInterpAveY_U8_U8_16x16 (dest, ref, stride, stride); + mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); else - mlib_VideoInterpAveY_U8_U8_16x8 (dest, ref, stride, stride); + mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); } -static void MC_avg_xy16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 16) - mlib_VideoInterpAveXY_U8_U8_16x16 (dest, ref, stride, stride); + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); else - mlib_VideoInterpAveXY_U8_U8_16x8 (dest, ref, stride, stride); + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); } -static void MC_avg_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoCopyRefAve_U8_U8_8x8 (dest, ref, stride); + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); else - mlib_VideoCopyRefAve_U8_U8_8x4 (dest, ref, stride); + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); } -static void MC_avg_x8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoInterpAveX_U8_U8_8x8 (dest, ref, stride, stride); + mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpAveX_U8_U8_8x4 (dest, ref, stride, stride); + mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); } -static void MC_avg_y8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoInterpAveY_U8_U8_8x8 (dest, ref, stride, stride); + mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); else - mlib_VideoInterpAveY_U8_U8_8x4 (dest, ref, stride, stride); + mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); } -static void MC_avg_xy8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) { if (height == 8) - mlib_VideoInterpAveXY_U8_U8_8x8 (dest, ref, stride, stride); + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, + stride, stride); else - mlib_VideoInterpAveXY_U8_U8_8x4 (dest, ref, stride, stride); + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, + stride, stride); } -MOTION_COMP_EXTERN (mlib) +MPEG2_MC_EXTERN (mlib) #endif diff --git a/libmpeg2/motion_comp_mmx.c b/libmpeg2/motion_comp_mmx.c index 51b40bac55..33103e1738 100644 --- a/libmpeg2/motion_comp_mmx.c +++ b/libmpeg2/motion_comp_mmx.c @@ -1,8 +1,10 @@ /* * motion_comp_mmx.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,6 +27,7 @@ #include <inttypes.h> +#include "mpeg2.h" #include "mpeg2_internal.h" #include "attributes.h" #include "mmx.h" @@ -35,15 +38,22 @@ /* MMX code - needs a rewrite */ - - - - - +/* + * Motion Compensation frequently needs to average values using the + * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction + * to compute this, but it's been left out of classic MMX. + * + * We need to be careful of overflows when doing this computation. + * Rather than unpacking data to 16-bits, which reduces parallelism, + * we use the following formulas: + * + * (x+y)>>1 == (x&y)+((x^y)>>1) + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + */ /* some rounding constants */ -mmx_t round1 = {0x0001000100010001LL}; -mmx_t round4 = {0x0002000200020002LL}; +static mmx_t mask1 = {0xfefefefefefefefeLL}; +static mmx_t round4 = {0x0002000200020002LL}; /* * This code should probably be compiled with loop unrolling @@ -59,202 +69,176 @@ static inline void mmx_zero_reg () pxor_r2r (mm0, mm0); } -static inline void mmx_average_2_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2) +static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2) { /* *dest = (*src1 + *src2 + 1)/ 2; */ - movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes - - movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ - paddw_r2r (mm3, mm1); // add lows to mm1 - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - - paddw_r2r (mm4, mm2); // add highs to mm2 - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest + pxor_r2r (mm1, mm3); /* xor src1 and src2 */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or src1 and src2 */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ } static inline void mmx_interp_average_2_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2) + const uint8_t * src1, + const uint8_t * src2) { /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ - movq_m2r (*dest, mm1); // load 8 dest bytes - movq_r2r (mm1, mm2); // copy 8 dest bytes - - movq_m2r (*src1, mm3); // load 8 src1 bytes - movq_r2r (mm3, mm4); // copy 8 src1 bytes - - movq_m2r (*src2, mm5); // load 8 src2 bytes - movq_r2r (mm5, mm6); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low dest bytes - punpckhbw_r2r (mm0, mm2); // unpack high dest bytes + movq_m2r (*dest, mm1); /* load 8 dest bytes */ + movq_r2r (mm1, mm2); /* copy 8 dest bytes */ - punpcklbw_r2r (mm0, mm3); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src1 bytes + movq_m2r (*src1, mm3); /* load 8 src1 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src1 bytes */ - punpcklbw_r2r (mm0, mm5); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm6); // unpack high src2 bytes + movq_m2r (*src2, mm5); /* load 8 src2 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src2 bytes */ - paddw_r2r (mm5, mm3); // add lows - paddw_m2r (round1, mm3); - psraw_i2r (1, mm3); // /2 + pxor_r2r (mm3, mm5); /* xor src1 and src2 */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm4, mm6); /* or src1 and src2 */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2r (mm6, mm5); /* copy subresult */ - paddw_r2r (mm6, mm4); // add highs - paddw_m2r (round1, mm4); - psraw_i2r (1, mm4); // /2 - - paddw_r2r (mm3, mm1); // add lows - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - - paddw_r2r (mm4, mm2); // add highs - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest + pxor_r2r (mm1, mm5); /* xor srcavg and dest */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm2, mm6); /* or srcavg and dest */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2m (mm6, *dest); /* store result in dest */ } -static inline void mmx_average_4_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2, - uint8_t * src3, uint8_t * src4) +static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) { /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ - movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ - movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ /* now have partials in mm1 and mm2 */ - movq_m2r (*src3, mm3); // load 8 src3 bytes - movq_r2r (mm3, mm4); // copy 8 src3 bytes + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ - punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ - movq_m2r (*src4, mm5); // load 8 src4 bytes - movq_r2r (mm5, mm6); // copy 8 src4 bytes + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ - punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes - punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ - paddw_r2r (mm5, mm1); // add lows - paddw_r2r (mm6, mm2); // add highs + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ /* now have subtotal in mm1 and mm2 */ paddw_m2r (round4, mm1); - psraw_i2r (2, mm1); // /4 + psraw_i2r (2, mm1); /* /4 */ paddw_m2r (round4, mm2); - psraw_i2r (2, mm2); // /4 + psraw_i2r (2, mm2); /* /4 */ - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2m (mm1, *dest); /* store result in dest */ } static inline void mmx_interp_average_4_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2, - uint8_t * src3, uint8_t * src4) + const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) { /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ - movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ - movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ /* now have partials in mm1 and mm2 */ - movq_m2r (*src3, mm3); // load 8 src3 bytes - movq_r2r (mm3, mm4); // copy 8 src3 bytes + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ - punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ - movq_m2r (*src4, mm5); // load 8 src4 bytes - movq_r2r (mm5, mm6); // copy 8 src4 bytes + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ - punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes - punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ - paddw_r2r (mm5, mm1); // add lows - paddw_r2r (mm6, mm2); // add highs + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ paddw_m2r (round4, mm1); - psraw_i2r (2, mm1); // /4 + psraw_i2r (2, mm1); /* /4 */ paddw_m2r (round4, mm2); - psraw_i2r (2, mm2); // /4 + psraw_i2r (2, mm2); /* /4 */ /* now have subtotal/4 in mm1 and mm2 */ - movq_m2r (*dest, mm3); // load 8 dest bytes - movq_r2r (mm3, mm4); // copy 8 dest bytes - - punpcklbw_r2r (mm0, mm3); // unpack low dest bytes - punpckhbw_r2r (mm0, mm4); // unpack high dest bytes - - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs + movq_m2r (*dest, mm3); /* load 8 dest bytes */ + movq_r2r (mm3, mm4); /* copy 8 dest bytes */ - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2r (mm1,mm2); /* copy subresult */ - /* now have end value in mm1 and mm2 */ - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1,*dest); // store result in dest + pxor_r2r (mm1, mm3); /* xor srcavg and dest */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or srcavg and dest */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ } /*-----------------------------------------------------------------------*/ -static inline void MC_avg_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_avg_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { mmx_zero_reg (); @@ -269,33 +253,33 @@ static inline void MC_avg_mmx (int width, int height, } while (--height); } -static void MC_avg_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_mmx (16, height, dest, ref, stride); } -static void MC_avg_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_mmx (8, height, dest, ref, stride); } /*-----------------------------------------------------------------------*/ -static inline void MC_put_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_put_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { mmx_zero_reg (); do { - movq_m2r (* ref, mm1); // load 8 ref bytes - movq_r2m (mm1,* dest); // store 8 bytes at curr + movq_m2r (* ref, mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* dest); /* store 8 bytes at curr */ if (width == 16) { - movq_m2r (* (ref+8), mm1); // load 8 ref bytes - movq_r2m (mm1,* (dest+8)); // store 8 bytes at curr + movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */ } dest += stride; @@ -303,14 +287,14 @@ static inline void MC_put_mmx (int width, int height, } while (--height); } -static void MC_put_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_mmx (16, height, dest, ref, stride); } -static void MC_put_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_mmx (8, height, dest, ref, stride); } @@ -318,8 +302,8 @@ static void MC_put_8_mmx (uint8_t * dest, uint8_t * ref, /*-----------------------------------------------------------------------*/ /* Half pixel interpolation in the x direction */ -static inline void MC_avg_x_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { mmx_zero_reg (); @@ -334,22 +318,22 @@ static inline void MC_avg_x_mmx (int width, int height, } while (--height); } -static void MC_avg_x16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_x_mmx (16, height, dest, ref, stride); } -static void MC_avg_x8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_x_mmx (8, height, dest, ref, stride); } /*-----------------------------------------------------------------------*/ -static inline void MC_put_x_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { mmx_zero_reg (); @@ -364,24 +348,24 @@ static inline void MC_put_x_mmx (int width, int height, } while (--height); } -static void MC_put_x16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_x_mmx (16, height, dest, ref, stride); } -static void MC_put_x8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_x_mmx (8, height, dest, ref, stride); } /*-----------------------------------------------------------------------*/ -static inline void MC_avg_xy_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { - uint8_t * ref_next = ref+stride; + const uint8_t * ref_next = ref + stride; mmx_zero_reg (); @@ -398,24 +382,24 @@ static inline void MC_avg_xy_mmx (int width, int height, } while (--height); } -static void MC_avg_xy16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_xy_mmx (16, height, dest, ref, stride); } -static void MC_avg_xy8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_xy_mmx (8, height, dest, ref, stride); } /*-----------------------------------------------------------------------*/ -static inline void MC_put_xy_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { - uint8_t * ref_next = ref+stride; + const uint8_t * ref_next = ref + stride; mmx_zero_reg (); @@ -431,24 +415,24 @@ static inline void MC_put_xy_mmx (int width, int height, } while (--height); } -static void MC_put_xy16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_xy_mmx (16, height, dest, ref, stride); } -static void MC_put_xy8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_xy_mmx (8, height, dest, ref, stride); } /*-----------------------------------------------------------------------*/ -static inline void MC_avg_y_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { - uint8_t * ref_next = ref+stride; + const uint8_t * ref_next = ref + stride; mmx_zero_reg (); @@ -464,24 +448,24 @@ static inline void MC_avg_y_mmx (int width, int height, } while (--height); } -static void MC_avg_y16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_y_mmx (16, height, dest, ref, stride); } -static void MC_avg_y8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg_y_mmx (8, height, dest, ref, stride); } /*-----------------------------------------------------------------------*/ -static inline void MC_put_y_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) +static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) { - uint8_t * ref_next = ref+stride; + const uint8_t * ref_next = ref + stride; mmx_zero_reg (); @@ -497,20 +481,20 @@ static inline void MC_put_y_mmx (int width, int height, } while (--height); } -static void MC_put_y16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_y_mmx (16, height, dest, ref, stride); } -static void MC_put_y8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put_y_mmx (8, height, dest, ref, stride); } -MOTION_COMP_EXTERN (mmx) +MPEG2_MC_EXTERN (mmx) @@ -540,8 +524,8 @@ do { \ /* CPU_MMXEXT code */ -static inline void MC_put1_8 (int height, uint8_t * dest, uint8_t * ref, - int stride) +static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) { do { movq_m2r (*ref, mm0); @@ -551,8 +535,8 @@ static inline void MC_put1_8 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_put1_16 (int height, uint8_t * dest, uint8_t * ref, - int stride) +static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) { do { movq_m2r (*ref, mm0); @@ -564,8 +548,8 @@ static inline void MC_put1_16 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_avg1_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) +static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) { do { movq_m2r (*ref, mm0); @@ -576,8 +560,8 @@ static inline void MC_avg1_8 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_avg1_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) +static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) { do { movq_m2r (*ref, mm0); @@ -591,8 +575,9 @@ static inline void MC_avg1_16 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_put2_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) +static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) { do { movq_m2r (*ref, mm0); @@ -603,8 +588,9 @@ static inline void MC_put2_8 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_put2_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) +static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) { do { movq_m2r (*ref, mm0); @@ -618,8 +604,9 @@ static inline void MC_put2_16 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_avg2_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) +static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) { do { movq_m2r (*ref, mm0); @@ -631,8 +618,9 @@ static inline void MC_avg2_8 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_avg2_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) +static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) { do { movq_m2r (*ref, mm0); @@ -650,8 +638,8 @@ static inline void MC_avg2_16 (int height, uint8_t * dest, uint8_t * ref, static mmx_t mask_one = {0x0101010101010101LL}; -static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) +static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) { movq_m2r (*ref, mm0); movq_m2r (*(ref+1), mm1); @@ -684,13 +672,13 @@ static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, movq_r2m (mm0, *dest); dest += stride; - movq_r2r (mm6, mm7); // unroll ! - movq_r2r (mm2, mm0); // unroll ! + movq_r2r (mm6, mm7); /* unroll ! */ + movq_r2r (mm2, mm0); /* unroll ! */ } while (--height); } -static inline void MC_put4_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) +static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) { do { movq_m2r (*ref, mm0); @@ -735,8 +723,8 @@ static inline void MC_put4_16 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_avg4_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) +static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) { do { movq_m2r (*ref, mm0); @@ -764,8 +752,8 @@ static inline void MC_avg4_8 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static inline void MC_avg4_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) +static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) { do { movq_m2r (*ref, mm0); @@ -814,204 +802,204 @@ static inline void MC_avg4_16 (int height, uint8_t * dest, uint8_t * ref, } while (--height); } -static void MC_avg_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); } -static void MC_avg_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); } -static void MC_put_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put1_16 (height, dest, ref, stride); } -static void MC_put_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put1_8 (height, dest, ref, stride); } -static void MC_avg_x16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); } -static void MC_avg_x8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); } -static void MC_put_x16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); } -static void MC_put_x8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); } -static void MC_avg_y16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); } -static void MC_avg_y8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); } -static void MC_put_y16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); } -static void MC_put_y8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); } -static void MC_avg_xy16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); } -static void MC_avg_xy8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); } -static void MC_put_xy16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); } -static void MC_put_xy8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); } -MOTION_COMP_EXTERN (mmxext) +MPEG2_MC_EXTERN (mmxext) -static void MC_avg_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); } -static void MC_avg_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); } -static void MC_put_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put1_16 (height, dest, ref, stride); } -static void MC_put_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) +static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) { MC_put1_8 (height, dest, ref, stride); } -static void MC_avg_x16_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); } -static void MC_avg_x8_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); } -static void MC_put_x16_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); } -static void MC_put_x8_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); } -static void MC_avg_y16_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); } -static void MC_avg_y8_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); } -static void MC_put_y16_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); } -static void MC_put_y8_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); } -static void MC_avg_xy16_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); } -static void MC_avg_xy8_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); } -static void MC_put_xy16_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); } -static void MC_put_xy8_3dnow (uint8_t * dest, uint8_t * ref, +static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, int stride, int height) { MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); } -MOTION_COMP_EXTERN (3dnow) +MPEG2_MC_EXTERN (3dnow) #endif diff --git a/libmpeg2/mpeg2.h b/libmpeg2/mpeg2.h index a1a0ef1681..5016f4d85f 100644 --- a/libmpeg2/mpeg2.h +++ b/libmpeg2/mpeg2.h @@ -1,8 +1,10 @@ /* * mpeg2.h - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,52 +21,126 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -/* Structure for the mpeg2dec decoder */ - -typedef struct mpeg2dec_s { -// vo_instance_t * output; - - /* this is where we keep the state of the decoder */ - struct picture_s * picture; - - uint32_t shift; - int is_display_initialized; - int is_sequence_needed; - int drop_flag; - int drop_frame; - int in_slice; - - /* the maximum chunk size is determined by vbv_buffer_size */ - /* which is 224K for MP@ML streams. */ - /* (we make no pretenses of decoding anything more than that) */ - /* allocated in init - gcc has problems allocating such big structures */ - uint8_t * chunk_buffer; - /* pointer to current position in chunk_buffer */ - uint8_t * chunk_ptr; - /* last start code ? */ - uint8_t code; - - /* ONLY for 0.2.0 release - will not stay there later */ - int frame_rate_code; -} mpeg2dec_t ; - - -void mpeg2_init (void); -//void mpeg2_allocate_image_buffers (picture_t * picture); -int mpeg2_decode_data (vo_functions_t *, uint8_t * data_start, uint8_t * data_end,int framedrop); -//void mpeg2_close (vo_functions_t *); -//void mpeg2_drop (int flag); -//void mpeg2_free_image_buffers (picture_t * picture) - -/* initialize mpegdec with a opaque user pointer */ -//void mpeg2_init (mpeg2dec_t * mpeg2dec, uint32_t mm_accel -// ,vo_instance_t * output -// ); - -/* destroy everything which was allocated, shutdown the output */ -//void mpeg2_close (mpeg2dec_t * mpeg2dec); - -//int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, -// uint8_t * data_start, uint8_t * data_end); - -//void mpeg2_drop (mpeg2dec_t * mpeg2dec, int flag); +#ifndef MPEG2_H +#define MPEG2_H + +#define SEQ_FLAG_MPEG2 1 +#define SEQ_FLAG_CONSTRAINED_PARAMETERS 2 +#define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4 +#define SEQ_FLAG_LOW_DELAY 8 +#define SEQ_FLAG_COLOUR_DESCRIPTION 16 + +#define SEQ_MASK_VIDEO_FORMAT 0xe0 +#define SEQ_VIDEO_FORMAT_COMPONENT 0 +#define SEQ_VIDEO_FORMAT_PAL 0x20 +#define SEQ_VIDEO_FORMAT_NTSC 0x40 +#define SEQ_VIDEO_FORMAT_SECAM 0x60 +#define SEQ_VIDEO_FORMAT_MAC 0x80 +#define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0 + +typedef struct { + unsigned int width, height; + unsigned int chroma_width, chroma_height; + unsigned int byte_rate; + unsigned int vbv_buffer_size; + uint32_t flags; + + unsigned int picture_width, picture_height; + unsigned int display_width, display_height; + unsigned int pixel_width, pixel_height; + unsigned int frame_period; + + uint8_t profile_level_id; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; +} sequence_t; + +#define PIC_MASK_CODING_TYPE 7 +#define PIC_FLAG_CODING_TYPE_I 1 +#define PIC_FLAG_CODING_TYPE_P 2 +#define PIC_FLAG_CODING_TYPE_B 3 +#define PIC_FLAG_CODING_TYPE_D 4 + +#define PIC_FLAG_TOP_FIELD_FIRST 8 +#define PIC_FLAG_PROGRESSIVE_FRAME 16 +#define PIC_FLAG_COMPOSITE_DISPLAY 32 +#define PIC_FLAG_SKIP 64 +#define PIC_FLAG_PTS 128 +#define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 + +typedef struct { + unsigned int temporal_reference; + unsigned int nb_fields; + uint32_t pts; + uint32_t flags; + struct { + int x, y; + } display_offset[3]; +} picture_t; + +typedef struct { + uint8_t * buf[3]; + void * id; +} fbuf_t; + +typedef struct { + const sequence_t * sequence; + const picture_t * current_picture; + const picture_t * current_picture_2nd; + const fbuf_t * current_fbuf; + const picture_t * display_picture; + const picture_t * display_picture_2nd; + const fbuf_t * display_fbuf; + const fbuf_t * discard_fbuf; + const uint8_t * user_data; + int user_data_len; +} mpeg2_info_t; + +typedef struct mpeg2dec_s mpeg2dec_t; +typedef struct decoder_s decoder_t; + +#define STATE_SEQUENCE 1 +#define STATE_SEQUENCE_REPEATED 2 +#define STATE_GOP 3 +#define STATE_PICTURE 4 +#define STATE_SLICE_1ST 5 +#define STATE_PICTURE_2ND 6 +#define STATE_SLICE 7 +#define STATE_END 8 +#define STATE_INVALID 9 + +struct convert_init_s; +void mpeg2_convert (mpeg2dec_t * mpeg2dec, + void (* convert) (int, int, uint32_t, void *, + struct convert_init_s *), void * arg); +void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id); +void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf); +void mpeg2_init_fbuf (decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]); + +void mpeg2_slice (decoder_t * decoder, int code, const uint8_t * buffer); + +#define MPEG2_ACCEL_X86_MMX 1 +#define MPEG2_ACCEL_X86_3DNOW 2 +#define MPEG2_ACCEL_X86_MMXEXT 4 +#define MPEG2_ACCEL_PPC_ALTIVEC 1 +#define MPEG2_ACCEL_ALPHA 1 +#define MPEG2_ACCEL_ALPHA_MVI 2 +#define MPEG2_ACCEL_MLIB 0x40000000 +#define MPEG2_ACCEL_DETECT 0x80000000 + +uint32_t mpeg2_accel (uint32_t accel); +mpeg2dec_t * mpeg2_init (void); +const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec); +void mpeg2_close (mpeg2dec_t * mpeg2dec); + +void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end); +int mpeg2_parse (mpeg2dec_t * mpeg2dec); + +void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip); +void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end); + +void mpeg2_pts (mpeg2dec_t * mpeg2dec, uint32_t pts); + +#endif /* MPEG2_H */ diff --git a/libmpeg2/mpeg2_internal.h b/libmpeg2/mpeg2_internal.h index 6f1c48425b..0e364cbf43 100644 --- a/libmpeg2/mpeg2_internal.h +++ b/libmpeg2/mpeg2_internal.h @@ -1,8 +1,10 @@ /* * mpeg2_internal.h - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,40 +47,38 @@ #define B_TYPE 3 #define D_TYPE 4 -typedef struct motion_s { +typedef struct { uint8_t * ref[2][3]; + uint8_t ** ref2[2]; int pmv[2][2]; int f_code[2]; } motion_t; -typedef struct vo_frame_s { - uint8_t * base[3]; /* pointer to 3 planes */ - void (* copy) (struct vo_frame_s * frame, uint8_t ** src); - void* vo; - void* mpi; -#ifdef MPEG12_POSTPROC -#define MPEG2_MBC 120 -#define MPEG2_MBR 72 - int8_t quant_store[MPEG2_MBR+1][MPEG2_MBC+1]; -#endif - -// int slice; -// void (* field) (struct vo_frame_s * frame, int flags); -// void (* draw) (struct vo_frame_s * frame); -// vo_instance_t * instance; -} vo_frame_t; - -typedef struct picture_s { +struct decoder_s { /* first, state that carries information from one macroblock to the */ - /* next inside a slice, and is never used outside of slice_process() */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ /* DCT coefficients - should be kept aligned ! */ int16_t DCTblock[64]; /* bit parsing stuff */ - uint32_t bitstream_buf; /* current 32 bit working set of buffer */ - int bitstream_bits; /* used bits in working set */ - uint8_t * bitstream_ptr; /* buffer with stream data */ + uint32_t bitstream_buf; /* current 32 bit working set */ + int bitstream_bits; /* used bits in working set */ + const uint8_t * bitstream_ptr; /* buffer with stream data */ + + uint8_t * dest[3]; + uint8_t * picture_dest[3]; + void (* convert) (void * fbuf_id, uint8_t * const * src, + unsigned int v_offset); + void * fbuf_id; + + int offset; + int stride; + int uv_stride; + unsigned int limit_x; + unsigned int limit_y_16; + unsigned int limit_y_8; + unsigned int limit_y; /* Motion vectors */ /* The f_ and b_ correspond to the forward and backward motion */ @@ -90,9 +90,8 @@ typedef struct picture_s { int16_t dc_dct_pred[3]; int quantizer_scale; /* remove */ - int current_field; /* remove */ - int v_offset; /* remove */ - + int dmv_offset; /* remove */ + unsigned int v_offset; /* remove */ /* now non-slice-specific information */ @@ -101,16 +100,17 @@ typedef struct picture_s { uint8_t non_intra_quantizer_matrix [64]; /* The width and height of the picture snapped to macroblock units */ - int coded_picture_width; - int coded_picture_height; + int width; + int height; + int vertical_position_extension; /* picture header stuff */ /* what type of picture this is (I, P, B, D) */ - int picture_coding_type; - + int coding_type; + /* picture coding extension stuff */ - + /* quantization factor for intra dc coefficients */ int intra_dc_precision; /* top/bottom/both fields */ @@ -130,98 +130,167 @@ typedef struct picture_s { /* stuff derived from bitstream */ /* pointer to the zigzag scan we're supposed to be using */ - uint8_t * scan; - - struct vo_frame_s * current_frame; - struct vo_frame_s * forward_reference_frame; - struct vo_frame_s * backward_reference_frame; - struct vo_frame_s * temp_frame; // B frame + const uint8_t * scan; int second_field; int mpeg1; +}; - /* these things are not needed by the decoder */ - /* this is a temporary interface, we will build a better one later. */ - int aspect_ratio_information; - int frame_rate_code; - int progressive_sequence; - int repeat_first_field; - int progressive_frame; - int bitrate; - - // added by A'rpi/ESP-team - int display_picture_width; - int display_picture_height; - int pp_options; - int display_time; - - struct vo_frame_s * display_frame; - int slice; - -} picture_t; - -typedef struct mpeg2_config_s { - /* Bit flags that enable various things */ - uint32_t flags; -} mpeg2_config_t; - -/* The only global variable, */ -/* the config struct */ -extern mpeg2_config_t config; - - +typedef struct { + fbuf_t fbuf; +} fbuf_alloc_t; + +struct mpeg2dec_s { + decoder_t decoder; + + mpeg2_info_t info; + + uint32_t shift; + int is_display_initialized; + int (* action) (struct mpeg2dec_s * mpeg2dec); + int state; + uint32_t ext_state; + + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer; + /* pointer to start of the current chunk */ + uint8_t * chunk_start; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + + /* PTS */ + uint32_t pts_current, pts_previous; + int num_pts; + int bytes_since_pts; + + int first; + int alloc_index_user; + int alloc_index; + uint8_t first_decode_slice; + uint8_t nb_decode_slices; + + sequence_t new_sequence; + sequence_t sequence; + picture_t pictures[4]; + picture_t * picture; + /*const*/ fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ + + fbuf_alloc_t fbuf_alloc[3]; + int custom_fbuf; + + uint8_t * yuv_buf[3][3]; + int yuv_index; + void * convert_id; + int convert_size[3]; + void (* convert_start) (void * id, uint8_t * const * dest, int flags); + void (* convert_copy) (void * id, uint8_t * const * src, + unsigned int v_offset); + + uint8_t * buf_start; + uint8_t * buf_end; + + int16_t display_offset_x, display_offset_y; +}; -/* slice.c */ -void header_state_init (picture_t * picture); -int header_process_picture_header (picture_t * picture, uint8_t * buffer); -int header_process_sequence_header (picture_t * picture, uint8_t * buffer); -int header_process_extension (picture_t * picture, uint8_t * buffer); +typedef struct { +#ifdef ARCH_PPC + uint8_t regv[12*16]; +#endif + int dummy; +} cpu_state_t; + +/* alloc.c */ +#define ALLOC_MPEG2DEC 0 +#define ALLOC_CHUNK 1 +#define ALLOC_YUV 2 +#define ALLOC_CONVERT_ID 3 +#define ALLOC_CONVERTED 4 +void * mpeg2_malloc (int size, int reason); +void mpeg2_free (void * buf); + +/* cpu_accel.c */ +uint32_t mpeg2_detect_accel (void); + +/* cpu_state.c */ +void mpeg2_cpu_state_init (uint32_t accel); + +/* decode.c */ +int mpeg2_seek_sequence (mpeg2dec_t * mpeg2dec); +int mpeg2_seek_header (mpeg2dec_t * mpeg2dec); +int mpeg2_parse_header (mpeg2dec_t * mpeg2dec); + +/* header.c */ +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); +int mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); +int mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); +int mpeg2_header_end (mpeg2dec_t * mpeg2dec); +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int coding_type); /* idct.c */ -void idct_init (void); +void mpeg2_idct_init (uint32_t accel); /* idct_mlib.c */ -void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride); -void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mlib (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride); +void mpeg2_idct_add_mlib_non_ieee (int last, int16_t * block, + uint8_t * dest, int stride); /* idct_mmx.c */ -void idct_block_copy_mmxext (int16_t *block, uint8_t * dest, int stride); -void idct_block_add_mmxext (int16_t *block, uint8_t * dest, int stride); -void idct_block_copy_mmx (int16_t *block, uint8_t * dest, int stride); -void idct_block_add_mmx (int16_t *block, uint8_t * dest, int stride); -void idct_mmx_init (void); +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmxext (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmx (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_mmx_init (void); + +/* idct_altivec.c */ +void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_altivec (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_altivec_init (void); + +/* idct_alpha.c */ +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mvi (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_alpha (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_alpha_init(int no_mvi); /* motion_comp.c */ -void motion_comp_init (void); - -typedef struct mc_functions_s -{ - void (* put [8]) (uint8_t *dst, uint8_t *, int32_t, int32_t); - void (* avg [8]) (uint8_t *dst, uint8_t *, int32_t, int32_t); -} mc_functions_t; - -#define MOTION_COMP_EXTERN(x) mc_functions_t mc_functions_##x = \ -{ \ - {MC_put_16_##x, MC_put_x16_##x, MC_put_y16_##x, MC_put_xy16_##x, \ - MC_put_8_##x, MC_put_x8_##x, MC_put_y8_##x, MC_put_xy8_##x}, \ - {MC_avg_16_##x, MC_avg_x16_##x, MC_avg_y16_##x, MC_avg_xy16_##x, \ - MC_avg_8_##x, MC_avg_x8_##x, MC_avg_y8_##x, MC_avg_xy8_##x} \ -}; - -extern mc_functions_t mc_functions_c; -extern mc_functions_t mc_functions_mmx; -extern mc_functions_t mc_functions_mmxext; -extern mc_functions_t mc_functions_3dnow; -extern mc_functions_t mc_functions_mlib; +void mpeg2_mc_init (uint32_t accel); -/* slice.c */ -int slice_process (picture_t *picture, uint8_t code, uint8_t * buffer); +typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); -/* stats.c */ -void stats_header (uint8_t code, uint8_t * buffer); - -void mpeg2_allocate_image_buffers(picture_t * picture); -void mpeg2_free_image_buffers (picture_t * picture); +typedef struct { + mpeg2_mc_fct * put [8]; + mpeg2_mc_fct * avg [8]; +} mpeg2_mc_t; +#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ + {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ + MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ + {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ + MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ +}; +extern mpeg2_mc_t mpeg2_mc_c; +extern mpeg2_mc_t mpeg2_mc_mmx; +extern mpeg2_mc_t mpeg2_mc_mmxext; +extern mpeg2_mc_t mpeg2_mc_3dnow; +extern mpeg2_mc_t mpeg2_mc_altivec; +extern mpeg2_mc_t mpeg2_mc_alpha; +extern mpeg2_mc_t mpeg2_mc_mlib; diff --git a/libmpeg2/slice.c b/libmpeg2/slice.c index 4e289f0d06..7f6a2ed052 100644 --- a/libmpeg2/slice.c +++ b/libmpeg2/slice.c @@ -1,8 +1,10 @@ /* * slice.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,20 +23,18 @@ #include "config.h" -#include <string.h> #include <inttypes.h> -#include "video_out.h" +#include "mpeg2.h" #include "mpeg2_internal.h" #include "attributes.h" -extern mc_functions_t mc_functions; -extern void (* idct_block_copy) (int16_t * block, uint8_t * dest, int stride); -extern void (* idct_block_add) (int16_t * block, uint8_t * dest, int stride); - -//#ifdef MPEG12_POSTPROC -//extern int quant_store[MPEG2_MBR+1][MPEG2_MBC+1]; // [Review] -//#endif +extern mpeg2_mc_t mpeg2_mc; +extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); +extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); +extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); #include "vlc.h" @@ -45,23 +45,23 @@ static int non_linear_quantizer_scale [] = { 56, 64, 72, 80, 88, 96, 104, 112 }; -static inline int get_macroblock_modes (picture_t * picture) +static inline int get_macroblock_modes (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int macroblock_modes; - MBtab * tab; + const MBtab * tab; - switch (picture->picture_coding_type) { + switch (decoder->coding_type) { case I_TYPE: tab = MB_I + UBITS (bit_buf, 1); DUMPBITS (bit_buf, bits, tab->len); macroblock_modes = tab->modes; - if ((! (picture->frame_pred_frame_dct)) && - (picture->picture_structure == FRAME_PICTURE)) { + if ((! (decoder->frame_pred_frame_dct)) && + (decoder->picture_structure == FRAME_PICTURE)) { macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; DUMPBITS (bit_buf, bits, 1); } @@ -74,13 +74,13 @@ static inline int get_macroblock_modes (picture_t * picture) DUMPBITS (bit_buf, bits, tab->len); macroblock_modes = tab->modes; - if (picture->picture_structure != FRAME_PICTURE) { + if (decoder->picture_structure != FRAME_PICTURE) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; DUMPBITS (bit_buf, bits, 2); } return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { + } else if (decoder->frame_pred_frame_dct) { if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) macroblock_modes |= MC_FRAME; return macroblock_modes; @@ -102,13 +102,13 @@ static inline int get_macroblock_modes (picture_t * picture) DUMPBITS (bit_buf, bits, tab->len); macroblock_modes = tab->modes; - if (picture->picture_structure != FRAME_PICTURE) { + if (decoder->picture_structure != FRAME_PICTURE) { if (! (macroblock_modes & MACROBLOCK_INTRA)) { macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; DUMPBITS (bit_buf, bits, 2); } return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { + } else if (decoder->frame_pred_frame_dct) { /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ macroblock_modes |= MC_FRAME; return macroblock_modes; @@ -138,18 +138,18 @@ static inline int get_macroblock_modes (picture_t * picture) #undef bit_ptr } -static inline int get_quantizer_scale (picture_t * picture) +static inline int get_quantizer_scale (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int quantizer_scale_code; quantizer_scale_code = UBITS (bit_buf, 5); DUMPBITS (bit_buf, bits, 5); - if (picture->q_scale_type) + if (decoder->q_scale_type) return non_linear_quantizer_scale [quantizer_scale_code]; else return quantizer_scale_code << 1; @@ -158,15 +158,16 @@ static inline int get_quantizer_scale (picture_t * picture) #undef bit_ptr } -static inline int get_motion_delta (picture_t * picture, int f_code) +static inline int get_motion_delta (decoder_t * const decoder, + const int f_code) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int delta; int sign; - MVtab * tab; + const MVtab * tab; if (bit_buf & 0x80000000) { DUMPBITS (bit_buf, bits, 1); @@ -211,30 +212,32 @@ static inline int get_motion_delta (picture_t * picture, int f_code) #undef bit_ptr } -static inline int bound_motion_vector (int vector, int f_code) +static inline int bound_motion_vector (const int vector, const int f_code) { -#if 1 - int limit; +#if 0 + unsigned int limit; + int sign; limit = 16 << f_code; - if (vector >= limit) - return vector - 2*limit; - else if (vector < -limit) - return vector + 2*limit; - else return vector; + if ((unsigned int)(vector + limit) < 2 * limit) + return vector; + else { + sign = ((int32_t)vector) >> 31; + return vector - ((2 * limit) ^ sign) + sign; + } #else - return (vector << (27 - f_code)) >> (27 - f_code); + return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); #endif } -static inline int get_dmv (picture_t * picture) +static inline int get_dmv (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) - DMVtab * tab; + const DMVtab * tab; tab = DMV_2 + UBITS (bit_buf, 2); DUMPBITS (bit_buf, bits, tab->len); @@ -244,19 +247,19 @@ static inline int get_dmv (picture_t * picture) #undef bit_ptr } -static inline int get_coded_block_pattern (picture_t * picture) +static inline int get_coded_block_pattern (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) - CBPtab * tab; + const CBPtab * tab; NEEDBITS (bit_buf, bits, bit_ptr); if (bit_buf >= 0x20000000) { - tab = CBP_7 - 16 + UBITS (bit_buf, 7); + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); DUMPBITS (bit_buf, bits, tab->len); return tab->cbp; @@ -272,12 +275,12 @@ static inline int get_coded_block_pattern (picture_t * picture) #undef bit_ptr } -static inline int get_luma_dc_dct_diff (picture_t * picture) +static inline int get_luma_dc_dct_diff (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - DCtab * tab; +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; int size; int dc_diff; @@ -296,7 +299,7 @@ static inline int get_luma_dc_dct_diff (picture_t * picture) return 0; } } else { - tab = DC_long - 0x1e0 + UBITS (bit_buf, 9); + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); size = tab->size; DUMPBITS (bit_buf, bits, tab->len); NEEDBITS (bit_buf, bits, bit_ptr); @@ -309,12 +312,12 @@ static inline int get_luma_dc_dct_diff (picture_t * picture) #undef bit_ptr } -static inline int get_chroma_dc_dct_diff (picture_t * picture) +static inline int get_chroma_dc_dct_diff (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - DCtab * tab; +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; int size; int dc_diff; @@ -333,7 +336,7 @@ static inline int get_chroma_dc_dct_diff (picture_t * picture) return 0; } } else { - tab = DC_long - 0x3e0 + UBITS (bit_buf, 10); + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); size = tab->size; DUMPBITS (bit_buf, bits, tab->len + 1); NEEDBITS (bit_buf, bits, bit_ptr); @@ -346,41 +349,41 @@ static inline int get_chroma_dc_dct_diff (picture_t * picture) #undef bit_ptr } -#define SATURATE(val) \ -do { \ - if ((uint32_t)(val + 2048) > 4095) \ - val = (val > 0) ? 2047 : -2048; \ +#define SATURATE(val) \ +do { \ + if (unlikely ((uint32_t)(val + 2048) > 4095)) \ + val = SBITS (val, 1) ^ 2047; \ } while (0) -static void get_intra_block_B14 (picture_t * picture) +static void get_intra_block_B14 (decoder_t * const decoder) { int i; int j; int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; int mismatch; - DCTtab * tab; + const DCTtab * tab; uint32_t bit_buf; int bits; - uint8_t * bit_ptr; + const uint8_t * bit_ptr; int16_t * dest; - dest = picture->DCTblock; + dest = decoder->DCTblock; i = 0; mismatch = ~dest[0]; - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; NEEDBITS (bit_buf, bits, bit_ptr); while (1) { if (bit_buf >= 0x28000000) { - tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); i += tab->run; if (i >= 64) @@ -406,7 +409,7 @@ static void get_intra_block_B14 (picture_t * picture) } else if (bit_buf >= 0x04000000) { - tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); i += tab->run; if (i < 64) @@ -435,17 +438,17 @@ static void get_intra_block_B14 (picture_t * picture) continue; } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00800000) { - tab = DCT_13 - 16 + UBITS (bit_buf, 13); + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00200000) { - tab = DCT_15 - 16 + UBITS (bit_buf, 15); + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); i += tab->run; if (i < 64) goto normal_code; @@ -461,40 +464,40 @@ static void get_intra_block_B14 (picture_t * picture) } dest[63] ^= mismatch & 1; DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; } -static void get_intra_block_B15 (picture_t * picture) +static void get_intra_block_B15 (decoder_t * const decoder) { int i; int j; int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; int mismatch; - DCTtab * tab; + const DCTtab * tab; uint32_t bit_buf; int bits; - uint8_t * bit_ptr; + const uint8_t * bit_ptr; int16_t * dest; - dest = picture->DCTblock; + dest = decoder->DCTblock; i = 0; mismatch = ~dest[0]; - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; NEEDBITS (bit_buf, bits, bit_ptr); while (1) { if (bit_buf >= 0x04000000) { - tab = DCT_B15_8 - 4 + UBITS (bit_buf, 8); + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); i += tab->run; if (i < 64) { @@ -548,17 +551,17 @@ static void get_intra_block_B15 (picture_t * picture) } } else if (bit_buf >= 0x02000000) { - tab = DCT_B15_10 - 8 + UBITS (bit_buf, 10); + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00800000) { - tab = DCT_13 - 16 + UBITS (bit_buf, 13); + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00200000) { - tab = DCT_15 - 16 + UBITS (bit_buf, 15); + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); i += tab->run; if (i < 64) goto normal_code; @@ -574,37 +577,37 @@ static void get_intra_block_B15 (picture_t * picture) } dest[63] ^= mismatch & 1; DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; } -static void get_non_intra_block (picture_t * picture) +static int get_non_intra_block (decoder_t * const decoder) { int i; int j; int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; int mismatch; - DCTtab * tab; + const DCTtab * tab; uint32_t bit_buf; int bits; - uint8_t * bit_ptr; + const uint8_t * bit_ptr; int16_t * dest; i = -1; mismatch = 1; - dest = picture->DCTblock; + dest = decoder->DCTblock; - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; NEEDBITS (bit_buf, bits, bit_ptr); if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5); + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); goto entry_1; } else goto entry_2; @@ -612,7 +615,7 @@ static void get_non_intra_block (picture_t * picture) while (1) { if (bit_buf >= 0x28000000) { - tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); entry_1: i += tab->run; @@ -642,7 +645,7 @@ static void get_non_intra_block (picture_t * picture) entry_2: if (bit_buf >= 0x04000000) { - tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); i += tab->run; if (i < 64) @@ -671,17 +674,17 @@ static void get_non_intra_block (picture_t * picture) continue; } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00800000) { - tab = DCT_13 - 16 + UBITS (bit_buf, 13); + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00200000) { - tab = DCT_15 - 16 + UBITS (bit_buf, 15); + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); i += tab->run; if (i < 64) goto normal_code; @@ -697,38 +700,39 @@ static void get_non_intra_block (picture_t * picture) } dest[63] ^= mismatch & 1; DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; } -static void get_mpeg1_intra_block (picture_t * picture) +static void get_mpeg1_intra_block (decoder_t * const decoder) { int i; int j; int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - DCTtab * tab; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + const DCTtab * tab; uint32_t bit_buf; int bits; - uint8_t * bit_ptr; + const uint8_t * bit_ptr; int16_t * dest; i = 0; - dest = picture->DCTblock; + dest = decoder->DCTblock; - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; NEEDBITS (bit_buf, bits, bit_ptr); while (1) { if (bit_buf >= 0x28000000) { - tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); i += tab->run; if (i >= 64) @@ -756,7 +760,7 @@ static void get_mpeg1_intra_block (picture_t * picture) } else if (bit_buf >= 0x04000000) { - tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); i += tab->run; if (i < 64) @@ -791,17 +795,17 @@ static void get_mpeg1_intra_block (picture_t * picture) continue; } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00800000) { - tab = DCT_13 - 16 + UBITS (bit_buf, 13); + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00200000) { - tab = DCT_15 - 16 + UBITS (bit_buf, 15); + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); i += tab->run; if (i < 64) goto normal_code; @@ -816,35 +820,35 @@ static void get_mpeg1_intra_block (picture_t * picture) break; /* illegal, check needed to avoid buffer overflow */ } DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; } -static void get_mpeg1_non_intra_block (picture_t * picture) +static int get_mpeg1_non_intra_block (decoder_t * const decoder) { int i; int j; int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - DCTtab * tab; + const uint8_t * scan = decoder->scan; + const uint8_t * quant_matrix = decoder->non_intra_quantizer_matrix; + int quantizer_scale = decoder->quantizer_scale; + const DCTtab * tab; uint32_t bit_buf; int bits; - uint8_t * bit_ptr; + const uint8_t * bit_ptr; int16_t * dest; i = -1; - dest = picture->DCTblock; + dest = decoder->DCTblock; - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; NEEDBITS (bit_buf, bits, bit_ptr); if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5); + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); goto entry_1; } else goto entry_2; @@ -852,7 +856,7 @@ static void get_mpeg1_non_intra_block (picture_t * picture) while (1) { if (bit_buf >= 0x28000000) { - tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); entry_1: i += tab->run; @@ -884,7 +888,7 @@ static void get_mpeg1_non_intra_block (picture_t * picture) entry_2: if (bit_buf >= 0x04000000) { - tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); i += tab->run; if (i < 64) @@ -920,17 +924,17 @@ static void get_mpeg1_non_intra_block (picture_t * picture) continue; } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00800000) { - tab = DCT_13 - 16 + UBITS (bit_buf, 13); + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); i += tab->run; if (i < 64) goto normal_code; } else if (bit_buf >= 0x00200000) { - tab = DCT_15 - 16 + UBITS (bit_buf, 15); + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); i += tab->run; if (i < 64) goto normal_code; @@ -945,362 +949,320 @@ static void get_mpeg1_non_intra_block (picture_t * picture) break; /* illegal, check needed to avoid buffer overflow */ } DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; } -static inline int get_macroblock_address_increment (picture_t * picture) +static inline void slice_intra_DCT (decoder_t * const decoder, const int cc, + uint8_t * const dest, const int stride) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - MBAtab * tab; - int mba; - - mba = 0; - - while (1) { - if (bit_buf >= 0x10000000) { - tab = MBA_5 - 2 + UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, tab->len); - return mba + tab->mba; - } else if (bit_buf >= 0x03000000) { - tab = MBA_11 - 24 + UBITS (bit_buf, 11); - DUMPBITS (bit_buf, bits, tab->len); - return mba + tab->mba; - } else switch (UBITS (bit_buf, 11)) { - case 8: /* macroblock_escape */ - mba += 33; - /* no break here on purpose */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - break; - default: /* end of slice, or error */ -// printf("MB error: %d \n",(UBITS (bit_buf, 11))); // FIXME! -// return 0; - return -1; - } - } - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void slice_intra_DCT (picture_t * picture, int cc, - uint8_t * dest, int stride) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) NEEDBITS (bit_buf, bits, bit_ptr); /* Get the intra DC coefficient and inverse quantize it */ if (cc == 0) - picture->dc_dct_pred[0] += get_luma_dc_dct_diff (picture); + decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); else - picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture); - picture->DCTblock[0] = - picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); - memset (picture->DCTblock + 1, 0, 63 * sizeof (int16_t)); - - if (picture->mpeg1) { - if (picture->picture_coding_type != D_TYPE) - get_mpeg1_intra_block (picture); - } else if (picture->intra_vlc_format) - get_intra_block_B15 (picture); + decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); + decoder->DCTblock[0] = + decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision); + + if (decoder->mpeg1) { + if (decoder->coding_type != D_TYPE) + get_mpeg1_intra_block (decoder); + } else if (decoder->intra_vlc_format) + get_intra_block_B15 (decoder); else - get_intra_block_B14 (picture); - idct_block_copy (picture->DCTblock, dest, stride); + get_intra_block_B14 (decoder); + mpeg2_idct_copy (decoder->DCTblock, dest, stride); #undef bit_buf #undef bits #undef bit_ptr } -static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest, - int stride) -{ - memset (picture->DCTblock, 0, 64 * sizeof (int16_t)); - if (picture->mpeg1) - get_mpeg1_non_intra_block (picture); - else - get_non_intra_block (picture); - idct_block_add (picture->DCTblock, dest, stride); -} - -#define MOTION_Y(table,offset_x,offset_y,motion_x,motion_y, \ - dest,src,offset_dest,offset_src,stride,height) \ -do { \ - int xy_half; \ - int total_offset; \ - \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - total_offset = ((offset_y + (motion_y >> 1)) * stride + \ - offset_x + (motion_x >> 1) + (offset_src)); \ - table[xy_half] (dest[0] + offset_x + (offset_dest), \ - src[0] + total_offset, stride, height); \ -} while (0) - -#define MOTION_UV(table,offset_x,offset_y,motion_x,motion_y, \ - dest,src,offset_dest,offset_src,stride,height) \ -do { \ - int xy_half; \ - int total_offset; \ - \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - total_offset = (((offset_y + motion_y) >> 1) * (stride) + \ - ((offset_x + motion_x) >> 1) + (offset_src)); \ - table[4+xy_half] (dest[1] + (offset_x >> 1) + (offset_dest), \ - src[1] + total_offset, stride, height); \ - table[4+xy_half] (dest[2] + (offset_x >> 1) + (offset_dest), \ - src[2] + total_offset, stride, height); \ -} while (0) - -static inline void motion_block (void (** table) (uint8_t *, uint8_t *, - int32_t, int32_t), - int x_offset, int y_offset, int mb_y_8_offset, - int src_field, int dest_field, - int x_pred, int y_pred, - uint8_t * dest[3], uint8_t * src[3], - int stride, int height) +static inline void slice_non_intra_DCT (decoder_t * const decoder, + uint8_t * const dest, const int stride) { - MOTION_Y (table, x_offset, y_offset, x_pred, y_pred, dest, src, - dest_field + mb_y_8_offset*8*stride, src_field, stride, height); + int last; - x_pred /= 2; - y_pred /= 2; - stride >>= 1; - height >>= 1; - - MOTION_UV (table, x_offset, y_offset, x_pred, y_pred, dest, src, - (dest_field >> 1) + mb_y_8_offset*4*stride, src_field >> 1, - stride, height); + if (decoder->mpeg1) + last = get_mpeg1_non_intra_block (decoder); + else + last = get_non_intra_block (decoder); + mpeg2_idct_add (last, decoder->DCTblock, dest, stride); } -static void motion_mp1 (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +#define MOTION(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if ((pos_x > decoder->limit_x) || (pos_y > decoder->limit_y_ ## size)) \ + return; \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \ + decoder->stride, size); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + ((((decoder->v_offset + motion_y) >> 1) + y/2) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size/2); \ + table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size/2) + +#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if ((pos_x > decoder->limit_x) || (pos_y > decoder->limit_y)) \ + return; \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, \ + (ref[0] + (pos_x >> 1) + \ + ((pos_y op) + src_field) * decoder->stride), \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y op) + src_field) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 4) + +static void motion_mp1 (decoder_t * const decoder, motion_t * const motion, + mpeg2_mc_fct * const * const table) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion_x = (motion->pmv[0][0] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); motion->pmv[0][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, - motion->f_code[0]); - motion_y = bound_motion_vector (motion_y, motion->f_code[0]); + motion_y = (motion->pmv[0][1] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); motion->pmv[0][1] = motion_y; - if (motion->f_code[1]) { - motion_x <<= 1; - motion_y <<= 1; - } - - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, dest, motion->ref[0], stride, 16); + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); #undef bit_buf #undef bits #undef bit_ptr } -static void motion_mp1_reuse (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static void motion_fr_frame (decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) { +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int motion_x, motion_y; - - motion_x = motion->pmv[0][0]; - motion_y = motion->pmv[0][1]; - - if (motion->f_code[1]) { - motion_x <<= 1; - motion_y <<= 1; - } - - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, dest, motion->ref[0], stride, 16); -} - -static void motion_fr_frame (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[1][0] = motion->pmv[0][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, motion->f_code[1]); motion_y = bound_motion_vector (motion_y, motion->f_code[1]); motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, dest, motion->ref[0], stride, 16); + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); #undef bit_buf #undef bits #undef bit_ptr } -static void motion_fr_field (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static void motion_fr_field (decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - int field_select; +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y, field; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - field_select = SBITS (bit_buf, 1); + field = UBITS (bit_buf, 1); DUMPBITS (bit_buf, bits, 1); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[0][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, motion->f_code[1]); /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ motion->pmv[0][1] = motion_y << 1; - motion_block (table, offset, picture->v_offset >> 1, - 0, (field_select & stride), 0, - motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); NEEDBITS (bit_buf, bits, bit_ptr); - field_select = SBITS (bit_buf, 1); + field = UBITS (bit_buf, 1); DUMPBITS (bit_buf, bits, 1); - motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[1][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (picture, + motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (decoder, motion->f_code[1]); /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ motion->pmv[1][1] = motion_y << 1; - motion_block (table, offset, picture->v_offset >> 1, - 0, (field_select & stride), stride, - motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); #undef bit_buf #undef bits #undef bit_ptr } -static void motion_fr_dmv (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static void motion_fr_dmv (decoder_t * const decoder, motion_t * const motion, + mpeg2_mc_fct * const * const table) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - int dmv_x, dmv_y; - int m; - int other_x, other_y; +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_x = get_dmv (picture); + dmv_x = get_dmv (decoder); - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (decoder, motion->f_code[1]); /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + dmv_y = get_dmv (decoder); - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_y = get_dmv (picture); - - motion_block (mc_functions.put, offset, picture->v_offset >> 1, 0, 0, 0, - motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); - - m = picture->top_field_first ? 1 : 3; + m = decoder->top_field_first ? 1 : 3; other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; - motion_block (mc_functions.avg, offset, picture->v_offset >> 1, 0, stride, 0, - other_x, other_y, dest, motion->ref[0], stride * 2, 8); - - motion_block (mc_functions.put, offset, picture->v_offset >> 1, - 0, stride, stride, - motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); - m = picture->top_field_first ? 3 : 1; + m = decoder->top_field_first ? 3 : 1; other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; - motion_block (mc_functions.avg, offset, picture->v_offset >> 1, 0, 0, stride, - other_x, other_y, dest, motion->ref[0], stride * 2, 8); + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); + + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); + offset = (decoder->offset + (motion_x >> 1) + + (decoder->v_offset + (motion_y & ~1)) * decoder->stride); + mpeg2_mc.avg[xy_half] + (decoder->dest[0] + decoder->offset, + motion->ref[0][0] + offset, 2 * decoder->stride, 8); + mpeg2_mc.avg[xy_half] + (decoder->dest[0] + decoder->stride + decoder->offset, + motion->ref[0][0] + decoder->stride + offset, 2 * decoder->stride, 8); + motion_x /= 2; motion_y /= 2; + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); + offset = (((decoder->offset + motion_x) >> 1) + + (((decoder->v_offset >> 1) + (motion_y & ~1)) * + decoder->uv_stride)); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[1] + (decoder->offset >> 1), + motion->ref[0][1] + offset, 2 * decoder->uv_stride, 4); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[1] + decoder->uv_stride + (decoder->offset >> 1), + motion->ref[0][1] + decoder->uv_stride + offset, + 2 * decoder->uv_stride, 4); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[2] + (decoder->offset >> 1), + motion->ref[0][2] + offset, 2 * decoder->uv_stride, 4); + mpeg2_mc.avg[4+xy_half] + (decoder->dest[2] + decoder->uv_stride + (decoder->offset >> 1), + motion->ref[0][2] + decoder->uv_stride + offset, + 2 * decoder->uv_stride, 4); #undef bit_buf #undef bits #undef bit_ptr } -/* like motion_frame, but reuse previous motion vectors */ -static void motion_fr_reuse (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static inline void motion_reuse (const decoder_t * const decoder, + const motion_t * const motion, + mpeg2_mc_fct * const * const table) { - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion->pmv[0][0], motion->pmv[0][1], - dest, motion->ref[0], stride, 16); + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); } -/* like motion_frame, but use null motion vectors */ -static void motion_fr_zero (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static inline void motion_zero (const decoder_t * const decoder, + const motion_t * const motion, + mpeg2_mc_fct * const * const table) { - motion_block (table, offset, picture->v_offset, 0, 0, 0, 0, 0, - dest, motion->ref[0], stride, 16); + unsigned int offset; + + table[0] (decoder->dest[0] + decoder->offset, + (motion->ref[0][0] + decoder->offset + + decoder->v_offset * decoder->stride), + decoder->stride, 16); + + offset = ((decoder->offset >> 1) + + (decoder->v_offset >> 1) * decoder->uv_stride); + table[4] (decoder->dest[1] + (decoder->offset >> 1), + motion->ref[0][1] + offset, decoder->uv_stride, 8); + table[4] (decoder->dest[2] + (decoder->offset >> 1), + motion->ref[0][2] + offset, decoder->uv_stride, 8); } /* like motion_frame, but parsing without actual motion compensation */ -static void motion_fr_conceal (picture_t * picture) +static void motion_fr_conceal (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int tmp; NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][0] + - get_motion_delta (picture, picture->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][1] + - get_motion_delta (picture, picture->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); - picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ #undef bit_buf @@ -1308,175 +1270,137 @@ static void motion_fr_conceal (picture_t * picture) #undef bit_ptr } -static void motion_fi_field (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static void motion_fi_field (decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int motion_x, motion_y; - int field_select; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - field_select = UBITS (bit_buf, 1); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; DUMPBITS (bit_buf, bits, 1); - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[1][0] = motion->pmv[0][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, motion->f_code[1]); motion_y = bound_motion_vector (motion_y, motion->f_code[1]); motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, - dest, motion->ref[field_select], stride, 16); + MOTION (table, ref_field, motion_x, motion_y, 16, 0); #undef bit_buf #undef bits #undef bit_ptr } -static void motion_fi_16x8 (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static void motion_fi_16x8 (decoder_t * const decoder, motion_t * const motion, + mpeg2_mc_fct * const * const table) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int motion_x, motion_y; - int field_select; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - field_select = UBITS (bit_buf, 1); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; DUMPBITS (bit_buf, bits, 1); - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[0][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, motion->f_code[1]); motion_y = bound_motion_vector (motion_y, motion->f_code[1]); motion->pmv[0][1] = motion_y; - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, - dest, motion->ref[field_select], stride, 8); + MOTION (table, ref_field, motion_x, motion_y, 8, 0); NEEDBITS (bit_buf, bits, bit_ptr); - field_select = UBITS (bit_buf, 1); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; DUMPBITS (bit_buf, bits, 1); - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[1][0] = motion_x; NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[1][1] + get_motion_delta (picture, + motion_y = motion->pmv[1][1] + get_motion_delta (decoder, motion->f_code[1]); motion_y = bound_motion_vector (motion_y, motion->f_code[1]); motion->pmv[1][1] = motion_y; - motion_block (table, offset, picture->v_offset+8, 1, 0, 0, - motion_x, motion_y, - dest, motion->ref[field_select], stride, 8); + MOTION (table, ref_field, motion_x, motion_y, 8, 8); #undef bit_buf #undef bits #undef bit_ptr } -static void motion_fi_dmv (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) +static void motion_fi_dmv (decoder_t * const decoder, motion_t * const motion, + mpeg2_mc_fct * const * const table) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - int dmv_x, dmv_y; +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int motion_x, motion_y, other_x, other_y; + unsigned int pos_x, pos_y, xy_half, offset; NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, motion->f_code[0]); motion_x = bound_motion_vector (motion_x, motion->f_code[0]); motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_x = get_dmv (picture); + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, motion->f_code[1]); motion_y = bound_motion_vector (motion_y, motion->f_code[1]); motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + + decoder->dmv_offset); - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_y = get_dmv (picture); - - motion_block (mc_functions.put, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, - dest, motion->ref[picture->current_field], stride, 16); - - motion_x = ((motion_x + (motion_x > 0)) >> 1) + dmv_x; - motion_y = ((motion_y + (motion_y > 0)) >> 1) + dmv_y + - 2 * picture->current_field - 1; - motion_block (mc_functions.avg, offset, picture->v_offset, 0, 0, 0, - motion_x, motion_y, - dest, motion->ref[!picture->current_field], stride, 16); + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); #undef bit_buf #undef bits #undef bit_ptr } -static void motion_fi_reuse (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ - motion_block (table, offset, picture->v_offset, 0, 0, 0, - motion->pmv[0][0], motion->pmv[0][1], - dest, motion->ref[picture->current_field], stride, 16); -} - -static void motion_fi_zero (picture_t * picture, motion_t * motion, - uint8_t * dest[3], int offset, int stride, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ - motion_block (table, offset, picture->v_offset, 0, 0, 0, 0, 0, - dest, motion->ref[picture->current_field], stride, 16); -} - -static void motion_fi_conceal (picture_t * picture) +static void motion_fi_conceal (decoder_t * const decoder) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) int tmp; NEEDBITS (bit_buf, bits, bit_ptr); DUMPBITS (bit_buf, bits, 1); /* remove field_select */ - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][0] + - get_motion_delta (picture, picture->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][1] + - get_motion_delta (picture, picture->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); - picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ #undef bit_buf @@ -1484,334 +1408,388 @@ static void motion_fi_conceal (picture_t * picture) #undef bit_ptr } -#define MOTION(routine,direction) \ -do { \ - if ((direction) & MACROBLOCK_MOTION_FORWARD) \ - routine (picture, &(picture->f_motion), dest, offset, stride, \ - mc_functions.put); \ - if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ - routine (picture, &(picture->b_motion), dest, offset, stride, \ - ((direction) & MACROBLOCK_MOTION_FORWARD ? \ - mc_functions.avg : mc_functions.put)); \ +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (decoder, &(decoder->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (decoder, &(decoder->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ } while (0) -#define CHECK_DISPLAY \ +#define NEXT_MACROBLOCK \ do { \ - if (offset == picture->coded_picture_width) { \ + decoder->offset += 16; \ + if (decoder->offset == decoder->width) { \ do { /* just so we can use the break statement */ \ - if (picture->current_frame->copy) { \ - picture->current_frame->copy (picture->current_frame, \ - dest); \ - if (picture->picture_coding_type == B_TYPE) \ + if (decoder->convert) { \ + decoder->convert (decoder->fbuf_id, decoder->dest, \ + decoder->v_offset); \ + if (decoder->coding_type == B_TYPE) \ break; \ } \ - dest[0] += 16 * stride; \ - dest[1] += 4 * stride; \ - dest[2] += 4 * stride; \ + decoder->dest[0] += 16 * decoder->stride; \ + decoder->dest[1] += 4 * decoder->stride; \ + decoder->dest[2] += 4 * decoder->stride; \ } while (0); \ - if (! (picture->mpeg1)) \ - return 0; \ - picture->v_offset += 16; \ - if (picture->v_offset >= picture->coded_picture_height) \ - return 0; \ - offset = 0; ++code; \ + decoder->v_offset += 16; \ + if (decoder->v_offset > decoder->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + decoder->offset = 0; \ } \ } while (0) -int slice_process (picture_t * picture, uint8_t code, uint8_t * buffer) +void mpeg2_init_fbuf (decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) { -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int macroblock_modes; - int stride; - uint8_t * dest[3]; - int offset; - uint8_t ** forward_ref[2]; - - stride = picture->coded_picture_width; - offset = (code - 1) * stride * 4; - picture->v_offset = (code - 1) * 16; - - forward_ref[0] = picture->forward_reference_frame->base; - if (picture->picture_structure != FRAME_PICTURE) { - forward_ref[1] = picture->forward_reference_frame->base; - offset <<= 1; - picture->current_field = (picture->picture_structure == BOTTOM_FIELD); - if ((picture->second_field) && - (picture->picture_coding_type != B_TYPE)) - forward_ref[picture->picture_structure == TOP_FIELD] = - picture->current_frame->base; - - picture->f_motion.ref[1][0] = forward_ref[1][0] + stride; - picture->f_motion.ref[1][1] = forward_ref[1][1] + (stride >> 1); - picture->f_motion.ref[1][2] = forward_ref[1][2] + (stride >> 1); - - picture->b_motion.ref[1][0] = - picture->backward_reference_frame->base[0] + stride; - picture->b_motion.ref[1][1] = - picture->backward_reference_frame->base[1] + (stride >> 1); - picture->b_motion.ref[1][2] = - picture->backward_reference_frame->base[2] + (stride >> 1); - } + int offset, stride, height, bottom_field; - picture->f_motion.ref[0][0] = forward_ref[0][0]; - picture->f_motion.ref[0][1] = forward_ref[0][1]; - picture->f_motion.ref[0][2] = forward_ref[0][2]; + stride = decoder->width; + bottom_field = (decoder->picture_structure == BOTTOM_FIELD); + offset = bottom_field ? stride : 0; + height = decoder->height; - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + decoder->picture_dest[0] = current_fbuf[0] + offset; + decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1); + decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1); - picture->b_motion.ref[0][0] = picture->backward_reference_frame->base[0]; - picture->b_motion.ref[0][1] = picture->backward_reference_frame->base[1]; - picture->b_motion.ref[0][2] = picture->backward_reference_frame->base[2]; + decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); - picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; - picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1); - if ((picture->current_frame->copy) && - (picture->picture_coding_type == B_TYPE)) - offset = 0; + if (decoder->picture_structure != FRAME_PICTURE) { + decoder->dmv_offset = bottom_field ? 1 : -1; + decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field]; + decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field]; + decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field]; + decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field]; + offset = stride - offset; - dest[0] = picture->current_frame->base[0] + offset * 4; - dest[1] = picture->current_frame->base[1] + offset; - dest[2] = picture->current_frame->base[2] + offset; + if (decoder->second_field && (decoder->coding_type != B_TYPE)) + forward_fbuf = current_fbuf; + + decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1); - switch (picture->picture_structure) { - case BOTTOM_FIELD: - dest[0] += stride; - dest[1] += stride >> 1; - dest[2] += stride >> 1; - /* follow thru */ - case TOP_FIELD: stride <<= 1; + height >>= 1; } - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); + decoder->stride = stride; + decoder->uv_stride = stride >> 1; + decoder->limit_x = 2 * decoder->width - 32; + decoder->limit_y_16 = 2 * height - 32; + decoder->limit_y_8 = 2 * height - 16; + decoder->limit_y = height - 16; +} - bitstream_init (picture, buffer); +static inline int slice_init (decoder_t * const decoder, int code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int offset; + const MBAtab * mba; - picture->quantizer_scale = get_quantizer_scale (picture); + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; + + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + + if (decoder->vertical_position_extension) { + code += UBITS (bit_buf, 3) << 7; + DUMPBITS (bit_buf, bits, 3); + } + decoder->v_offset = (code - 1) * 16; + offset = 0; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) + offset = (code - 1) * decoder->stride * 4; + + decoder->dest[0] = decoder->picture_dest[0] + offset * 4; + decoder->dest[1] = decoder->picture_dest[1] + offset; + decoder->dest[2] = decoder->picture_dest[2] + offset; + + decoder->quantizer_scale = get_quantizer_scale (decoder); /* ignore intra_slice and all the extra data */ while (bit_buf & 0x80000000) { DUMPBITS (bit_buf, bits, 9); NEEDBITS (bit_buf, bits, bit_ptr); } - DUMPBITS (bit_buf, bits, 1); - NEEDBITS (bit_buf, bits, bit_ptr); - offset = get_macroblock_address_increment (picture) << 4; + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + decoder->offset = (offset + mba->mba) << 4; + + while (decoder->offset - decoder->width >= 0) { + decoder->offset -= decoder->width; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) { + decoder->dest[0] += 16 * decoder->stride; + decoder->dest[1] += 4 * decoder->stride; + decoder->dest[2] += 4 * decoder->stride; + } + decoder->v_offset += 16; + } + if (decoder->v_offset > decoder->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_slice (decoder_t * const decoder, const int code, + const uint8_t * const buffer) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + cpu_state_t cpu_state; + + bitstream_init (decoder, buffer); + + if (slice_init (decoder, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + NEEDBITS (bit_buf, bits, bit_ptr); - macroblock_modes = get_macroblock_modes (picture); + macroblock_modes = get_macroblock_modes (decoder); /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ if (macroblock_modes & MACROBLOCK_QUANT) - picture->quantizer_scale = get_quantizer_scale (picture); + decoder->quantizer_scale = get_quantizer_scale (decoder); if (macroblock_modes & MACROBLOCK_INTRA) { int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; - if (picture->concealment_motion_vectors) { - if (picture->picture_structure == FRAME_PICTURE) - motion_fr_conceal (picture); + if (decoder->concealment_motion_vectors) { + if (decoder->picture_structure == FRAME_PICTURE) + motion_fr_conceal (decoder); else - motion_fi_conceal (picture); + motion_fi_conceal (decoder); } else { - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; - picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; } if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = stride; - DCT_stride = stride * 2; + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; } else { - DCT_offset = stride * 8; - DCT_stride = stride; + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; } - slice_intra_DCT (picture, 0, dest[0] + offset, DCT_stride); - slice_intra_DCT (picture, 0, dest[0] + offset + 8, DCT_stride); - slice_intra_DCT (picture, 0, dest[0] + offset + DCT_offset, - DCT_stride); - slice_intra_DCT (picture, 0, dest[0] + offset + DCT_offset + 8, - DCT_stride); - - slice_intra_DCT (picture, 1, dest[1] + (offset >> 1), stride >> 1); - slice_intra_DCT (picture, 2, dest[2] + (offset >> 1), stride >> 1); - - if (picture->picture_coding_type == D_TYPE) { + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + slice_intra_DCT (decoder, 0, dest_y, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); + slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + + if (decoder->coding_type == D_TYPE) { NEEDBITS (bit_buf, bits, bit_ptr); DUMPBITS (bit_buf, bits, 1); } } else { - if (picture->mpeg1) { - if ((macroblock_modes & MOTION_TYPE_MASK) == MC_FRAME) - MOTION (motion_mp1, macroblock_modes); - else { - /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD); - } - } else if (picture->picture_structure == FRAME_PICTURE) + if (decoder->picture_structure == FRAME_PICTURE) switch (macroblock_modes & MOTION_TYPE_MASK) { case MC_FRAME: - MOTION (motion_fr_frame, macroblock_modes); + if (decoder->mpeg1) + MOTION_CALL (motion_mp1, macroblock_modes); + else + MOTION_CALL (motion_fr_frame, macroblock_modes); break; case MC_FIELD: - MOTION (motion_fr_field, macroblock_modes); + MOTION_CALL (motion_fr_field, macroblock_modes); break; case MC_DMV: - MOTION (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); break; case 0: /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD); + decoder->f_motion.pmv[0][0] = 0; + decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = 0; + decoder->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); break; } else switch (macroblock_modes & MOTION_TYPE_MASK) { case MC_FIELD: - MOTION (motion_fi_field, macroblock_modes); + MOTION_CALL (motion_fi_field, macroblock_modes); break; case MC_16X8: - MOTION (motion_fi_16x8, macroblock_modes); + MOTION_CALL (motion_fi_16x8, macroblock_modes); break; case MC_DMV: - MOTION (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); break; case 0: /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD); + decoder->f_motion.pmv[0][0] = 0; + decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = 0; + decoder->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); break; } if (macroblock_modes & MACROBLOCK_PATTERN) { int coded_block_pattern; int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = stride; - DCT_stride = stride * 2; + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; } else { - DCT_offset = stride * 8; - DCT_stride = stride; + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; } - coded_block_pattern = get_coded_block_pattern (picture); + coded_block_pattern = get_coded_block_pattern (decoder); + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; if (coded_block_pattern & 0x20) - slice_non_intra_DCT (picture, dest[0] + offset, - DCT_stride); + slice_non_intra_DCT (decoder, dest_y, DCT_stride); if (coded_block_pattern & 0x10) - slice_non_intra_DCT (picture, dest[0] + offset + 8, - DCT_stride); + slice_non_intra_DCT (decoder, dest_y + 8, DCT_stride); if (coded_block_pattern & 0x08) - slice_non_intra_DCT (picture, - dest[0] + offset + DCT_offset, + slice_non_intra_DCT (decoder, dest_y + DCT_offset, DCT_stride); if (coded_block_pattern & 0x04) - slice_non_intra_DCT (picture, - dest[0] + offset + DCT_offset + 8, + slice_non_intra_DCT (decoder, dest_y + DCT_offset + 8, DCT_stride); - if (coded_block_pattern & 0x2) - slice_non_intra_DCT (picture, dest[1] + (offset >> 1), - stride >> 1); + slice_non_intra_DCT (decoder, + decoder->dest[1] + (offset >> 1), + decoder->uv_stride); if (coded_block_pattern & 0x1) - slice_non_intra_DCT (picture, dest[2] + (offset >> 1), - stride >> 1); + slice_non_intra_DCT (decoder, + decoder->dest[2] + (offset >> 1), + decoder->uv_stride); } - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision+7); + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; } -#ifdef MPEG12_POSTPROC - picture->current_frame->quant_store[code][(offset>>4)+1] = picture->quantizer_scale>>1; -#endif - offset += 16; - CHECK_DISPLAY; + NEXT_MACROBLOCK; NEEDBITS (bit_buf, bits, bit_ptr); - - if (0 /* FIXME */ && (bit_buf & 0x80000000)) { - DUMPBITS (bit_buf, bits, 1); - } else { - int mba_inc; - - mba_inc = get_macroblock_address_increment (picture); - if (!mba_inc) - continue; - else if (mba_inc < 0) + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision+7); + if (mba_inc) { + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision; - if (picture->picture_coding_type == P_TYPE) { - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + if (decoder->coding_type == P_TYPE) { + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; do { - if (picture->picture_structure == FRAME_PICTURE) - MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD); - else - MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD); - -#ifdef MPEG12_POSTPROC - picture->current_frame->quant_store[code][(offset>>4)+1] = picture->quantizer_scale>>1; -#endif - - offset += 16; - CHECK_DISPLAY; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + NEXT_MACROBLOCK; } while (--mba_inc); } else { do { - if (picture->mpeg1) - MOTION (motion_mp1_reuse, macroblock_modes); - else if (picture->picture_structure == FRAME_PICTURE) - MOTION (motion_fr_reuse, macroblock_modes); - else - MOTION (motion_fi_reuse, macroblock_modes); - -#ifdef MPEG12_POSTPROC - picture->current_frame->quant_store[code][(offset>>4)+1] = picture->quantizer_scale>>1; -#endif - - offset += 16; - CHECK_DISPLAY; + MOTION_CALL (motion_reuse, macroblock_modes); + NEXT_MACROBLOCK; } while (--mba_inc); } } } - - return 0; #undef bit_buf #undef bits #undef bit_ptr diff --git a/libmpeg2/sse.h b/libmpeg2/sse.h deleted file mode 100644 index 51540dca08..0000000000 --- a/libmpeg2/sse.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * sse.h - * Copyright (C) 1999 R. Fisher - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - - -typedef union { - float sf[4]; /* Single-precision (32-bit) value */ -} ATTR_ALIGN(16) sse_t; /* On a 16 byte (128-bit) boundary */ - - -#define sse_i2r(op, imm, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (imm) ) - -#define sse_m2r(op, mem, reg) \ - __asm__ __volatile__ (#op " %0, %%" #reg \ - : /* nothing */ \ - : "X" (mem)) - -#define sse_r2m(op, reg, mem) \ - __asm__ __volatile__ (#op " %%" #reg ", %0" \ - : "=X" (mem) \ - : /* nothing */ ) - -#define sse_r2r(op, regs, regd) \ - __asm__ __volatile__ (#op " %" #regs ", %" #regd) - -#define sse_r2ri(op, regs, regd, imm) \ - __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ - : /* nothing */ \ - : "X" (imm) ) - -#define sse_m2ri(op, mem, reg, subop) \ - __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \ - : /* nothing */ \ - : "X" (mem)) - - -#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg) -#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var) -#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd) - -#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var) - -#define movups_m2r(var, reg) sse_m2r(movups, var, reg) -#define movups_r2m(reg, var) sse_r2m(movups, reg, var) -#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd) - -#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd) - -#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd) - -#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg) -#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var) - -#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg) -#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var) - -#define movss_m2r(var, reg) sse_m2r(movss, var, reg) -#define movss_r2m(reg, var) sse_r2m(movss, reg, var) -#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd) - -#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index) -#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index) - -#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg) -#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg) - -#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg) -#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg) - -#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg) -#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg) - -#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg) -#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg) - -#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) -#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) - -#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) -#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) - -#define movmskps(xmmreg, reg) \ - __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) - -#define addps_m2r(var, reg) sse_m2r(addps, var, reg) -#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd) - -#define addss_m2r(var, reg) sse_m2r(addss, var, reg) -#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd) - -#define subps_m2r(var, reg) sse_m2r(subps, var, reg) -#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd) - -#define subss_m2r(var, reg) sse_m2r(subss, var, reg) -#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd) - -#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg) -#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd) - -#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg) -#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd) - -#define divps_m2r(var, reg) sse_m2r(divps, var, reg) -#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd) - -#define divss_m2r(var, reg) sse_m2r(divss, var, reg) -#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd) - -#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg) -#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd) - -#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg) -#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd) - -#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg) -#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd) - -#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg) -#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd) - -#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg) -#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd) - -#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg) -#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd) - -#define andps_m2r(var, reg) sse_m2r(andps, var, reg) -#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd) - -#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg) -#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd) - -#define orps_m2r(var, reg) sse_m2r(orps, var, reg) -#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd) - -#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg) -#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd) - -#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg) -#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd) - -#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg) -#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd) - -#define minps_m2r(var, reg) sse_m2r(minps, var, reg) -#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd) - -#define minss_m2r(var, reg) sse_m2r(minss, var, reg) -#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd) - -#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op) -#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op) - -#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0) -#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0) - -#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1) -#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1) - -#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2) -#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2) - -#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3) -#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3) - -#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4) -#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4) - -#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5) -#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5) - -#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6) -#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6) - -#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7) -#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7) - -#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op) -#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op) - -#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0) -#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0) - -#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1) -#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1) - -#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2) -#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2) - -#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3) -#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3) - -#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4) -#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4) - -#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5) -#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5) - -#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6) -#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6) - -#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7) -#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7) - -#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg) -#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd) - -#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg) -#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd) - -#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg) -#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd) - -#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg) -#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd) - -#define fxrstor(mem) \ - __asm__ __volatile__ ("fxrstor %0" \ - : /* nothing */ \ - : "X" (mem)) - -#define fxsave(mem) \ - __asm__ __volatile__ ("fxsave %0" \ - : /* nothing */ \ - : "X" (mem)) - -#define stmxcsr(mem) \ - __asm__ __volatile__ ("stmxcsr %0" \ - : /* nothing */ \ - : "X" (mem)) - -#define ldmxcsr(mem) \ - __asm__ __volatile__ ("ldmxcsr %0" \ - : /* nothing */ \ - : "X" (mem)) - diff --git a/libmpeg2/stats.c b/libmpeg2/stats.c deleted file mode 100644 index f3456058df..0000000000 --- a/libmpeg2/stats.c +++ /dev/null @@ -1,315 +0,0 @@ -/* - * stats.c - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include <stdio.h> -#include <stdlib.h> -#include <inttypes.h> - -#include "mpeg2_internal.h" - -static int debug_level = -1; - -/* Determine is debug output is required. */ -/* We could potentially have multiple levels of debug info */ -static int debug_is_on (void) -{ - char * env_var; - - if (debug_level < 0) { - env_var = getenv ("MPEG2_DEBUG"); - - if (env_var) - debug_level = 1; - else - debug_level = 0; - } - - return debug_level; -} - -static void stats_picture (uint8_t * buffer) -{ - static char * picture_coding_type_str [8] = { - "Invalid picture type", - "I-type", - "P-type", - "B-type", - "D (very bad)", - "Invalid","Invalid","Invalid" - }; - - int picture_coding_type; - int temporal_reference; - int vbv_delay; - - temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); - picture_coding_type = (buffer [1] >> 3) & 7; - vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | - (buffer[3] >> 3)) & 0xffff; - - fprintf (stderr, " (picture) %s temporal_reference %d, vbv_delay %d\n", - picture_coding_type_str [picture_coding_type], - temporal_reference, vbv_delay); -} - -static void stats_user_data (uint8_t * buffer) -{ - fprintf (stderr, " (user_data)\n"); -} - -static void stats_sequence (uint8_t * buffer) -{ - static char * aspect_ratio_information_str[8] = { - "Invalid Aspect Ratio", - "1:1", - "4:3", - "16:9", - "2.21:1", - "Invalid Aspect Ratio", - "Invalid Aspect Ratio", - "Invalid Aspect Ratio" - }; - static char * frame_rate_str[16] = { - "Invalid frame_rate_code", - "23.976", "24", "25" , "29.97", - "30" , "50", "59.94", "60" , - "Invalid frame_rate_code", "Invalid frame_rate_code", - "Invalid frame_rate_code", "Invalid frame_rate_code", - "Invalid frame_rate_code", "Invalid frame_rate_code", - "Invalid frame_rate_code" - }; - - int horizontal_size; - int vertical_size; - int aspect_ratio_information; - int frame_rate_code; - int bit_rate_value; - int vbv_buffer_size_value; - int constrained_parameters_flag; - int load_intra_quantizer_matrix; - int load_non_intra_quantizer_matrix; - - vertical_size = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; - horizontal_size = vertical_size >> 12; - vertical_size &= 0xfff; - aspect_ratio_information = buffer[3] >> 4; - frame_rate_code = buffer[3] & 15; - bit_rate_value = (buffer[4] << 10) | (buffer[5] << 2) | (buffer[6] >> 6); - vbv_buffer_size_value = ((buffer[6] << 5) | (buffer[7] >> 3)) & 0x3ff; - constrained_parameters_flag = buffer[7] & 4; - load_intra_quantizer_matrix = buffer[7] & 2; - if (load_intra_quantizer_matrix) - buffer += 64; - load_non_intra_quantizer_matrix = buffer[7] & 1; - - fprintf (stderr, " (seq) %dx%d %s, %s fps, %5.0f kbps, VBV %d kB%s%s%s\n", - horizontal_size, vertical_size, - aspect_ratio_information_str [aspect_ratio_information], - frame_rate_str [frame_rate_code], - bit_rate_value * 400.0 / 1000.0, - 2 * vbv_buffer_size_value, - constrained_parameters_flag ? " , CP":"", - load_intra_quantizer_matrix ? " , Custom Intra Matrix":"", - load_non_intra_quantizer_matrix ? " , Custom Non-Intra Matrix":""); -} - -static void stats_sequence_error (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_error)\n"); -} - -static void stats_sequence_end (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_end)\n"); -} - -static void stats_group (uint8_t * buffer) -{ - fprintf (stderr, " (group)%s%s\n", - (buffer[4] & 0x40) ? " closed_gop" : "", - (buffer[4] & 0x20) ? " broken_link" : ""); -} - -static void stats_slice (uint8_t code, uint8_t * buffer) -{ - /* fprintf (stderr, " (slice %d)\n", code); */ -} - -static void stats_sequence_extension (uint8_t * buffer) -{ - static char * chroma_format_str[4] = { - "Invalid Chroma Format", - "4:2:0 Chroma", - "4:2:2 Chroma", - "4:4:4 Chroma" - }; - - int progressive_sequence; - int chroma_format; - - progressive_sequence = (buffer[1] >> 3) & 1; - chroma_format = (buffer[1] >> 1) & 3; - - fprintf (stderr, " (seq_ext) progressive_sequence %d, %s\n", - progressive_sequence, chroma_format_str [chroma_format]); -} - -static void stats_sequence_display_extension (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_display_extension)\n"); -} - -static void stats_quant_matrix_extension (uint8_t * buffer) -{ - fprintf (stderr, " (quant_matrix_extension)\n"); -} - -static void stats_copyright_extension (uint8_t * buffer) -{ - fprintf (stderr, " (copyright_extension)\n"); -} - - -static void stats_sequence_scalable_extension (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_scalable_extension)\n"); -} - -static void stats_picture_display_extension (uint8_t * buffer) -{ - fprintf (stderr, " (picture_display_extension)\n"); -} - -static void stats_picture_coding_extension (uint8_t * buffer) -{ - static char * picture_structure_str[4] = { - "Invalid Picture Structure", - "Top field", - "Bottom field", - "Frame Picture" - }; - - int f_code[2][2]; - int intra_dc_precision; - int picture_structure; - int top_field_first; - int frame_pred_frame_dct; - int concealment_motion_vectors; - int q_scale_type; - int intra_vlc_format; - int alternate_scan; - int repeat_first_field; - int progressive_frame; - - f_code[0][0] = buffer[0] & 15; - f_code[0][1] = buffer[1] >> 4; - f_code[1][0] = buffer[1] & 15; - f_code[1][1] = buffer[2] >> 4; - intra_dc_precision = (buffer[2] >> 2) & 3; - picture_structure = buffer[2] & 3; - top_field_first = buffer[3] >> 7; - frame_pred_frame_dct = (buffer[3] >> 6) & 1; - concealment_motion_vectors = (buffer[3] >> 5) & 1; - q_scale_type = (buffer[3] >> 4) & 1; - intra_vlc_format = (buffer[3] >> 3) & 1; - alternate_scan = (buffer[3] >> 2) & 1; - repeat_first_field = (buffer[3] >> 1) & 1; - progressive_frame = buffer[4] >> 7; - - fprintf (stderr, - " (pic_ext) %s\n", picture_structure_str [picture_structure]); - fprintf (stderr, - " (pic_ext) forward horizontal f_code % d, forward vertical f_code % d\n", - f_code[0][0], f_code[0][1]); - fprintf (stderr, - " (pic_ext) backward horizontal f_code % d, backward vertical f_code % d\n", - f_code[1][0], f_code[1][1]); - fprintf (stderr, - " (pic_ext) intra_dc_precision %d, top_field_first %d, frame_pred_frame_dct %d\n", - intra_dc_precision, top_field_first, frame_pred_frame_dct); - fprintf (stderr, - " (pic_ext) concealment_motion_vectors %d, q_scale_type %d, intra_vlc_format %d\n", - concealment_motion_vectors, q_scale_type, intra_vlc_format); - fprintf (stderr, - " (pic_ext) alternate_scan %d, repeat_first_field %d, progressive_frame %d\n", - alternate_scan, repeat_first_field, progressive_frame); -} - -void stats_header (uint8_t code, uint8_t * buffer) -{ - if (! (debug_is_on ())) - return; - - switch (code) { - case 0x00: - stats_picture (buffer); - break; - case 0xb2: - stats_user_data (buffer); - break; - case 0xb3: - stats_sequence (buffer); - break; - case 0xb4: - stats_sequence_error (buffer); - break; - case 0xb5: - switch (buffer[0] >> 4) { - case 1: - stats_sequence_extension (buffer); - break; - case 2: - stats_sequence_display_extension (buffer); - break; - case 3: - stats_quant_matrix_extension (buffer); - break; - case 4: - stats_copyright_extension (buffer); - break; - case 5: - stats_sequence_scalable_extension (buffer); - break; - case 7: - stats_picture_display_extension (buffer); - break; - case 8: - stats_picture_coding_extension (buffer); - break; - default: - fprintf (stderr, " (unknown extension %#x)\n", buffer[0] >> 4); - } - break; - case 0xb7: - stats_sequence_end (buffer); - break; - case 0xb8: - stats_group (buffer); - break; - default: - if (code < 0xb0) - stats_slice (code, buffer); - else - fprintf (stderr, " (unknown start code %#02x)\n", code); - } -} diff --git a/libmpeg2/vlc.h b/libmpeg2/vlc.h index ed2e04f882..aa3dfe1841 100644 --- a/libmpeg2/vlc.h +++ b/libmpeg2/vlc.h @@ -1,8 +1,10 @@ /* * vlc.h - * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. * * mpeg2dec is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,17 +27,18 @@ do { \ bit_ptr += 2; \ } while (0) -static inline void bitstream_init (picture_t * picture, uint8_t * start) +static inline void bitstream_init (decoder_t * decoder, const uint8_t * start) { - picture->bitstream_buf = 0; GETWORD (picture->bitstream_buf, 16, start); - picture->bitstream_ptr = start; - picture->bitstream_bits = 0; + decoder->bitstream_buf = + (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; + decoder->bitstream_ptr = start + 4; + decoder->bitstream_bits = -16; } /* make sure that there are at least 16 valid bits in bit_buf */ #define NEEDBITS(bit_buf,bits,bit_ptr) \ do { \ - if (bits > 0) { \ + if (unlikely (bits > 0)) { \ GETWORD (bit_buf, bits, bit_ptr); \ bits -= 16; \ } \ @@ -94,14 +97,14 @@ typedef struct { #define INTRA MACROBLOCK_INTRA #define QUANT MACROBLOCK_QUANT -static MBtab MB_I [] = { +static const MBtab MB_I [] = { {INTRA|QUANT, 2}, {INTRA, 1} }; #define MC MACROBLOCK_MOTION_FORWARD #define CODED MACROBLOCK_PATTERN -static MBtab MB_P [] = { +static const MBtab MB_P [] = { {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, @@ -116,7 +119,7 @@ static MBtab MB_P [] = { #define BWD MACROBLOCK_MOTION_BACKWARD #define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD -static MBtab MB_B [] = { +static const MBtab MB_B [] = { {0, 0}, {INTRA|QUANT, 6}, {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, @@ -146,11 +149,11 @@ static MBtab MB_B [] = { #undef INTER -static MVtab MV_4 [] = { +static const MVtab MV_4 [] = { { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} }; -static MVtab MV_10 [] = { +static const MVtab MV_10 [] = { { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, @@ -160,12 +163,12 @@ static MVtab MV_10 [] = { }; -static DMVtab DMV_2 [] = { +static const DMVtab DMV_2 [] = { { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} }; -static CBPtab CBP_7 [] = { +static const CBPtab CBP_7 [] = { {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, @@ -196,7 +199,7 @@ static CBPtab CBP_7 [] = { {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} }; -static CBPtab CBP_9 [] = { +static const CBPtab CBP_9 [] = { {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, @@ -216,21 +219,21 @@ static CBPtab CBP_9 [] = { }; -static DCtab DC_lum_5 [] = { +static const DCtab DC_lum_5 [] = { {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} }; -static DCtab DC_chrom_5 [] = { +static const DCtab DC_chrom_5 [] = { {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} }; -static DCtab DC_long [] = { +static const DCtab DC_long [] = { {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, @@ -238,7 +241,7 @@ static DCtab DC_long [] = { }; -static DCTtab DCT_16 [] = { +static const DCTtab DCT_16 [] = { {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, @@ -249,7 +252,7 @@ static DCTtab DCT_16 [] = { { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} }; -static DCTtab DCT_15 [] = { +static const DCTtab DCT_15 [] = { { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, @@ -264,7 +267,7 @@ static DCTtab DCT_15 [] = { { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} }; -static DCTtab DCT_13 [] = { +static const DCTtab DCT_13 [] = { { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, @@ -279,12 +282,12 @@ static DCTtab DCT_13 [] = { { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} }; -static DCTtab DCT_B14_10 [] = { +static const DCTtab DCT_B14_10 [] = { { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} }; -static DCTtab DCT_B14_8 [] = { +static const DCTtab DCT_B14_8 [] = { { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, @@ -296,7 +299,7 @@ static DCTtab DCT_B14_8 [] = { { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} }; -static DCTtab DCT_B14AC_5 [] = { +static const DCTtab DCT_B14AC_5 [] = { { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, @@ -306,7 +309,7 @@ static DCTtab DCT_B14AC_5 [] = { { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} }; -static DCTtab DCT_B14DC_5 [] = { +static const DCTtab DCT_B14DC_5 [] = { { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, @@ -316,12 +319,12 @@ static DCTtab DCT_B14DC_5 [] = { { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} }; -static DCTtab DCT_B15_10 [] = { +static const DCTtab DCT_B15_10 [] = { { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} }; -static DCTtab DCT_B15_8 [] = { +static const DCTtab DCT_B15_8 [] = { { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, @@ -388,14 +391,14 @@ static DCTtab DCT_B15_8 [] = { }; -static MBAtab MBA_5 [] = { +static const MBAtab MBA_5 [] = { {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} }; -static MBAtab MBA_11 [] = { +static const MBAtab MBA_11 [] = { {32, 11}, {31, 11}, {30, 11}, {29, 11}, {28, 11}, {27, 11}, {26, 11}, {25, 11}, {24, 11}, {23, 11}, {22, 11}, {21, 11}, |