aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar rik <rik@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-09-23 15:31:21 +0000
committerGravatar rik <rik@b3059339-0415-0410-9bf9-f77b7e298cf2>2006-09-23 15:31:21 +0000
commit4bb03ad126d2a26f1eeb8d86c3748cf7fcfdf51b (patch)
tree671ad0eedd6d22a470809d91a5f30167bd24f61a
parent1a3cfa9283a79f0ccf69b24b618612e733e0c471 (diff)
Patch by Karolina Lindqvist <karolina.lindqvist@kramnet.se>
"This patch is the MMX optimizations for the zrmjpeg filter, which is used by the zr2 video output driver." With some small changes by me: - column width=80 - kept jpeg_enc_* functions static because they confuse the current vo_zr.c - did not include jpeg_enc.h because jpeg_enc functions are still static git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@19956 b3059339-0415-0410-9bf9-f77b7e298cf2
-rw-r--r--libmpcodecs/vf_zrmjpeg.c248
1 files changed, 125 insertions, 123 deletions
diff --git a/libmpcodecs/vf_zrmjpeg.c b/libmpcodecs/vf_zrmjpeg.c
index 9e52dc9172..a7406ec617 100644
--- a/libmpcodecs/vf_zrmjpeg.c
+++ b/libmpcodecs/vf_zrmjpeg.c
@@ -31,11 +31,13 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h"
+//#include "jpeg_enc.h" /* this file is not present yet */
#undef malloc
#undef free
#undef realloc
+extern int avcodec_inited;
/* some convenient #define's, is this portable enough? */
#define VERBOSE(...) mp_msg(MSGT_DECVIDEO, MSGL_V, "vf_zrmjpeg: " __VA_ARGS__)
@@ -60,6 +62,9 @@ typedef struct MJpegContext {
uint16_t huff_code_ac_chrominance[256];
} MJpegContext;
+// The get_pixels routine to use. The real routine comes from dsputil
+static void (*get_pixels)(DCTELEM *restrict block, const uint8_t *pixels, int line_size);
+
/* Begin excessive code duplication ************************************/
/* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
@@ -75,6 +80,10 @@ static const unsigned short aanscales[64] = {
4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
};
+/*
+ * This routine is like the routine with the same name in mjpeg.c,
+ * except for some coefficient changes.
+ */
static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix,
int bias, int qmin, int qmax) {
@@ -130,6 +139,9 @@ static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
}
}
+/*
+ * This routine is a clone of mjpeg_encode_dc
+ */
static inline void encode_dc(MpegEncContext *s, int val,
uint8_t *huff_size, uint16_t *huff_code) {
int mant, nbits;
@@ -142,19 +154,15 @@ static inline void encode_dc(MpegEncContext *s, int val,
val = -val;
mant--;
}
-
- /* compute the log (XXX: optimize) */
- nbits = 0;
- while (val != 0) {
- val = val >> 1;
- nbits++;
- }
-
+ nbits= av_log2_16bit(val) + 1;
put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
}
}
+/*
+ * This routine is a duplicate of encode_block in mjpeg.c
+ */
static void encode_block(MpegEncContext *s, DCTELEM *block, int n) {
int mant, nbits, code, i, j;
int component, dc, run, last_index, val;
@@ -199,12 +207,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n) {
mant--;
}
- /* compute the log (XXX: optimize) */
- nbits = 0;
- while (val != 0) {
- val = val >> 1;
- nbits++;
- }
+ nbits= av_log2_16bit(val) + 1;
code = (run << 4) | nbits;
put_bits(&s->pb, huff_size_ac[code],
@@ -241,9 +244,6 @@ typedef struct {
struct MpegEncContext *s;
int cheap_upsample;
int bw;
- int y_ps;
- int u_ps;
- int v_ps;
int y_rs;
int u_rs;
int v_rs;
@@ -253,7 +253,7 @@ typedef struct {
* changes, it allows for black&white encoding (it skips the U and V
* macroblocks and it outputs the huffman code for 'no change' (dc) and
* 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
-static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
+static always_inline void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
MJpegContext *m = j->s->mjpeg_ctx;
@@ -279,11 +279,58 @@ static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
}
}
+/*
+ * Taking one MCU (YUYV) from 8-bit pixel planar storage and
+ * filling it into four 16-bit pixel DCT macroblocks.
+ */
+static always_inline void fill_block(jpeg_enc_t *j, int x, int y,
+ unsigned char *y_data, unsigned char *u_data,
+ unsigned char *v_data)
+{
+ int i, k;
+ short int *dest;
+ unsigned char *source;
+
+ // The first Y, Y0
+ get_pixels(j->s->block[0], y*8*j->y_rs + 16*x + y_data, j->y_rs);
+ // The second Y, Y1
+ get_pixels(j->s->block[1], y*8*j->y_rs + 16*x + 8 + y_data, j->y_rs);
+
+ if (!j->bw && j->cheap_upsample) {
+ source = y * 4 * j->u_rs + 8*x + u_data;
+ dest = j->s->block[2];
+ for (i = 0; i < 4; i++) {
+ for (k = 0; k < 8; k++) {
+ dest[k] = source[k]; // First row
+ dest[k+8] = source[k]; // Duplicate to next row
+
+ }
+ dest += 16;
+ source += j->u_rs;
+ }
+ source = y * 4 * j->v_rs + 8*x + v_data;
+ dest = j->s->block[3];
+ for (i = 0; i < 4; i++) {
+ for (k = 0; k < 8; k++) {
+ dest[k] = source[k];
+ dest[k+8] = source[k];
+ }
+ dest += 16;
+ source += j->u_rs;
+ }
+ } else if (!j->bw && !j->cheap_upsample) {
+ // U
+ get_pixels(j->s->block[2], y*8*j->u_rs + 8*x + u_data, j->u_rs);
+ // V
+ get_pixels(j->s->block[3], y*8*j->v_rs + 8*x + v_data, j->v_rs);
+ }
+}
+
/* this function can take all kinds of YUV colorspaces
* YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
- * y_ps means "y pixel size", y_rs means "y row size".
+ * y_rs means "y row size".
* For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3,
- * y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
+ * y_rs = u_rs = v_rs.
*
* The actual buffers must be passed with mjpeg_encode_frame, this is
* to make it possible to call encode on the buffer provided by the
@@ -301,46 +348,41 @@ static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
/* The encoder doesn't know anything about interlacing, the halve height
* needs to be passed and the double rowstride. Which field gets encoded
* is decided by what buffers are passed to mjpeg_encode_frame */
-static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
- int u_psize, int u_rsize, int v_psize, int v_rsize,
+static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_rsize,
+ int u_rsize, int v_rsize,
int cu, int q, int b) {
jpeg_enc_t *j;
int i = 0;
- VERBOSE("JPEG encoder init: %dx%d %d %d %d %d %d %d\n",
- w, h, y_psize, y_rsize, u_psize,
- u_rsize, v_psize, v_rsize);
+ VERBOSE("JPEG encoder init: %dx%d %d %d %d cu=%d q=%d bw=%d\n",
+ w, h, y_rsize, u_rsize, v_rsize, cu, q, b);
- j = malloc(sizeof(jpeg_enc_t));
+ j = av_mallocz(sizeof(jpeg_enc_t));
if (j == NULL) return NULL;
- j->s = malloc(sizeof(MpegEncContext));
- memset(j->s,0x00,sizeof(MpegEncContext));
+ j->s = av_mallocz(sizeof(MpegEncContext));
if (j->s == NULL) {
- free(j);
+ av_free(j);
return NULL;
}
/* info on how to access the pixels */
- j->y_ps = y_psize;
- j->u_ps = u_psize;
- j->v_ps = v_psize;
j->y_rs = y_rsize;
j->u_rs = u_rsize;
j->v_rs = v_rsize;
- j->s->width = w;
+ j->s->width = w; // image width and height
j->s->height = h;
- j->s->qscale = q;
+ j->s->qscale = q; // Encoding quality
j->s->mjpeg_data_only_frames = 0;
j->s->out_format = FMT_MJPEG;
- j->s->intra_only = 1;
- j->s->encoding = 1;
+ j->s->intra_only = 1; // Generate only intra pictures for jpeg
+ j->s->encoding = 1; // Set mode to encode
j->s->pict_type = I_TYPE;
j->s->y_dc_scale = 8;
j->s->c_dc_scale = 8;
- j->s->mjpeg_write_tables = 1;
+ j->s->mjpeg_write_tables = 1; // setup to write tables
j->s->mjpeg_vsample[0] = 1;
j->s->mjpeg_vsample[1] = 1;
j->s->mjpeg_vsample[2] = 1;
@@ -351,23 +393,40 @@ static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
j->cheap_upsample = cu;
j->bw = b;
+ // Is this needed?
+ /* if libavcodec is used by the decoder then we must not
+ * initialize again, but if it is not initialized then we must
+ * initialize it here. */
+ if (!avcodec_inited) {
+ avcodec_init();
+ avcodec_register_all();
+ avcodec_inited=1;
+ }
+
if (mjpeg_init(j->s) < 0) {
- free(j->s);
- free(j);
+ av_free(j->s);
+ av_free(j);
return NULL;
}
/* alloc bogus avctx to keep MPV_common_init from segfaulting */
- j->s->avctx = calloc(sizeof(*j->s->avctx), 1);
- /* Set up to encode mjpeg */
- j->s->avctx->codec_id = CODEC_ID_MJPEG;
+ j->s->avctx = avcodec_alloc_context();
+ if (j->s->avctx == NULL) {
+ av_free(j->s);
+ av_free(j);
+ return NULL;
+ }
- /* make MPV_common_init allocate important buffers, like s->block */
+ // Set some a minimum amount of default values that are needed
+ j->s->avctx->codec_id = CODEC_ID_MJPEG;
+ j->s->avctx->dct_algo = FF_DCT_AUTO;
+ j->s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
j->s->avctx->thread_count = 1;
+ /* make MPV_common_init allocate important buffers, like s->block */
if (MPV_common_init(j->s) < 0) {
- free(j->s);
- free(j);
+ av_free(j->s);
+ av_free(j);
return NULL;
}
@@ -375,24 +434,28 @@ static jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
j->s->mb_height = j->s->height/8;
j->s->mb_intra = 1;
+ // Init q matrix
j->s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
for (i = 1; i < 64; i++)
j->s->intra_matrix[i] = clip_uint8(
(ff_mpeg1_default_intra_matrix[i]*j->s->qscale) >> 3);
+ // precompute matrix
convert_matrix(j->s, j->s->q_intra_matrix, j->s->q_intra_matrix16,
j->s->intra_matrix, j->s->intra_quant_bias, 8, 8);
+
+ get_pixels = j->s->dsp.get_pixels;
+
return j;
}
-static int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
- unsigned char *u_data, unsigned char *v_data, char *bufr) {
- int i, k, mb_x, mb_y, overflow;
- short int *dest;
- unsigned char *source;
+static int jpeg_enc_frame(jpeg_enc_t *j, uint8_t *y_data,
+ uint8_t *u_data, uint8_t *v_data, uint8_t *bufr) {
+ int mb_x, mb_y, overflow;
/* initialize the buffer */
init_put_bits(&j->s->pb, bufr, 1024*256);
+ // Emit the mjpeg header blocks
mjpeg_picture_header(j->s);
j->s->header_bits = put_bits_count(&j->s->pb);
@@ -403,72 +466,11 @@ static int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
- /* conversion 8 to 16 bit and filling of blocks
- * must be mmx optimized */
- /* fill 2 Y macroblocks and one U and one V */
- source = mb_y * 8 * j->y_rs +
- 16 * j->y_ps * mb_x + y_data;
- dest = j->s->block[0];
- for (i = 0; i < 8; i++) {
- for (k = 0; k < 8; k++) {
- dest[k] = source[k*j->y_ps];
- }
- dest += 8;
- source += j->y_rs;
- }
- source = mb_y * 8 * j->y_rs +
- (16*mb_x + 8)*j->y_ps + y_data;
- dest = j->s->block[1];
- for (i = 0; i < 8; i++) {
- for (k = 0; k < 8; k++) {
- dest[k] = source[k*j->y_ps];
- }
- dest += 8;
- source += j->y_rs;
- }
- if (!j->bw && j->cheap_upsample) {
- source = mb_y*4*j->u_rs +
- 8*mb_x*j->u_ps + u_data;
- dest = j->s->block[2];
- for (i = 0; i < 4; i++) {
- for (k = 0; k < 8; k++) {
- dest[k] = source[k*j->u_ps];
- dest[k+8] = source[k*j->u_ps];
- }
- dest += 16;
- source += j->u_rs;
- }
- source = mb_y*4*j->v_rs +
- 8*mb_x*j->v_ps + v_data;
- dest = j->s->block[3];
- for (i = 0; i < 4; i++) {
- for (k = 0; k < 8; k++) {
- dest[k] = source[k*j->v_ps];
- dest[k+8] = source[k*j->v_ps];
- }
- dest += 16;
- source += j->u_rs;
- }
- } else if (!j->bw && !j->cheap_upsample) {
- source = mb_y*8*j->u_rs +
- 8*mb_x*j->u_ps + u_data;
- dest = j->s->block[2];
- for (i = 0; i < 8; i++) {
- for (k = 0; k < 8; k++)
- dest[k] = source[k*j->u_ps];
- dest += 8;
- source += j->u_rs;
- }
- source = mb_y*8*j->v_rs +
- 8*mb_x*j->v_ps + v_data;
- dest = j->s->block[3];
- for (i = 0; i < 8; i++) {
- for (k = 0; k < 8; k++)
- dest[k] = source[k*j->v_ps];
- dest += 8;
- source += j->u_rs;
- }
- }
+ /*
+ * Fill one DCT block (8x8 pixels) from
+ * 2 Y macroblocks and one U and one V
+ */
+ fill_block(j, mb_x, mb_y, y_data, u_data, v_data);
emms_c(); /* is this really needed? */
j->s->block_last_index[0] =
@@ -509,8 +511,8 @@ static int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
static void jpeg_enc_uninit(jpeg_enc_t *j) {
mjpeg_close(j->s);
- free(j->s);
- free(j);
+ av_free(j->s);
+ av_free(j);
}
struct vf_priv_s {
@@ -654,11 +656,11 @@ static int config(struct vf_instance_s* vf, int width, int height, int d_width,
priv->y_stride = width;
priv->c_stride = width/2;
- priv->j = jpeg_enc_init(width, height/priv->fields, 1,
- priv->fields*priv->y_stride, 1,
- priv->fields*priv->c_stride, 1,
- priv->fields*priv->c_stride, 1,
- priv->quality, priv->bw);
+ priv->j = jpeg_enc_init(width, height/priv->fields,
+ priv->fields*priv->y_stride,
+ priv->fields*priv->c_stride,
+ priv->fields*priv->c_stride,
+ 1, priv->quality, priv->bw);
if (!priv->j) return 0;
return vf_next_config(vf, width, height, d_width, d_height, flags,