aboutsummaryrefslogtreecommitdiffhomepage
path: root/libmpcodecs/native
diff options
context:
space:
mode:
authorGravatar Uoti Urpala <uau@glyph.nonexistent.invalid>2009-03-14 23:52:45 +0200
committerGravatar Uoti Urpala <uau@glyph.nonexistent.invalid>2009-03-14 23:52:45 +0200
commitae2faad6669c313b7a5dd318baeee0bffdd47031 (patch)
tree0b383b5dde56d54be4b144e7e23e96bd8bdd43cf /libmpcodecs/native
parentb93f4b7bba0e31d157b74685d3166f74a6c244d7 (diff)
parent642162c07460e439d1d81cda4643dc028ed238e0 (diff)
Merge svn changes up to r28951
Diffstat (limited to 'libmpcodecs/native')
-rw-r--r--libmpcodecs/native/nuppelvideo.c114
-rw-r--r--libmpcodecs/native/rtjpegn.c2047
-rw-r--r--libmpcodecs/native/rtjpegn.h27
3 files changed, 11 insertions, 2177 deletions
diff --git a/libmpcodecs/native/nuppelvideo.c b/libmpcodecs/native/nuppelvideo.c
deleted file mode 100644
index 8128f85c47..0000000000
--- a/libmpcodecs/native/nuppelvideo.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * NuppelVideo 0.05 file parser
- * for MPlayer
- * by Panagiotis Issaris <takis@lumumba.luc.ac.be>
- *
- * Reworked by alex
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "config.h"
-#include "mp_msg.h"
-#include "mpbswap.h"
-
-#include "libvo/fastmemcpy.h"
-
-#include "libmpdemux/nuppelvideo.h"
-#include "rtjpegn.h"
-#include "libavutil/lzo.h"
-
-#define KEEP_BUFFER
-
-void decode_nuv( unsigned char *encoded, int encoded_size,
- unsigned char *decoded, int width, int height)
-{
- int r;
- unsigned int out_len = width * height + ( width * height ) / 2;
- struct rtframeheader *encodedh = ( struct rtframeheader* ) encoded;
- static unsigned char *buffer = 0; /* for RTJpeg with LZO decompress */
-#ifdef KEEP_BUFFER
- static unsigned char *previous_buffer = 0; /* to support Last-frame-copy */
-#endif
-
-// printf("frametype: %c, comtype: %c, encoded_size: %d, width: %d, height: %d\n",
-// encodedh->frametype, encodedh->comptype, encoded_size, width, height);
-
- le2me_rtframeheader(encodedh);
- switch(encodedh->frametype)
- {
- case 'D': /* additional data for compressors */
- {
- /* tables are in encoded */
- if (encodedh->comptype == 'R')
- {
- RTjpeg_init_decompress ( (unsigned long *)(encoded+12), width, height );
- mp_msg(MSGT_DECVIDEO, MSGL_V, "Found RTjpeg tables (size: %d, width: %d, height: %d)\n",
- encoded_size-12, width, height);
- }
- break;
- }
- case 'V':
- {
- int in_len = encodedh->packetlength;
-#ifdef KEEP_BUFFER
- if (!previous_buffer)
- previous_buffer = ( unsigned char * ) malloc ( out_len + AV_LZO_OUTPUT_PADDING );
-#endif
-
- switch(encodedh->comptype)
- {
- case '0': /* raw YUV420 */
- fast_memcpy(decoded, encoded + 12, out_len);
- break;
- case '1': /* RTJpeg */
- RTjpeg_decompressYUV420 ( ( __s8 * ) encoded + 12, decoded );
- break;
- case '2': /* RTJpeg with LZO */
- if (!buffer)
- buffer = ( unsigned char * ) malloc ( out_len + AV_LZO_OUTPUT_PADDING );
- if (!buffer)
- {
- mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
- break;
- }
- r = av_lzo1x_decode ( buffer, &out_len, encoded + 12, &in_len );
- if ( r )
- {
- mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
- break;
- }
- RTjpeg_decompressYUV420 ( ( __s8 * ) buffer, decoded );
- break;
- case '3': /* raw YUV420 with LZO */
- r = av_lzo1x_decode ( decoded, &out_len, encoded + 12, &in_len );
- if ( r )
- {
- mp_msg(MSGT_DECVIDEO, MSGL_ERR, "Nuppelvideo: error decompressing\n");
- break;
- }
- break;
- case 'N': /* black frame */
- memset ( decoded, 0, width * height );
- memset ( decoded + width * height, 127, width * height / 2);
- break;
- case 'L': /* copy last frame */
-#ifdef KEEP_BUFFER
- fast_memcpy ( decoded, previous_buffer, width*height*3/2);
-#endif
- break;
- }
-
-#ifdef KEEP_BUFFER
- fast_memcpy(previous_buffer, decoded, width*height*3/2);
-#endif
- break;
- }
- default:
- mp_msg(MSGT_DECVIDEO, MSGL_V, "Nuppelvideo: unknwon frametype: %c\n",
- encodedh->frametype);
- }
-}
diff --git a/libmpcodecs/native/rtjpegn.c b/libmpcodecs/native/rtjpegn.c
index b97e507817..0eea073b61 100644
--- a/libmpcodecs/native/rtjpegn.c
+++ b/libmpcodecs/native/rtjpegn.c
@@ -68,7 +68,7 @@ static const __u64 RTjpeg_aan_tab[64]={
#if !HAVE_MMX
static __s32 RTjpeg_ws[64+31];
#endif
-__u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32];
+static __u8 RTjpeg_alldata[2*64+4*64+4*64+4*64+4*64+32];
static __s16 *block; // rh
static __s16 *RTjpeg_block;
@@ -86,13 +86,12 @@ static int RTjpeg_Ysize, RTjpeg_Csize;
static __s16 *RTjpeg_old=NULL;
#if HAVE_MMX
-mmx_t RTjpeg_lmask;
-mmx_t RTjpeg_cmask;
+static mmx_t RTjpeg_lmask;
+static mmx_t RTjpeg_cmask;
#else
-__u16 RTjpeg_lmask;
-__u16 RTjpeg_cmask;
+static __u16 RTjpeg_lmask;
+static __u16 RTjpeg_cmask;
#endif
-int RTjpeg_mtest=0;
static const unsigned char RTjpeg_lum_quant_tbl[64] = {
16, 11, 10, 16, 24, 40, 51, 61,
@@ -150,11 +149,6 @@ static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
#endif
-// *strm++ = 0x10;
-// *strm = 0x00;
-//
-// return 2;
-
// first byte allways written
((__u8*)strm)[0]=
(__u8)(data[RTjpeg_ZZ[0]]>254) ? 254:((data[RTjpeg_ZZ[0]]<0)?0:data[RTjpeg_ZZ[0]]);
@@ -299,152 +293,6 @@ fprintf(stdout, "\n\n");
return (int)co;
}
-/* +++++++++++++++++++++++++++++++++++++++++++++++++++*/
-/* Stream to Block (decoding) */
-/* */
-
-static int RTjpeg_s2b(__s16 *data, __s8 *strm, __u8 bt8, __u32 *qtbl)
-{
- int ci;
- register int co;
- register int i;
- register unsigned char bitten;
- register unsigned char bitoff;
-
- /* first byte always read */
- i=RTjpeg_ZZ[0];
- data[i]=((__u8)strm[0])*qtbl[i];
-
- /* we start at the behind */
-
- bitten = ((unsigned char)strm[1]) >> 2;
- co = 63;
- for(; co > bitten; co--) {
-
- data[RTjpeg_ZZ[co]] = 0;
-
- }
-
- if (co==0) {
- ci = 2;
- goto AUTOBAHN;
- }
-
- /* we have to read the last 2 bits of the second byte */
- ci=1;
- bitoff = 0;
-
- for(; co>0; co--) {
-
- bitten = ((unsigned char)strm[ci]) >> bitoff;
- bitten &= 0x03;
-
- i=RTjpeg_ZZ[co];
-
- switch( bitten ) {
- case 0x03:
- data[i]= -qtbl[i];
- break;
- case 0x02:
- goto FUSSWEG;
- break;
- case 0x01:
- data[i]= qtbl[i];
- break;
- case 0x00:
- data[i]= 0;
- break;
- default:
- break;
- }
-
- if( bitoff == 0 ) {
- bitoff = 8;
- ci++;
- }
- bitoff -= 2;
- }
- /* co is 0 now */
- /* data is written properly */
-
- /* if bitoff!=6 then ci is the index, but should be the byte count, so we increment by 1 */
- if (bitoff!=6) ci++;
-
- goto AUTOBAHN;
-
-
-FUSSWEG:
-/* correct bitoff to nibble */
- switch(bitoff){
- case 4:
- case 6:
- bitoff = 0;
- break;
- case 2:
- case 0:
- /* we have to read from the next byte */
- ci++;
- bitoff = 4;
- break;
- default:
- break;
- }
-
- for(; co>0; co--) {
-
- bitten = ((unsigned char)strm[ci]) >> bitoff;
- bitten &= 0x0f;
-
- i=RTjpeg_ZZ[co];
-
- if( bitten == 0x08 ) {
- goto STRASSE;
- }
-
- /* the compiler cannot do sign extension for signed nibbles */
- if( bitten & 0x08 ) {
- bitten |= 0xf0;
- }
- /* the unsigned char bitten now is a valid signed char */
-
- data[i]=((signed char)bitten)*qtbl[i];
-
- if( bitoff == 0 ) {
- bitoff = 8;
- ci++;
- }
- bitoff -= 4;
- }
- /* co is 0 */
-
- /* if bitoff!=4 then ci is the index, but should be the byte count, so we increment by 1 */
- if (bitoff!=4) ci++;
-
- goto AUTOBAHN;
-
-STRASSE:
- ci++;
-
- for(; co>0; co--) {
- i=RTjpeg_ZZ[co];
- data[i]=strm[ci++]*qtbl[i];
- }
-
- /* ci now is the count, because it points to next element => no incrementing */
-
-AUTOBAHN:
-
-#ifdef SHOWBLOCK
-fprintf(stdout, "\nci = '%d'\n", ci);
- for (i=0; i < 64; i++) {
- fprintf(stdout, "%d ", data[RTjpeg_ZZ[i]]);
- }
-fprintf(stdout, "\n\n");
-#endif
-
- return ci;
-}
-
#else
static int RTjpeg_b2s(__s16 *data, __s8 *strm, __u8 bt8)
@@ -1542,1148 +1390,6 @@ static void RTjpeg_dctY(__u8 *idata, __s16 *odata, int rskip)
#endif
}
-#define FIX_1_082392200 ((__s32) 277) /* FIX(1.082392200) */
-#define FIX_1_414213562 ((__s32) 362) /* FIX(1.414213562) */
-#define FIX_1_847759065 ((__s32) 473) /* FIX(1.847759065) */
-#define FIX_2_613125930 ((__s32) 669) /* FIX(2.613125930) */
-
-#define DESCALE(x) (__s16)( ((x)+4) >> 3)
-
-/* clip yuv to 16..235 (should be 16..240 for cr/cb but ... */
-
-#define RL(x) ((x)>235) ? 235 : (((x)<16) ? 16 : (x))
-#define MULTIPLY(var,const) (((__s32) ((var) * (const)) + 128)>>8)
-
-static void RTjpeg_idct_init(void)
-{
- int i;
-
- for(i=0; i<64; i++)
- {
- RTjpeg_liqt[i]=((__u64)RTjpeg_liqt[i]*RTjpeg_aan_tab[i])>>32;
- RTjpeg_ciqt[i]=((__u64)RTjpeg_ciqt[i]*RTjpeg_aan_tab[i])>>32;
- }
-}
-
-static void RTjpeg_idct(__u8 *odata, __s16 *data, int rskip)
-{
-#if HAVE_MMX
-
-static mmx_t fix_141 = {0x5a825a825a825a82LL};
-static mmx_t fix_184n261 = {0xcf04cf04cf04cf04LL};
-static mmx_t fix_184 = {0x7641764176417641LL};
-static mmx_t fix_n184 = {0x896f896f896f896fLL};
-static mmx_t fix_108n184 = {0xcf04cf04cf04cf04LL};
-
- mmx_t workspace[64];
- mmx_t *wsptr = workspace;
- register mmx_t *dataptr = (mmx_t *)odata;
- mmx_t *idata = (mmx_t *)data;
-
- rskip = rskip>>3;
-/*
- * Perform inverse DCT on one block of coefficients.
- */
-
- /* Odd part */
-
- movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5]
-
- movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3]
-
- movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1]
-
- movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */
-
- movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7]
-
- paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5;
-
- psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5
-
- psllw_i2r(2, mm2); // shift z10
- movq_r2r(mm2, mm0); // copy z10
-
- pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm3, mm5); // copy tmp4
-
- pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
- paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7;
-
- movq_r2r(mm3, mm6); // copy z11 /* phase 5 */
- psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7;
-
- psubw_r2r(mm1, mm6); // z11-z13
- psllw_i2r(2, mm5); // shift z12
-
- movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part
- movq_r2r(mm5, mm7); // copy z12
-
- pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
- paddw_r2r(mm1, mm3); // tmp7 = z11 + z13;
-
- //ok
-
- /* Even part */
- pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
- psllw_i2r(2, mm6);
-
- movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2]
-
- paddw_r2r(mm5, mm0); // tmp10
-
- paddw_r2r(mm7, mm2); // tmp12
-
- pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
- psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7
-
- movq_r2r(mm1, mm5); // copy tmp1
- paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */
-
- psubw_r2r(mm4, mm5); // tmp1-tmp3
- psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6;
-
- movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace
- psllw_i2r(2, mm5); // shift tmp1-tmp3
-
- movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0]
-
- pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
- paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5;
-
- movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4]
-
- psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
- movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace
- movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */
-
- movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace
- psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2;
-
- paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2;
- movq_r2r(mm1, mm5); // copy tmp11
-
- paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12;
- movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */
-
- paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13;
-
- psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13;
- movq_r2r(mm7, mm0); // copy tmp0
-
- psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12;
- paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-
- psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-
- movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0]
- movq_r2r(mm1, mm3); // copy tmp1
-
- movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7]
- paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-
- psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-
- movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1]
- movq_r2r(mm4, mm1); // copy tmp3
-
- movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6]
-
- paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
-
- psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
-
- movq_r2m(mm4, *(wsptr+8));
- movq_r2r(mm5, mm7); // copy tmp2
-
- paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
-
- movq_r2m(mm1, *(wsptr+6));
- psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-
- movq_r2m(mm5, *(wsptr+4));
-
- movq_r2m(mm7, *(wsptr+10));
-
- //ok
-
-
-/*****************************************************************/
-
- idata++;
- wsptr++;
-
-/*****************************************************************/
-
- movq_m2r(*(idata+10), mm1); // load idata[DCTSIZE*5]
-
- movq_m2r(*(idata+6), mm0); // load idata[DCTSIZE*3]
-
- movq_m2r(*(idata+2), mm3); // load idata[DCTSIZE*1]
- movq_r2r(mm1, mm2); // copy tmp6 /* phase 6 */
-
- movq_m2r(*(idata+14), mm4); // load idata[DCTSIZE*7]
- paddw_r2r(mm0, mm1); // z13 = tmp6 + tmp5;
-
- psubw_r2r(mm0, mm2); // z10 = tmp6 - tmp5
-
- psllw_i2r(2, mm2); // shift z10
- movq_r2r(mm2, mm0); // copy z10
-
- pmulhw_m2r(fix_184n261, mm2); // MULTIPLY( z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm3, mm5); // copy tmp4
-
- pmulhw_m2r(fix_n184, mm0); // MULTIPLY(z10, -FIX_1_847759065); /* 2*c2 */
- paddw_r2r(mm4, mm3); // z11 = tmp4 + tmp7;
-
- movq_r2r(mm3, mm6); // copy z11 /* phase 5 */
- psubw_r2r(mm4, mm5); // z12 = tmp4 - tmp7;
-
- psubw_r2r(mm1, mm6); // z11-z13
- psllw_i2r(2, mm5); // shift z12
-
- movq_m2r(*(idata+12), mm4); // load idata[DCTSIZE*6], even part
- movq_r2r(mm5, mm7); // copy z12
-
- pmulhw_m2r(fix_108n184, mm5); // MULT(z12, (FIX_1_08-FIX_1_84)) //- z5; /* 2*(c2-c6) */ even part
- paddw_r2r(mm1, mm3); // tmp7 = z11 + z13;
-
- //ok
-
- /* Even part */
- pmulhw_m2r(fix_184, mm7); // MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) //+ z5; /* -2*(c2+c6) */
- psllw_i2r(2, mm6);
-
- movq_m2r(*(idata+4), mm1); // load idata[DCTSIZE*2]
-
- paddw_r2r(mm5, mm0); // tmp10
-
- paddw_r2r(mm7, mm2); // tmp12
-
- pmulhw_m2r(fix_141, mm6); // tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
- psubw_r2r(mm3, mm2); // tmp6 = tmp12 - tmp7
-
- movq_r2r(mm1, mm5); // copy tmp1
- paddw_r2r(mm4, mm1); // tmp13= tmp1 + tmp3; /* phases 5-3 */
-
- psubw_r2r(mm4, mm5); // tmp1-tmp3
- psubw_r2r(mm2, mm6); // tmp5 = tmp11 - tmp6;
-
- movq_r2m(mm1, *(wsptr)); // save tmp13 in workspace
- psllw_i2r(2, mm5); // shift tmp1-tmp3
-
- movq_m2r(*(idata), mm7); // load idata[DCTSIZE*0]
- paddw_r2r(mm6, mm0); // tmp4 = tmp10 + tmp5;
-
- pmulhw_m2r(fix_141, mm5); // MULTIPLY(tmp1 - tmp3, FIX_1_414213562)
-
- movq_m2r(*(idata+8), mm4); // load idata[DCTSIZE*4]
-
- psubw_r2r(mm1, mm5); // tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
-
- movq_r2m(mm0, *(wsptr+4)); // save tmp4 in workspace
- movq_r2r(mm7, mm1); // copy tmp0 /* phase 3 */
-
- movq_r2m(mm5, *(wsptr+2)); // save tmp12 in workspace
- psubw_r2r(mm4, mm1); // tmp11 = tmp0 - tmp2;
-
- paddw_r2r(mm4, mm7); // tmp10 = tmp0 + tmp2;
- movq_r2r(mm1, mm5); // copy tmp11
-
- paddw_m2r(*(wsptr+2), mm1); // tmp1 = tmp11 + tmp12;
- movq_r2r(mm7, mm4); // copy tmp10 /* phase 2 */
-
- paddw_m2r(*(wsptr), mm7); // tmp0 = tmp10 + tmp13;
-
- psubw_m2r(*(wsptr), mm4); // tmp3 = tmp10 - tmp13;
- movq_r2r(mm7, mm0); // copy tmp0
-
- psubw_m2r(*(wsptr+2), mm5); // tmp2 = tmp11 - tmp12;
- paddw_r2r(mm3, mm7); // wsptr[DCTSIZE*0] = (int) (tmp0 + tmp7);
-
- psubw_r2r(mm3, mm0); // wsptr[DCTSIZE*7] = (int) (tmp0 - tmp7);
-
- movq_r2m(mm7, *(wsptr)); // wsptr[DCTSIZE*0]
- movq_r2r(mm1, mm3); // copy tmp1
-
- movq_r2m(mm0, *(wsptr+14)); // wsptr[DCTSIZE*7]
- paddw_r2r(mm2, mm1); // wsptr[DCTSIZE*1] = (int) (tmp1 + tmp6);
-
- psubw_r2r(mm2, mm3); // wsptr[DCTSIZE*6] = (int) (tmp1 - tmp6);
-
- movq_r2m(mm1, *(wsptr+2)); // wsptr[DCTSIZE*1]
- movq_r2r(mm4, mm1); // copy tmp3
-
- movq_r2m(mm3, *(wsptr+12)); // wsptr[DCTSIZE*6]
-
- paddw_m2r(*(wsptr+4), mm4); // wsptr[DCTSIZE*4] = (int) (tmp3 + tmp4);
-
- psubw_m2r(*(wsptr+4), mm1); // wsptr[DCTSIZE*3] = (int) (tmp3 - tmp4);
-
- movq_r2m(mm4, *(wsptr+8));
- movq_r2r(mm5, mm7); // copy tmp2
-
- paddw_r2r(mm6, mm5); // wsptr[DCTSIZE*2] = (int) (tmp2 + tmp5)
-
- movq_r2m(mm1, *(wsptr+6));
- psubw_r2r(mm6, mm7); // wsptr[DCTSIZE*5] = (int) (tmp2 - tmp5);
-
- movq_r2m(mm5, *(wsptr+4));
-
- movq_r2m(mm7, *(wsptr+10));
-
-/*****************************************************************/
-
- /* Pass 2: process rows from work array, store into output array. */
- /* Note that we must descale the results by a factor of 8 == 2**3, */
- /* and also undo the PASS1_BITS scaling. */
-
-/*****************************************************************/
- /* Even part */
-
- wsptr--;
-
-// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
- movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3]
-
- movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7]
- movq_r2r(mm0, mm2);
-
- movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3]
- paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
-
- movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7]
- psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
-
- movq_r2r(mm0, mm6);
- movq_r2r(mm3, mm5);
-
- paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
- movq_r2r(mm2, mm1);
-
- psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
- punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
-
- movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7]
- punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
-
- movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-
- punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
- movq_r2r(mm3, mm4);
-
- movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3]
- punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
-
- movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
-
- paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
- movq_r2r(mm6, mm2);
-
- psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
- paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
-
- movq_r2r(mm3, mm5);
- punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
-
- psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
- punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
-
- movq_r2r(mm4, mm7);
- punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
-
- punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
-
- punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
-
- punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
- movq_r2r(mm1, mm6);
-
- //ok
-
-// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
-
- movq_r2r(mm0, mm2);
- punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
-
- punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
- psllw_i2r(2, mm6);
-
- pmulhw_m2r(fix_141, mm6);
- punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
-
- punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
- movq_r2r(mm0, mm7);
-
-// tmp0 = tmp10 + tmp13;
-// tmp3 = tmp10 - tmp13;
- paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
- psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
-
-// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
- psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
-// tmp1 = tmp11 + tmp12;
-// tmp2 = tmp11 - tmp12;
- movq_r2r(mm1, mm5);
-
- //OK
-
- /* Odd part */
-
-// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
- movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3]
- paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
-
- movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7]
- psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
-
- movq_r2r(mm3, mm6);
- punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5]
-
- punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3]
- movq_r2r(mm3, mm2);
-
-//Save tmp0 and tmp1 in wsptr
- movq_r2m(mm0, *(wsptr)); // save tmp0
- paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13]
-
-
-//Continue with z10 --- z13
- movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3]
- psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10]
-
- movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7]
- movq_r2r(mm6, mm4);
-
- movq_r2m(mm1, *(wsptr+1)); // save tmp1
- punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5]
-
- punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3]
- movq_r2r(mm6, mm1);
-
-//Save tmp2 and tmp3 in wsptr
- paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13]
- movq_r2r(mm2, mm4);
-
-//Continue with z10 --- z13
- movq_r2m(mm5, *(wsptr+2)); // save tmp2
- punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11]
-
- psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10]
- punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13]
-
- movq_r2r(mm3, mm0);
- punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12]
-
- movq_r2m(mm7, *(wsptr+3)); // save tmp3
- punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10]
-
- movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11]
-
- movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13]
-
- movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3]
- movq_r2r(mm6, mm4);
-
- punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5]
- movq_r2r(mm1, mm5);
-
- punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3]
- movq_r2r(mm6, mm2);
-
- movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7]
- paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13]
-
- psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10]
- punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5]
-
- punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3]
- movq_r2r(mm1, mm7);
-
- paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13]
- psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10]
-
- movq_r2r(mm6, mm5);
- punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11]
-
- punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13]
- movq_r2r(mm2, mm4);
-
- punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12]
-
- punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10]
-
- punpckhdq_r2r(mm6, mm4); /// wsptr[2,z10],[3,z10],[2,z11],[3,z11]
-
- punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13]
- movq_r2r(mm0, mm5);
-
- punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10]
-
- punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11]
- movq_r2r(mm3, mm4);
-
- punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13]
- movq_r2r(mm5, mm1);
-
- punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12]
-// tmp7 = z11 + z13; /* phase 5 */
-// tmp8 = z11 - z13; /* phase 5 */
- psubw_r2r(mm4, mm1); // tmp8
-
- paddw_r2r(mm4, mm5); // tmp7
-// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
- psllw_i2r(2, mm1);
-
- psllw_i2r(2, mm0);
-
- pmulhw_m2r(fix_141, mm1); // tmp21
-// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */
-// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
- psllw_i2r(2, mm3);
- movq_r2r(mm0, mm7);
-
- pmulhw_m2r(fix_n184, mm7);
- movq_r2r(mm3, mm6);
-
- movq_m2r(*(wsptr), mm2); // tmp0,final1
-
- pmulhw_m2r(fix_108n184, mm6);
-// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
-// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm2, mm4); // final1
-
- pmulhw_m2r(fix_184n261, mm0);
- paddw_r2r(mm5, mm2); // tmp0+tmp7,final1
-
- pmulhw_m2r(fix_184, mm3);
- psubw_r2r(mm5, mm4); // tmp0-tmp7,final1
-
-// tmp6 = tmp22 - tmp7; /* phase 2 */
- psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1
-
- paddw_r2r(mm6, mm7); // tmp20
- psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1
-
- paddw_r2r(mm0, mm3); // tmp22
-
-// tmp5 = tmp21 - tmp6;
- psubw_r2r(mm5, mm3); // tmp6
-
-// tmp4 = tmp20 + tmp5;
- movq_m2r(*(wsptr+1), mm0); // tmp1,final2
- psubw_r2r(mm3, mm1); // tmp5
-
- movq_r2r(mm0, mm6); // final2
- paddw_r2r(mm3, mm0); // tmp1+tmp6,final2
-
- /* Final output stage: scale down by a factor of 8 and range-limit */
-
-
-// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-// & RANGE_MASK]; final1
-
-
-// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-// & RANGE_MASK]; final2
- psubw_r2r(mm3, mm6); // tmp1-tmp6,final2
- psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1]
-
- psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6]
-
- packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
-
- movq_m2r(*(wsptr+2), mm5); // tmp2,final3
- packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
-
-// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-// & RANGE_MASK]; final3
- paddw_r2r(mm1, mm7); // tmp4
- movq_r2r(mm5, mm3);
-
- paddw_r2r(mm1, mm5); // tmp2+tmp5
- psubw_r2r(mm1, mm3); // tmp2-tmp5
-
- psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2]
-
- movq_m2r(*(wsptr+3), mm4); // tmp3,final4
- psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5]
-
-
-
-// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-// & RANGE_MASK]; final4
- movq_r2r(mm4, mm6);
- paddw_r2r(mm7, mm4); // tmp3+tmp4
-
- psubw_r2r(mm7, mm6); // tmp3-tmp4
- psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4]
-
- // mov ecx, [dataptr]
-
- psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3]
-
- packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
-
- packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
- movq_r2r(mm2, mm4);
-
- movq_r2r(mm5, mm7);
- punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
-
- punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
- movq_r2r(mm2, mm1);
-
- punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
-
- // add dataptr, 4
-
- punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
-
- punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
-
- // add ecx, output_col
-
- movq_r2r(mm7, mm6);
- punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
-
- movq_r2r(mm2, mm0);
- punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
-
- // mov idata, [dataptr]
-
- punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
-
- // add dataptr, 4
-
- movq_r2r(mm1, mm3);
-
- // add idata, output_col
-
- punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
-
- movq_r2m(mm2, *(dataptr));
-
- punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
-
- dataptr += rskip;
- movq_r2m(mm0, *(dataptr));
-
- punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
- punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
-
- dataptr += rskip;
- movq_r2m(mm1, *(dataptr));
-
- dataptr += rskip;
- movq_r2m(mm3, *(dataptr));
-
-/*******************************************************************/
-
- wsptr += 8;
-
-/*******************************************************************/
-
-// tmp10 = ((DCTELEM) wsptr[0] + (DCTELEM) wsptr[4]);
-// tmp13 = ((DCTELEM) wsptr[2] + (DCTELEM) wsptr[6]);
-// tmp11 = ((DCTELEM) wsptr[0] - (DCTELEM) wsptr[4]);
-// tmp14 = ((DCTELEM) wsptr[2] - (DCTELEM) wsptr[6]);
- movq_m2r(*(wsptr), mm0); // wsptr[0,0],[0,1],[0,2],[0,3]
-
- movq_m2r(*(wsptr+1), mm1); // wsptr[0,4],[0,5],[0,6],[0,7]
- movq_r2r(mm0, mm2);
-
- movq_m2r(*(wsptr+2), mm3); // wsptr[1,0],[1,1],[1,2],[1,3]
- paddw_r2r(mm1, mm0); // wsptr[0,tmp10],[xxx],[0,tmp13],[xxx]
-
- movq_m2r(*(wsptr+3), mm4); // wsptr[1,4],[1,5],[1,6],[1,7]
- psubw_r2r(mm1, mm2); // wsptr[0,tmp11],[xxx],[0,tmp14],[xxx]
-
- movq_r2r(mm0, mm6);
- movq_r2r(mm3, mm5);
-
- paddw_r2r(mm4, mm3); // wsptr[1,tmp10],[xxx],[1,tmp13],[xxx]
- movq_r2r(mm2, mm1);
-
- psubw_r2r(mm4, mm5); // wsptr[1,tmp11],[xxx],[1,tmp14],[xxx]
- punpcklwd_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[xxx],[xxx]
-
- movq_m2r(*(wsptr+7), mm7); // wsptr[3,4],[3,5],[3,6],[3,7]
- punpckhwd_r2r(mm3, mm6); // wsptr[0,tmp13],[1,tmp13],[xxx],[xxx]
-
- movq_m2r(*(wsptr+4), mm3); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckldq_r2r(mm6, mm0); // wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-
- punpcklwd_r2r(mm5, mm1); // wsptr[0,tmp11],[1,tmp11],[xxx],[xxx]
- movq_r2r(mm3, mm4);
-
- movq_m2r(*(wsptr+6), mm6); // wsptr[3,0],[3,1],[3,2],[3,3]
- punpckhwd_r2r(mm5, mm2); // wsptr[0,tmp14],[1,tmp14],[xxx],[xxx]
-
- movq_m2r(*(wsptr+5), mm5); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckldq_r2r(mm2, mm1); // wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
- paddw_r2r(mm5, mm3); // wsptr[2,tmp10],[xxx],[2,tmp13],[xxx]
- movq_r2r(mm6, mm2);
-
- psubw_r2r(mm5, mm4); // wsptr[2,tmp11],[xxx],[2,tmp14],[xxx]
- paddw_r2r(mm7, mm6); // wsptr[3,tmp10],[xxx],[3,tmp13],[xxx]
-
- movq_r2r(mm3, mm5);
- punpcklwd_r2r(mm6, mm3); // wsptr[2,tmp10],[3,tmp10],[xxx],[xxx]
-
- psubw_r2r(mm7, mm2); // wsptr[3,tmp11],[xxx],[3,tmp14],[xxx]
- punpckhwd_r2r(mm6, mm5); // wsptr[2,tmp13],[3,tmp13],[xxx],[xxx]
-
- movq_r2r(mm4, mm7);
- punpckldq_r2r(mm5, mm3); // wsptr[2,tmp10],[3,tmp10],[2,tmp13],[3,tmp13]
-
- punpcklwd_r2r(mm2, mm4); // wsptr[2,tmp11],[3,tmp11],[xxx],[xxx]
-
- punpckhwd_r2r(mm2, mm7); // wsptr[2,tmp14],[3,tmp14],[xxx],[xxx]
-
- punpckldq_r2r(mm7, mm4); // wsptr[2,tmp11],[3,tmp11],[2,tmp14],[3,tmp14]
- movq_r2r(mm1, mm6);
-
- //OK
-
-// mm0 = ;wsptr[0,tmp10],[1,tmp10],[0,tmp13],[1,tmp13]
-// mm1 = ;wsptr[0,tmp11],[1,tmp11],[0,tmp14],[1,tmp14]
-
- movq_r2r(mm0, mm2);
- punpckhdq_r2r(mm4, mm6); // wsptr[0,tmp14],[1,tmp14],[2,tmp14],[3,tmp14]
-
- punpckldq_r2r(mm4, mm1); // wsptr[0,tmp11],[1,tmp11],[2,tmp11],[3,tmp11]
- psllw_i2r(2, mm6);
-
- pmulhw_m2r(fix_141, mm6);
- punpckldq_r2r(mm3, mm0); // wsptr[0,tmp10],[1,tmp10],[2,tmp10],[3,tmp10]
-
- punpckhdq_r2r(mm3, mm2); // wsptr[0,tmp13],[1,tmp13],[2,tmp13],[3,tmp13]
- movq_r2r(mm0, mm7);
-
-// tmp0 = tmp10 + tmp13;
-// tmp3 = tmp10 - tmp13;
- paddw_r2r(mm2, mm0); // [0,tmp0],[1,tmp0],[2,tmp0],[3,tmp0]
- psubw_r2r(mm2, mm7); // [0,tmp3],[1,tmp3],[2,tmp3],[3,tmp3]
-
-// tmp12 = MULTIPLY(tmp14, FIX_1_414213562) - tmp13;
- psubw_r2r(mm2, mm6); // wsptr[0,tmp12],[1,tmp12],[2,tmp12],[3,tmp12]
-// tmp1 = tmp11 + tmp12;
-// tmp2 = tmp11 - tmp12;
- movq_r2r(mm1, mm5);
-
- //OK
-
-
- /* Odd part */
-
-// z13 = (DCTELEM) wsptr[5] + (DCTELEM) wsptr[3];
-// z10 = (DCTELEM) wsptr[5] - (DCTELEM) wsptr[3];
-// z11 = (DCTELEM) wsptr[1] + (DCTELEM) wsptr[7];
-// z12 = (DCTELEM) wsptr[1] - (DCTELEM) wsptr[7];
- movq_m2r(*(wsptr), mm3); // wsptr[0,0],[0,1],[0,2],[0,3]
- paddw_r2r(mm6, mm1); // [0,tmp1],[1,tmp1],[2,tmp1],[3,tmp1]
-
- movq_m2r(*(wsptr+1), mm4); // wsptr[0,4],[0,5],[0,6],[0,7]
- psubw_r2r(mm6, mm5); // [0,tmp2],[1,tmp2],[2,tmp2],[3,tmp2]
-
- movq_r2r(mm3, mm6);
- punpckldq_r2r(mm4, mm3); // wsptr[0,0],[0,1],[0,4],[0,5]
-
- punpckhdq_r2r(mm6, mm4); // wsptr[0,6],[0,7],[0,2],[0,3]
- movq_r2r(mm3, mm2);
-
-//Save tmp0 and tmp1 in wsptr
- movq_r2m(mm0, *(wsptr)); // save tmp0
- paddw_r2r(mm4, mm2); // wsptr[xxx],[0,z11],[xxx],[0,z13]
-
-
-//Continue with z10 --- z13
- movq_m2r(*(wsptr+2), mm6); // wsptr[1,0],[1,1],[1,2],[1,3]
- psubw_r2r(mm4, mm3); // wsptr[xxx],[0,z12],[xxx],[0,z10]
-
- movq_m2r(*(wsptr+3), mm0); // wsptr[1,4],[1,5],[1,6],[1,7]
- movq_r2r(mm6, mm4);
-
- movq_r2m(mm1, *(wsptr+1)); // save tmp1
- punpckldq_r2r(mm0, mm6); // wsptr[1,0],[1,1],[1,4],[1,5]
-
- punpckhdq_r2r(mm4, mm0); // wsptr[1,6],[1,7],[1,2],[1,3]
- movq_r2r(mm6, mm1);
-
-//Save tmp2 and tmp3 in wsptr
- paddw_r2r(mm0, mm6); // wsptr[xxx],[1,z11],[xxx],[1,z13]
- movq_r2r(mm2, mm4);
-
-//Continue with z10 --- z13
- movq_r2m(mm5, *(wsptr+2)); // save tmp2
- punpcklwd_r2r(mm6, mm2); // wsptr[xxx],[xxx],[0,z11],[1,z11]
-
- psubw_r2r(mm0, mm1); // wsptr[xxx],[1,z12],[xxx],[1,z10]
- punpckhwd_r2r(mm6, mm4); // wsptr[xxx],[xxx],[0,z13],[1,z13]
-
- movq_r2r(mm3, mm0);
- punpcklwd_r2r(mm1, mm3); // wsptr[xxx],[xxx],[0,z12],[1,z12]
-
- movq_r2m(mm7, *(wsptr+3)); // save tmp3
- punpckhwd_r2r(mm1, mm0); // wsptr[xxx],[xxx],[0,z10],[1,z10]
-
- movq_m2r(*(wsptr+4), mm6); // wsptr[2,0],[2,1],[2,2],[2,3]
- punpckhdq_r2r(mm2, mm0); // wsptr[0,z10],[1,z10],[0,z11],[1,z11]
-
- movq_m2r(*(wsptr+5), mm7); // wsptr[2,4],[2,5],[2,6],[2,7]
- punpckhdq_r2r(mm4, mm3); // wsptr[0,z12],[1,z12],[0,z13],[1,z13]
-
- movq_m2r(*(wsptr+6), mm1); // wsptr[3,0],[3,1],[3,2],[3,3]
- movq_r2r(mm6, mm4);
-
- punpckldq_r2r(mm7, mm6); // wsptr[2,0],[2,1],[2,4],[2,5]
- movq_r2r(mm1, mm5);
-
- punpckhdq_r2r(mm4, mm7); // wsptr[2,6],[2,7],[2,2],[2,3]
- movq_r2r(mm6, mm2);
-
- movq_m2r(*(wsptr+7), mm4); // wsptr[3,4],[3,5],[3,6],[3,7]
- paddw_r2r(mm7, mm6); // wsptr[xxx],[2,z11],[xxx],[2,z13]
-
- psubw_r2r(mm7, mm2); // wsptr[xxx],[2,z12],[xxx],[2,z10]
- punpckldq_r2r(mm4, mm1); // wsptr[3,0],[3,1],[3,4],[3,5]
-
- punpckhdq_r2r(mm5, mm4); // wsptr[3,6],[3,7],[3,2],[3,3]
- movq_r2r(mm1, mm7);
-
- paddw_r2r(mm4, mm1); // wsptr[xxx],[3,z11],[xxx],[3,z13]
- psubw_r2r(mm4, mm7); // wsptr[xxx],[3,z12],[xxx],[3,z10]
-
- movq_r2r(mm6, mm5);
- punpcklwd_r2r(mm1, mm6); // wsptr[xxx],[xxx],[2,z11],[3,z11]
-
- punpckhwd_r2r(mm1, mm5); // wsptr[xxx],[xxx],[2,z13],[3,z13]
- movq_r2r(mm2, mm4);
-
- punpcklwd_r2r(mm7, mm2); // wsptr[xxx],[xxx],[2,z12],[3,z12]
-
- punpckhwd_r2r(mm7, mm4); // wsptr[xxx],[xxx],[2,z10],[3,z10]
-
- punpckhdq_r2r(mm6, mm4); // wsptr[2,z10],[3,z10],[2,z11],[3,z11]
-
- punpckhdq_r2r(mm5, mm2); // wsptr[2,z12],[3,z12],[2,z13],[3,z13]
- movq_r2r(mm0, mm5);
-
- punpckldq_r2r(mm4, mm0); // wsptr[0,z10],[1,z10],[2,z10],[3,z10]
-
- punpckhdq_r2r(mm4, mm5); // wsptr[0,z11],[1,z11],[2,z11],[3,z11]
- movq_r2r(mm3, mm4);
-
- punpckhdq_r2r(mm2, mm4); // wsptr[0,z13],[1,z13],[2,z13],[3,z13]
- movq_r2r(mm5, mm1);
-
- punpckldq_r2r(mm2, mm3); // wsptr[0,z12],[1,z12],[2,z12],[3,z12]
-// tmp7 = z11 + z13; /* phase 5 */
-// tmp8 = z11 - z13; /* phase 5 */
- psubw_r2r(mm4, mm1); // tmp8
-
- paddw_r2r(mm4, mm5); // tmp7
-// tmp21 = MULTIPLY(tmp8, FIX_1_414213562); /* 2*c4 */
- psllw_i2r(2, mm1);
-
- psllw_i2r(2, mm0);
-
- pmulhw_m2r(fix_141, mm1); // tmp21
-// tmp20 = MULTIPLY(z12, (FIX_1_082392200- FIX_1_847759065)) /* 2*(c2-c6) */
-// + MULTIPLY(z10, - FIX_1_847759065); /* 2*c2 */
- psllw_i2r(2, mm3);
- movq_r2r(mm0, mm7);
-
- pmulhw_m2r(fix_n184, mm7);
- movq_r2r(mm3, mm6);
-
- movq_m2r(*(wsptr), mm2); // tmp0,final1
-
- pmulhw_m2r(fix_108n184, mm6);
-// tmp22 = MULTIPLY(z10,(FIX_1_847759065 - FIX_2_613125930)) /* -2*(c2+c6) */
-// + MULTIPLY(z12, FIX_1_847759065); /* 2*c2 */
- movq_r2r(mm2, mm4); // final1
-
- pmulhw_m2r(fix_184n261, mm0);
- paddw_r2r(mm5, mm2); // tmp0+tmp7,final1
-
- pmulhw_m2r(fix_184, mm3);
- psubw_r2r(mm5, mm4); // tmp0-tmp7,final1
-
-// tmp6 = tmp22 - tmp7; /* phase 2 */
- psraw_i2r(3, mm2); // outptr[0,0],[1,0],[2,0],[3,0],final1
-
- paddw_r2r(mm6, mm7); // tmp20
- psraw_i2r(3, mm4); // outptr[0,7],[1,7],[2,7],[3,7],final1
-
- paddw_r2r(mm0, mm3); // tmp22
-
-// tmp5 = tmp21 - tmp6;
- psubw_r2r(mm5, mm3); // tmp6
-
-// tmp4 = tmp20 + tmp5;
- movq_m2r(*(wsptr+1), mm0); // tmp1,final2
- psubw_r2r(mm3, mm1); // tmp5
-
- movq_r2r(mm0, mm6); // final2
- paddw_r2r(mm3, mm0); // tmp1+tmp6,final2
-
- /* Final output stage: scale down by a factor of 8 and range-limit */
-
-// outptr[0] = range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[7] = range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS+3)
-// & RANGE_MASK]; final1
-
-
-// outptr[1] = range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[6] = range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS+3)
-// & RANGE_MASK]; final2
- psubw_r2r(mm3, mm6); // tmp1-tmp6,final2
- psraw_i2r(3, mm0); // outptr[0,1],[1,1],[2,1],[3,1]
-
- psraw_i2r(3, mm6); // outptr[0,6],[1,6],[2,6],[3,6]
-
- packuswb_r2r(mm4, mm0); // out[0,1],[1,1],[2,1],[3,1],[0,7],[1,7],[2,7],[3,7]
-
- movq_m2r(*(wsptr+2), mm5); // tmp2,final3
- packuswb_r2r(mm6, mm2); // out[0,0],[1,0],[2,0],[3,0],[0,6],[1,6],[2,6],[3,6]
-
-// outptr[2] = range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[5] = range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS+3)
-// & RANGE_MASK]; final3
- paddw_r2r(mm1, mm7); // tmp4
- movq_r2r(mm5, mm3);
-
- paddw_r2r(mm1, mm5); // tmp2+tmp5
- psubw_r2r(mm1, mm3); // tmp2-tmp5
-
- psraw_i2r(3, mm5); // outptr[0,2],[1,2],[2,2],[3,2]
-
- movq_m2r(*(wsptr+3), mm4); // tmp3,final4
- psraw_i2r(3, mm3); // outptr[0,5],[1,5],[2,5],[3,5]
-
-
-
-// outptr[4] = range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS+3)
-// & RANGE_MASK];
-// outptr[3] = range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS+3)
-// & RANGE_MASK]; final4
- movq_r2r(mm4, mm6);
- paddw_r2r(mm7, mm4); // tmp3+tmp4
-
- psubw_r2r(mm7, mm6); // tmp3-tmp4
- psraw_i2r(3, mm4); // outptr[0,4],[1,4],[2,4],[3,4]
-
- psraw_i2r(3, mm6); // outptr[0,3],[1,3],[2,3],[3,3]
-
- /*
- movq_r2m(mm4, *dummy);
- fprintf(stderr, "3-4 %016llx\n", dummy);
- movq_r2m(mm4, *dummy);
- fprintf(stderr, "3+4 %016llx\n", dummy);
- */
-
-
- packuswb_r2r(mm4, mm5); // out[0,2],[1,2],[2,2],[3,2],[0,4],[1,4],[2,4],[3,4]
-
- packuswb_r2r(mm3, mm6); // out[0,3],[1,3],[2,3],[3,3],[0,5],[1,5],[2,5],[3,5]
- movq_r2r(mm2, mm4);
-
- movq_r2r(mm5, mm7);
- punpcklbw_r2r(mm0, mm2); // out[0,0],[0,1],[1,0],[1,1],[2,0],[2,1],[3,0],[3,1]
-
- punpckhbw_r2r(mm0, mm4); // out[0,6],[0,7],[1,6],[1,7],[2,6],[2,7],[3,6],[3,7]
- movq_r2r(mm2, mm1);
-
- punpcklbw_r2r(mm6, mm5); // out[0,2],[0,3],[1,2],[1,3],[2,2],[2,3],[3,2],[3,3]
-
- punpckhbw_r2r(mm6, mm7); // out[0,4],[0,5],[1,4],[1,5],[2,4],[2,5],[3,4],[3,5]
-
- punpcklwd_r2r(mm5, mm2); // out[0,0],[0,1],[0,2],[0,3],[1,0],[1,1],[1,2],[1,3]
-
- movq_r2r(mm7, mm6);
- punpckhwd_r2r(mm5, mm1); // out[2,0],[2,1],[2,2],[2,3],[3,0],[3,1],[3,2],[3,3]
-
- movq_r2r(mm2, mm0);
- punpcklwd_r2r(mm4, mm6); // out[0,4],[0,5],[0,6],[0,7],[1,4],[1,5],[1,6],[1,7]
-
- punpckldq_r2r(mm6, mm2); // out[0,0],[0,1],[0,2],[0,3],[0,4],[0,5],[0,6],[0,7]
-
- movq_r2r(mm1, mm3);
-
- punpckhwd_r2r(mm4, mm7); // out[2,4],[2,5],[2,6],[2,7],[3,4],[3,5],[3,6],[3,7]
-
- dataptr += rskip;
- movq_r2m(mm2, *(dataptr));
-
- punpckhdq_r2r(mm6, mm0); // out[1,0],[1,1],[1,2],[1,3],[1,4],[1,5],[1,6],[1,7]
-
- dataptr += rskip;
- movq_r2m(mm0, *(dataptr));
-
- punpckldq_r2r(mm7, mm1); // out[2,0],[2,1],[2,2],[2,3],[2,4],[2,5],[2,6],[2,7]
-
- punpckhdq_r2r(mm7, mm3); // out[3,0],[3,1],[3,2],[3,3],[3,4],[3,5],[3,6],[3,7]
-
- dataptr += rskip;
- movq_r2m(mm1, *(dataptr));
-
- dataptr += rskip;
- movq_r2m(mm3, *(dataptr));
-
-#else
- __s32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
- __s32 tmp10, tmp11, tmp12, tmp13;
- __s32 z5, z10, z11, z12, z13;
- __s16 *inptr;
- __s32 *wsptr;
- __u8 *outptr;
- int ctr;
- __s32 dcval;
- __s32 workspace[64];
-
- inptr = data;
- wsptr = workspace;
- for (ctr = 8; ctr > 0; ctr--) {
-
- if ((inptr[8] | inptr[16] | inptr[24] |
- inptr[32] | inptr[40] | inptr[48] | inptr[56]) == 0) {
- dcval = inptr[0];
- wsptr[0] = dcval;
- wsptr[8] = dcval;
- wsptr[16] = dcval;
- wsptr[24] = dcval;
- wsptr[32] = dcval;
- wsptr[40] = dcval;
- wsptr[48] = dcval;
- wsptr[56] = dcval;
-
- inptr++;
- wsptr++;
- continue;
- }
-
- tmp0 = inptr[0];
- tmp1 = inptr[16];
- tmp2 = inptr[32];
- tmp3 = inptr[48];
-
- tmp10 = tmp0 + tmp2;
- tmp11 = tmp0 - tmp2;
-
- tmp13 = tmp1 + tmp3;
- tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13;
-
- tmp0 = tmp10 + tmp13;
- tmp3 = tmp10 - tmp13;
- tmp1 = tmp11 + tmp12;
- tmp2 = tmp11 - tmp12;
-
- tmp4 = inptr[8];
- tmp5 = inptr[24];
- tmp6 = inptr[40];
- tmp7 = inptr[56];
-
- z13 = tmp6 + tmp5;
- z10 = tmp6 - tmp5;
- z11 = tmp4 + tmp7;
- z12 = tmp4 - tmp7;
-
- tmp7 = z11 + z13;
- tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
-
- z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
- tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
- tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
-
- tmp6 = tmp12 - tmp7;
- tmp5 = tmp11 - tmp6;
- tmp4 = tmp10 + tmp5;
-
- wsptr[0] = (__s32) (tmp0 + tmp7);
- wsptr[56] = (__s32) (tmp0 - tmp7);
- wsptr[8] = (__s32) (tmp1 + tmp6);
- wsptr[48] = (__s32) (tmp1 - tmp6);
- wsptr[16] = (__s32) (tmp2 + tmp5);
- wsptr[40] = (__s32) (tmp2 - tmp5);
- wsptr[32] = (__s32) (tmp3 + tmp4);
- wsptr[24] = (__s32) (tmp3 - tmp4);
-
- inptr++;
- wsptr++;
- }
-
- wsptr = workspace;
- for (ctr = 0; ctr < 8; ctr++) {
- outptr = &(odata[ctr*rskip]);
-
- tmp10 = wsptr[0] + wsptr[4];
- tmp11 = wsptr[0] - wsptr[4];
-
- tmp13 = wsptr[2] + wsptr[6];
- tmp12 = MULTIPLY(wsptr[2] - wsptr[6], FIX_1_414213562) - tmp13;
-
- tmp0 = tmp10 + tmp13;
- tmp3 = tmp10 - tmp13;
- tmp1 = tmp11 + tmp12;
- tmp2 = tmp11 - tmp12;
-
- z13 = wsptr[5] + wsptr[3];
- z10 = wsptr[5] - wsptr[3];
- z11 = wsptr[1] + wsptr[7];
- z12 = wsptr[1] - wsptr[7];
-
- tmp7 = z11 + z13;
- tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);
-
- z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
- tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
- tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;
-
- tmp6 = tmp12 - tmp7;
- tmp5 = tmp11 - tmp6;
- tmp4 = tmp10 + tmp5;
-
- outptr[0] = RL(DESCALE(tmp0 + tmp7));
- outptr[7] = RL(DESCALE(tmp0 - tmp7));
- outptr[1] = RL(DESCALE(tmp1 + tmp6));
- outptr[6] = RL(DESCALE(tmp1 - tmp6));
- outptr[2] = RL(DESCALE(tmp2 + tmp5));
- outptr[5] = RL(DESCALE(tmp2 - tmp5));
- outptr[4] = RL(DESCALE(tmp3 + tmp4));
- outptr[3] = RL(DESCALE(tmp3 - tmp4));
-
- wsptr += 8;
- }
-#endif
-}
/*
Main Routines
@@ -2733,7 +1439,7 @@ Input: buf -> pointer to 128 ints for quant values store to pass back to
Q -> quality factor (192=best, 32=worst)
*/
-void RTjpeg_init_Q(__u8 Q)
+static void RTjpeg_init_Q(__u8 Q)
{
int i;
__u64 qual;
@@ -2760,7 +1466,6 @@ void RTjpeg_init_Q(__u8 Q)
RTjpeg_cb8--;
RTjpeg_dct_init();
- RTjpeg_idct_init();
RTjpeg_quant_init();
}
@@ -2822,37 +1527,6 @@ void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q)
buf[64+i]=le2me_32(RTjpeg_ciqt[i]);
}
-void RTjpeg_init_decompress(__u32 *buf, int width, int height)
-{
- int i;
-
- RTjpeg_init_data();
-
- RTjpeg_width=width;
- RTjpeg_height=height;
- RTjpeg_Ywidth = RTjpeg_width>>3;
- RTjpeg_Ysize=width * height;
- RTjpeg_Cwidth = RTjpeg_width>>4;
- RTjpeg_Csize= (width>>1) * height;
-
- for(i=0; i<64; i++)
- {
- RTjpeg_liqt[i]=le2me_32(buf[i]);
- RTjpeg_ciqt[i]=le2me_32(buf[i+64]);
- }
-
- RTjpeg_lb8=0;
- while(RTjpeg_liqt[RTjpeg_ZZ[++RTjpeg_lb8]]<=8);
- RTjpeg_lb8--;
- RTjpeg_cb8=0;
- while(RTjpeg_ciqt[RTjpeg_ZZ[++RTjpeg_cb8]]<=8);
- RTjpeg_cb8--;
-
- RTjpeg_idct_init();
-
-// RTjpeg_color_init();
-}
-
int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
{
__s8 * sb;
@@ -2907,210 +1581,6 @@ int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp)
return (sp-sb);
}
-int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp)
-{
- __s8 * sb;
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + RTjpeg_Csize;
- register int i, j, k;
-
-#if HAVE_MMX
- emms();
-#endif
- sb=sp;
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
- for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
-
- RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
- RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
-
- }
- bp+=RTjpeg_width<<3;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
-
- }
-#if HAVE_MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-int RTjpeg_compress8(__s8 *sp, unsigned char *bp)
-{
- __s8 * sb;
- int i, j;
-
-#if HAVE_MMX
- emms();
-#endif
-
- sb=sp;
-/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
- for(j=0; j<RTjpeg_width; j+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- }
- bp+=RTjpeg_width;
- }
-
-#if HAVE_MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp)
-{
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize);
- int i, j,k;
-
-#if HAVE_MMX
- emms();
-#endif
-
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
- for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
- }
- }
- bp+=RTjpeg_width<<3;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
- }
-#if HAVE_MMX
- emms();
-#endif
-}
-
-void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp)
-{
- register __s8 * bp1 = bp + (RTjpeg_width<<3);
- register __s8 * bp2 = bp + RTjpeg_Ysize;
- register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
- int i, j,k;
-
-#if HAVE_MMX
- emms();
-#endif
-
-/* Y */
- for(i=RTjpeg_height>>1; i; i-=8)
- {
- for(k=0, j=0; j<RTjpeg_width; j+=16, k+=8) {
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j+8, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp1+j, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp1+j+8, RTjpeg_block, RTjpeg_width);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp2+k, RTjpeg_block, RTjpeg_width>>1);
- }
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_cb8, RTjpeg_ciqt);
- RTjpeg_idct(bp3+k, RTjpeg_block, RTjpeg_width>>1);
- }
- }
- bp+=RTjpeg_width<<4;
- bp1+=RTjpeg_width<<4;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
- }
-#if HAVE_MMX
- emms();
-#endif
-}
-
-void RTjpeg_decompress8(__s8 *sp, __u8 *bp)
-{
- int i, j;
-
-#if HAVE_MMX
- emms();
-#endif
-
-/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
- for(j=0; j<RTjpeg_width; j+=8)
- if(*sp==-1)sp++;
- else
- {
- sp+=RTjpeg_s2b(RTjpeg_block, sp, RTjpeg_lb8, RTjpeg_liqt);
- RTjpeg_idct(bp+j, RTjpeg_block, RTjpeg_width);
- }
- bp+=RTjpeg_width<<3;
- }
-}
-
/*
External Function
@@ -3177,11 +1647,8 @@ static int RTjpeg_bcomp(__s16 *old, mmx_t *mask)
if(result.q)
{
-// if(!RTjpeg_mtest)
-// for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
return 0;
}
-// printf(".");
return 1;
}
@@ -3193,7 +1660,6 @@ static int RTjpeg_bcomp(__s16 *old, __u16 *mask)
for(i=0; i<64; i++)
if(abs(old[i]-RTjpeg_block[i])>*mask)
{
- if(!RTjpeg_mtest)
for(i=0; i<16; i++)((__u64 *)old)[i]=((__u64 *)RTjpeg_block)[i];
return 0;
}
@@ -3201,15 +1667,9 @@ static int RTjpeg_bcomp(__s16 *old, __u16 *mask)
}
#endif
-void RTjpeg_set_test(int i)
-{
- RTjpeg_mtest=i;
-}
-
int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
{
__s8 * sb;
-//rh __s16 *block;
register __s8 * bp1 = bp + (RTjpeg_width<<3);
register __s8 * bp2 = bp + RTjpeg_Ysize;
register __s8 * bp3 = bp2 + (RTjpeg_Csize>>1);
@@ -3296,498 +1756,3 @@ int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask
#endif
return (sp-sb);
}
-
-
-int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask)
-{
- __s8 * sb;
- __s16 *block;
- register __s8 * bp2;
- register __s8 * bp3;
- register int i, j, k;
-
-#if HAVE_MMX
- emms();
- RTjpeg_lmask.uq=((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask;
- RTjpeg_cmask.uq=((__u64)cmask<<48)|((__u64)cmask<<32)|((__u64)cmask<<16)|cmask;
-#else
- RTjpeg_lmask=lmask;
- RTjpeg_cmask=cmask;
-#endif
-
- bp = bp - RTjpeg_width*0;
- bp2 = bp + RTjpeg_Ysize-RTjpeg_width*0;
- bp3 = bp2 + RTjpeg_Csize;
-
- sb=sp;
- block=RTjpeg_old;
-/* Y */
- for(i=RTjpeg_height; i; i-=8)
- {
- for(j=0, k=0; j<RTjpeg_width; j+=16, k+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- if(RTjpeg_bcomp(block, &RTjpeg_lmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- block+=64;
-
- RTjpeg_dctY(bp+j+8, RTjpeg_block, RTjpeg_Ywidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- if(RTjpeg_bcomp(block, &RTjpeg_lmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- block+=64;
-
- RTjpeg_dctY(bp2+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- if(RTjpeg_bcomp(block, &RTjpeg_cmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
- block+=64;
-
- RTjpeg_dctY(bp3+k, RTjpeg_block, RTjpeg_Cwidth);
- RTjpeg_quant(RTjpeg_block, RTjpeg_cqt);
- if(RTjpeg_bcomp(block, &RTjpeg_cmask))
- {
- *((__u8 *)sp++)=255;
- }
- else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_cb8);
- block+=64;
-
- }
- bp+=RTjpeg_width<<3;
- bp2+=RTjpeg_width<<2;
- bp3+=RTjpeg_width<<2;
- }
- printf ("%d\n", block - RTjpeg_old);
-#if HAVE_MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask)
-{
- __s8 * sb;
- __s16 *block;
- int i, j;
-
-#if HAVE_MMX
- emms();
- RTjpeg_lmask.uq=((__u64)lmask<<48)|((__u64)lmask<<32)|((__u64)lmask<<16)|lmask;
-#else
- RTjpeg_lmask=lmask;
-#endif
-
-
- sb=sp;
- block=RTjpeg_old;
-/* Y */
- for(i=0; i<RTjpeg_height; i+=8)
- {
- for(j=0; j<RTjpeg_width; j+=8)
- {
- RTjpeg_dctY(bp+j, RTjpeg_block, RTjpeg_width);
- RTjpeg_quant(RTjpeg_block, RTjpeg_lqt);
- if(RTjpeg_bcomp(block, &RTjpeg_lmask))
- {
- *((__u8 *)sp++)=255;
-// printf("* %d ", sp[-1]);
- } else sp+=RTjpeg_b2s(RTjpeg_block, sp, RTjpeg_lb8);
- block+=64;
- }
- bp+=RTjpeg_width<<3;
- }
-#if HAVE_MMX
- emms();
-#endif
- return (sp-sb);
-}
-
-#define KcrR 76284
-#define KcrG 53281
-#define KcbG 25625
-#define KcbB 132252
-#define Ky 76284
-
-void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute;
- int yskip;
-
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
- bufy=&buf[0];
- bufoute=rgb;
-
- for(i=0; i<(RTjpeg_height); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- }
- bufy+=yskip;
- }
-}
-
-
-void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
-
- if(stride==0)
- oskip=RTjpeg_width*3;
- else
- oskip=2*stride-RTjpeg_width*3;
-
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*3;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- }
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
-}
-
-
-void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
-
- if(stride==0)
- oskip=RTjpeg_width*4;
- else
- oskip = 2*stride-RTjpeg_width*4;
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/2];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*4;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufoute++;
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufoute++;
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufouto++;
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- bufouto++;
-
- }
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
-}
-
-void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
-
- if(stride==0)
- oskip=RTjpeg_width*3;
- else
- oskip=2*stride - RTjpeg_width*3;
-
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*3;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufoute++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- *(bufouto++)=(tmp>255)?255:((tmp<0)?0:tmp);
-
- }
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
-}
-
-void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride)
-{
- int tmp;
- int i, j;
- __s32 y, crR, crG, cbG, cbB;
- __u8 *bufcr, *bufcb, *bufy, *bufoute, *bufouto;
- int oskip, yskip;
- unsigned char r, g, b;
-
- if(stride==0)
- oskip=RTjpeg_width*2;
- else
- oskip=2*stride-RTjpeg_width*2;
-
- yskip=RTjpeg_width;
-
- bufcb=&buf[RTjpeg_width*RTjpeg_height];
- bufcr=&buf[RTjpeg_width*RTjpeg_height+(RTjpeg_width*RTjpeg_height)/4];
- bufy=&buf[0];
- bufoute=rgb;
- bufouto=rgb+RTjpeg_width*2;
-
- for(i=0; i<(RTjpeg_height>>1); i++)
- {
- for(j=0; j<RTjpeg_width; j+=2)
- {
- crR=(*bufcr-128)*KcrR;
- crG=(*(bufcr++)-128)*KcrG;
- cbG=(*bufcb-128)*KcbG;
- cbB=(*(bufcb++)-128)*KcbB;
-
- y=(bufy[j]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufoute++)=tmp&0xff;
- *(bufoute++)=tmp>>8;
-
-
- y=(bufy[j+1]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufoute++)=tmp&0xff;
- *(bufoute++)=tmp>>8;
-
- y=(bufy[j+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufouto++)=tmp&0xff;
- *(bufouto++)=tmp>>8;
-
- y=(bufy[j+1+yskip]-16)*Ky;
-
- tmp=(y+cbB)>>16;
- b=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y-crG-cbG)>>16;
- g=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(y+crR)>>16;
- r=(tmp>255)?255:((tmp<0)?0:tmp);
- tmp=(int)((int)b >> 3);
- tmp|=(int)(((int)g >> 2) << 5);
- tmp|=(int)(((int)r >> 3) << 11);
- *(bufouto++)=tmp&0xff;
- *(bufouto++)=tmp>>8;
-
- }
- bufoute+=oskip;
- bufouto+=oskip;
- bufy+=yskip<<1;
- }
-}
-
-/* fix stride */
-
-void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride)
-{
- memcpy(rgb, buf, RTjpeg_width*RTjpeg_height);
-}
-
diff --git a/libmpcodecs/native/rtjpegn.h b/libmpcodecs/native/rtjpegn.h
index a854818e2f..859b8238f8 100644
--- a/libmpcodecs/native/rtjpegn.h
+++ b/libmpcodecs/native/rtjpegn.h
@@ -35,27 +35,10 @@
#define __s32 int32_t
#define __s64 int64_t
-extern void RTjpeg_init_Q(__u8 Q);
-extern void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q);
-extern void RTjpeg_init_decompress(__u32 *buf, int width, int height);
-extern int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp);
-extern int RTjpeg_compressYUV422(__s8 *sp, unsigned char *bp);
-extern void RTjpeg_decompressYUV420(__s8 *sp, __u8 *bp);
-extern void RTjpeg_decompressYUV422(__s8 *sp, __u8 *bp);
-extern int RTjpeg_compress8(__s8 *sp, unsigned char *bp);
-extern void RTjpeg_decompress8(__s8 *sp, __u8 *bp);
-
-extern void RTjpeg_init_mcompress(void);
-extern int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
-extern int RTjpeg_mcompressYUV422(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
-extern int RTjpeg_mcompress8(__s8 *sp, unsigned char *bp, __u16 lmask);
-extern void RTjpeg_set_test(int i);
-
-extern void RTjpeg_yuv420rgb(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuv422rgb(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb8(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb16(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb24(__u8 *buf, __u8 *rgb, int stride);
-extern void RTjpeg_yuvrgb32(__u8 *buf, __u8 *rgb, int stride);
+void RTjpeg_init_compress(__u32 *buf, int width, int height, __u8 Q);
+int RTjpeg_compressYUV420(__s8 *sp, unsigned char *bp);
+
+void RTjpeg_init_mcompress(void);
+int RTjpeg_mcompressYUV420(__s8 *sp, unsigned char *bp, __u16 lmask, __u16 cmask);
#endif /* MPLAYER_RTJPEGN_H */