diff options
author | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2002-02-12 23:17:14 +0000 |
---|---|---|
committer | michael <michael@b3059339-0415-0410-9bf9-f77b7e298cf2> | 2002-02-12 23:17:14 +0000 |
commit | 7a4350e4c14add138b66f2c0c43b2796cf01094c (patch) | |
tree | f48fbac27b35728987935063b9c0ef2b6c464b6d /libvo | |
parent | 7891bbb9e939df7b9f6267adb4b6950ce5de325a (diff) |
mem2agpcpy()
git-svn-id: svn://svn.mplayerhq.hu/mplayer/trunk@4682 b3059339-0415-0410-9bf9-f77b7e298cf2
Diffstat (limited to 'libvo')
-rw-r--r-- | libvo/aclib.c | 32 | ||||
-rw-r--r-- | libvo/aclib_template.c | 85 | ||||
-rw-r--r-- | libvo/fastmemcpy.h | 10 |
3 files changed, 124 insertions, 3 deletions
diff --git a/libvo/aclib.c b/libvo/aclib.c index a2931739ea..f569f58460 100644 --- a/libvo/aclib.c +++ b/libvo/aclib.c @@ -118,4 +118,34 @@ inline void * fast_memcpy(void * to, const void * from, size_t len) #endif //!RUNTIME_CPUDETECT } -#endif /* use fastmemcpy */
\ No newline at end of file +inline void * mem2agpcpy(void * to, const void * from, size_t len) +{ +#ifdef RUNTIME_CPUDETECT +#ifdef CAN_COMPILE_X86_ASM + // ordered per speed fasterst first + if(gCpuCaps.hasMMX2) + mem2agpcpy_MMX2(to, from, len); + else if(gCpuCaps.has3DNow) + mem2agpcpy_3DNow(to, from, len); + else if(gCpuCaps.hasMMX) + mem2agpcpy_MMX(to, from, len); + else +#endif //CAN_COMPILE_X86_ASM + memcpy(to, from, len); // prior to mmx we use the standart memcpy +#else +#ifdef HAVE_MMX2 + mem2agpcpy_MMX2(to, from, len); +#elif defined (HAVE_3DNOW) + mem2agpcpy_3DNow(to, from, len); +#elif defined (HAVE_MMX) + mem2agpcpy_MMX(to, from, len); +#else + memcpy(to, from, len); // prior to mmx we use the standart memcpy +#endif + +#endif //!RUNTIME_CPUDETECT +} + + +#endif /* use fastmemcpy */ + diff --git a/libvo/aclib_template.c b/libvo/aclib_template.c index 9e444c4593..702b8aaa9a 100644 --- a/libvo/aclib_template.c +++ b/libvo/aclib_template.c @@ -353,3 +353,88 @@ static inline void * RENAME(fast_memcpy)(void * to, const void * from, size_t le if(len) small_memcpy(to, from, len); return retval; } + +/** + * special copy routine for mem -> agp/pci copy (based upon fast_memcpy) + */ +static inline void * RENAME(mem2agpcpy)(void * to, const void * from, size_t len) +{ + void *retval; + size_t i; + retval = to; +#ifdef STATISTICS + { + static int freq[33]; + static int t=0; + int i; + for(i=0; len>(1<<i); i++); + freq[i]++; + t++; + if(1024*1024*1024 % t == 0) + for(i=0; i<32; i++) + printf("mem2agp freq < %8d %4d\n", 1<<i, freq[i]); + } +#endif + if(len >= MIN_LEN) + { + register unsigned long int delta; + /* Align destinition to MMREG_SIZE -boundary */ + delta = ((unsigned long int)to)&7; + if(delta) + { + delta=8-delta; + len -= delta; + small_memcpy(to, from, delta); + } + i = len >> 6; /* len/64 */ + len &= 63; + /* + This algorithm is top effective when the code consequently + reads and writes blocks which have size of cache line. + Size of cache line is processor-dependent. + It will, however, be a minimum of 32 bytes on any processors. + It would be better to have a number of instructions which + perform reading and writing to be multiple to a number of + processor's decoders, but it's not always possible. + */ + for(; i>0; i--) + { + __asm__ __volatile__ ( + PREFETCH" 320(%0)\n" + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + MOVNTQ" %%mm0, (%1)\n" + MOVNTQ" %%mm1, 8(%1)\n" + MOVNTQ" %%mm2, 16(%1)\n" + MOVNTQ" %%mm3, 24(%1)\n" + MOVNTQ" %%mm4, 32(%1)\n" + MOVNTQ" %%mm5, 40(%1)\n" + MOVNTQ" %%mm6, 48(%1)\n" + MOVNTQ" %%mm7, 56(%1)\n" + :: "r" (from), "r" (to) : "memory"); + ((const unsigned char *)from)+=64; + ((unsigned char *)to)+=64; + } +#ifdef HAVE_MMX2 + /* since movntq is weakly-ordered, a "sfence" + * is needed to become ordered again. */ + __asm__ __volatile__ ("sfence":::"memory"); +#endif +#ifndef HAVE_SSE + /* enables to use FPU */ + __asm__ __volatile__ (EMMS:::"memory"); +#endif + } + /* + * Now do the tail of the block + */ + if(len) small_memcpy(to, from, len); + return retval; +} + diff --git a/libvo/fastmemcpy.h b/libvo/fastmemcpy.h index cff2846bc8..aee1e786cd 100644 --- a/libvo/fastmemcpy.h +++ b/libvo/fastmemcpy.h @@ -9,8 +9,14 @@ #include <stddef.h> extern void * fast_memcpy(void * to, const void * from, size_t len); +extern void * mem2agpcpy(void * to, const void * from, size_t len); #define memcpy(a,b,c) fast_memcpy(a,b,c) -#endif /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */ -#endif /* USE_FASTMEMCPY */ +#else /* HAVE_MMX/MMX2/3DNOW/SSE/SSE2 */ +#define mem2agpcpy(a,b,c) memcpy(a,b,c) +#endif + +#else /* USE_FASTMEMCPY */ +#define mem2agpcpy(a,b,c) memcpy(a,b,c) +#endif #endif |