From 4e753558fc8cc2f77cbcd46fba80d8612e836a1e Mon Sep 17 00:00:00 2001 From: "senorblanco@chromium.org" Date: Mon, 16 Nov 2009 21:09:00 +0000 Subject: More SSE2-ification; fix for gcc -msse2. Review URL: http://codereview.appspot.com/154163 git-svn-id: http://skia.googlecode.com/svn/trunk@428 2bbb7eff-a529-9590-31e7-b0007b416f81 --- src/opts/SkBlitRow_opts_SSE2.cpp | 85 +++++--------------------------- src/opts/SkBlitRow_opts_SSE2.h | 30 +++++++++++ src/opts/SkUtils_opts_SSE2.cpp | 77 +++++++++++++++++++++++++++++ src/opts/SkUtils_opts_SSE2.h | 21 ++++++++ src/opts/SkUtils_opts_none.cpp | 26 ++++++++++ src/opts/opts_check_SSE2.cpp | 104 +++++++++++++++++++++++++++++++++++++++ src/opts/opts_files.mk | 4 +- 7 files changed, 271 insertions(+), 76 deletions(-) create mode 100644 src/opts/SkBlitRow_opts_SSE2.h create mode 100644 src/opts/SkUtils_opts_SSE2.cpp create mode 100644 src/opts/SkUtils_opts_SSE2.h create mode 100644 src/opts/SkUtils_opts_none.cpp create mode 100644 src/opts/opts_check_SSE2.cpp (limited to 'src/opts') diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp index 7428584a89..8983093ee6 100644 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -15,47 +15,17 @@ ** limitations under the License. */ -#include "SkBlitRow.h" +#include "SkBlitRow_opts_SSE2.h" #include "SkColorPriv.h" -#include "SkDither.h" #include -#ifdef _MSC_VER -static void getcpuid(int info_type, int info[4]) -{ - __asm { - mov eax, [info_type] - cpuid - mov edi, [info] - mov [edi], eax - mov [edi+4], ebx - mov [edi+8], ecx - mov [edi+12], edx - } -} -#else -static void getcpuid(int info_type, int info[4]) -{ - // We save and restore ebx, so this code can be compatible with -fPIC - asm volatile ( - "pushl %%ebx \n\t" - "cpuid \n\t" - "movl %%ebx, %1 \n\t" - "popl %%ebx \n\t" - : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) - : "a"(info_type) - : - ); -} -#endif - /* SSE2 version of S32_Blend_BlitRow32() * portable version is in core/SkBlitRow_D32.cpp */ -static void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { +void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { SkASSERT(alpha <= 255); if (count <= 0) { return; @@ -108,7 +78,7 @@ static void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, src = reinterpret_cast(s); dst = reinterpret_cast(d); - while (count > 0) { + while (count > 0) { *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); src++; dst++; @@ -116,9 +86,9 @@ static void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, } } -static void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { +void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { SkASSERT(alpha == 255); if (count <= 0) { return; @@ -228,9 +198,9 @@ static void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, } } -static void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, - const SkPMColor* SK_RESTRICT src, - int count, U8CPU alpha) { +void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { SkASSERT(alpha <= 255); if (count <= 0) { return; @@ -307,36 +277,3 @@ static void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, count--; } } - -/////////////////////////////////////////////////////////////////////////////// - -static const SkBlitRow::Proc32 platform_32_procs[] = { - NULL, // S32_Opaque, - S32_Blend_BlitRow32_SSE2, // S32_Blend, - S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque - S32A_Blend_BlitRow32_SSE2, // S32A_Blend, -}; - -SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { - return NULL; -} - -SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { - return NULL; -} - -SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { - static bool once; - static bool hasSSE2; - if (!once) { - int cpu_info[4] = { 0 }; - getcpuid(1, cpu_info); - hasSSE2 = (cpu_info[3] & (1<<26)) != 0; - once = true; - } - if (hasSSE2) { - return platform_32_procs[flags]; - } else { - return NULL; - } -} diff --git a/src/opts/SkBlitRow_opts_SSE2.h b/src/opts/SkBlitRow_opts_SSE2.h new file mode 100644 index 0000000000..c22edd81ff --- /dev/null +++ b/src/opts/SkBlitRow_opts_SSE2.h @@ -0,0 +1,30 @@ +/* + ** + ** Copyright 2009, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "SkBlitRow.h" + +void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha); + +void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha); + +void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha); diff --git a/src/opts/SkUtils_opts_SSE2.cpp b/src/opts/SkUtils_opts_SSE2.cpp new file mode 100644 index 0000000000..053703399b --- /dev/null +++ b/src/opts/SkUtils_opts_SSE2.cpp @@ -0,0 +1,77 @@ +/* + ** + ** Copyright 2009, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include +#include "SkUtils_opts_SSE2.h" + +void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count) +{ + SkASSERT(dst != NULL && count >= 0); + + // dst must be 2-byte aligned. + SkASSERT((((size_t) dst) & 0x01) == 0); + + if (count >= 32) { + while (((size_t)dst) & 0x0F) { + *dst++ = value; + --count; + } + __m128i *d = reinterpret_cast<__m128i*>(dst); + __m128i value_wide = _mm_set1_epi16(value); + while (count >= 32) { + _mm_store_si128(d++, value_wide); + _mm_store_si128(d++, value_wide); + _mm_store_si128(d++, value_wide); + _mm_store_si128(d++, value_wide); + count -= 32; + } + dst = reinterpret_cast(d); + } + while (count > 0) { + *dst++ = value; + --count; + } +} + +void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count) +{ + SkASSERT(dst != NULL && count >= 0); + + // dst must be 4-byte aligned. + SkASSERT((((size_t) dst) & 0x03) == 0); + + if (count >= 16) { + while (((size_t)dst) & 0x0F) { + *dst++ = value; + --count; + } + __m128i *d = reinterpret_cast<__m128i*>(dst); + __m128i value_wide = _mm_set1_epi32(value); + while (count >= 16) { + _mm_store_si128(d++, value_wide); + _mm_store_si128(d++, value_wide); + _mm_store_si128(d++, value_wide); + _mm_store_si128(d++, value_wide); + count -= 16; + } + dst = reinterpret_cast(d); + } + while (count > 0) { + *dst++ = value; + --count; + } +} diff --git a/src/opts/SkUtils_opts_SSE2.h b/src/opts/SkUtils_opts_SSE2.h new file mode 100644 index 0000000000..a54e82f0a6 --- /dev/null +++ b/src/opts/SkUtils_opts_SSE2.h @@ -0,0 +1,21 @@ +/* + ** + ** Copyright 2009, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "SkTypes.h" + +void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count); +void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count); diff --git a/src/opts/SkUtils_opts_none.cpp b/src/opts/SkUtils_opts_none.cpp new file mode 100644 index 0000000000..108ce9cc09 --- /dev/null +++ b/src/opts/SkUtils_opts_none.cpp @@ -0,0 +1,26 @@ +/* + ** + ** Copyright 2009, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "SkUtils.h" + +SkMemset16Proc SkMemset16GetPlatformProc() { + return NULL; +} + +SkMemset32Proc SkMemset32GetPlatformProc() { + return NULL; +} diff --git a/src/opts/opts_check_SSE2.cpp b/src/opts/opts_check_SSE2.cpp new file mode 100644 index 0000000000..4757ed859e --- /dev/null +++ b/src/opts/opts_check_SSE2.cpp @@ -0,0 +1,104 @@ +/* + ** + ** Copyright 2009, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "SkBlitRow_opts_SSE2.h" +#include "SkUtils_opts_SSE2.h" +#include "SkUtils.h" + +/* This file must *not* be compiled with -msse or -msse2, otherwise + gcc may generate sse2 even for scalar ops (and thus give an invalid + instruction on Pentium3 on the code below). Only files named *_SSE2.cpp + in this directory should be compiled with -msse2. */ + +#ifdef __x86_64__ +/* All x86_64 machines have SSE2, so don't even bother checking. */ +static inline bool hasSSE2() { + return true; +} +#else +#ifdef _MSC_VER +static inline void getcpuid(int info_type, int info[4]) { + __asm { + mov eax, [info_type] + cpuid + mov edi, [info] + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + } +} +#else +static inline void getcpuid(int info_type, int info[4]) { + // We save and restore ebx, so this code can be compatible with -fPIC + asm volatile ( + "pushl %%ebx \n\t" + "cpuid \n\t" + "movl %%ebx, %1 \n\t" + "popl %%ebx \n\t" + : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) + : "a"(info_type) + : + ); +} +#endif + +static inline bool hasSSE2() { + int cpu_info[4] = { 0 }; + getcpuid(1, cpu_info); + return (cpu_info[3] & (1<<26)) != 0; +} +#endif + +static SkBlitRow::Proc32 platform_32_procs[] = { + NULL, // S32_Opaque, + S32_Blend_BlitRow32_SSE2, // S32_Blend, + S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque + S32A_Blend_BlitRow32_SSE2, // S32A_Blend, +}; + +SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { + return NULL; +} + +SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { + return NULL; +} + +SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { + if (hasSSE2()) { + return platform_32_procs[flags]; + } else { + return NULL; + } +} + +SkMemset16Proc SkMemset16GetPlatformProc() { + if (hasSSE2()) { + return sk_memset16_SSE2; + } else { + return NULL; + } +} + +SkMemset32Proc SkMemset32GetPlatformProc() { + if (hasSSE2()) { + return sk_memset32_SSE2; + } else { + return NULL; + } +} diff --git a/src/opts/opts_files.mk b/src/opts/opts_files.mk index d756f68a68..ae8fd7764b 100644 --- a/src/opts/opts_files.mk +++ b/src/opts/opts_files.mk @@ -1,4 +1,4 @@ SOURCE := \ SkBlitRow_opts_none.cpp \ - SkBitmapProcState_opts_none.cpp - + SkBitmapProcState_opts_none.cpp \ + SkUtils_opts_none.cpp -- cgit v1.2.3