diff options
-rw-r--r-- | include/core/SkBlitRow.h | 17 | ||||
-rw-r--r-- | src/core/SkBlitRow_D16.cpp | 4 | ||||
-rw-r--r-- | src/core/SkBlitRow_D32.cpp | 2 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_SSE2.cpp | 336 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_arm.cpp | 17 | ||||
-rw-r--r-- | src/opts/SkBlitRow_opts_none.cpp | 42 | ||||
-rw-r--r-- | src/ports/SkTime_win.cpp | 46 |
7 files changed, 418 insertions, 46 deletions
diff --git a/include/core/SkBlitRow.h b/include/core/SkBlitRow.h index dbbd84d182..6560d488d0 100644 --- a/include/core/SkBlitRow.h +++ b/include/core/SkBlitRow.h @@ -65,18 +65,21 @@ public: Color32(row, row, count, color); } + /** These static functions are called by the Factory and Factory32 + functions, and should return either NULL, or a + platform-specific function-ptr to be used in place of the + system default. + */ + + static const Proc32 PlatformProcs32(unsigned flags); + static const Proc PlatformProcs565(unsigned flags); + static const Proc PlatformProcs4444(unsigned flags); + private: enum { kFlags16_Mask = 7, kFlags32_Mask = 3 }; - /** These global arrays are indexed using the flags parameter to Factory, - and contain either NULL, or a platform-specific function-ptr to be used - in place of the system default. - */ - static const Proc gPlatform_565_Procs[]; - static const Proc gPlatform_4444_Procs[]; - static const Proc32 gPlatform_Procs32[]; }; #endif diff --git a/src/core/SkBlitRow_D16.cpp b/src/core/SkBlitRow_D16.cpp index 66ac90e29a..07c42ceb0b 100644 --- a/src/core/SkBlitRow_D16.cpp +++ b/src/core/SkBlitRow_D16.cpp @@ -242,13 +242,13 @@ SkBlitRow::Proc SkBlitRow::Factory(unsigned flags, SkBitmap::Config config) { switch (config) { case SkBitmap::kRGB_565_Config: - proc = gPlatform_565_Procs[flags]; + proc = PlatformProcs565(flags); if (NULL == proc) { proc = gDefault_565_Procs[flags]; } break; case SkBitmap::kARGB_4444_Config: - proc = gPlatform_4444_Procs[flags]; + proc = PlatformProcs4444(flags); if (NULL == proc) { proc = SkBlitRow_Factory_4444(flags); } diff --git a/src/core/SkBlitRow_D32.cpp b/src/core/SkBlitRow_D32.cpp index f67bb9abae..003602566e 100644 --- a/src/core/SkBlitRow_D32.cpp +++ b/src/core/SkBlitRow_D32.cpp @@ -78,7 +78,7 @@ SkBlitRow::Proc32 SkBlitRow::Factory32(unsigned flags) { // just so we don't crash flags &= kFlags32_Mask; - SkBlitRow::Proc32 proc = gPlatform_Procs32[flags]; + SkBlitRow::Proc32 proc = PlatformProcs32(flags); if (NULL == proc) { proc = gDefault_Procs32[flags]; } diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp new file mode 100644 index 0000000000..68c751923d --- /dev/null +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -0,0 +1,336 @@ +/* + ** + ** Copyright 2009, The Android Open Source Project + ** + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** + ** http://www.apache.org/licenses/LICENSE-2.0 + ** + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + */ + +#include "SkBlitRow.h" +#include "SkColorPriv.h" +#include "SkDither.h" + +#include <emmintrin.h> + +#ifdef _MSC_VER +static void getcpuid(int info_type, int info[4]) +{ + __asm { + mov eax, [info_type] + cpuid + mov edi, [info] + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + } +} +#else +static void getcpuid(int info_type, int info[4]) +{ + asm("cpuid": "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3]) + : "a"(info_type) + : + ); +} +#endif + +/* SSE2 version of S32_Blend_BlitRow32() + * portable version is in core/SkBlitRow_D32.cpp + */ +static void S32_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { + SkASSERT(alpha <= 255); + if (count <= 0) { + return; + } + + uint32_t src_scale = SkAlpha255To256(alpha); + uint32_t dst_scale = 256 - src_scale; + + const __m128i *s = reinterpret_cast<const __m128i*>(src); + __m128i *d = reinterpret_cast<__m128i*>(dst); + __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); + __m128i src_scale_wide = _mm_set1_epi16(src_scale); + __m128i dst_scale_wide = _mm_set1_epi16(dst_scale); + while (count >= 4) { + // Load 4 pixels each of src and dest. + __m128i src_pixel = _mm_loadu_si128(s); + __m128i dst_pixel = _mm_loadu_si128(d); + + // Get red and blue pixels into lower byte of each word. + __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); + __m128i src_rb = _mm_and_si128(rb_mask, src_pixel); + + // Get alpha and green into lower byte of each word. + __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); + __m128i src_ag = _mm_srli_epi16(src_pixel, 8); + + // Multiply by scale. + src_rb = _mm_mullo_epi16(src_rb, src_scale_wide); + src_ag = _mm_mullo_epi16(src_ag, src_scale_wide); + dst_rb = _mm_mullo_epi16(dst_rb, dst_scale_wide); + dst_ag = _mm_mullo_epi16(dst_ag, dst_scale_wide); + + // Divide by 256. + src_rb = _mm_srli_epi16(src_rb, 8); + dst_rb = _mm_srli_epi16(dst_rb, 8); + src_ag = _mm_andnot_si128(rb_mask, src_ag); + dst_ag = _mm_andnot_si128(rb_mask, dst_ag); + + // Combine back into RGBA. + src_pixel = _mm_or_si128(src_rb, src_ag); + dst_pixel = _mm_or_si128(dst_rb, dst_ag); + + // Add result + __m128i result = _mm_add_epi8(src_pixel, dst_pixel); + _mm_storeu_si128(d, result); + s++; + d++; + count -= 4; + } + + src = reinterpret_cast<const SkPMColor*>(s); + dst = reinterpret_cast<SkPMColor*>(d); + while (count > 0) { + *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); + src++; + dst++; + count--; + } +} + +static void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { + SkASSERT(alpha == 255); + if (count <= 0) { + return; + } + const __m128i *s = reinterpret_cast<const __m128i*>(src); + __m128i *d = reinterpret_cast<__m128i*>(dst); +#ifdef SK_USE_ACCURATE_BLENDING + __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); + __m128i c_128 = _mm_set1_epi16(128); // 8 copies of 128 (16-bit) + __m128i c_255 = _mm_set1_epi16(255); // 8 copies of 255 (16-bit) + while (count >= 4) { + // Load 4 pixels + __m128i src_pixel = _mm_loadu_si128(s); + __m128i dst_pixel = _mm_loadu_si128(d); + + __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); + __m128i dst_ag = _mm_andnot_si128(rb_mask, dst_pixel); + dst_ag = _mm_srli_epi16(dst_ag, 8); + // Shift alphas down to lower 8 bits of each quad. + __m128i alpha = _mm_srli_epi32(src_pixel, 24); + + // Copy alpha to upper 3rd byte of each quad + alpha = _mm_or_si128(alpha, _mm_slli_epi32(alpha, 16)); + + // Subtract alphas from 255, to get 0..255 + alpha = _mm_sub_epi16(c_255, alpha); + + // Multiply by red and blue by src alpha. + dst_rb = _mm_mullo_epi16(dst_rb, alpha); + // Multiply by alpha and green by src alpha. + dst_ag = _mm_mullo_epi16(dst_ag, alpha); + + // dst_rb_low = (dst_rb >> 8) + __m128i dst_rb_low = _mm_srli_epi16(dst_rb, 8); + __m128i dst_ag_low = _mm_srli_epi16(dst_ag, 8); + + // dst_rb = (dst_rb + dst_rb_low + 128) >> 8 + dst_rb = _mm_add_epi16(dst_rb, dst_rb_low); + dst_rb = _mm_add_epi16(dst_rb, c_128); + dst_rb = _mm_srli_epi16(dst_rb, 8); + + // dst_ag = (dst_ag + dst_ag_low + 128) & ag_mask + dst_ag = _mm_add_epi16(dst_ag, dst_ag_low); + dst_ag = _mm_add_epi16(dst_ag, c_128); + dst_ag = _mm_andnot_si128(rb_mask, dst_ag); + + // Combine back into RGBA. + dst_pixel = _mm_or_si128(dst_rb, dst_ag); + + // Add result + __m128i result = _mm_add_epi8(src_pixel, dst_pixel); + _mm_storeu_si128(d, result); + s++; + d++; + count -= 4; + } +#else + __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); + __m128i c_256 = _mm_set1_epi16(0x0100); // 8 copies of 256 (16-bit) + while (count >= 4) { + // Load 4 pixels + __m128i src_pixel = _mm_loadu_si128(s); + __m128i dst_pixel = _mm_loadu_si128(d); + + __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); + __m128i dst_ag = _mm_andnot_si128(rb_mask, dst_pixel); + dst_ag = _mm_srli_epi16(dst_ag, 8); + // Shift alphas down to lower 8 bits of each quad. + __m128i alpha = _mm_srli_epi32(src_pixel, 24); + + // Copy alpha to upper 3rd byte of each quad + alpha = _mm_or_si128(alpha, _mm_slli_epi32(alpha, 16)); + + // Subtract alphas from 256, to get 1..256 + alpha = _mm_sub_epi16(c_256, alpha); + + // Multiply by red and blue by src alpha. + dst_rb = _mm_mullo_epi16(dst_rb, alpha); + // Multiply by alpha and green by src alpha. + dst_ag = _mm_mullo_epi16(dst_ag, alpha); + + // Divide by 256. + dst_rb = _mm_srli_epi16(dst_rb, 8); + + // Mask out high bits (already in the right place) + dst_ag = _mm_andnot_si128(rb_mask, dst_ag); + + // Combine back into RGBA. + dst_pixel = _mm_or_si128(dst_rb, dst_ag); + + // Add result + __m128i result = _mm_add_epi8(src_pixel, dst_pixel); + _mm_storeu_si128(d, result); + s++; + d++; + count -= 4; + } +#endif + + src = reinterpret_cast<const SkPMColor*>(s); + dst = reinterpret_cast<SkPMColor*>(d); + while (count > 0) { + *dst = SkPMSrcOver(*src, *dst); + src++; + dst++; + count--; + } +} + +static void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, + const SkPMColor* SK_RESTRICT src, + int count, U8CPU alpha) { + SkASSERT(alpha <= 255); + if (count <= 0) { + return; + } + + uint32_t src_scale = SkAlpha255To256(alpha); + + const __m128i *s = reinterpret_cast<const __m128i*>(src); + __m128i *d = reinterpret_cast<__m128i*>(dst); + __m128i src_scale_wide = _mm_set1_epi16(src_scale); + __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); + __m128i c_256 = _mm_set1_epi16(256); // 8 copies of 256 (16-bit) + while (count >= 4) { + // Load 4 pixels each of src and dest. + __m128i src_pixel = _mm_loadu_si128(s); + __m128i dst_pixel = _mm_loadu_si128(d); + + // Get red and blue pixels into lower byte of each word. + __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); + __m128i src_rb = _mm_and_si128(rb_mask, src_pixel); + + // Get alpha and green into lower byte of each word. + __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); + __m128i src_ag = _mm_srli_epi16(src_pixel, 8); + + // Put per-pixel alpha in low byte of each word. + __m128i dst_alpha = _mm_shufflehi_epi16(src_ag, 0xF5); + dst_alpha = _mm_shufflelo_epi16(dst_alpha, 0xF5); + + // dst_alpha = dst_alpha * src_scale + dst_alpha = _mm_mullo_epi16(dst_alpha, src_scale_wide); + + // Divide by 256. + dst_alpha = _mm_srli_epi16(dst_alpha, 8); + + // Subtract alphas from 256, to get 1..256 + dst_alpha = _mm_sub_epi16(c_256, dst_alpha); + + // Multiply red and blue by dst pixel alpha. + dst_rb = _mm_mullo_epi16(dst_rb, dst_alpha); + // Multiply alpha and green by dst pixel alpha. + dst_ag = _mm_mullo_epi16(dst_ag, dst_alpha); + + // Multiply red and blue by global alpha. + src_rb = _mm_mullo_epi16(src_rb, src_scale_wide); + // Multiply alpha and green by global alpha. + src_ag = _mm_mullo_epi16(src_ag, src_scale_wide); + + // Divide by 256. + dst_rb = _mm_srli_epi16(dst_rb, 8); + src_rb = _mm_srli_epi16(src_rb, 8); + + // Mask out low bits (goodies already in the right place; no need to divide) + dst_ag = _mm_andnot_si128(rb_mask, dst_ag); + src_ag = _mm_andnot_si128(rb_mask, src_ag); + + // Combine back into RGBA. + dst_pixel = _mm_or_si128(dst_rb, dst_ag); + src_pixel = _mm_or_si128(src_rb, src_ag); + + // Add two pixels into result. + __m128i result = _mm_add_epi8(src_pixel, dst_pixel); + _mm_storeu_si128(d, result); + s++; + d++; + count -= 4; + } + src = reinterpret_cast<const SkPMColor*>(s); + dst = reinterpret_cast<SkPMColor*>(d); + while (count > 0) { + *dst = SkBlendARGB32(*src, *dst, alpha); + src++; + dst++; + count--; + } +} + +/////////////////////////////////////////////////////////////////////////////// + +static const SkBlitRow::Proc32 platform_32_procs[] = { + NULL, // S32_Opaque, + S32_Blend_BlitRow32_SSE2, // S32_Blend, + S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque + S32A_Blend_BlitRow32_SSE2, // S32A_Blend, +}; + +const SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { + return NULL; +} + +const SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { + return NULL; +} + +const SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { + static bool once; + static bool hasSSE2; + if (!once) { + int cpu_info[4] = { 0 }; + getcpuid(1, cpu_info); + hasSSE2 = (cpu_info[3] & (1<<26)) != 0; + once = true; + } + if (hasSSE2) { + return platform_32_procs[flags]; + } else { + return NULL; + } +} diff --git a/src/opts/SkBlitRow_opts_arm.cpp b/src/opts/SkBlitRow_opts_arm.cpp index be1cbdf2b9..44550daf9e 100644 --- a/src/opts/SkBlitRow_opts_arm.cpp +++ b/src/opts/SkBlitRow_opts_arm.cpp @@ -976,7 +976,7 @@ static void S32_D565_Opaque_Dither_neon(uint16_t* SK_RESTRICT dst, /////////////////////////////////////////////////////////////////////////////// -const SkBlitRow::Proc SkBlitRow::gPlatform_565_Procs[] = { +static const SkBlitRow::Proc platform_565_procs[] = { // no dither S32_D565_Opaque_PROC, S32_D565_Blend_PROC, @@ -990,7 +990,7 @@ const SkBlitRow::Proc SkBlitRow::gPlatform_565_Procs[] = { NULL, // S32A_D565_Blend_Dither }; -const SkBlitRow::Proc SkBlitRow::gPlatform_4444_Procs[] = { +static const SkBlitRow::Proc platform_4444_procs[] = { // no dither NULL, // S32_D4444_Opaque, NULL, // S32_D4444_Blend, @@ -1004,10 +1004,21 @@ const SkBlitRow::Proc SkBlitRow::gPlatform_4444_Procs[] = { NULL, // S32A_D4444_Blend_Dither }; -const SkBlitRow::Proc32 SkBlitRow::gPlatform_Procs32[] = { +static const SkBlitRow::Proc32 platform_32_procs[] = { NULL, // S32_Opaque, S32_Blend_BlitRow32_PROC, // S32_Blend, S32A_Opaque_BlitRow32_PROC, // S32A_Opaque, NULL, // S32A_Blend, }; +const SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { + return platform_4444_procs[flags]; +} + +const SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { + return platform_565_procs[flags]; +} + +const SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { + return platform_32_procs[flags]; +} diff --git a/src/opts/SkBlitRow_opts_none.cpp b/src/opts/SkBlitRow_opts_none.cpp index 7a777597c8..15b999bfe0 100644 --- a/src/opts/SkBlitRow_opts_none.cpp +++ b/src/opts/SkBlitRow_opts_none.cpp @@ -2,38 +2,14 @@ // Platform impl of Platform_procs with no overrides -const SkBlitRow::Proc SkBlitRow::gPlatform_565_Procs[] = { - // no dither - NULL, // S32_D565_Opaque, - NULL, // S32_D565_Blend, - NULL, // S32A_D565_Opaque, - NULL, // S32A_D565_Blend, - - // dither - NULL, // S32_D565_Opaque_Dither, - NULL, // S32_D565_Blend_Dither, - NULL, // S32A_D565_Opaque_Dither, - NULL, // S32A_D565_Blend_Dither -}; +const SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) { + return NULL; +} -const SkBlitRow::Proc SkBlitRow::gPlatform_4444_Procs[] = { - // no dither - NULL, // S32_D4444_Opaque, - NULL, // S32_D4444_Blend, - NULL, // S32A_D4444_Opaque, - NULL, // S32A_D4444_Blend, - - // dither - NULL, // S32_D4444_Opaque_Dither, - NULL, // S32_D4444_Blend_Dither, - NULL, // S32A_D4444_Opaque_Dither, - NULL, // S32A_D4444_Blend_Dither -}; - -const SkBlitRow::Proc32 SkBlitRow::gPlatform_Procs32[] = { - NULL, // S32_Opaque, - NULL, // S32_Blend, - NULL, // S32A_Opaque, - NULL, // S32A_Blend, -}; +const SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { + return NULL; +} +const SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { + return NULL; +} diff --git a/src/ports/SkTime_win.cpp b/src/ports/SkTime_win.cpp new file mode 100644 index 0000000000..5194b028cb --- /dev/null +++ b/src/ports/SkTime_win.cpp @@ -0,0 +1,46 @@ +/* libs/graphics/ports/SkTime_Unix.cpp +** +** Copyright 2009, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#include "SkTime.h" + +void SkTime::GetDateTime(DateTime* dt) +{ + if (dt) + { + SYSTEMTIME st; + GetSystemTime(&st); + + dt->fYear = st.wYear; + dt->fMonth = SkToU8(st.wMonth + 1); + dt->fDayOfWeek = SkToU8(st.wDayOfWeek); + dt->fDay = SkToU8(st.wDay); + dt->fHour = SkToU8(st.wHour); + dt->fMinute = SkToU8(st.wMinute); + dt->fSecond = SkToU8(st.wSecond); + } +} + +SkMSec SkTime::GetMSecs() +{ + FILETIME ft; + LARGE_INTEGER li; + GetSystemTimeAsFileTime(&ft); + li.LowPart = ft.dwLowDateTime; + li.HighPart = ft.dwHighDateTime; + __int64 t = li.QuadPart; /* In 100-nanosecond intervals */ + return t / 10000; /* In milliseconds */ +} |