aboutsummaryrefslogtreecommitdiffhomepage
path: root/include/core/SkColorPriv.h
diff options
context:
space:
mode:
authorGravatar henrik.smiding <henrik.smiding@intel.com>2015-03-20 09:20:46 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-03-20 09:20:46 -0700
commit70840cbd898df67f603987213164c798415d76bf (patch)
treed6119805a17a04ace94a75748204621f090b4fe2 /include/core/SkColorPriv.h
parent86ad8d643624a55b02e529100bbe4e2940115fa1 (diff)
Replace SSE optimization of Color32A_D565
Adds an SSE2 version of the Color32A_D565 function, to replace the existing SSE4 version. Also does some minor cleanup. Performance improvement in the following Skia benchmarks. Measured on Atom Silvermont: Xfermode_SrcOver - x3 luma_colorfilter_large - x4.6 luma_colorfilter_small - x2 tablebench - ~15% chart_bw - ~10% Measured on Corei7 Haswell: luma_colorfilter_large running SSE2 - x2 luma_colorfilter_large running SSE4 - x2.3 Also improves performance in WPS Office application and 2D subtest of 0xbenchmark on Android. Signed-off-by: Henrik Smiding <henrik.smiding@intel.com> Review URL: https://codereview.chromium.org/923523002
Diffstat (limited to 'include/core/SkColorPriv.h')
-rw-r--r--include/core/SkColorPriv.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/include/core/SkColorPriv.h b/include/core/SkColorPriv.h
index 9db768783f..15c94ac68c 100644
--- a/include/core/SkColorPriv.h
+++ b/include/core/SkColorPriv.h
@@ -281,6 +281,16 @@ static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
// this helper explicitly returns a clean 16bit value (but slower)
#define SkAlphaMulRGB16_ToU16(c, s) (uint16_t)SkAlphaMulRGB16(c, s)
+/** Blend pre-expanded RGB32 with 16bit color value by the 0..32 scale parameter.
+ The computation yields only 16bits of valid data, but we claim to return
+ 32bits, so that the compiler won't generate extra instructions to "clean"
+ the top 16bits.
+*/
+static inline U16CPU SkBlend32_RGB16(uint32_t src_expand, uint16_t dst, unsigned scale) {
+ uint32_t dst_expand = SkExpand_rgb_16(dst) * scale;
+ return SkCompact_rgb_16((src_expand + dst_expand) >> 5);
+}
+
/** Blend src and dst 16bit colors by the 0..256 scale parameter.
The computation yields only 16bits of valid data, but we claim
to return 32bits, so that the compiler won't generate extra instructions to