aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/SkUtils.cpp
diff options
context:
space:
mode:
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2014-05-21 12:43:07 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2014-05-21 12:43:07 +0000
commitf0ea77a3630e6d1c01d83aa5430b3780da9e88b6 (patch)
tree8a4371e68c70a8b71b59e73bbae6ae4c8afd109b /src/core/SkUtils.cpp
parent74ff1badf43169fa4ecc952994930edec4e56bee (diff)
SSE2 implementation of memcpy32
With SSE2 version memcpy32, S32_Opaque_BlitRow32() in SkBlitRow_D32.cpp has about 30% performance improvement. Here are the data on desktop i7-3770. before: bitmap_scale_filter_90_90 8888: cmsecs = 2.01 bitmaprect_FF_filter_trans 8888: cmsecs = 3.61 bitmaprect_FF_nofilter_trans 8888: cmsecs = 3.57 bitmaprect_FF_filter_identity 8888: cmsecs = 3.53 bitmaprect_FF_nofilter_identity 8888: cmsecs = 3.53 bitmap_4444_update 8888: cmsecs = 4.84 bitmap_4444_update_volatile 8888: cmsecs = 4.81 bitmap_4444 8888: cmsecs = 4.81 after: bitmap_scale_filter_90_90 8888: cmsecs = 1.83 bitmaprect_FF_filter_trans 8888: cmsecs = 2.36 bitmaprect_FF_nofilter_trans 8888: cmsecs = 2.36 bitmaprect_FF_filter_identity 8888: cmsecs = 2.60 bitmaprect_FF_nofilter_identity 8888: cmsecs = 2.63 bitmap_4444_update 8888: cmsecs = 3.30 bitmap_4444_update_volatile 8888: cmsecs = 3.30 bitmap_4444 8888: cmsecs = 3.29 BUG=skia: R=mtklein@google.com, reed@google.com, bsalomon@google.com Author: qiankun.miao@intel.com Review URL: https://codereview.chromium.org/285313002 git-svn-id: http://skia.googlecode.com/svn/trunk@14822 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'src/core/SkUtils.cpp')
-rw-r--r--src/core/SkUtils.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/src/core/SkUtils.cpp b/src/core/SkUtils.cpp
index 76da23a6d0..c65947dfe9 100644
--- a/src/core/SkUtils.cpp
+++ b/src/core/SkUtils.cpp
@@ -34,6 +34,18 @@
*(dst)++ = value; *(dst)++ = value; \
*(dst)++ = value; *(dst)++ = value; \
} while (0)
+
+#define copy_16_longs(dst, src) \
+ do { \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ *(dst)++ = *(src)++; *(dst)++ = *(src)++; \
+ } while (0)
#endif
///////////////////////////////////////////////////////////////////////////////
@@ -109,6 +121,24 @@ static void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
}
}
+static void sk_memcpy32_portable(uint32_t dst[], const uint32_t src[], int count) {
+ SkASSERT(dst != NULL && count >= 0);
+
+ int sixteenlongs = count >> 4;
+ if (sixteenlongs) {
+ do {
+ copy_16_longs(dst, src);
+ } while (--sixteenlongs != 0);
+ count &= 15;
+ }
+
+ if (count) {
+ do {
+ *dst++ = *src++;
+ } while (--count != 0);
+ }
+}
+
static void choose_memset16(SkMemset16Proc* proc) {
*proc = SkMemset16GetPlatformProc();
if (NULL == *proc) {
@@ -141,6 +171,22 @@ void sk_memset32(uint32_t dst[], uint32_t value, int count) {
return proc(dst, value, count);
}
+static void choose_memcpy32(SkMemcpy32Proc* proc) {
+ *proc = SkMemcpy32GetPlatformProc();
+ if (NULL == *proc) {
+ *proc = &sk_memcpy32_portable;
+ }
+}
+
+void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count) {
+ SK_DECLARE_STATIC_ONCE(once);
+ static SkMemcpy32Proc proc = NULL;
+ SkOnce(&once, choose_memcpy32, &proc);
+ SkASSERT(proc != NULL);
+
+ return proc(dst, src, count);
+}
+
///////////////////////////////////////////////////////////////////////////////
/* 0xxxxxxx 1 total