aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar qiankun.miao <qiankun.miao@intel.com>2014-12-02 18:26:09 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2014-12-02 18:26:09 -0800
commit45a05780867a06b9f8a8d5240cf6c5d5a2c15a35 (patch)
tree8000137dda38c6d723ddd2ea5519ae7ae8fedf9a
parent9503ac7a3cc98327a88eff59c71c451fbdd7e043 (diff)
Add SSSE3 acceleration for S32_D16_filter_DXDY
With this CL, related nanobench can be improved for 565 config. bitmap_BGRA_8888_scale_rotate_bilerp 115us -> 70.5us 0.61x bitmap_BGRA_8888_update_volatile_scale_rotate_bilerp 115us -> 70.5us 0.61x bitmap_BGRA_8888_update_scale_rotate_bilerp 112us -> 68us 0.6x BUG=skia: Review URL: https://codereview.chromium.org/773753002
-rw-r--r--src/core/SkBitmapProcState.h2
-rw-r--r--src/opts/SkBitmapProcState_opts_SSSE3.cpp34
-rw-r--r--src/opts/SkBitmapProcState_opts_SSSE3.h4
-rw-r--r--src/opts/opts_check_x86.cpp11
4 files changed, 39 insertions, 12 deletions
diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h
index dd1f0bff45..add5bf4fa9 100644
--- a/src/core/SkBitmapProcState.h
+++ b/src/core/SkBitmapProcState.h
@@ -204,6 +204,8 @@ void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y);
void S32_D16_filter_DX(const SkBitmapProcState& s,
const uint32_t* xy, int count, uint16_t* colors);
+void S32_D16_filter_DXDY(const SkBitmapProcState& s,
+ const uint32_t* xy, int count, uint16_t* colors);
void highQualityFilter32(const SkBitmapProcState &s, int x, int y,
SkPMColor *SK_RESTRICT colors, int count);
diff --git a/src/opts/SkBitmapProcState_opts_SSSE3.cpp b/src/opts/SkBitmapProcState_opts_SSSE3.cpp
index 165f1f5ce0..5e7c7b4698 100644
--- a/src/opts/SkBitmapProcState_opts_SSSE3.cpp
+++ b/src/opts/SkBitmapProcState_opts_SSSE3.cpp
@@ -6,6 +6,7 @@
*/
#include "SkBitmapProcState_opts_SSSE3.h"
+#include "SkColorPriv.h"
#include "SkPaint.h"
#include "SkUtils.h"
@@ -720,17 +721,28 @@ void S32_alpha_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
}
void S32_opaque_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors) {
+ const uint32_t* xy,
+ int count, uint32_t* colors) {
S32_generic_D32_filter_DXDY_SSSE3<false>(s, xy, count, colors);
}
void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors) {
+ const uint32_t* xy,
+ int count, uint32_t* colors) {
S32_generic_D32_filter_DXDY_SSSE3<true>(s, xy, count, colors);
}
+void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
+ const uint32_t* xy,
+ int count, uint16_t* colors) {
+ SkASSERT(64 >= count);
+ SkAutoSTMalloc<64, uint32_t> colors32(count);
+ S32_generic_D32_filter_DXDY_SSSE3<false>(s, xy, count, colors32);
+ for(int i = 0; i < count; i++) {
+ *colors++ = SkPixel32ToPixel16(colors32[i]);
+ }
+}
+
#else // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
void S32_opaque_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
@@ -746,14 +758,20 @@ void S32_alpha_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
}
void S32_opaque_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors) {
+ const uint32_t* xy,
+ int count, uint32_t* colors) {
sk_throw();
}
void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
- const uint32_t* xy,
- int count, uint32_t* colors) {
+ const uint32_t* xy,
+ int count, uint32_t* colors) {
+ sk_throw();
+}
+
+void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
+ const uint32_t* xy,
+ int count, uint32_t* colors) {
sk_throw();
}
diff --git a/src/opts/SkBitmapProcState_opts_SSSE3.h b/src/opts/SkBitmapProcState_opts_SSSE3.h
index 9fd074aacf..74504d8bc6 100644
--- a/src/opts/SkBitmapProcState_opts_SSSE3.h
+++ b/src/opts/SkBitmapProcState_opts_SSSE3.h
@@ -23,4 +23,8 @@ void S32_alpha_D32_filter_DXDY_SSSE3(const SkBitmapProcState& s,
const uint32_t* xy,
int count, uint32_t* colors);
+void S32_D16_filter_DXDY_SSSE3(const SkBitmapProcState& s,
+ const uint32_t* xy,
+ int count, uint16_t* colors);
+
#endif
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp
index 8fec2ba69c..34aae928eb 100644
--- a/src/opts/opts_check_x86.cpp
+++ b/src/opts/opts_check_x86.cpp
@@ -149,26 +149,27 @@ void SkBitmapProcState::platformProcs() {
if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
return;
}
+ const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3);
/* Check fSampleProc32 */
if (fSampleProc32 == S32_opaque_D32_filter_DX) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (ssse3) {
fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
} else {
fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
}
} else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (ssse3) {
fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
}
} else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (ssse3) {
fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
} else {
fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
}
} else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
- if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
+ if (ssse3) {
fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
}
}
@@ -176,6 +177,8 @@ void SkBitmapProcState::platformProcs() {
/* Check fSampleProc16 */
if (fSampleProc16 == S32_D16_filter_DX) {
fSampleProc16 = S32_D16_filter_DX_SSE2;
+ } else if (ssse3 && fSampleProc16 == S32_D16_filter_DXDY) {
+ fSampleProc16 = S32_D16_filter_DXDY_SSSE3;
}
/* Check fMatrixProc */