aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/jumper/SkJumper_stages.cpp
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-02-24 09:05:14 -0500
committerGravatar Mike Klein <mtklein@chromium.org>2017-02-24 14:37:14 +0000
commit420e38f586ed21a51c9d216c422b4c4d5ab2dc97 (patch)
tree66304c9651058dcf3989c966a39c0ae07fefa2b9 /src/jumper/SkJumper_stages.cpp
parent1a9e8f31b43098e69cf5b49fef362387a6cc6da7 (diff)
SkJumper: a8
Change-Id: I123caaee0bb8e3967c0a1f2acf1d80bcf0f41758 Reviewed-on: https://skia-review.googlesource.com/8944 Reviewed-by: Mike Klein <mtklein@chromium.org> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src/jumper/SkJumper_stages.cpp')
-rw-r--r--src/jumper/SkJumper_stages.cpp32
1 files changed, 31 insertions, 1 deletions
diff --git a/src/jumper/SkJumper_stages.cpp b/src/jumper/SkJumper_stages.cpp
index 580432c60d..899323b0db 100644
--- a/src/jumper/SkJumper_stages.cpp
+++ b/src/jumper/SkJumper_stages.cpp
@@ -42,6 +42,7 @@ static Dst bit_cast(const Src& src) {
static F rsqrt(F v) { return 1.0f / sqrtf(v); }
static U32 round(F v, F scale) { return (uint32_t)lrintf(v*scale); }
static U16 pack(U32 v) { return (U16)v; }
+ static U8 pack(U16 v) { return (U8)v; }
static F if_then_else(I32 c, F t, F e) { return c ? t : e; }
@@ -67,6 +68,7 @@ static Dst bit_cast(const Src& src) {
static F rsqrt(F v) { auto e = vrsqrteq_f32(v); return vrsqrtsq_f32(v,e*e) * e; }
static U32 round(F v, F scale) { return vcvtnq_u32_f32(v*scale); }
static U16 pack(U32 v) { return __builtin_convertvector(v, U16); }
+ static U8 pack(U16 v) { return __builtin_convertvector(v, U8); }
static F if_then_else(I32 c, F t, F e) { return vbslq_f32((U32)c,t,e); }
@@ -94,6 +96,7 @@ static Dst bit_cast(const Src& src) {
static F rsqrt(F v) { auto e = vrsqrte_f32(v); return vrsqrts_f32(v,e*e) * e; }
static U32 round(F v, F scale) { return vcvt_u32_f32(mad(v,scale,0.5f)); }
static U16 pack(U32 v) { return __builtin_convertvector(v, U16); }
+ static U8 pack(U16 v) { return __builtin_convertvector(v, U8); }
static F if_then_else(I32 c, F t, F e) { return vbsl_f32((U32)c,t,e); }
@@ -123,6 +126,10 @@ static Dst bit_cast(const Src& src) {
hi = _mm256_extractf128_si256(v, 1);
return _mm_packus_epi32(lo, hi);
}
+ static U8 pack(U16 v) {
+ __m128i r = _mm_packus_epi16(v,v);
+ return unaligned_load<U8>(&r);
+ }
static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }
@@ -151,6 +158,10 @@ static Dst bit_cast(const Src& src) {
hi = _mm256_extractf128_si256(v, 1);
return _mm_packus_epi32(lo, hi);
}
+ static U8 pack(U16 v) {
+ __m128i r = _mm_packus_epi16(v,v);
+ return unaligned_load<U8>(&r);
+ }
static F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }
@@ -187,6 +198,12 @@ static Dst bit_cast(const Src& src) {
#endif
return unaligned_load<U16>(&p); // We have two copies. Return (the lower) one.
}
+ static U8 pack(U16 v) {
+ __m128i r;
+ memcpy(&r, &v, sizeof(v));
+ r = _mm_packus_epi16(r,r);
+ return unaligned_load<U8>(&r);
+ }
static F if_then_else(I32 c, F t, F e) {
#if defined(__SSE4_1__)
@@ -213,7 +230,7 @@ static Dst bit_cast(const Src& src) {
static U32 expand(U16 v) { return __builtin_convertvector( v, U32); }
static U32 expand(U8 v) { return __builtin_convertvector( v, U32); }
#else
- static F cast (U32 v) { return (F)v; }
+ static F cast (U32 v) { return (F)v; }
static U32 expand(U16 v) { return (U32)v; }
static U32 expand(U8 v) { return (U32)v; }
#endif
@@ -525,6 +542,19 @@ STAGE(load_tables) {
a = cast( (px >> 24)) * k->_1_255;
}
+STAGE(load_a8) {
+ auto ptr = *(const uint8_t**)ctx + x;
+
+ r = g = b = 0.0f;
+ a = cast(expand(unaligned_load<U8>(ptr))) * k->_1_255;
+}
+STAGE(store_a8) {
+ auto ptr = *(uint8_t**)ctx + x;
+
+ U8 packed = pack(pack(round(a, k->_255)));
+ memcpy(ptr, &packed, sizeof(packed));
+}
+
STAGE(load_565) {
auto ptr = *(const uint16_t**)ctx + x;