aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-02-14 09:41:17 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-02-14 15:12:42 +0000
commit7598b4882cff7c418421c96c04ce658655cdd933 (patch)
tree474d20c6902df67dc0cf355b5dc70cb63eb09c47 /src
parentbfdc87a62193aae20fac0bce37bed7b656f299bb (diff)
SkSplicer: fix dstover
dstover was updating the {dr,dg,db,da} registers instead of {r,g,b,a}. Change-Id: Ie2280abe80efb274fb422dc38a329c6dac12f846 Reviewed-on: https://skia-review.googlesource.com/8404 Reviewed-by: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
Diffstat (limited to 'src')
-rw-r--r--src/splicer/SkSplicer_generated.h65
-rw-r--r--src/splicer/SkSplicer_stages.cpp8
2 files changed, 46 insertions, 27 deletions
diff --git a/src/splicer/SkSplicer_generated.h b/src/splicer/SkSplicer_generated.h
index a9d8891c57..4316364e84 100644
--- a/src/splicer/SkSplicer_generated.h
+++ b/src/splicer/SkSplicer_generated.h
@@ -58,12 +58,20 @@ static const unsigned int aarch64_srcover[] = {
0xd65f03c0, // return
};
static const unsigned int aarch64_dstover[] = {
- 0x4d40c870, // ld1r {v16.4s}, [x3]
- 0x4ea7d610, // fsub v16.4s, v16.4s, v7.4s
- 0x4e20ce04, // fmla v4.4s, v16.4s, v0.4s
- 0x4e21ce05, // fmla v5.4s, v16.4s, v1.4s
- 0x4e22ce06, // fmla v6.4s, v16.4s, v2.4s
- 0x4e23ce07, // fmla v7.4s, v16.4s, v3.4s
+ 0x4d40c871, // ld1r {v17.4s}, [x3]
+ 0x4ea41c90, // mov v16.16b, v4.16b
+ 0x4ea61cd2, // mov v18.16b, v6.16b
+ 0x4ea71cf3, // mov v19.16b, v7.16b
+ 0x4ea7d634, // fsub v20.4s, v17.4s, v7.4s
+ 0x4ea51cb1, // mov v17.16b, v5.16b
+ 0x4e20ce90, // fmla v16.4s, v20.4s, v0.4s
+ 0x4e21ce91, // fmla v17.4s, v20.4s, v1.4s
+ 0x4e22ce92, // fmla v18.4s, v20.4s, v2.4s
+ 0x4e23ce93, // fmla v19.4s, v20.4s, v3.4s
+ 0x4eb01e00, // mov v0.16b, v16.16b
+ 0x4eb11e21, // mov v1.16b, v17.16b
+ 0x4eb21e42, // mov v2.16b, v18.16b
+ 0x4eb31e63, // mov v3.16b, v19.16b
0xd65f03c0, // return
};
static const unsigned int aarch64_clamp_0[] = {
@@ -494,11 +502,19 @@ static const unsigned int armv7_srcover[] = {
};
static const unsigned int armv7_dstover[] = {
0xf4e30c9f, // vld1.32 {d16[]}, [r3 :32]
- 0xf2600d87, // vsub.f32 d16, d16, d7
- 0xf2004c30, // vfma.f32 d4, d0, d16
- 0xf2015c30, // vfma.f32 d5, d1, d16
- 0xf2026c30, // vfma.f32 d6, d2, d16
- 0xf2037c30, // vfma.f32 d7, d3, d16
+ 0xf2651115, // vorr d17, d5, d5
+ 0xf2604d87, // vsub.f32 d20, d16, d7
+ 0xf2640114, // vorr d16, d4, d4
+ 0xf2662116, // vorr d18, d6, d6
+ 0xf2673117, // vorr d19, d7, d7
+ 0xf2400c34, // vfma.f32 d16, d0, d20
+ 0xf2411c34, // vfma.f32 d17, d1, d20
+ 0xf2422c34, // vfma.f32 d18, d2, d20
+ 0xf2433c34, // vfma.f32 d19, d3, d20
+ 0xf22001b0, // vorr d0, d16, d16
+ 0xf22111b1, // vorr d1, d17, d17
+ 0xf22221b2, // vorr d2, d18, d18
+ 0xf22331b3, // vorr d3, d19, d19
0xe12fff1e, // return
};
static const unsigned int armv7_clamp_0[] = {
@@ -979,17 +995,14 @@ static const unsigned char sse2_dstover[] = {
0xf3,0x44,0x0f,0x10,0x01, // movss (%rcx),%xmm8
0x45,0x0f,0xc6,0xc0,0x00, // shufps $0x0,%xmm8,%xmm8
0x44,0x0f,0x5c,0xc7, // subps %xmm7,%xmm8
- 0x45,0x0f,0x28,0xc8, // movaps %xmm8,%xmm9
- 0x44,0x0f,0x59,0xc8, // mulps %xmm0,%xmm9
- 0x41,0x0f,0x58,0xe1, // addps %xmm9,%xmm4
- 0x45,0x0f,0x28,0xc8, // movaps %xmm8,%xmm9
- 0x44,0x0f,0x59,0xc9, // mulps %xmm1,%xmm9
- 0x41,0x0f,0x58,0xe9, // addps %xmm9,%xmm5
- 0x45,0x0f,0x28,0xc8, // movaps %xmm8,%xmm9
- 0x44,0x0f,0x59,0xca, // mulps %xmm2,%xmm9
- 0x41,0x0f,0x58,0xf1, // addps %xmm9,%xmm6
- 0x44,0x0f,0x59,0xc3, // mulps %xmm3,%xmm8
- 0x41,0x0f,0x58,0xf8, // addps %xmm8,%xmm7
+ 0x41,0x0f,0x59,0xc0, // mulps %xmm8,%xmm0
+ 0x0f,0x58,0xc4, // addps %xmm4,%xmm0
+ 0x41,0x0f,0x59,0xc8, // mulps %xmm8,%xmm1
+ 0x0f,0x58,0xcd, // addps %xmm5,%xmm1
+ 0x41,0x0f,0x59,0xd0, // mulps %xmm8,%xmm2
+ 0x0f,0x58,0xd6, // addps %xmm6,%xmm2
+ 0x41,0x0f,0x59,0xd8, // mulps %xmm8,%xmm3
+ 0x0f,0x58,0xdf, // addps %xmm7,%xmm3
0xc3, // return
};
static const unsigned char sse2_clamp_0[] = {
@@ -1540,10 +1553,10 @@ static const unsigned char hsw_srcover[] = {
static const unsigned char hsw_dstover[] = {
0xc4,0x62,0x7d,0x18,0x01, // vbroadcastss (%rcx),%ymm8
0xc5,0x3c,0x5c,0xc7, // vsubps %ymm7,%ymm8,%ymm8
- 0xc4,0xc2,0x7d,0xb8,0xe0, // vfmadd231ps %ymm8,%ymm0,%ymm4
- 0xc4,0xc2,0x75,0xb8,0xe8, // vfmadd231ps %ymm8,%ymm1,%ymm5
- 0xc4,0xc2,0x6d,0xb8,0xf0, // vfmadd231ps %ymm8,%ymm2,%ymm6
- 0xc4,0xc2,0x65,0xb8,0xf8, // vfmadd231ps %ymm8,%ymm3,%ymm7
+ 0xc4,0xe2,0x3d,0xa8,0xc4, // vfmadd213ps %ymm4,%ymm8,%ymm0
+ 0xc4,0xe2,0x3d,0xa8,0xcd, // vfmadd213ps %ymm5,%ymm8,%ymm1
+ 0xc4,0xe2,0x3d,0xa8,0xd6, // vfmadd213ps %ymm6,%ymm8,%ymm2
+ 0xc4,0xe2,0x3d,0xa8,0xdf, // vfmadd213ps %ymm7,%ymm8,%ymm3
0xc3, // return
};
static const unsigned char hsw_clamp_0[] = {
diff --git a/src/splicer/SkSplicer_stages.cpp b/src/splicer/SkSplicer_stages.cpp
index badaf78695..03b7001640 100644
--- a/src/splicer/SkSplicer_stages.cpp
+++ b/src/splicer/SkSplicer_stages.cpp
@@ -201,7 +201,13 @@ STAGE(srcover) {
b = fma(db, A, b);
a = fma(da, A, a);
}
-STAGE(dstover) { srcover_k(x,limit,ctx,k, dr,dg,db,da, r,g,b,a); }
+STAGE(dstover) {
+ auto DA = k->_1 - da;
+ r = fma(r, DA, dr);
+ g = fma(g, DA, dg);
+ b = fma(b, DA, db);
+ a = fma(a, DA, da);
+}
STAGE(clamp_0) {
r = max(r, 0);