aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-09-10 14:32:32 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-09-10 14:32:32 -0700
commite70afc9f48d00828ee6b707899a8ff542b0e8b98 (patch)
tree58e033054b171a025dd0c34b6b6cf760387414fb /src
parent2bd5d024723f52fc4d1144773ee9922a8c0a1b4d (diff)
use new shuffle to speed up affine matrix mappts
sse: 25 -> 18 neon: 95 -> 86 BUG=skia: Review URL: https://codereview.chromium.org/1333983002
Diffstat (limited to 'src')
-rw-r--r--src/opts/SkMatrix_opts.h5
-rw-r--r--src/opts/SkNx_neon.h5
2 files changed, 7 insertions, 3 deletions
diff --git a/src/opts/SkMatrix_opts.h b/src/opts/SkMatrix_opts.h
index 3fb2701e88..2d0a142cf1 100644
--- a/src/opts/SkMatrix_opts.h
+++ b/src/opts/SkMatrix_opts.h
@@ -89,12 +89,11 @@ static void matrix_affine(const SkMatrix& m, SkPoint* dst, const SkPoint* src, i
}
Sk4s trans4(tx, ty, tx, ty);
Sk4s scale4(sx, sy, sx, sy);
- Sk4s skew4(kx, ky, kx, ky); // applied to swizzle of src4
+ Sk4s skew4(ky, kx, ky, kx); // applied src4, then x/y swapped
count >>= 1;
for (int i = 0; i < count; ++i) {
Sk4s src4 = Sk4s::Load(&src->fX);
- Sk4s swz4(src[0].fY, src[0].fX, src[1].fY, src[1].fX); // need ABCD -> BADC
- (src4 * scale4 + swz4 * skew4 + trans4).store(&dst->fX);
+ (trans4 + src4 * scale4 + SkNx_shuffle<1,0,3,2>(src4 * skew4)).store(&dst->fX);
src += 2;
dst += 2;
}
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index cf149862c5..3d90f878a5 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -396,6 +396,11 @@ public:
#undef SHIFT16
#undef SHIFT8
+template <>
+inline SkNf<4,float> SkNx_shuffle_impl<SkNf<4,float>, 1,0,3,2>(const SkNf<4,float>& src) {
+ return vrev64q_f32(src.fVec);
+}
+
} // namespace
#endif//SkNx_neon_DEFINED