aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts
diff options
context:
space:
mode:
Diffstat (limited to 'src/opts')
-rw-r--r--src/opts/SkNx_neon.h10
-rw-r--r--src/opts/SkNx_sse.h7
2 files changed, 17 insertions, 0 deletions
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 8d1a249530..f8170ef962 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -43,6 +43,16 @@ public:
vst3_f32((float*) dst, abc);
}
+ AI static void Store4(void* dst, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
+ float32x2x4_t abcd = {{
+ a.fVec,
+ b.fVec,
+ c.fVec,
+ d.fVec,
+ }};
+ vst4_f32((float*) dst, abcd);
+ }
+
AI SkNx invert() const {
float32x2_t est0 = vrecpe_f32(fVec),
est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index dc9a5944da..3b530f0e78 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -36,6 +36,13 @@ public:
_mm_storel_pi(((__m64*)dst) + 2, hi);
}
+ AI static void Store4(void* dst, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
+ auto lo = _mm_setr_ps(a[0], b[0], c[0], d[0]),
+ hi = _mm_setr_ps(a[1], b[1], c[1], d[1]);
+ _mm_storeu_ps((float*)dst, lo);
+ _mm_storeu_ps(((float*)dst) + 4, hi);
+ }
+
AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); }
AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }