aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/core/SkNx.h10
-rw-r--r--src/opts/SkNx_neon.h8
-rw-r--r--src/opts/SkNx_sse.h5
-rw-r--r--tests/SkNxTest.cpp11
4 files changed, 34 insertions, 0 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index 14af1186d4..0b41365e82 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -97,6 +97,11 @@ struct SkNx {
Half::Store3(ptr, a.fLo, b.fLo, c.fLo);
Half::Store3(ptr + 3*N/2*sizeof(T), a.fHi, b.fHi, c.fHi);
}
+ AI static void Store2(void* vptr, const SkNx& a, const SkNx& b) {
+ auto ptr = (char*)vptr;
+ Half::Store2(ptr, a.fLo, b.fLo);
+ Half::Store2(ptr + 2*N/2*sizeof(T), a.fHi, b.fHi);
+ }
AI bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
AI bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
@@ -200,6 +205,11 @@ struct SkNx<1,T> {
b.store(ptr + 1*sizeof(T));
c.store(ptr + 2*sizeof(T));
}
+ AI static void Store2(void* vptr, const SkNx& a, const SkNx& b) {
+ auto ptr = (char*)vptr;
+ a.store(ptr + 0*sizeof(T));
+ b.store(ptr + 1*sizeof(T));
+ }
AI bool anyTrue() const { return fVal != 0; }
AI bool allTrue() const { return fVal != 0; }
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 52247c8f66..232903ab67 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -34,6 +34,14 @@ public:
AI static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
AI void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
+ AI static void Store2(void* dst, const SkNx& a, const SkNx& b) {
+ float32x2x2_t ab = {{
+ a.fVec,
+ b.fVec,
+ }};
+ vst2_f32((float*) dst, ab);
+ }
+
AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
float32x2x3_t abc = {{
a.fVec,
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index b4ae0cbe2b..bd5c58e261 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -29,6 +29,11 @@ public:
AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
+ AI static void Store2(void* dst, const SkNx& a, const SkNx& b) {
+ auto vals = _mm_unpacklo_ps(a.fVec, b.fVec);
+ _mm_storeu_ps((float*)dst, vals);
+ }
+
AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) {
auto lo = _mm_setr_ps(a[0], b[0], c[0], a[1]),
hi = _mm_setr_ps(b[1], c[1], 0, 0);
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index 9901f7a41d..c27b88c12b 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -424,6 +424,17 @@ DEF_TEST(Sk4f_Load2, r) {
REPORTER_ASSERT(r, y[3] == 7);
}
+DEF_TEST(Sk2f_Store2, r) {
+ Sk2f p0{0, 2};
+ Sk2f p1{1, 3};
+ float dst[4];
+ Sk2f::Store2(dst, p0, p1);
+ REPORTER_ASSERT(r, dst[0] == 0);
+ REPORTER_ASSERT(r, dst[1] == 1);
+ REPORTER_ASSERT(r, dst[2] == 2);
+ REPORTER_ASSERT(r, dst[3] == 3);
+}
+
DEF_TEST(Sk2f_Store3, r) {
Sk2f p0{0, 3};
Sk2f p1{1, 4};