aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/core/SkNx.h14
-rw-r--r--src/opts/SkNx_neon.h6
-rw-r--r--src/opts/SkNx_sse.h7
-rw-r--r--tests/SkNxTest.cpp17
4 files changed, 44 insertions, 0 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index 6957cb0d38..91c978b70b 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -78,6 +78,15 @@ struct SkNx {
*b = SkNx{bl, bh};
*c = SkNx{cl, ch};
}
+ AI static void Load2(const void* vptr, SkNx* a, SkNx* b) {
+ auto ptr = (const char*)vptr;
+ Half al, bl,
+ ah, bh;
+ Half::Load2(ptr , &al, &bl);
+ Half::Load2(ptr + 2*N/2*sizeof(T), &ah, &bh);
+ *a = SkNx{al, ah};
+ *b = SkNx{bl, bh};
+ }
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
auto ptr = (char*)vptr;
Half::Store4(ptr, a.fLo, b.fLo, c.fLo, d.fLo);
@@ -168,6 +177,11 @@ struct SkNx<1,T> {
*b = Load(ptr + 1*sizeof(T));
*c = Load(ptr + 2*sizeof(T));
}
+ AI static void Load2(const void* vptr, SkNx* a, SkNx* b) {
+ auto ptr = (const char*)vptr;
+ *a = Load(ptr + 0*sizeof(T));
+ *b = Load(ptr + 1*sizeof(T));
+ }
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
auto ptr = (char*)vptr;
a.store(ptr + 0*sizeof(T));
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 16a32e11a2..b114f8f28a 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -120,6 +120,12 @@ public:
AI static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
AI void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
+ AI static void Load2(const void* ptr, SkNx* x, SkNx* y) {
+ float32x4x2_t xy = vld2q_f32((const float*) ptr);
+ *x = xy.val[0];
+ *y = xy.val[1];
+ }
+
AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
float32x4x4_t rgba = vld4q_f32((const float*) ptr);
*r = rgba.val[0];
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index a8483a089c..a5432495eb 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -84,6 +84,13 @@ public:
AI static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); }
AI void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); }
+ AI static void Load2(const void* ptr, SkNx* x, SkNx* y) {
+ SkNx lo = SkNx::Load((const float*)ptr+0),
+ hi = SkNx::Load((const float*)ptr+4);
+ *x = SkNx{lo[0], lo[2], hi[0], hi[2]};
+ *y = SkNx{lo[1], lo[3], hi[1], hi[3]};
+ }
+
AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) {
__m128 v0 = _mm_loadu_ps(((float*)ptr) + 0),
v1 = _mm_loadu_ps(((float*)ptr) + 4),
diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp
index afa6750799..069f1bc43e 100644
--- a/tests/SkNxTest.cpp
+++ b/tests/SkNxTest.cpp
@@ -406,3 +406,20 @@ DEF_TEST(SkNx_thenElse, r) {
REPORTER_ASSERT(r, fslo[0] == 1);
REPORTER_ASSERT(r, fslo[1] == -1);
}
+
+DEF_TEST(Sk4f_Load2, r) {
+ float xy[8] = { 0,1,2,3,4,5,6,7 };
+
+ Sk4f x,y;
+ Sk4f::Load2(xy, &x,&y);
+
+ REPORTER_ASSERT(r, x[0] == 0);
+ REPORTER_ASSERT(r, x[1] == 2);
+ REPORTER_ASSERT(r, x[2] == 4);
+ REPORTER_ASSERT(r, x[3] == 6);
+
+ REPORTER_ASSERT(r, y[0] == 1);
+ REPORTER_ASSERT(r, y[1] == 3);
+ REPORTER_ASSERT(r, y[2] == 5);
+ REPORTER_ASSERT(r, y[3] == 7);
+}