diff options
-rw-r--r-- | src/core/SkNx.h | 14 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 6 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 7 | ||||
-rw-r--r-- | tests/SkNxTest.cpp | 17 |
4 files changed, 44 insertions, 0 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h index 6957cb0d38..91c978b70b 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -78,6 +78,15 @@ struct SkNx { *b = SkNx{bl, bh}; *c = SkNx{cl, ch}; } + AI static void Load2(const void* vptr, SkNx* a, SkNx* b) { + auto ptr = (const char*)vptr; + Half al, bl, + ah, bh; + Half::Load2(ptr , &al, &bl); + Half::Load2(ptr + 2*N/2*sizeof(T), &ah, &bh); + *a = SkNx{al, ah}; + *b = SkNx{bl, bh}; + } AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) { auto ptr = (char*)vptr; Half::Store4(ptr, a.fLo, b.fLo, c.fLo, d.fLo); @@ -168,6 +177,11 @@ struct SkNx<1,T> { *b = Load(ptr + 1*sizeof(T)); *c = Load(ptr + 2*sizeof(T)); } + AI static void Load2(const void* vptr, SkNx* a, SkNx* b) { + auto ptr = (const char*)vptr; + *a = Load(ptr + 0*sizeof(T)); + *b = Load(ptr + 1*sizeof(T)); + } AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) { auto ptr = (char*)vptr; a.store(ptr + 0*sizeof(T)); diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index 16a32e11a2..b114f8f28a 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -120,6 +120,12 @@ public: AI static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); } AI void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); } + AI static void Load2(const void* ptr, SkNx* x, SkNx* y) { + float32x4x2_t xy = vld2q_f32((const float*) ptr); + *x = xy.val[0]; + *y = xy.val[1]; + } + AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) { float32x4x4_t rgba = vld4q_f32((const float*) ptr); *r = rgba.val[0]; diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index a8483a089c..a5432495eb 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -84,6 +84,13 @@ public: AI static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); } AI void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); } + AI static void Load2(const void* ptr, SkNx* x, SkNx* y) { + SkNx lo = SkNx::Load((const float*)ptr+0), + hi = SkNx::Load((const float*)ptr+4); + *x = SkNx{lo[0], lo[2], hi[0], hi[2]}; + *y = SkNx{lo[1], lo[3], hi[1], hi[3]}; + } + AI static void Load4(const void* ptr, SkNx* r, SkNx* g, SkNx* b, SkNx* a) { __m128 v0 = _mm_loadu_ps(((float*)ptr) + 0), v1 = _mm_loadu_ps(((float*)ptr) + 4), diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp index afa6750799..069f1bc43e 100644 --- a/tests/SkNxTest.cpp +++ b/tests/SkNxTest.cpp @@ -406,3 +406,20 @@ DEF_TEST(SkNx_thenElse, r) { REPORTER_ASSERT(r, fslo[0] == 1); REPORTER_ASSERT(r, fslo[1] == -1); } + +DEF_TEST(Sk4f_Load2, r) { + float xy[8] = { 0,1,2,3,4,5,6,7 }; + + Sk4f x,y; + Sk4f::Load2(xy, &x,&y); + + REPORTER_ASSERT(r, x[0] == 0); + REPORTER_ASSERT(r, x[1] == 2); + REPORTER_ASSERT(r, x[2] == 4); + REPORTER_ASSERT(r, x[3] == 6); + + REPORTER_ASSERT(r, y[0] == 1); + REPORTER_ASSERT(r, y[1] == 3); + REPORTER_ASSERT(r, y[2] == 5); + REPORTER_ASSERT(r, y[3] == 7); +} |