diff options
-rw-r--r-- | src/core/SkNx.h | 11 | ||||
-rw-r--r-- | src/opts/SkNx_neon.h | 9 | ||||
-rw-r--r-- | src/opts/SkNx_sse.h | 7 | ||||
-rw-r--r-- | tests/SkNxTest.cpp | 14 |
4 files changed, 41 insertions, 0 deletions
diff --git a/src/core/SkNx.h b/src/core/SkNx.h index 91c978b70b..14af1186d4 100644 --- a/src/core/SkNx.h +++ b/src/core/SkNx.h @@ -92,6 +92,11 @@ struct SkNx { Half::Store4(ptr, a.fLo, b.fLo, c.fLo, d.fLo); Half::Store4(ptr + 4*N/2*sizeof(T), a.fHi, b.fHi, c.fHi, d.fHi); } + AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) { + auto ptr = (char*)vptr; + Half::Store3(ptr, a.fLo, b.fLo, c.fLo); + Half::Store3(ptr + 3*N/2*sizeof(T), a.fHi, b.fHi, c.fHi); + } AI bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } AI bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } @@ -189,6 +194,12 @@ struct SkNx<1,T> { c.store(ptr + 2*sizeof(T)); d.store(ptr + 3*sizeof(T)); } + AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) { + auto ptr = (char*)vptr; + a.store(ptr + 0*sizeof(T)); + b.store(ptr + 1*sizeof(T)); + c.store(ptr + 2*sizeof(T)); + } AI bool anyTrue() const { return fVal != 0; } AI bool allTrue() const { return fVal != 0; } diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h index b114f8f28a..8d1a249530 100644 --- a/src/opts/SkNx_neon.h +++ b/src/opts/SkNx_neon.h @@ -34,6 +34,15 @@ public: AI static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); } AI void store(void* ptr) const { vst1_f32((float*)ptr, fVec); } + AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) { + float32x2x3_t abc = {{ + a.fVec, + b.fVec, + c.fVec, + }}; + vst3_f32((float*) dst, abc); + } + AI SkNx invert() const { float32x2_t est0 = vrecpe_f32(fVec), est1 = vmul_f32(vrecps_f32(est0, fVec), est0); diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h index a5432495eb..dc9a5944da 100644 --- a/src/opts/SkNx_sse.h +++ b/src/opts/SkNx_sse.h @@ -29,6 +29,13 @@ public: AI void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); } + AI static void Store3(void* dst, const SkNx& a, const SkNx& b, const SkNx& c) { + auto lo = _mm_setr_ps(a[0], b[0], c[0], a[1]), + hi = _mm_setr_ps(b[1], c[1], 0, 0); + _mm_storeu_ps((float*)dst, lo); + _mm_storel_pi(((__m64*)dst) + 2, hi); + } + AI SkNx operator - () const { return _mm_xor_ps(_mm_set1_ps(-0.0f), fVec); } AI SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } diff --git a/tests/SkNxTest.cpp b/tests/SkNxTest.cpp index 069f1bc43e..349ffb67e7 100644 --- a/tests/SkNxTest.cpp +++ b/tests/SkNxTest.cpp @@ -423,3 +423,17 @@ DEF_TEST(Sk4f_Load2, r) { REPORTER_ASSERT(r, y[2] == 5); REPORTER_ASSERT(r, y[3] == 7); } + +DEF_TEST(Sk2f_Store3, r) { + Sk2f p0{0, 3}; + Sk2f p1{1, 4}; + Sk2f p2{2, 5}; + float dst[6]; + Sk2f::Store3(dst, p0, p1, p2); + REPORTER_ASSERT(r, dst[0] == 0); + REPORTER_ASSERT(r, dst[1] == 1); + REPORTER_ASSERT(r, dst[2] == 2); + REPORTER_ASSERT(r, dst[3] == 3); + REPORTER_ASSERT(r, dst[4] == 4); + REPORTER_ASSERT(r, dst[5] == 5); +} |