aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--bench/Sk4fBench.cpp4
-rw-r--r--src/core/SkColor.cpp10
-rw-r--r--src/core/SkGeometry.cpp4
-rw-r--r--src/core/SkGeometry.h4
-rw-r--r--src/core/SkMipMap.cpp4
-rw-r--r--src/core/SkNx.h16
-rw-r--r--src/core/SkRect.cpp4
-rw-r--r--src/core/SkScan_Hairline.cpp8
-rw-r--r--src/effects/SkColorMatrixFilter.cpp8
-rw-r--r--src/effects/gradients/SkLinearGradient.cpp6
-rw-r--r--src/opts/SkColorCubeFilter_opts.h8
-rw-r--r--src/opts/SkMatrix_opts.h18
-rw-r--r--src/opts/SkNx_avx.h4
-rw-r--r--src/opts/SkNx_neon.h32
-rw-r--r--src/opts/SkNx_sse.h38
-rw-r--r--src/opts/SkXfermode_opts.h4
16 files changed, 90 insertions, 82 deletions
diff --git a/bench/Sk4fBench.cpp b/bench/Sk4fBench.cpp
index 1daa3f2973..712a65776d 100644
--- a/bench/Sk4fBench.cpp
+++ b/bench/Sk4fBench.cpp
@@ -33,8 +33,8 @@ struct Sk4fBytesRoundtripBench : public Benchmark {
for (int i = 0; i < loops; i++) {
uint32_t color = lcg_rand(&seed),
back;
- auto f = SkNx_cast<float>(Sk4b::Load((const uint8_t*)&color));
- SkNx_cast<uint8_t>(f).store((uint8_t*)&back);
+ auto f = SkNx_cast<float>(Sk4b::Load(&color));
+ SkNx_cast<uint8_t>(f).store(&back);
junk ^= back;
}
blackhole ^= junk;
diff --git a/src/core/SkColor.cpp b/src/core/SkColor.cpp
index cf6e0b2e9f..865fe0de62 100644
--- a/src/core/SkColor.cpp
+++ b/src/core/SkColor.cpp
@@ -105,16 +105,16 @@ SkColor SkHSVToColor(U8CPU a, const SkScalar hsv[3]) {
#include "SkNx.h"
SkPM4f SkPM4f::FromPMColor(SkPMColor c) {
- Sk4f value = SkNx_cast<float>(Sk4b::Load((const uint8_t*)&c));
+ Sk4f value = SkNx_cast<float>(Sk4b::Load(&c));
SkPM4f c4;
- (value * Sk4f(1.0f / 255)).store(c4.fVec);
+ (value * Sk4f(1.0f / 255)).store(&c4);
return c4;
}
SkColor4f SkColor4f::FromColor(SkColor c) {
- Sk4f value = SkNx_shuffle<3,2,1,0>(SkNx_cast<float>(Sk4b::Load((const uint8_t*)&c)));
+ Sk4f value = SkNx_shuffle<3,2,1,0>(SkNx_cast<float>(Sk4b::Load(&c)));
SkColor4f c4;
- (value * Sk4f(1.0f / 255)).store(c4.vec());
+ (value * Sk4f(1.0f / 255)).store(&c4);
return c4;
}
@@ -138,7 +138,7 @@ SkPM4f SkColor4f::premul() const {
#endif
SkPM4f pm4;
- dst.store(pm4.fVec);
+ dst.store(&pm4);
return pm4;
}
diff --git a/src/core/SkGeometry.cpp b/src/core/SkGeometry.cpp
index 629703a1ee..809ed199ad 100644
--- a/src/core/SkGeometry.cpp
+++ b/src/core/SkGeometry.cpp
@@ -11,7 +11,7 @@
static SkVector to_vector(const Sk2s& x) {
SkVector vector;
- x.store(&vector.fX);
+ x.store(&vector);
return vector;
}
@@ -1060,7 +1060,7 @@ SkVector SkConic::evalTangentAt(SkScalar t) const {
void SkConic::evalAt(SkScalar t, SkPoint* pt, SkVector* tangent) const {
SkASSERT(t >= 0 && t <= SK_Scalar1);
-
+
if (pt) {
*pt = this->evalAt(t);
}
diff --git a/src/core/SkGeometry.h b/src/core/SkGeometry.h
index 935967ea78..15f1e55195 100644
--- a/src/core/SkGeometry.h
+++ b/src/core/SkGeometry.h
@@ -12,12 +12,12 @@
#include "SkNx.h"
static inline Sk2s from_point(const SkPoint& point) {
- return Sk2s::Load(&point.fX);
+ return Sk2s::Load(&point);
}
static inline SkPoint to_point(const Sk2s& x) {
SkPoint point;
- x.store(&point.fX);
+ x.store(&point);
return point;
}
diff --git a/src/core/SkMipMap.cpp b/src/core/SkMipMap.cpp
index e105493262..08602b7e00 100644
--- a/src/core/SkMipMap.cpp
+++ b/src/core/SkMipMap.cpp
@@ -21,11 +21,11 @@ struct ColorTypeFilter_8888 {
typedef uint32_t Type;
#if defined(SKNX_IS_FAST)
static Sk4h Expand(uint32_t x) {
- return SkNx_cast<uint16_t>(Sk4b::Load((const uint8_t*)&x));
+ return SkNx_cast<uint16_t>(Sk4b::Load(&x));
}
static uint32_t Compact(const Sk4h& x) {
uint32_t r;
- SkNx_cast<uint8_t>(x).store((uint8_t*)&r);
+ SkNx_cast<uint8_t>(x).store(&r);
return r;
}
#else
diff --git a/src/core/SkNx.h b/src/core/SkNx.h
index 2bbd495ce5..4cb15dffe7 100644
--- a/src/core/SkNx.h
+++ b/src/core/SkNx.h
@@ -32,7 +32,8 @@ public:
SkNx() {}
SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {}
SkNx(T val) : fLo(val), fHi(val) {}
- static SkNx Load(const T vals[N]) {
+ static SkNx Load(const void* ptr) {
+ auto vals = (const T*)ptr;
return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2));
}
@@ -43,7 +44,8 @@ public:
T i, T j, T k, T l, T m, T n, T o, T p)
: fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); }
- void store(T vals[N]) const {
+ void store(void* ptr) const {
+ auto vals = (T*)ptr;
fLo.store(vals);
fHi.store(vals+N/2);
}
@@ -108,9 +110,15 @@ class SkNx<1,T> {
public:
SkNx() {}
SkNx(T val) : fVal(val) {}
- static SkNx Load(const T vals[1]) { return SkNx(vals[0]); }
+ static SkNx Load(const void* ptr) {
+ auto vals = (const T*)ptr;
+ return SkNx(vals[0]);
+ }
- void store(T vals[1]) const { vals[0] = fVal; }
+ void store(void* ptr) const {
+ auto vals = (T*) ptr;
+ vals[0] = fVal;
+ }
SkNx saturatedAdd(const SkNx& o) const {
SkASSERT((T)(~0) > 0); // TODO: support signed T
diff --git a/src/core/SkRect.cpp b/src/core/SkRect.cpp
index 6c44ea2544..f2060f8441 100644
--- a/src/core/SkRect.cpp
+++ b/src/core/SkRect.cpp
@@ -67,7 +67,7 @@ bool SkRect::setBoundsCheck(const SkPoint pts[], int count) {
pts += 1;
count -= 1;
} else {
- min = Sk4s::Load(&pts[0].fX);
+ min = Sk4s::Load(pts);
pts += 2;
count -= 2;
}
@@ -76,7 +76,7 @@ bool SkRect::setBoundsCheck(const SkPoint pts[], int count) {
count >>= 1;
for (int i = 0; i < count; ++i) {
- Sk4s xy = Sk4s::Load(&pts->fX);
+ Sk4s xy = Sk4s::Load(pts);
accum = accum * xy;
min = Sk4s::Min(min, xy);
max = Sk4s::Max(max, xy);
diff --git a/src/core/SkScan_Hairline.cpp b/src/core/SkScan_Hairline.cpp
index efccfc9ee8..a63220efef 100644
--- a/src/core/SkScan_Hairline.cpp
+++ b/src/core/SkScan_Hairline.cpp
@@ -233,7 +233,7 @@ static void hairquad(const SkPoint pts[3], const SkRegion* clip,
Sk2s C = coeff.fC;
for (int i = 1; i < lines; ++i) {
t = t + dt;
- ((A * t + B) * t + C).store(&tmp[i].fX);
+ ((A * t + B) * t + C).store(&tmp[i]);
}
tmp[lines] = pts[2];
lineproc(tmp, lines + 1, clip, blitter);
@@ -310,7 +310,7 @@ static void hair_cubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* bl
Sk2s D = coeff.fD;
for (int i = 1; i < lines; ++i) {
t = t + dt;
- (((A * t + B) * t + C) * t + D).store(&tmp[i].fX);
+ (((A * t + B) * t + C) * t + D).store(&tmp[i]);
}
tmp[lines] = pts[3];
lineproc(tmp, lines + 1, clip, blitter);
@@ -319,10 +319,10 @@ static void hair_cubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* bl
static SkRect compute_nocheck_cubic_bounds(const SkPoint pts[4]) {
SkASSERT(SkScalarsAreFinite(&pts[0].fX, 8));
- Sk2s min = Sk2s::Load(&pts[0].fX);
+ Sk2s min = Sk2s::Load(pts);
Sk2s max = min;
for (int i = 1; i < 4; ++i) {
- Sk2s pair = Sk2s::Load(&pts[i].fX);
+ Sk2s pair = Sk2s::Load(pts+i);
min = Sk2s::Min(min, pair);
max = Sk2s::Max(max, pair);
}
diff --git a/src/effects/SkColorMatrixFilter.cpp b/src/effects/SkColorMatrixFilter.cpp
index 4bc07b39db..a0878a5044 100644
--- a/src/effects/SkColorMatrixFilter.cpp
+++ b/src/effects/SkColorMatrixFilter.cpp
@@ -86,7 +86,7 @@ static Sk4f clamp_0_1(const Sk4f& x) {
static SkPMColor round(const Sk4f& x) {
SkPMColor c;
- SkNx_cast<uint8_t>(x * Sk4f(255) + Sk4f(0.5f)).store((uint8_t*)&c);
+ SkNx_cast<uint8_t>(x * Sk4f(255) + Sk4f(0.5f)).store(&c);
return c;
}
@@ -132,7 +132,7 @@ struct SkPMColorAdaptor {
return round(c4);
}
static Sk4f To4f(SkPMColor c) {
- return SkNx_cast<float>(Sk4b::Load((const uint8_t*)&c)) * Sk4f(1.0f/255);
+ return SkNx_cast<float>(Sk4b::Load(&c)) * Sk4f(1.0f/255);
}
};
void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor dst[]) const {
@@ -142,11 +142,11 @@ void SkColorMatrixFilter::filterSpan(const SkPMColor src[], int count, SkPMColor
struct SkPM4fAdaptor {
static SkPM4f From4f(const Sk4f& c4) {
SkPM4f c;
- c4.store(c.fVec);
+ c4.store(&c);
return c;
}
static Sk4f To4f(const SkPM4f& c) {
- return Sk4f::Load(c.fVec);
+ return Sk4f::Load(&c);
}
};
void SkColorMatrixFilter::filterSpan4f(const SkPM4f src[], int count, SkPM4f dst[]) const {
diff --git a/src/effects/gradients/SkLinearGradient.cpp b/src/effects/gradients/SkLinearGradient.cpp
index 8c74427bac..4a27a35de4 100644
--- a/src/effects/gradients/SkLinearGradient.cpp
+++ b/src/effects/gradients/SkLinearGradient.cpp
@@ -133,7 +133,7 @@ SkLinearGradient::LinearGradientContext::LinearGradientContext(
const Sk4f scale(1, 1, 1, paintAlpha);
for (int i = 0; i < count; ++i) {
uint32_t c = SkSwizzle_Color_to_PMColor(shader.fOrigColors[i]);
- rec[i].fColor = SkNx_cast<float>(Sk4b::Load((const uint8_t*)&c)) * scale;
+ rec[i].fColor = SkNx_cast<float>(Sk4b::Load(&c)) * scale;
if (i > 0) {
SkASSERT(rec[i - 1].fPos <= rec[i].fPos);
}
@@ -145,7 +145,7 @@ SkLinearGradient::LinearGradientContext::LinearGradientContext(
for (int i = 0; i < count; ++i) {
SkPMColor pmc = SkPreMultiplyColor(shader.fOrigColors[i]);
pmc = SkAlphaMulQ(pmc, alphaScale);
- rec[i].fColor = SkNx_cast<float>(Sk4b::Load((const uint8_t*)&pmc));
+ rec[i].fColor = SkNx_cast<float>(Sk4b::Load(&pmc));
if (i > 0) {
SkASSERT(rec[i - 1].fPos <= rec[i].fPos);
}
@@ -515,7 +515,7 @@ find_backward(const SkLinearGradient::LinearGradientContext::Rec rec[], float ti
template <bool apply_alpha> SkPMColor trunc_from_255(const Sk4f& x) {
SkPMColor c;
- SkNx_cast<uint8_t>(x).store((uint8_t*)&c);
+ SkNx_cast<uint8_t>(x).store(&c);
if (apply_alpha) {
c = SkPreMultiplyARGB(SkGetPackedA32(c), SkGetPackedR32(c),
SkGetPackedG32(c), SkGetPackedB32(c));
diff --git a/src/opts/SkColorCubeFilter_opts.h b/src/opts/SkColorCubeFilter_opts.h
index ab8d1d4c9b..e0c4c4abad 100644
--- a/src/opts/SkColorCubeFilter_opts.h
+++ b/src/opts/SkColorCubeFilter_opts.h
@@ -59,10 +59,10 @@ void color_cube_filter_span(const SkPMColor src[],
const SkColor lutColor10 = colorCube[ix + i10];
const SkColor lutColor11 = colorCube[ix + i11];
- Sk4f sum = SkNx_cast<float>(Sk4b::Load((const uint8_t*)&lutColor00)) * g0b0;
- sum = sum + SkNx_cast<float>(Sk4b::Load((const uint8_t*)&lutColor01)) * g0b1;
- sum = sum + SkNx_cast<float>(Sk4b::Load((const uint8_t*)&lutColor10)) * g1b0;
- sum = sum + SkNx_cast<float>(Sk4b::Load((const uint8_t*)&lutColor11)) * g1b1;
+ Sk4f sum = SkNx_cast<float>(Sk4b::Load(&lutColor00)) * g0b0;
+ sum = sum + SkNx_cast<float>(Sk4b::Load(&lutColor01)) * g0b1;
+ sum = sum + SkNx_cast<float>(Sk4b::Load(&lutColor10)) * g1b0;
+ sum = sum + SkNx_cast<float>(Sk4b::Load(&lutColor11)) * g1b1;
color = color + sum * Sk4f((float)colorToFactors[x][r]);
}
if (a != 255) {
diff --git a/src/opts/SkMatrix_opts.h b/src/opts/SkMatrix_opts.h
index 3fb2701e88..b3d3f618e0 100644
--- a/src/opts/SkMatrix_opts.h
+++ b/src/opts/SkMatrix_opts.h
@@ -27,14 +27,14 @@ static void matrix_translate(const SkMatrix& m, SkPoint* dst, const SkPoint* src
Sk4s trans4(tx, ty, tx, ty);
count >>= 1;
if (count & 1) {
- (Sk4s::Load(&src->fX) + trans4).store(&dst->fX);
+ (Sk4s::Load(src) + trans4).store(dst);
src += 2;
dst += 2;
}
count >>= 1;
for (int i = 0; i < count; ++i) {
- (Sk4s::Load(&src[0].fX) + trans4).store(&dst[0].fX);
- (Sk4s::Load(&src[2].fX) + trans4).store(&dst[2].fX);
+ (Sk4s::Load(src+0) + trans4).store(dst+0);
+ (Sk4s::Load(src+2) + trans4).store(dst+2);
src += 4;
dst += 4;
}
@@ -58,14 +58,14 @@ static void matrix_scale_translate(const SkMatrix& m, SkPoint* dst, const SkPoin
Sk4s scale4(sx, sy, sx, sy);
count >>= 1;
if (count & 1) {
- (Sk4s::Load(&src->fX) * scale4 + trans4).store(&dst->fX);
+ (Sk4s::Load(src) * scale4 + trans4).store(dst);
src += 2;
dst += 2;
}
count >>= 1;
for (int i = 0; i < count; ++i) {
- (Sk4s::Load(&src[0].fX) * scale4 + trans4).store(&dst[0].fX);
- (Sk4s::Load(&src[2].fX) * scale4 + trans4).store(&dst[2].fX);
+ (Sk4s::Load(src+0) * scale4 + trans4).store(dst+0);
+ (Sk4s::Load(src+2) * scale4 + trans4).store(dst+2);
src += 4;
dst += 4;
}
@@ -92,9 +92,9 @@ static void matrix_affine(const SkMatrix& m, SkPoint* dst, const SkPoint* src, i
Sk4s skew4(kx, ky, kx, ky); // applied to swizzle of src4
count >>= 1;
for (int i = 0; i < count; ++i) {
- Sk4s src4 = Sk4s::Load(&src->fX);
- Sk4s swz4(src[0].fY, src[0].fX, src[1].fY, src[1].fX); // need ABCD -> BADC
- (src4 * scale4 + swz4 * skew4 + trans4).store(&dst->fX);
+ Sk4s src4 = Sk4s::Load(src);
+ Sk4s swz4 = SkNx_shuffle<1,0,3,2>(src4); // y0 x0, y1 x1
+ (src4 * scale4 + swz4 * skew4 + trans4).store(dst);
src += 2;
dst += 2;
}
diff --git a/src/opts/SkNx_avx.h b/src/opts/SkNx_avx.h
index f635181a92..85a2110537 100644
--- a/src/opts/SkNx_avx.h
+++ b/src/opts/SkNx_avx.h
@@ -24,12 +24,12 @@ public:
SkNx() {}
SkNx(float val) : fVec(_mm256_set1_ps(val)) {}
- static SkNx Load(const float vals[8]) { return _mm256_loadu_ps(vals); }
+ static SkNx Load(const void* ptr) { return _mm256_loadu_ps((const float*)ptr); }
SkNx(float a, float b, float c, float d,
float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c,d,e,f,g,h)) {}
- void store(float vals[8]) const { _mm256_storeu_ps(vals, fVec); }
+ void store(void* ptr) const { _mm256_storeu_ps((float*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm256_add_ps(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm256_sub_ps(fVec, o.fVec); }
diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h
index 8adb276064..a4b7cd1a73 100644
--- a/src/opts/SkNx_neon.h
+++ b/src/opts/SkNx_neon.h
@@ -41,10 +41,10 @@ public:
SkNx() {}
SkNx(float val) : fVec(vdup_n_f32(val)) {}
- static SkNx Load(const float vals[2]) { return vld1_f32(vals); }
+ static SkNx Load(const void* ptr) { return vld1_f32((const float*)ptr); }
SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; }
- void store(float vals[2]) const { vst1_f32(vals, fVec); }
+ void store(void* ptr) const { vst1_f32((float*)ptr, fVec); }
SkNx approxInvert() const {
float32x2_t est0 = vrecpe_f32(fVec),
@@ -122,10 +122,10 @@ public:
SkNx() {}
SkNx(int val) : fVec(vdupq_n_s32(val)) {}
- static SkNx Load(const int vals[4]) { return vld1q_s32(vals); }
+ static SkNx Load(const void* ptr) { return vld1q_s32((const int*)ptr); }
SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
- void store(int vals[4]) const { vst1q_s32(vals, fVec); }
+ void store(void* ptr) const { vst1q_s32((int*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
@@ -149,10 +149,10 @@ public:
SkNx() {}
SkNx(float val) : fVec(vdupq_n_f32(val)) {}
- static SkNx Load(const float vals[4]) { return vld1q_f32(vals); }
+ static SkNx Load(const void* ptr) { return vld1q_f32((const float*)ptr); }
SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
- void store(float vals[4]) const { vst1q_f32(vals, fVec); }
+ void store(void* ptr) const { vst1q_f32((float*)ptr, fVec); }
SkNx approxInvert() const {
float32x4_t est0 = vrecpeq_f32(fVec),
est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
@@ -240,13 +240,13 @@ public:
SkNx() {}
SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {}
- static SkNx Load(const uint16_t vals[4]) { return vld1_u16(vals); }
+ static SkNx Load(const void* ptr) { return vld1_u16((const uint16_t*)ptr); }
SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) {
fVec = (uint16x4_t) { a,b,c,d };
}
- void store(uint16_t vals[4]) const { vst1_u16(vals, fVec); }
+ void store(void* ptr) const { vst1_u16((uint16_t*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); }
@@ -276,14 +276,14 @@ public:
SkNx() {}
SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {}
- static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); }
+ static SkNx Load(const void* ptr) { return vld1q_u16((const uint16_t*)ptr); }
SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
fVec = (uint16x8_t) { a,b,c,d, e,f,g,h };
}
- void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); }
+ void store(void* ptr) const { vst1q_u16((uint16_t*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); }
@@ -312,11 +312,11 @@ public:
SkNx(const uint8x8_t& vec) : fVec(vec) {}
SkNx() {}
- static SkNx Load(const uint8_t vals[4]) {
- return (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);
+ static SkNx Load(const void* ptr) {
+ return (uint8x8_t)vld1_dup_u32((const uint32_t*)ptr);
}
- void store(uint8_t vals[4]) const {
- return vst1_lane_u32((uint32_t*)vals, (uint32x2_t)fVec, 0);
+ void store(void* ptr) const {
+ return vst1_lane_u32((uint32_t*)ptr, (uint32x2_t)fVec, 0);
}
// TODO as needed
@@ -331,7 +331,7 @@ public:
SkNx() {}
SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {}
- static SkNx Load(const uint8_t vals[16]) { return vld1q_u8(vals); }
+ static SkNx Load(const void* ptr) { return vld1q_u8((const uint8_t*)ptr); }
SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
uint8_t e, uint8_t f, uint8_t g, uint8_t h,
@@ -340,7 +340,7 @@ public:
fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p };
}
- void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); }
+ void store(void* ptr) const { vst1q_u8((uint8_t*)ptr, fVec); }
SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); }
diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
index a17d988ee7..71ecbfd89d 100644
--- a/src/opts/SkNx_sse.h
+++ b/src/opts/SkNx_sse.h
@@ -22,12 +22,12 @@ public:
SkNx() {}
SkNx(float val) : fVec(_mm_set1_ps(val)) {}
- static SkNx Load(const float vals[2]) {
- return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
+ static SkNx Load(const void* ptr) {
+ return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)ptr));
}
SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
- void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
+ void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
@@ -71,10 +71,10 @@ public:
SkNx() {}
SkNx(double val) : fVec(_mm_set1_pd(val)) {}
- static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); }
+ static SkNx Load(const void* ptr) { return _mm_loadu_pd((const double*)ptr); }
SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
- void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
+ void store(void* ptr) const { _mm_storeu_pd((double*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); }
@@ -117,10 +117,10 @@ public:
SkNx() {}
SkNx(int val) : fVec(_mm_set1_epi32(val)) {}
- static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*)vals); }
+ static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
- void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
+ void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
@@ -155,11 +155,11 @@ public:
SkNx() {}
SkNx(float val) : fVec( _mm_set1_ps(val) ) {}
- static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); }
+ static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); }
SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
- void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
+ void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
@@ -210,10 +210,10 @@ public:
SkNx() {}
SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
- static SkNx Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m128i*)vals); }
+ static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a,b,c,d,0,0,0,0)) {}
- void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
+ void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
@@ -237,11 +237,11 @@ public:
SkNx() {}
SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
- static SkNx Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m128i*)vals); }
+ static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a,b,c,d,e,f,g,h)) {}
- void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
+ void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
@@ -278,8 +278,8 @@ public:
SkNx(const __m128i& vec) : fVec(vec) {}
SkNx() {}
- static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128(*(const int*)vals); }
- void store(uint8_t vals[4]) const { *(int*)vals = _mm_cvtsi128_si32(fVec); }
+ static SkNx Load(const void* ptr) { return _mm_cvtsi32_si128(*(const int*)ptr); }
+ void store(void* ptr) const { *(int*)ptr = _mm_cvtsi128_si32(fVec); }
// TODO as needed
@@ -292,8 +292,8 @@ public:
SkNx(const __m128i& vec) : fVec(vec) {}
SkNx() {}
- static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m128i*)vals); }
- void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
+ static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
+ void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
// TODO as needed
@@ -307,14 +307,14 @@ public:
SkNx() {}
SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
- static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m128i*)vals); }
+ static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
uint8_t e, uint8_t f, uint8_t g, uint8_t h,
uint8_t i, uint8_t j, uint8_t k, uint8_t l,
uint8_t m, uint8_t n, uint8_t o, uint8_t p)
: fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {}
- void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
+ void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }
diff --git a/src/opts/SkXfermode_opts.h b/src/opts/SkXfermode_opts.h
index 7b72bc3f80..f15094c98b 100644
--- a/src/opts/SkXfermode_opts.h
+++ b/src/opts/SkXfermode_opts.h
@@ -285,12 +285,12 @@ private:
}
static Sk4f Load(SkPMColor c) {
- return SkNx_cast<float>(Sk4b::Load((uint8_t*)&c)) * Sk4f(1.0f/255);
+ return SkNx_cast<float>(Sk4b::Load(&c)) * Sk4f(1.0f/255);
}
static SkPMColor Round(const Sk4f& f) {
SkPMColor c;
- SkNx_cast<uint8_t>(f * Sk4f(255) + Sk4f(0.5f)).store((uint8_t*)&c);
+ SkNx_cast<uint8_t>(f * Sk4f(255) + Sk4f(0.5f)).store(&c);
return c;
}