aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-03-25 13:43:34 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-03-25 13:43:34 -0700
commit15391ee4acaa092f52742f64968ad8046b74ca81 (patch)
treeace01376be31a5a759439e6cec52a4acc65bf4bd /src
parent2af858354d913397a6c316ef46a5d52d686e10ab (diff)
Update 4-at-a-time APIs.
There is no reason to require the 4 SkPMFloats (registers) to be adjacent. The only potential win in loads and stores comes from the SkPMColors being adjacent. Makes no difference to existing bench. BUG=skia: Review URL: https://codereview.chromium.org/1035583002
Diffstat (limited to 'src')
-rw-r--r--src/core/SkPMFloat.h15
-rw-r--r--src/opts/SkPMFloat_SSE2.h35
-rw-r--r--src/opts/SkPMFloat_SSSE3.h38
-rw-r--r--src/opts/SkPMFloat_neon.h33
-rw-r--r--src/opts/SkPMFloat_none.h33
5 files changed, 118 insertions, 36 deletions
diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h
index 010974debe..1d034f049b 100644
--- a/src/core/SkPMFloat.h
+++ b/src/core/SkPMFloat.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
#ifndef SkPM_DEFINED
#define SkPM_DEFINED
@@ -20,7 +27,7 @@ public:
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
// May be more efficient than one at a time. No special alignment assumed for SkPMColors.
- static void From4PMColors(SkPMFloat[4], const SkPMColor[4]);
+ static void From4PMColors(const SkPMColor[4], SkPMFloat*, SkPMFloat*, SkPMFloat*, SkPMFloat*);
explicit SkPMFloat(SkPMColor);
SkPMFloat(float a, float r, float g, float b) {
@@ -51,8 +58,10 @@ public:
SkPMColor clamped() const; // Will clamp all values to [0, 255]. Then may assert isValid().
// 4-at-a-time versions of get() and clamped(). Like From4PMColors(), no alignment assumed.
- static void To4PMColors(SkPMColor[4], const SkPMFloat[4]);
- static void ClampTo4PMColors(SkPMColor[4], const SkPMFloat[4]);
+ static void To4PMColors(
+ const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, SkPMColor[4]);
+ static void ClampTo4PMColors(
+ const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, SkPMColor[4]);
bool isValid() const {
return this->a() >= 0 && this->a() <= 255
diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h
index 2a85b1a74f..156c0c9897 100644
--- a/src/opts/SkPMFloat_SSE2.h
+++ b/src/opts/SkPMFloat_SSE2.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {
fColors = that.fColors;
return *this;
@@ -34,25 +41,31 @@ inline SkPMColor SkPMFloat::clamped() const {
return c;
}
-inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors[4]) {
+inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
+ SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
// Haven't beaten this yet.
- for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); }
+ *a = FromPMColor(colors[0]);
+ *b = FromPMColor(colors[1]);
+ *c = FromPMColor(colors[2]);
+ *d = FromPMColor(colors[3]);
}
-inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
- SkASSERT(floats[0].isValid() && floats[1].isValid()
- && floats[2].isValid() && floats[3].isValid());
+inline void SkPMFloat::To4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
// Haven't beaten this yet.
- ClampTo4PMColors(colors, floats);
+ ClampTo4PMColors(a,b,c,d, colors);
}
-inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
+inline void SkPMFloat::ClampTo4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
// Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[0].fColors)),
- c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[1].fColors)),
- c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[2].fColors)),
- c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[3].fColors));
+ __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors)),
+ c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors)),
+ c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors)),
+ c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors));
__m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),
_mm_packus_epi16(c2, c3));
_mm_storeu_si128((__m128i*)colors, c3210);
diff --git a/src/opts/SkPMFloat_SSSE3.h b/src/opts/SkPMFloat_SSSE3.h
index ab54caf3d4..fca4197ea0 100644
--- a/src/opts/SkPMFloat_SSSE3.h
+++ b/src/opts/SkPMFloat_SSSE3.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {
fColors = that.fColors;
return *this;
@@ -41,23 +48,34 @@ inline SkPMColor SkPMFloat::clamped() const {
return c;
}
-inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors[4]) {
+inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
+ SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
// Haven't beaten this yet.
- for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); }
+ *a = FromPMColor(colors[0]);
+ *b = FromPMColor(colors[1]);
+ *c = FromPMColor(colors[2]);
+ *d = FromPMColor(colors[3]);
}
-inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
- // Haven't beaten this yet. Still faster than ClampTo4PMColors too.
- for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); }
+inline void SkPMFloat::To4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
+ // Haven't beaten this yet. Still faster than ClampTo4PMColors?
+ colors[0] = a.get();
+ colors[1] = b.get();
+ colors[2] = c.get();
+ colors[3] = d.get();
}
-inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
+inline void SkPMFloat::ClampTo4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
// Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[0].fColors)),
- c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[1].fColors)),
- c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[2].fColors)),
- c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), floats[3].fColors));
+ __m128i c0 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), a.fColors)),
+ c1 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), b.fColors)),
+ c2 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), c.fColors)),
+ c3 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), d.fColors));
__m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),
_mm_packus_epi16(c2, c3));
_mm_storeu_si128((__m128i*)colors, c3210);
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
index 6c9df37e51..780981bd95 100644
--- a/src/opts/SkPMFloat_neon.h
+++ b/src/opts/SkPMFloat_neon.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {
fColors = that.fColors;
return *this;
@@ -41,14 +48,28 @@ inline SkPMColor SkPMFloat::clamped() const {
}
// TODO: we should be able to beat these loops on all three methods.
-inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors[4]) {
- for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); }
+inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
+ SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
+ *a = FromPMColor(colors[0]);
+ *b = FromPMColor(colors[1]);
+ *c = FromPMColor(colors[2]);
+ *d = FromPMColor(colors[3]);
}
-inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
- for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); }
+inline void SkPMFloat::To4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
+ colors[0] = a.get();
+ colors[1] = b.get();
+ colors[2] = c.get();
+ colors[3] = d.get();
}
-inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
- for (int i = 0; i < 4; i++) { colors[i] = floats[i].clamped(); }
+inline void SkPMFloat::ClampTo4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
+ colors[0] = a.clamped();
+ colors[1] = b.clamped();
+ colors[2] = c.clamped();
+ colors[3] = d.clamped();
}
diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h
index c47f8a3713..00705aa582 100644
--- a/src/opts/SkPMFloat_none.h
+++ b/src/opts/SkPMFloat_none.h
@@ -1,3 +1,10 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) {
for (int i = 0; i < 4; i++) { fColor[i] = that.fColor[i]; }
return *this;
@@ -28,14 +35,28 @@ inline SkPMColor SkPMFloat::clamped() const {
return SkPackARGB32(a+0.5f, r+0.5f, g+0.5f, b+0.5f);
}
-inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors[4]) {
- for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); }
+inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
+ SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
+ *a = FromPMColor(colors[0]);
+ *b = FromPMColor(colors[1]);
+ *c = FromPMColor(colors[2]);
+ *d = FromPMColor(colors[3]);
}
-inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
- for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); }
+inline void SkPMFloat::To4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
+ colors[0] = a.get();
+ colors[1] = b.get();
+ colors[2] = c.get();
+ colors[3] = d.get();
}
-inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
- for (int i = 0; i < 4; i++) { colors[i] = floats[i].clamped(); }
+inline void SkPMFloat::ClampTo4PMColors(
+ const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
+ SkPMColor colors[4]) {
+ colors[0] = a.clamped();
+ colors[1] = b.clamped();
+ colors[2] = c.clamped();
+ colors[3] = d.clamped();
}