aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-03-26 12:32:29 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-03-26 12:32:29 -0700
commit3d4c4a5a9feff961c6ba70443fa40ea1ca0a503e (patch)
tree0e400cbdbd0b5dd8eee77d37c6e5373f5f2e1d3e /src
parentd968a6f29e92230ee118ec9cdfff0329bf83d602 (diff)
SkPMFloat::trunc()
Add and test trunc(), which is what get() used to be before rounding. Using trunc() is a ~40% speedup on our linear gradient bench. #neon #floats BUG=skia:3592 #n5 #n9 CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus5-Adreno330-Arm7-Debug-Trybot;client.skia.android:Test-Android-Nexus9-TegraK1-Arm64-Release-Trybot Review URL: https://codereview.chromium.org/1032243002
Diffstat (limited to 'src')
-rw-r--r--src/core/SkPMFloat.h4
-rw-r--r--src/opts/SkPMFloat_SSE2.h10
-rw-r--r--src/opts/SkPMFloat_SSSE3.h11
-rw-r--r--src/opts/SkPMFloat_neon.h11
-rw-r--r--src/opts/SkPMFloat_none.h4
5 files changed, 32 insertions, 8 deletions
diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h
index 1d034f049b..66262a8916 100644
--- a/src/core/SkPMFloat.h
+++ b/src/core/SkPMFloat.h
@@ -57,6 +57,10 @@ public:
SkPMColor get() const; // May SkASSERT(this->isValid()). Some implementations may clamp.
SkPMColor clamped() const; // Will clamp all values to [0, 255]. Then may assert isValid().
+ // Like get(), but truncates instead of rounding.
+ // The domain of this function is (-1.0f, 256.0f). Values in (-1.0f, 0.0f] trunc to a zero.
+ SkPMColor trunc() const;
+
// 4-at-a-time versions of get() and clamped(). Like From4PMColors(), no alignment assumed.
static void To4PMColors(
const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, SkPMColor[4]);
diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h
index 156c0c9897..fa920d75d6 100644
--- a/src/opts/SkPMFloat_SSE2.h
+++ b/src/opts/SkPMFloat_SSE2.h
@@ -41,6 +41,16 @@ inline SkPMColor SkPMFloat::clamped() const {
return c;
}
+inline SkPMColor SkPMFloat::trunc() const {
+ // Basically, same as clamped(), but no rounding.
+ __m128i fix8_32 = _mm_cvttps_epi32(fColors),
+ fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
+ fix8 = _mm_packus_epi16(fix8_16, fix8_16);
+ SkPMColor c = _mm_cvtsi128_si32(fix8);
+ SkPMColorAssert(c);
+ return c;
+}
+
inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
// Haven't beaten this yet.
diff --git a/src/opts/SkPMFloat_SSSE3.h b/src/opts/SkPMFloat_SSSE3.h
index fca4197ea0..6ff6929d01 100644
--- a/src/opts/SkPMFloat_SSSE3.h
+++ b/src/opts/SkPMFloat_SSSE3.h
@@ -27,17 +27,20 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) {
SkASSERT(this->isValid());
}
-inline SkPMColor SkPMFloat::get() const {
- SkASSERT(this->isValid());
+inline SkPMColor SkPMFloat::trunc() const {
const int _ = 255; // _ means to zero that byte.
- // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
- __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),
+ __m128i fix8_32 = _mm_cvttps_epi32(fColors),
fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _,_,_,_, 12,8,4,0));
SkPMColor c = _mm_cvtsi128_si32(fix8);
SkPMColorAssert(c);
return c;
}
+inline SkPMColor SkPMFloat::get() const {
+ SkASSERT(this->isValid());
+ return SkPMFloat(Sk4f(0.5f) + *this).trunc();
+}
+
inline SkPMColor SkPMFloat::clamped() const {
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
__m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
index 780981bd95..e5b16f5e6f 100644
--- a/src/opts/SkPMFloat_neon.h
+++ b/src/opts/SkPMFloat_neon.h
@@ -26,10 +26,8 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) {
SkASSERT(this->isValid());
}
-inline SkPMColor SkPMFloat::get() const {
- SkASSERT(this->isValid());
- float32x4_t add_half = vaddq_f32(fColors, vdupq_n_f32(0.5f));
- uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
+inline SkPMColor SkPMFloat::trunc() const {
+ uint32x4_t fix8_32 = vcvtq_u32_f32(fColors); // vcvtq_u32_f32 truncates
uint16x4_t fix8_16 = vmovn_u32(fix8_32);
uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
@@ -37,6 +35,11 @@ inline SkPMColor SkPMFloat::get() const {
return c;
}
+inline SkPMColor SkPMFloat::get() const {
+ SkASSERT(this->isValid());
+ return SkPMFloat(Sk4f(0.5f) + *this).trunc();
+}
+
inline SkPMColor SkPMFloat::clamped() const {
float32x4_t add_half = vaddq_f32(fColors, vdupq_n_f32(0.5f));
uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h
index 00705aa582..86516b1875 100644
--- a/src/opts/SkPMFloat_none.h
+++ b/src/opts/SkPMFloat_none.h
@@ -18,6 +18,10 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) {
SkASSERT(this->isValid());
}
+inline SkPMColor SkPMFloat::trunc() const {
+ return SkPackARGB32(this->a(), this->r(), this->g(), this->b());
+}
+
inline SkPMColor SkPMFloat::get() const {
SkASSERT(this->isValid());
return SkPackARGB32(this->a()+0.5f, this->r()+0.5f, this->g()+0.5f, this->b()+0.5f);