diff options
-rw-r--r-- | HASHTAGS | 2 | ||||
-rw-r--r-- | bench/PMFloatBench.cpp | 10 | ||||
-rw-r--r-- | src/core/SkPMFloat.h | 4 | ||||
-rw-r--r-- | src/opts/SkPMFloat_SSE2.h | 10 | ||||
-rw-r--r-- | src/opts/SkPMFloat_SSSE3.h | 11 | ||||
-rw-r--r-- | src/opts/SkPMFloat_neon.h | 11 | ||||
-rw-r--r-- | src/opts/SkPMFloat_none.h | 4 | ||||
-rw-r--r-- | tests/PMFloatTest.cpp | 3 |
8 files changed, 42 insertions, 13 deletions
@@ -14,6 +14,8 @@ notry,NOTRY=true nocommit,COMMIT=false floats,BUG=skia:3592 +neon,#n5,#n9 +n5,CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus5-Adreno330-Arm7-Debug-Trybot n7,CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus7-Tegra3-Arm7-Debug-Trybot n9,CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus9-TegraK1-Arm64-Release-Trybot diff --git a/bench/PMFloatBench.cpp b/bench/PMFloatBench.cpp index d748144c4b..09819e9962 100644 --- a/bench/PMFloatBench.cpp +++ b/bench/PMFloatBench.cpp @@ -112,15 +112,15 @@ struct PMFloatGradientBench : public Benchmark { dcdx4(dcdx+dcdx+dcdx+dcdx); for (int n = 0; n < loops; n++) { - Sk4f a = c0 + dc*fx, // TODO: add 0.5f, here call trunc() instead of get(). + Sk4f a = c0 + dc*fx + Sk4f(0.5f), // The +0.5f lets us call trunc() instead of get(). b = a + dcdx, c = b + dcdx, d = c + dcdx; for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) { - fDevice[i+0] = SkPMFloat(a).get(); - fDevice[i+1] = SkPMFloat(b).get(); - fDevice[i+2] = SkPMFloat(c).get(); - fDevice[i+3] = SkPMFloat(d).get(); + fDevice[i+0] = SkPMFloat(a).trunc(); + fDevice[i+1] = SkPMFloat(b).trunc(); + fDevice[i+2] = SkPMFloat(c).trunc(); + fDevice[i+3] = SkPMFloat(d).trunc(); a += dcdx4; b += dcdx4; c += dcdx4; diff --git a/src/core/SkPMFloat.h b/src/core/SkPMFloat.h index 1d034f049b..66262a8916 100644 --- a/src/core/SkPMFloat.h +++ b/src/core/SkPMFloat.h @@ -57,6 +57,10 @@ public: SkPMColor get() const; // May SkASSERT(this->isValid()). Some implementations may clamp. SkPMColor clamped() const; // Will clamp all values to [0, 255]. Then may assert isValid(). + // Like get(), but truncates instead of rounding. + // The domain of this function is (-1.0f, 256.0f). Values in (-1.0f, 0.0f] trunc to a zero. + SkPMColor trunc() const; + // 4-at-a-time versions of get() and clamped(). Like From4PMColors(), no alignment assumed. static void To4PMColors( const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, SkPMColor[4]); diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h index 156c0c9897..fa920d75d6 100644 --- a/src/opts/SkPMFloat_SSE2.h +++ b/src/opts/SkPMFloat_SSE2.h @@ -41,6 +41,16 @@ inline SkPMColor SkPMFloat::clamped() const { return c; } +inline SkPMColor SkPMFloat::trunc() const { + // Basically, same as clamped(), but no rounding. + __m128i fix8_32 = _mm_cvttps_epi32(fColors), + fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), + fix8 = _mm_packus_epi16(fix8_16, fix8_16); + SkPMColor c = _mm_cvtsi128_si32(fix8); + SkPMColorAssert(c); + return c; +} + inline void SkPMFloat::From4PMColors(const SkPMColor colors[4], SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) { // Haven't beaten this yet. diff --git a/src/opts/SkPMFloat_SSSE3.h b/src/opts/SkPMFloat_SSSE3.h index fca4197ea0..6ff6929d01 100644 --- a/src/opts/SkPMFloat_SSSE3.h +++ b/src/opts/SkPMFloat_SSSE3.h @@ -27,17 +27,20 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { SkASSERT(this->isValid()); } -inline SkPMColor SkPMFloat::get() const { - SkASSERT(this->isValid()); +inline SkPMColor SkPMFloat::trunc() const { const int _ = 255; // _ means to zero that byte. - // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). - __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)), + __m128i fix8_32 = _mm_cvttps_epi32(fColors), fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _,_,_,_, 12,8,4,0)); SkPMColor c = _mm_cvtsi128_si32(fix8); SkPMColorAssert(c); return c; } +inline SkPMColor SkPMFloat::get() const { + SkASSERT(this->isValid()); + return SkPMFloat(Sk4f(0.5f) + *this).trunc(); +} + inline SkPMColor SkPMFloat::clamped() const { // We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up). __m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)), diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h index 780981bd95..e5b16f5e6f 100644 --- a/src/opts/SkPMFloat_neon.h +++ b/src/opts/SkPMFloat_neon.h @@ -26,10 +26,8 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { SkASSERT(this->isValid()); } -inline SkPMColor SkPMFloat::get() const { - SkASSERT(this->isValid()); - float32x4_t add_half = vaddq_f32(fColors, vdupq_n_f32(0.5f)); - uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually +inline SkPMColor SkPMFloat::trunc() const { + uint32x4_t fix8_32 = vcvtq_u32_f32(fColors); // vcvtq_u32_f32 truncates uint16x4_t fix8_16 = vmovn_u32(fix8_32); uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); @@ -37,6 +35,11 @@ inline SkPMColor SkPMFloat::get() const { return c; } +inline SkPMColor SkPMFloat::get() const { + SkASSERT(this->isValid()); + return SkPMFloat(Sk4f(0.5f) + *this).trunc(); +} + inline SkPMColor SkPMFloat::clamped() const { float32x4_t add_half = vaddq_f32(fColors, vdupq_n_f32(0.5f)); uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually diff --git a/src/opts/SkPMFloat_none.h b/src/opts/SkPMFloat_none.h index 00705aa582..86516b1875 100644 --- a/src/opts/SkPMFloat_none.h +++ b/src/opts/SkPMFloat_none.h @@ -18,6 +18,10 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { SkASSERT(this->isValid()); } +inline SkPMColor SkPMFloat::trunc() const { + return SkPackARGB32(this->a(), this->r(), this->g(), this->b()); +} + inline SkPMColor SkPMFloat::get() const { SkASSERT(this->isValid()); return SkPackARGB32(this->a()+0.5f, this->r()+0.5f, this->g()+0.5f, this->b()+0.5f); diff --git a/tests/PMFloatTest.cpp b/tests/PMFloatTest.cpp index 0f0d853c2b..309cd60f4a 100644 --- a/tests/PMFloatTest.cpp +++ b/tests/PMFloatTest.cpp @@ -22,6 +22,9 @@ DEF_TEST(SkPMFloat, r) { pmf = SkPMFloat(254.5f, 203.5f, 153.1f, 50.8f); REPORTER_ASSERT(r, c == pmf.get()); + pmf = SkPMFloat(255.9f, 204.01f, 153.0f, -0.9f); + REPORTER_ASSERT(r, SkPreMultiplyColor(0xFFCC9900) == pmf.trunc()); + // Test clamping. SkPMFloat clamped(SkPMFloat(510.0f, 153.0f, 1.0f, -0.2f).clamped()); REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, clamped.a())); |