aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/opts/SkPMFloat_neon.h
diff options
context:
space:
mode:
authorGravatar mtklein <mtklein@chromium.org>2015-03-03 08:57:07 -0800
committerGravatar Commit bot <commit-bot@chromium.org>2015-03-03 08:57:07 -0800
commit0aebf5d0d3a2aef38a71885c85303583fdeaad57 (patch)
tree1ec6aeef90ecc654f5040e7244f5c7246ce74690 /src/opts/SkPMFloat_neon.h
parentc2574f3657b1359496a4eba5b191961974b3a64f (diff)
Test and fix SkPMFloat rounding.
SSE rounds for free (that was a happy accident: they also have a truncating version). NEON does not, nor obviously the portable code, so they add 0.5 before truncating. NOPRESUBMIT=true BUG=skia: Review URL: https://codereview.chromium.org/974643002
Diffstat (limited to 'src/opts/SkPMFloat_neon.h')
-rw-r--r--src/opts/SkPMFloat_neon.h14
1 files changed, 8 insertions, 6 deletions
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
index 15ba3a58e4..179c674550 100644
--- a/src/opts/SkPMFloat_neon.h
+++ b/src/opts/SkPMFloat_neon.h
@@ -19,18 +19,20 @@ inline void SkPMFloat::set(SkPMColor c) {
inline SkPMColor SkPMFloat::get() const {
SkASSERT(this->isValid());
- uint32x4_t fix8_32 = vcvtq_u32_f32(vld1q_f32(fColor));
- uint16x4_t fix8_16 = vmovn_u32(fix8_32);
- uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
+ float32x4_t add_half = vaddq_f32(vld1q_f32(fColor), vdupq_n_f32(0.5f));
+ uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
+ uint16x4_t fix8_16 = vmovn_u32(fix8_32);
+ uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
SkPMColorAssert(c);
return c;
}
inline SkPMColor SkPMFloat::clamped() const {
- uint32x4_t fix8_32 = vcvtq_u32_f32(vld1q_f32(fColor));
- uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
- uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
+ float32x4_t add_half = vaddq_f32(vld1q_f32(fColor), vdupq_n_f32(0.5f));
+ uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
+ uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
+ uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
SkPMColorAssert(c);
return c;