aboutsummaryrefslogtreecommitdiffhomepage
path: root/include/core
diff options
context:
space:
mode:
authorGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-11-08 20:14:16 +0000
committerGravatar commit-bot@chromium.org <commit-bot@chromium.org@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-11-08 20:14:16 +0000
commit11e5b972a984c7b4e09ba4dfeacc7bd805107c5a (patch)
tree8b819ae42f35273010e987ce02363e381e6d2466 /include/core
parent8f457e3230f1a4ce737f512ffbb5c919b8d02407 (diff)
Add sk_float_rsqrt with SSE + NEON fast paths.
Current numbers: N4: running bench [640 480] math_fastIsqrt NONRENDERING: cmsecs = 3.12 running bench [640 480] math_slowIsqrt NONRENDERING: cmsecs = 4.82 running bench [640 480] math_sk_float_rsqrt NONRENDERING: cmsecs = 1.99 Desktop: running bench [640 480] math_fastIsqrt NONRENDERING: cmsecs = 0.89 running bench [640 480] math_slowIsqrt NONRENDERING: cmsecs = 0.94 running bench [640 480] math_sk_float_rsqrt NONRENDERING: cmsecs = 0.09 Haven't found any other benches where this is a significant effect yet. BUG= R=reed@google.com Author: mtklein@google.com Review URL: https://codereview.chromium.org/60083014 git-svn-id: http://skia.googlecode.com/svn/trunk@12203 2bbb7eff-a529-9590-31e7-b0007b416f81
Diffstat (limited to 'include/core')
-rw-r--r--include/core/SkFloatingPoint.h41
-rw-r--r--include/core/SkPoint.h26
2 files changed, 53 insertions, 14 deletions
diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h
index 44a3eef98d..7dfa9d8680 100644
--- a/include/core/SkFloatingPoint.h
+++ b/include/core/SkFloatingPoint.h
@@ -96,4 +96,45 @@ extern const uint32_t gIEEENegativeInfinity;
#define SK_FloatNaN (*SkTCast<const float*>(&gIEEENotANumber))
#define SK_FloatInfinity (*SkTCast<const float*>(&gIEEEInfinity))
#define SK_FloatNegativeInfinity (*SkTCast<const float*>(&gIEEENegativeInfinity))
+
+#if defined(__SSE__)
+#include <xmmintrin.h>
+#elif defined(__ARM_NEON__)
+#include <arm_neon.h>
+#endif
+
+// Fast, approximate inverse square root.
+// Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON.
+static inline float sk_float_rsqrt(const float x) {
+// We want all this inlined, so we'll inline SIMD and just take the hit when we don't know we've got
+// it at compile time. This is going to be too fast to productively hide behind a function pointer.
+//
+// We do one step of Newton's method to refine the estimates in the NEON and null paths. No
+// refinement is faster, but very innacurate. Two steps is more accurate, but slower than 1/sqrt.
+#if defined(__SSE__)
+ float result;
+ _mm_store_ss(&result, _mm_rsqrt_ss(_mm_set_ss(x)));
+ return result;
+#elif defined(__ARM_NEON__)
+ // Get initial estimate.
+ const float32x2_t xx = vdup_n_f32(x); // Clever readers will note we're doing everything 2x.
+ float32x2_t estimate = vrsqrte_f32(xx);
+
+ // One step of Newton's method to refine.
+ const float32x2_t estimate_sq = vmul_f32(estimate, estimate);
+ estimate = vmul_f32(estimate, vrsqrts_f32(xx, estimate_sq));
+ return vget_lane_f32(estimate, 0); // 1 will work fine too; the answer's in both places.
+#else
+ // Get initial estimate.
+ int i = *SkTCast<int*>(&x);
+ i = 0x5f3759df - (i>>1);
+ float estimate = *SkTCast<float*>(&i);
+
+ // One step of Newton's method to refine.
+ const float estimate_sq = estimate*estimate;
+ estimate *= (1.5f-0.5f*x*estimate_sq);
+ return estimate;
+#endif
+}
+
#endif
diff --git a/include/core/SkPoint.h b/include/core/SkPoint.h
index b94f730ec2..caf26507ff 100644
--- a/include/core/SkPoint.h
+++ b/include/core/SkPoint.h
@@ -216,13 +216,10 @@ struct SK_API SkPoint {
* Return true if the computed length of the vector is >= the internal
* tolerance (used to avoid dividing by tiny values).
*/
- static bool CanNormalize(SkScalar dx, SkScalar dy)
-#ifdef SK_SCALAR_IS_FLOAT
- // Simple enough (and performance critical sometimes) so we inline it.
- { return (dx*dx + dy*dy) > (SK_ScalarNearlyZero * SK_ScalarNearlyZero); }
-#else
- ;
-#endif
+ static bool CanNormalize(SkScalar dx, SkScalar dy) {
+ // Simple enough (and performance critical sometimes) so we inline it.
+ return (dx*dx + dy*dy) > (SK_ScalarNearlyZero * SK_ScalarNearlyZero);
+ }
bool canNormalize() const {
return CanNormalize(fX, fY);
@@ -252,6 +249,14 @@ struct SK_API SkPoint {
*/
bool setLength(SkScalar x, SkScalar y, SkScalar length);
+ /** Same as setLength, but favoring speed over accuracy.
+ */
+ bool setLengthFast(SkScalar length);
+
+ /** Same as setLength, but favoring speed over accuracy.
+ */
+ bool setLengthFast(SkScalar x, SkScalar y, SkScalar length);
+
/** Scale the point's coordinates by scale, writing the answer into dst.
It is legal for dst == this.
*/
@@ -316,7 +321,6 @@ struct SK_API SkPoint {
* Returns true if both X and Y are finite (not infinity or NaN)
*/
bool isFinite() const {
-#ifdef SK_SCALAR_IS_FLOAT
SkScalar accum = 0;
accum *= fX;
accum *= fY;
@@ -327,12 +331,6 @@ struct SK_API SkPoint {
// value==value will be true iff value is not NaN
// TODO: is it faster to say !accum or accum==accum?
return accum == accum;
-#else
- // use bit-or for speed, since we don't care about short-circuting the
- // tests, and we expect the common case will be that we need to check all.
- int isNaN = (SK_FixedNaN == fX) | (SK_FixedNaN == fX));
- return !isNaN;
-#endif
}
/**