aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar reed <reed@google.com>2015-04-15 13:13:48 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-04-15 13:13:48 -0700
commita0246165eab9018d07afc09ff826ea4c40898ebc (patch)
tree1bb6a4ddd84871e812d7159c2d741373accc9dce
parent00b7e5eb973a1b1e4d1affa74fc0341e8c41e3da (diff)
Speeup hairline curves (quads and cubics)
/skia/trunk> cat ../old.txt maxrss loops min median mean max stddev samples config bench 9M 1 4.28ms 4.32ms 4.36ms 4.67ms 3% ▄▁▁▃▂▂▁▁▂█ 8888 path_hairline_small_AA_cubic 9M 1 743µs 767µs 770µs 825µs 4% ▃▃▇▃▁▁▅▁█▁ 8888 path_hairline_small_AA_conic 9M 1 533µs 606µs 598µs 680µs 9% ▁▂▂█▆▇▇▄▂▂ 8888 path_hairline_small_AA_quad 9M 1 451µs 452µs 456µs 495µs 3% ▁▁▁▁█▁▁▁▁▁ 8888 path_hairline_small_AA_line /skia/trunk> cat ../new.txt maxrss loops min median mean max stddev samples config bench 9M 1 827µs 827µs 831µs 869µs 2% ▁▁▁▁▁▁▁█▁▁ 8888 path_hairline_small_AA_cubic 9M 1 515µs 517µs 517µs 518µs 0% ▇█▆▅▃▃▁▁▁▅ 8888 path_hairline_small_AA_conic 9M 1 310µs 311µs 315µs 332µs 2% ▂▁█▆▁▁▁▁▁▁ 8888 path_hairline_small_AA_quad 9M 1 254µs 254µs 258µs 276µs 3% ▁▁▁▁▁▁▁█▇▂ 8888 path_hairline_small_AA_line Edited revert of https://codereview.chromium.org/1085013003 TBR= Review URL: https://codereview.chromium.org/1078413003
-rw-r--r--src/core/SkBlitter.h10
-rw-r--r--src/core/SkBlitter_ARGB32.cpp57
-rw-r--r--src/core/SkCoreBlitters.h12
-rw-r--r--src/core/SkGeometry.cpp34
-rw-r--r--src/core/SkGeometry.h26
-rw-r--r--src/core/SkScan_Hairline.cpp134
6 files changed, 263 insertions, 10 deletions
diff --git a/src/core/SkBlitter.h b/src/core/SkBlitter.h
index 2d4a0defbf..8d9f7bcbed 100644
--- a/src/core/SkBlitter.h
+++ b/src/core/SkBlitter.h
@@ -8,6 +8,12 @@
#ifndef SkBlitter_DEFINED
#define SkBlitter_DEFINED
+#ifdef SK_SUPPORT_LEGACY_BLITANTIH2V2
+ #define SK_BLITANTIH2V2_VIRTUAL
+#else
+ #define SK_BLITANTIH2V2_VIRTUAL virtual
+#endif
+
#include "SkBitmap.h"
#include "SkBitmapProcShader.h"
#include "SkMask.h"
@@ -54,7 +60,7 @@ public:
virtual const SkBitmap* justAnOpaqueColor(uint32_t* value);
// (x, y), (x + 1, y)
- void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
+ SK_BLITANTIH2V2_VIRTUAL void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
int16_t runs[3];
uint8_t aa[2];
@@ -67,7 +73,7 @@ public:
}
// (x, y), (x, y + 1)
- void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
+ SK_BLITANTIH2V2_VIRTUAL void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
int16_t runs[2];
uint8_t aa[1];
diff --git a/src/core/SkBlitter_ARGB32.cpp b/src/core/SkBlitter_ARGB32.cpp
index bbad6c7926..caf85ddc1f 100644
--- a/src/core/SkBlitter_ARGB32.cpp
+++ b/src/core/SkBlitter_ARGB32.cpp
@@ -106,6 +106,25 @@ void SkARGB32_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
}
}
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+void SkARGB32_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
+ uint32_t* device = fDevice.getAddr32(x, y);
+ SkDEBUGCODE((void)fDevice.getAddr32(x + 1, y);)
+
+ device[0] = SkBlendARGB32(fPMColor, device[0], a0);
+ device[1] = SkBlendARGB32(fPMColor, device[1], a1);
+}
+
+void SkARGB32_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
+ uint32_t* device = fDevice.getAddr32(x, y);
+ SkDEBUGCODE((void)fDevice.getAddr32(x, y + 1);)
+
+ device[0] = SkBlendARGB32(fPMColor, device[0], a0);
+ device = (uint32_t*)((char*)device + fDevice.rowBytes());
+ device[0] = SkBlendARGB32(fPMColor, device[0], a1);
+}
+#endif
+
//////////////////////////////////////////////////////////////////////////////////////
#define solid_8_pixels(mask, dst, color) \
@@ -180,6 +199,25 @@ void SkARGB32_Opaque_Blitter::blitMask(const SkMask& mask,
}
}
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+void SkARGB32_Opaque_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
+ uint32_t* device = fDevice.getAddr32(x, y);
+ SkDEBUGCODE((void)fDevice.getAddr32(x + 1, y);)
+
+ device[0] = SkFastFourByteInterp(fPMColor, device[0], a0);
+ device[1] = SkFastFourByteInterp(fPMColor, device[1], a1);
+}
+
+void SkARGB32_Opaque_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
+ uint32_t* device = fDevice.getAddr32(x, y);
+ SkDEBUGCODE((void)fDevice.getAddr32(x, y + 1);)
+
+ device[0] = SkFastFourByteInterp(fPMColor, device[0], a0);
+ device = (uint32_t*)((char*)device + fDevice.rowBytes());
+ device[0] = SkFastFourByteInterp(fPMColor, device[0], a1);
+}
+#endif
+
///////////////////////////////////////////////////////////////////////////////
void SkARGB32_Blitter::blitV(int x, int y, int height, SkAlpha alpha) {
@@ -256,6 +294,25 @@ void SkARGB32_Black_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[],
}
}
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+void SkARGB32_Black_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
+ uint32_t* device = fDevice.getAddr32(x, y);
+ SkDEBUGCODE((void)fDevice.getAddr32(x + 1, y);)
+
+ device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0);
+ device[1] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[1], 256 - a1);
+}
+
+void SkARGB32_Black_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
+ uint32_t* device = fDevice.getAddr32(x, y);
+ SkDEBUGCODE((void)fDevice.getAddr32(x, y + 1);)
+
+ device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0);
+ device = (uint32_t*)((char*)device + fDevice.rowBytes());
+ device[0] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a1);
+}
+#endif
+
///////////////////////////////////////////////////////////////////////////////
// Special version of SkBlitRow::Factory32 that knows we're in kSrc_Mode,
diff --git a/src/core/SkCoreBlitters.h b/src/core/SkCoreBlitters.h
index f4a5d6f4db..acc18febed 100644
--- a/src/core/SkCoreBlitters.h
+++ b/src/core/SkCoreBlitters.h
@@ -120,6 +120,10 @@ public:
virtual void blitRect(int x, int y, int width, int height);
virtual void blitMask(const SkMask&, const SkIRect&);
virtual const SkBitmap* justAnOpaqueColor(uint32_t*);
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+ void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) override;
+ void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) override;
+#endif
protected:
SkColor fColor;
@@ -140,6 +144,10 @@ public:
SkARGB32_Opaque_Blitter(const SkBitmap& device, const SkPaint& paint)
: INHERITED(device, paint) { SkASSERT(paint.getAlpha() == 0xFF); }
virtual void blitMask(const SkMask&, const SkIRect&);
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+ void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) override;
+ void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) override;
+#endif
private:
typedef SkARGB32_Blitter INHERITED;
@@ -150,6 +158,10 @@ public:
SkARGB32_Black_Blitter(const SkBitmap& device, const SkPaint& paint)
: INHERITED(device, paint) {}
virtual void blitAntiH(int x, int y, const SkAlpha antialias[], const int16_t runs[]);
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+ void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) override;
+ void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) override;
+#endif
private:
typedef SkARGB32_Opaque_Blitter INHERITED;
diff --git a/src/core/SkGeometry.cpp b/src/core/SkGeometry.cpp
index 50af22de37..5979615425 100644
--- a/src/core/SkGeometry.cpp
+++ b/src/core/SkGeometry.cpp
@@ -9,6 +9,7 @@
#include "SkMatrix.h"
#include "SkNx.h"
+#if 0
static Sk2s from_point(const SkPoint& point) {
return Sk2s::Load(&point.fX);
}
@@ -18,6 +19,7 @@ static SkPoint to_point(const Sk2s& x) {
x.store(&point.fX);
return point;
}
+#endif
static SkVector to_vector(const Sk2s& x) {
SkVector vector;
@@ -135,6 +137,18 @@ static SkScalar eval_quad_derivative(const SkScalar src[], SkScalar t) {
return 2 * SkScalarMulAdd(A, t, B);
}
+void SkQuadToCoeff(const SkPoint pts[3], SkPoint coeff[3]) {
+ Sk2s p0 = from_point(pts[0]);
+ Sk2s p1 = from_point(pts[1]);
+ Sk2s p2 = from_point(pts[2]);
+
+ Sk2s p1minus2 = p1 - p0;
+
+ coeff[0] = to_point(p2 - p1 - p1 + p0); // A * t^2
+ coeff[1] = to_point(p1minus2 + p1minus2); // B * t
+ coeff[2] = pts[0]; // C
+}
+
void SkEvalQuadAt(const SkPoint src[3], SkScalar t, SkPoint* pt, SkVector* tangent) {
SkASSERT(src);
SkASSERT(t >= 0 && t <= SK_Scalar1);
@@ -452,6 +466,26 @@ void SkChopCubicAt(const SkPoint src[4], SkPoint dst[7], SkScalar t) {
dst[6] = src[3];
}
+void SkCubicToCoeff(const SkPoint pts[4], SkPoint coeff[4]) {
+ Sk2s p0 = from_point(pts[0]);
+ Sk2s p1 = from_point(pts[1]);
+ Sk2s p2 = from_point(pts[2]);
+ Sk2s p3 = from_point(pts[3]);
+
+ const Sk2s three(3);
+ Sk2s p1minusp2 = p1 - p2;
+
+ Sk2s D = p0;
+ Sk2s A = p3 + three * p1minusp2 - D;
+ Sk2s B = three * (D - p1minusp2 - p1);
+ Sk2s C = three * (p1 - D);
+
+ coeff[0] = to_point(A);
+ coeff[1] = to_point(B);
+ coeff[2] = to_point(C);
+ coeff[3] = to_point(D);
+}
+
/* http://code.google.com/p/skia/issues/detail?id=32
This test code would fail when we didn't check the return result of
diff --git a/src/core/SkGeometry.h b/src/core/SkGeometry.h
index bafde61155..9ddd91f750 100644
--- a/src/core/SkGeometry.h
+++ b/src/core/SkGeometry.h
@@ -9,6 +9,22 @@
#define SkGeometry_DEFINED
#include "SkMatrix.h"
+#include "SkNx.h"
+
+static inline Sk2s from_point(const SkPoint& point) {
+ return Sk2s::Load(&point.fX);
+}
+
+static inline SkPoint to_point(const Sk2s& x) {
+ SkPoint point;
+ x.store(&point.fX);
+ return point;
+}
+
+static inline Sk2s sk2s_cubic_eval(const Sk2s& A, const Sk2s& B, const Sk2s& C, const Sk2s& D,
+ const Sk2s& t) {
+ return ((A * t + B) * t + C) * t + D;
+}
/** Given a quadratic equation Ax^2 + Bx + C = 0, return 0, 1, 2 roots for the
equation.
@@ -25,6 +41,16 @@ SkPoint SkEvalQuadTangentAt(const SkPoint src[3], SkScalar t);
*/
void SkEvalQuadAt(const SkPoint src[3], SkScalar t, SkPoint* pt, SkVector* tangent = NULL);
+/**
+ * output is : eval(t) == coeff[0] * t^2 + coeff[1] * t + coeff[2]
+ */
+void SkQuadToCoeff(const SkPoint pts[3], SkPoint coeff[3]);
+
+/**
+ * output is : eval(t) == coeff[0] * t^3 + coeff[1] * t^2 + coeff[2] * t + coeff[3]
+ */
+void SkCubicToCoeff(const SkPoint pts[4], SkPoint coeff[4]);
+
/** Given a src quadratic bezier, chop it at the specified t value,
where 0 < t < 1, and return the two new quadratics in dst:
dst[0..2] and dst[2..4]
diff --git a/src/core/SkScan_Hairline.cpp b/src/core/SkScan_Hairline.cpp
index 0f2308b1f7..2ec051f8ab 100644
--- a/src/core/SkScan_Hairline.cpp
+++ b/src/core/SkScan_Hairline.cpp
@@ -1,4 +1,3 @@
-
/*
* Copyright 2006 The Android Open Source Project
*
@@ -6,7 +5,6 @@
* found in the LICENSE file.
*/
-
#include "SkScan.h"
#include "SkBlitter.h"
#include "SkRasterClip.h"
@@ -192,6 +190,10 @@ void SkScan::HairRect(const SkRect& rect, const SkRasterClip& clip,
#include "SkPath.h"
#include "SkGeometry.h"
+#include "SkNx.h"
+
+#define kMaxCubicSubdivideLevel 6
+#define kMaxQuadSubdivideLevel 5
static int compute_int_quad_dist(const SkPoint pts[3]) {
// compute the vector between the control point ([1]) and the middle of the
@@ -214,6 +216,9 @@ static int compute_int_quad_dist(const SkPoint pts[3]) {
static void hairquad(const SkPoint pts[3], const SkRegion* clip,
SkBlitter* blitter, int level, SkScan::HairRgnProc lineproc) {
+ SkASSERT(level <= kMaxQuadSubdivideLevel);
+
+#ifdef SK_SUPPORT_LEGACY_BLITANTIH2V2
if (level > 0) {
SkPoint tmp[5];
@@ -224,10 +229,113 @@ static void hairquad(const SkPoint pts[3], const SkRegion* clip,
SkPoint tmp[] = { pts[0], pts[2] };
lineproc(tmp, 2, clip, blitter);
}
+#else
+ SkPoint coeff[3];
+ SkQuadToCoeff(pts, coeff);
+
+ const int lines = 1 << level;
+ Sk2s t(0);
+ Sk2s dt(SK_Scalar1 / lines);
+
+ SkPoint tmp[(1 << kMaxQuadSubdivideLevel) + 1];
+ SkASSERT((unsigned)lines < SK_ARRAY_COUNT(tmp));
+
+ tmp[0] = pts[0];
+ Sk2s A = Sk2s::Load(&coeff[0].fX);
+ Sk2s B = Sk2s::Load(&coeff[1].fX);
+ Sk2s C = Sk2s::Load(&coeff[2].fX);
+ for (int i = 1; i < lines; ++i) {
+ t += dt;
+ ((A * t + B) * t + C).store(&tmp[i].fX);
+ }
+ tmp[lines] = pts[2];
+ lineproc(tmp, lines + 1, clip, blitter);
+#endif
}
-static void haircubic(const SkPoint pts[4], const SkRegion* clip,
+#ifndef SK_SUPPORT_LEGACY_BLITANTIH2V2
+static inline Sk2s abs(const Sk2s& value) {
+ return Sk2s::Max(value, -value);
+}
+
+static inline SkScalar max_component(const Sk2s& value) {
+ SkScalar components[2];
+ value.store(components);
+ return SkTMax(components[0], components[1]);
+}
+
+static inline int compute_cubic_segs(const SkPoint pts[4]) {
+ Sk2s p0 = from_point(pts[0]);
+ Sk2s p1 = from_point(pts[1]);
+ Sk2s p2 = from_point(pts[2]);
+ Sk2s p3 = from_point(pts[3]);
+
+ const Sk2s oneThird(1.0f / 3.0f);
+ const Sk2s twoThird(2.0f / 3.0f);
+
+ Sk2s p13 = oneThird * p3 + twoThird * p0;
+ Sk2s p23 = oneThird * p0 + twoThird * p3;
+
+ SkScalar diff = max_component(Sk2s::Max(abs(p1 - p13), abs(p2 - p23)));
+ SkScalar tol = SK_Scalar1 / 8;
+
+ for (int i = 0; i < kMaxCubicSubdivideLevel; ++i) {
+ if (diff < tol) {
+ return 1 << i;
+ }
+ tol *= 4;
+ }
+ return 1 << kMaxCubicSubdivideLevel;
+}
+
+static bool lt_90(SkPoint p0, SkPoint pivot, SkPoint p2) {
+ return SkVector::DotProduct(p0 - pivot, p2 - pivot) >= 0;
+}
+
+// The off-curve points are "inside" the limits of the on-curve pts
+static bool quick_cubic_niceness_check(const SkPoint pts[4]) {
+ return lt_90(pts[1], pts[0], pts[3]) &&
+ lt_90(pts[2], pts[0], pts[3]) &&
+ lt_90(pts[1], pts[3], pts[0]) &&
+ lt_90(pts[2], pts[3], pts[0]);
+}
+
+static void hair_cubic(const SkPoint pts[4], const SkRegion* clip, SkBlitter* blitter,
+ SkScan::HairRgnProc lineproc) {
+ const int lines = compute_cubic_segs(pts);
+ SkASSERT(lines > 0);
+ if (1 == lines) {
+ SkPoint tmp[2] = { pts[0], pts[3] };
+ lineproc(tmp, 2, clip, blitter);
+ return;
+ }
+
+ SkPoint coeff[4];
+ SkCubicToCoeff(pts, coeff);
+
+ const Sk2s dt(SK_Scalar1 / lines);
+ Sk2s t(0);
+
+ SkPoint tmp[(1 << kMaxCubicSubdivideLevel) + 1];
+ SkASSERT((unsigned)lines < SK_ARRAY_COUNT(tmp));
+
+ tmp[0] = pts[0];
+ Sk2s A = Sk2s::Load(&coeff[0].fX);
+ Sk2s B = Sk2s::Load(&coeff[1].fX);
+ Sk2s C = Sk2s::Load(&coeff[2].fX);
+ Sk2s D = Sk2s::Load(&coeff[3].fX);
+ for (int i = 1; i < lines; ++i) {
+ t += dt;
+ (((A * t + B) * t + C) * t + D).store(&tmp[i].fX);
+ }
+ tmp[lines] = pts[3];
+ lineproc(tmp, lines + 1, clip, blitter);
+}
+#endif
+
+static inline void haircubic(const SkPoint pts[4], const SkRegion* clip,
SkBlitter* blitter, int level, SkScan::HairRgnProc lineproc) {
+#ifdef SK_SUPPORT_LEGACY_BLITANTIH2V2
if (level > 0) {
SkPoint tmp[7];
@@ -238,10 +346,20 @@ static void haircubic(const SkPoint pts[4], const SkRegion* clip,
SkPoint tmp[] = { pts[0], pts[3] };
lineproc(tmp, 2, clip, blitter);
}
-}
+#else
+ if (quick_cubic_niceness_check(pts)) {
+ hair_cubic(pts, clip, blitter, lineproc);
+ } else {
+ SkPoint tmp[13];
+ SkScalar tValues[3];
-#define kMaxCubicSubdivideLevel 6
-#define kMaxQuadSubdivideLevel 5
+ int count = SkChopCubicAtMaxCurvature(pts, tmp, tValues);
+ for (int i = 0; i < count; i++) {
+ hair_cubic(&tmp[i * 3], clip, blitter, lineproc);
+ }
+ }
+#endif
+}
static int compute_quad_level(const SkPoint pts[3]) {
int d = compute_int_quad_dist(pts);
@@ -311,9 +429,9 @@ static void hair_path(const SkPath& path, const SkRasterClip& rclip, SkBlitter*
}
break;
}
- case SkPath::kCubic_Verb:
+ case SkPath::kCubic_Verb: {
haircubic(pts, clip, blitter, kMaxCubicSubdivideLevel, lineproc);
- break;
+ } break;
case SkPath::kClose_Verb:
break;
case SkPath::kDone_Verb: