aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Yuqian Li <liyuqian@google.com>2017-07-25 11:26:31 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-07-25 21:55:19 +0000
commitdf60e369a8fb83c5dafded3f2453defa423f174a (patch)
treedc5383b60c1efa6430756670e8b0618465bea857
parent3f475d948251e00a1525914469ee72e891bf436c (diff)
New analytic AA scan converter using delta (I call it DAA for now)
DAA is: 1. Much simpler than AAA. SkScan_AAAPath.cpp is about 1700 lines. SkScan_DAAPath.cpp is about 300 lines. The whole DAA CL is only about 800 lines. 2. Much faster than AAA for complicated paths. The speedup applies to GL backend (including ccpr)! Here's the frame time of 'SampleApp --slide Chart' on macbook pro: AAA-raster: 33ms DAA-raster: 21ms AAA-gl: 30ms DAA-gl: 20ms AAA-ccpr: 18ms DAA-ccpr: 12ms My linux desktop doesn't have SSE3 so the speedup is smaller (~25% for Chart). I believe that DAA is so fast that I can enable it for any paths (AAA is not enabled by default for complicated paths because it is slow; hence our older supersampling scan converter is used for stroking on Chart for AAA-xxx config.) 3. The SkCoverageDelta is suitable for threaded backend with out-of-order concurrent scan conversion as commented in the source code. Maybe we can also just send deltas to GPU. 4. Similar to most analytic path renderers, the quality is on the best ground-truth level, unless there are intersections within a pixel. The intersections look good to my eyes although theoretically that could be arbitrary far from the ground truth (see my AAA slides). 5. For simple paths, such as circle, triangle, rrect, etc., DAA is slower than AAA. But DAA is faster than our older supersampling scan converter in most cases. As those simple paths usually don't constitute the bottleneck of a picture (skp or svg), I strongly recommend use DAA. 6. DAA also heavily favors blitMask so it may work quite well with SkRasterPipeline and SkRasterPipelineBlitter. Finally, please check https://skia-review.googlesource.com/c/22420/ which accelerate DAA by specializing blitCoverageDeltas for SkARGB32_Blitter and SkARGB32_Black_Blitter. It brings a little(<5%) speedup. But I couldn't figure out how to reduce the duplicate code so I don't intend to land it. Bug: skia: Change-Id: I3b7ed6a727447922e645b1acb737a506e7c09a4c Reviewed-on: https://skia-review.googlesource.com/19666 Reviewed-by: Mike Reed <reed@google.com> Reviewed-by: Cary Clark <caryclark@google.com> Commit-Queue: Yuqian Li <liyuqian@google.com>
-rw-r--r--bench/nanobench.cpp4
-rw-r--r--dm/DM.cpp1
-rw-r--r--gn/core.gni3
-rw-r--r--samplecode/SampleApp.cpp20
-rw-r--r--src/core/SkAAClip.cpp4
-rw-r--r--src/core/SkAnalyticEdge.h57
-rw-r--r--src/core/SkBlitter.cpp104
-rw-r--r--src/core/SkBlitter.h10
-rw-r--r--src/core/SkCoverageDelta.cpp144
-rw-r--r--src/core/SkCoverageDelta.h218
-rw-r--r--src/core/SkEdgeBuilder.cpp84
-rw-r--r--src/core/SkEdgeBuilder.h43
-rw-r--r--src/core/SkScan.cpp4
-rw-r--r--src/core/SkScan.h4
-rw-r--r--src/core/SkScan_AAAPath.cpp3
-rw-r--r--src/core/SkScan_AntiPath.cpp18
-rw-r--r--src/core/SkScan_DAAPath.cpp352
-rw-r--r--tools/flags/SkCommonFlags.cpp5
-rw-r--r--tools/flags/SkCommonFlags.h2
19 files changed, 1026 insertions, 54 deletions
diff --git a/bench/nanobench.cpp b/bench/nanobench.cpp
index 196cb220d0..798dd0d727 100644
--- a/bench/nanobench.cpp
+++ b/bench/nanobench.cpp
@@ -1199,7 +1199,11 @@ int main(int argc, char** argv) {
}
gSkUseAnalyticAA = FLAGS_analyticAA;
+ gSkUseDeltaAA = FLAGS_deltaAA;
+ if (FLAGS_forceDeltaAA) {
+ gSkForceDeltaAA = true;
+ }
if (FLAGS_forceAnalyticAA) {
gSkForceAnalyticAA = true;
}
diff --git a/dm/DM.cpp b/dm/DM.cpp
index 698a79c962..8be3fbc05e 100644
--- a/dm/DM.cpp
+++ b/dm/DM.cpp
@@ -1289,6 +1289,7 @@ int main(int argc, char** argv) {
setup_crash_handler();
gSkUseAnalyticAA = FLAGS_analyticAA;
+ gSkUseDeltaAA = FLAGS_deltaAA;
if (FLAGS_forceAnalyticAA) {
gSkForceAnalyticAA = true;
diff --git a/gn/core.gni b/gn/core.gni
index d67a26b71b..ab8824122e 100644
--- a/gn/core.gni
+++ b/gn/core.gni
@@ -59,6 +59,8 @@ skia_core_sources = [
"$_src/core/SkCachedData.cpp",
"$_src/core/SkCanvas.cpp",
"$_src/core/SkCanvasPriv.h",
+ "$_src/core/SkCoverageDelta.h",
+ "$_src/core/SkCoverageDelta.cpp",
"$_src/core/SkClipStack.cpp",
"$_src/core/SkClipStack.h",
"$_src/core/SkClipStackDevice.cpp",
@@ -267,6 +269,7 @@ skia_core_sources = [
"$_src/core/SkScan.h",
"$_src/core/SkScanPriv.h",
"$_src/core/SkScan_AAAPath.cpp",
+ "$_src/core/SkScan_DAAPath.cpp",
"$_src/core/SkScan_AntiPath.cpp",
"$_src/core/SkScan_Antihair.cpp",
"$_src/core/SkScan_Hairline.cpp",
diff --git a/samplecode/SampleApp.cpp b/samplecode/SampleApp.cpp
index c36e2e8604..0bd4435118 100644
--- a/samplecode/SampleApp.cpp
+++ b/samplecode/SampleApp.cpp
@@ -1888,11 +1888,17 @@ bool SampleWindow::onHandleChar(SkUnichar uni) {
this->resetFPS();
break;
case 'A':
- if (gSkUseAnalyticAA.load() && !gSkForceAnalyticAA.load()) {
+ if (!gSkUseAnalyticAA) {
+ gSkUseAnalyticAA = true;
+ } else if (!gSkForceAnalyticAA && !gSkUseDeltaAA) {
gSkForceAnalyticAA = true;
- } else {
- gSkUseAnalyticAA = !gSkUseAnalyticAA.load();
+ } else if (!gSkUseDeltaAA) {
gSkForceAnalyticAA = false;
+ gSkUseDeltaAA = true;
+ } else if (!gSkForceDeltaAA) {
+ gSkForceDeltaAA = true;
+ } else {
+ gSkUseAnalyticAA = gSkForceAnalyticAA = gSkUseDeltaAA = gSkForceDeltaAA = false;
}
this->inval(nullptr);
this->updateTitle();
@@ -2255,7 +2261,13 @@ void SampleWindow::updateTitle() {
title.prependf("[T%d/%d] ", gSampleWindow->getTiles(), gSampleWindow->getThreads());
}
- if (gSkUseAnalyticAA) {
+ if (gSkUseDeltaAA) {
+ if (gSkForceDeltaAA) {
+ title.prepend("<FDAA> ");
+ } else {
+ title.prepend("<DAA> ");
+ }
+ } else if (gSkUseAnalyticAA) {
if (gSkForceAnalyticAA) {
title.prepend("<FAAA> ");
} else {
diff --git a/src/core/SkAAClip.cpp b/src/core/SkAAClip.cpp
index 467939b4e7..100ac9c646 100644
--- a/src/core/SkAAClip.cpp
+++ b/src/core/SkAAClip.cpp
@@ -1421,7 +1421,9 @@ bool SkAAClip::setPath(const SkPath& path, const SkRegion* clip, bool doAA) {
BuilderBlitter blitter(&builder);
if (doAA) {
- if (gSkUseAnalyticAA.load()) {
+ if (!path.isConvex() && gSkUseDeltaAA.load()) {
+ SkScan::DAAFillPath(path, snugClip, &blitter, true);
+ } else if (gSkUseAnalyticAA.load()) {
SkScan::AAAFillPath(path, snugClip, &blitter, true);
} else {
SkScan::AntiFillPath(path, snugClip, &blitter, true);
diff --git a/src/core/SkAnalyticEdge.h b/src/core/SkAnalyticEdge.h
index 8ecc489191..533d769c55 100644
--- a/src/core/SkAnalyticEdge.h
+++ b/src/core/SkAnalyticEdge.h
@@ -178,4 +178,61 @@ bool SkAnalyticEdge::setLine(const SkPoint& p0, const SkPoint& p1) {
return true;
}
+struct SkBezier {
+ int fCount; // 2 line, 3 quad, 4 cubic
+ SkPoint fP0;
+ SkPoint fP1;
+
+ // See if left shift, covert to SkFDot6, and round has the same top and bottom y.
+ // If so, the edge will be empty.
+ static inline bool IsEmpty(SkScalar y0, SkScalar y1, int shift = 2) {
+ SkScalar scale = (1 << (shift + 6));
+ return SkFDot6Round(int(y0 * scale)) == SkFDot6Round(int(y1 * scale));
+ }
+};
+
+struct SkLine : public SkBezier {
+ bool set(const SkPoint pts[2]){
+ if (IsEmpty(pts[0].fY, pts[1].fY)) {
+ return false;
+ }
+ fCount = 2;
+ fP0 = pts[0];
+ fP1 = pts[1];
+ return true;
+ }
+};
+
+struct SkQuad : public SkBezier {
+ SkPoint fP2;
+
+ bool set(const SkPoint pts[3]){
+ if (IsEmpty(pts[0].fY, pts[2].fY)) {
+ return false;
+ }
+ fCount = 3;
+ fP0 = pts[0];
+ fP1 = pts[1];
+ fP2 = pts[2];
+ return true;
+ }
+};
+
+struct SkCubic : public SkBezier {
+ SkPoint fP2;
+ SkPoint fP3;
+
+ bool set(const SkPoint pts[4]){
+ if (IsEmpty(pts[0].fY, pts[3].fY)) {
+ return false;
+ }
+ fCount = 4;
+ fP0 = pts[0];
+ fP1 = pts[1];
+ fP2 = pts[2];
+ fP3 = pts[3];
+ return true;
+ }
+};
+
#endif
diff --git a/src/core/SkBlitter.cpp b/src/core/SkBlitter.cpp
index 5abb08553a..6bd48bef85 100644
--- a/src/core/SkBlitter.cpp
+++ b/src/core/SkBlitter.cpp
@@ -41,6 +41,110 @@ void SkBlitter::blitAntiH(int x, int y, const SkAlpha antialias[],
}
*/
+inline static SkAlpha ScalarToAlpha(SkScalar a) {
+ return (SkAlpha)(a * 255);
+}
+
+void SkBlitter::blitFatAntiRect(const SkRect& rect) {
+ SkIRect bounds = rect.roundOut();
+ SkASSERT(bounds.width() >= 3 && bounds.height() >= 3);
+
+ int runSize = bounds.width() + 1; // +1 so we can set runs[bounds.width()] = 0
+ void* storage = this->allocBlitMemory(runSize * (sizeof(int16_t) + sizeof(SkAlpha)));
+ int16_t* runs = reinterpret_cast<int16_t*>(storage);
+ SkAlpha* alphas = reinterpret_cast<SkAlpha*>(runs + runSize);
+
+ runs[0] = 1;
+ runs[1] = bounds.width() - 2;
+ runs[bounds.width() - 1] = 1;
+ runs[bounds.width()] = 0;
+
+ SkScalar partialL = bounds.fLeft + 1 - rect.fLeft;
+ SkScalar partialR = rect.fRight - (bounds.fRight - 1);
+ SkScalar partialT = bounds.fTop + 1 - rect.fTop;
+ SkScalar partialB = rect.fBottom - (bounds.fBottom - 1);
+
+ alphas[0] = ScalarToAlpha(partialL * partialT);
+ alphas[1] = ScalarToAlpha(partialT);
+ alphas[bounds.width() - 1] = ScalarToAlpha(partialR * partialT);
+ this->blitAntiH(bounds.fLeft, bounds.fTop, alphas, runs);
+
+ this->blitAntiRect(bounds.fLeft, bounds.fTop + 1, bounds.width() - 2, bounds.height() - 2,
+ ScalarToAlpha(partialL), ScalarToAlpha(partialR));
+
+ alphas[0] = ScalarToAlpha(partialL * partialB);
+ alphas[1] = ScalarToAlpha(partialB);
+ alphas[bounds.width() - 1] = ScalarToAlpha(partialR * partialB);
+ this->blitAntiH(bounds.fLeft, bounds.fBottom - 1, alphas, runs);
+}
+
+void SkBlitter::blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& clip,
+ bool isEvenOdd, bool isInverse, bool isConvex) {
+ int runSize = clip.width() + 1; // +1 so we can set runs[clip.width()] = 0
+ void* storage = this->allocBlitMemory(runSize * (sizeof(int16_t) + sizeof(SkAlpha)));
+ int16_t* runs = reinterpret_cast<int16_t*>(storage);
+ SkAlpha* alphas = reinterpret_cast<SkAlpha*>(runs + runSize);
+ runs[clip.width()] = 0; // we must set the last run to 0 so blitAntiH can stop there
+
+ bool canUseMask = !deltas->forceRLE() &&
+ SkCoverageDeltaMask::CanHandle(SkIRect::MakeLTRB(0, 0, clip.width(), 1));
+ const SkAntiRect& antiRect = deltas->getAntiRect();
+ for(int y = deltas->top(); y < deltas->bottom(); ++y) {
+ // If antiRect is non-empty and we're at its top row, blit it and skip to the bottom
+ if (antiRect.fHeight && y == antiRect.fY) {
+ this->blitAntiRect(antiRect.fX, antiRect.fY, antiRect.fWidth, antiRect.fHeight,
+ antiRect.fLeftAlpha, antiRect.fRightAlpha);
+ y += antiRect.fHeight - 1; // -1 because ++y in the for loop
+ continue;
+ }
+
+ // If there are too many deltas, sorting will be slow. Using a mask will be much faster.
+ // This is such an important optimization that will bring ~2x speedup for benches like
+ // path_fill_small_long_line and path_stroke_small_sawtooth.
+ if (canUseMask && !deltas->sorted(y) && deltas->count(y) << 3 >= clip.width()) {
+ SkIRect rowIR = SkIRect::MakeLTRB(clip.fLeft, y, clip.fRight, y + 1);
+ SkCoverageDeltaMask mask(rowIR);
+ for(int i = 0; i < deltas->count(y); ++i) {
+ const SkCoverageDelta& delta = deltas->getDelta(y, i);
+ mask.addDelta(delta.fX, y, delta.fDelta);
+ }
+ mask.convertCoverageToAlpha(isEvenOdd, isInverse, isConvex);
+ this->blitMask(mask.prepareSkMask(), rowIR);
+ continue;
+ }
+
+ // The normal flow of blitting deltas starts from here. First sort deltas.
+ deltas->sort(y);
+
+ int i = 0; // init delta index to 0
+ int lastX = clip.fLeft; // init x to clip.fLeft
+ SkFixed coverage = 0; // init coverage to 0
+
+ // skip deltas with x less than clip.fLeft; they must be precision errors
+ for(; i < deltas->count(y) && deltas->getDelta(y, i).fX < clip.fLeft; ++i);
+ for(; i < deltas->count(y) && deltas->getDelta(y, i).fX < clip.fRight; ++i) {
+ const SkCoverageDelta& delta = deltas->getDelta(y, i);
+ SkASSERT(delta.fX >= lastX); // delta must be x sorted
+ if (delta.fX > lastX) { // we have proceeded to a new x (different from lastX)
+ SkAlpha alpha = isConvex ? ConvexCoverageToAlpha(coverage, isInverse)
+ : CoverageToAlpha(coverage, isEvenOdd, isInverse);
+ alphas[lastX - clip.fLeft] = alpha; // set alpha at lastX
+ runs[lastX - clip.fLeft] = delta.fX - lastX; // set the run length
+ lastX = delta.fX; // now set lastX to current x
+ }
+ coverage += delta.fDelta; // cumulate coverage with the current delta
+ }
+
+ // Set the alpha and run length from the right-most delta to the right clip boundary
+ SkAlpha alpha = isConvex ? ConvexCoverageToAlpha(coverage, isInverse)
+ : CoverageToAlpha(coverage, isEvenOdd, isInverse);
+ alphas[lastX - clip.fLeft] = alpha;
+ runs[lastX - clip.fLeft] = clip.fRight - lastX;
+
+ this->blitAntiH(clip.fLeft, y, alphas, runs); // finally blit the current row
+ }
+}
+
void SkBlitter::blitV(int x, int y, int height, SkAlpha alpha) {
if (alpha == 255) {
this->blitRect(x, y, 1, height);
diff --git a/src/core/SkBlitter.h b/src/core/SkBlitter.h
index 921e8f367b..c280ac37b7 100644
--- a/src/core/SkBlitter.h
+++ b/src/core/SkBlitter.h
@@ -11,6 +11,7 @@
#include "SkAutoMalloc.h"
#include "SkBitmapProcShader.h"
#include "SkColor.h"
+#include "SkCoverageDelta.h"
#include "SkRect.h"
#include "SkRegion.h"
#include "SkShaderBase.h"
@@ -31,6 +32,12 @@ class SkBlitter {
public:
virtual ~SkBlitter();
+ // The actual blitter may speedup the process by rewriting this in a more efficient way.
+ // For example, one may avoid some virtual blitAntiH calls by directly calling
+ // SkBlitRow::Color32.
+ virtual void blitCoverageDeltas(SkCoverageDeltaList* deltas, const SkIRect& clip,
+ bool isEvenOdd, bool isInverse, bool isConvex);
+
/// Blit a horizontal run of one or more pixels.
virtual void blitH(int x, int y, int width) = 0;
@@ -62,6 +69,9 @@ public:
virtual void blitAntiRect(int x, int y, int width, int height,
SkAlpha leftAlpha, SkAlpha rightAlpha);
+ // Blit a rect in AA with size at least 3 x 3 (small rect has too many edge cases...)
+ void blitFatAntiRect(const SkRect& rect);
+
/// Blit a pattern of pixels defined by a rectangle-clipped mask;
/// typically used for text.
virtual void blitMask(const SkMask&, const SkIRect& clip);
diff --git a/src/core/SkCoverageDelta.cpp b/src/core/SkCoverageDelta.cpp
new file mode 100644
index 0000000000..8f109cec1a
--- /dev/null
+++ b/src/core/SkCoverageDelta.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkCoverageDelta.h"
+
+SkCoverageDeltaList::SkCoverageDeltaList(SkCoverageDeltaAllocator* alloc, int top, int bottom,
+ bool forceRLE) {
+ fAlloc = alloc;
+ fTop = top;
+ fBottom = bottom;
+ fForceRLE = forceRLE;
+
+ // Init the anti-rect to be empty
+ fAntiRect.fY = bottom;
+ fAntiRect.fHeight = 0;
+
+ if (bottom - top <= RESERVED_HEIGHT) {
+ fSorted = fReservedSorted;
+ fCounts = fReservedCounts;
+ fMaxCounts = fReservedMaxCounts;
+ fRows = fReservedRows - top;
+ fRows[top] = fReservedStorage;
+ } else {
+ fSorted = fAlloc->makeArrayDefault<bool>(bottom - top);
+ fCounts = fAlloc->makeArrayDefault<int>((bottom - top) * 2);
+ fMaxCounts = fCounts + bottom - top;
+ fRows = fAlloc->makeArrayDefault<SkCoverageDelta*>(bottom - top) - top;
+ fRows[top] = fAlloc->makeArrayDefault<SkCoverageDelta>(INIT_ROW_SIZE * (bottom - top));
+ }
+
+ memset(fSorted, true, bottom - top);
+ memset(fCounts, 0, sizeof(int) * (bottom - top));
+
+ // Minus top so we can directly use fCounts[y] instead of fCounts[y - fTop].
+ // Same for fMaxCounts, fRows, and fSorted.
+ fSorted -= top;
+ fCounts -= top;
+ fMaxCounts -= top;
+
+ for(int y = top; y < bottom; ++y) {
+ fMaxCounts[y] = INIT_ROW_SIZE;
+ }
+ for(int y = top + 1; y < bottom; ++y) {
+ fRows[y] = fRows[y - 1] + INIT_ROW_SIZE;
+ }
+}
+
+int SkCoverageDeltaMask::ExpandWidth(int width) {
+ int result = width + PADDING * 2;
+ return result + (SIMD_WIDTH - result % SIMD_WIDTH) % SIMD_WIDTH;
+}
+
+bool SkCoverageDeltaMask::CanHandle(const SkIRect& bounds) {
+ // Expand width so we don't have to worry about the boundary
+ return ExpandWidth(bounds.width()) * bounds.height() + PADDING * 2 < MAX_MASK_SIZE;
+}
+
+bool SkCoverageDeltaMask::Suitable(const SkIRect& bounds) {
+ return bounds.width() <= SUITABLE_WIDTH && CanHandle(bounds);
+}
+
+SkCoverageDeltaMask::SkCoverageDeltaMask(const SkIRect& bounds) : fBounds(bounds) {
+ SkASSERT(CanHandle(bounds));
+
+ // Init the anti-rect to be empty
+ fAntiRect.fY = fBounds.fBottom;
+ fAntiRect.fHeight = 0;
+
+ fExpandedWidth = ExpandWidth(fBounds.width());
+
+ // Add PADDING columns so we may access fDeltas[index(-PADDING, 0)]
+ // Minus index(fBounds.fLeft, fBounds.fTop) so we can directly access fDeltas[index(x, y)]
+ fDeltas = fDeltaStorage + PADDING - this->index(fBounds.fLeft, fBounds.fTop);
+
+ memset(fDeltaStorage, 0, (fExpandedWidth * bounds.height() + PADDING * 2) * sizeof(SkFixed));;
+}
+
+void SkCoverageDeltaMask::convertCoverageToAlpha(bool isEvenOdd, bool isInverse, bool isConvex) {
+ SkFixed* deltaRow = &this->delta(fBounds.fLeft, fBounds.fTop);
+ SkAlpha* maskRow = fMask;
+ for(int iy = 0; iy < fBounds.height(); ++iy) {
+ // If we're inside fAntiRect, blit it to the mask and advance to its bottom
+ if (fAntiRect.fHeight && iy == fAntiRect.fY - fBounds.fTop) {
+ // Blit the mask
+ int L = fAntiRect.fX - fBounds.fLeft;
+ for(int i = 0; i < fAntiRect.fHeight; ++i) {
+ SkAlpha* tMask = maskRow + L;
+ if (fAntiRect.fLeftAlpha) {
+ tMask[0] = fAntiRect.fLeftAlpha;
+ }
+ memset(tMask + 1, 0xff, fAntiRect.fWidth);
+ if (fAntiRect.fRightAlpha) {
+ tMask[fAntiRect.fWidth + 1] = fAntiRect.fRightAlpha;
+ }
+ maskRow += fBounds.width();
+ }
+
+ // Advance to the bottom (maskRow is already advanced to the bottom).
+ deltaRow += fExpandedWidth * fAntiRect.fHeight;
+ iy += fAntiRect.fHeight - 1; // -1 because we'll ++iy after continue
+ continue;
+ }
+
+ // Otherwise, cumulate deltas into coverages, and convert them into alphas
+ SkFixed c[SIMD_WIDTH] = {0}; // prepare SIMD_WIDTH coverages at a time
+ for(int ix = 0; ix < fExpandedWidth; ix += SIMD_WIDTH) {
+ // Future todo: is it faster to process SIMD_WIDTH rows at a time so we can use SIMD
+ // for coverage accumulation?
+
+ // Cumulate deltas to get SIMD_WIDTH new coverages
+ c[0] = c[SIMD_WIDTH - 1] + deltaRow[ix];
+ for(int j = 1; j < SIMD_WIDTH; ++j) {
+ c[j] = c[j - 1] + deltaRow[ix + j];
+ }
+
+ // My SIMD CoverageToAlpha seems to be only faster with SSSE3.
+ // (On linux, even with -mavx2, my SIMD still seems to be slow...)
+ // Even with only SSSE2, it's still faster to do SIMD_WIDTH non-SIMD computations at one
+ // time (i.e., SIMD_WIDTH = 8 is faster than SIMD_WIDTH = 1 even if SK_CPU_SSE_LEVEL is
+ // less than SK_CPU_SSE_LEVEL_SSSE3). Maybe the compiler is doing some SIMD by itself.
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
+ using SkNi = SkNx<SIMD_WIDTH, int>;
+
+ SkNi cn = SkNi::Load(c);
+ SkNi an = isConvex ? ConvexCoverageToAlpha(cn, isInverse)
+ : CoverageToAlpha(cn, isEvenOdd, isInverse);
+ SkNx_cast<SkAlpha>(an).store(maskRow + ix);
+#else
+ for(int j = 0; j < SIMD_WIDTH; ++j) {
+ maskRow[ix + j] = isConvex ? ConvexCoverageToAlpha(c[j], isInverse)
+ : CoverageToAlpha(c[j], isEvenOdd, isInverse);
+ }
+#endif
+ }
+
+ // Finally, advance to the next row
+ deltaRow += fExpandedWidth;
+ maskRow += fBounds.width();
+ }
+}
diff --git a/src/core/SkCoverageDelta.h b/src/core/SkCoverageDelta.h
new file mode 100644
index 0000000000..e0ecb9d7e6
--- /dev/null
+++ b/src/core/SkCoverageDelta.h
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#ifndef SkCoverageDelta_DEFINED
+#define SkCoverageDelta_DEFINED
+
+#include "SkArenaAlloc.h"
+#include "SkFixed.h"
+#include "SkMask.h"
+#include "SkTSort.h"
+#include "SkUtils.h"
+
+// Future todo: maybe we can make fX and fDelta 16-bit long to speed it up a little bit.
+struct SkCoverageDelta {
+ int fX; // the y coordinate will be implied in SkCoverageDeltaList
+ SkFixed fDelta; // the amount that the alpha changed
+
+ // Sort according to fX
+ bool operator<(const SkCoverageDelta& other) const {
+ return fX < other.fX;
+ }
+};
+
+// All the arguments needed for SkBlitter::blitAntiRect
+struct SkAntiRect {
+ int fX;
+ int fY;
+ int fWidth;
+ int fHeight;
+ SkAlpha fLeftAlpha;
+ SkAlpha fRightAlpha;
+};
+
+using SkCoverageDeltaAllocator = SkSTArenaAlloc<256>;
+
+// A list of SkCoverageDelta with y from top() to bottom().
+// For each row y, there are count(y) number of deltas.
+// You can ask whether they are sorted or not by sorted(y), and you can sort them by sort(y).
+// Once sorted, getDelta(y, i) should return the i-th leftmost delta on row y.
+class SkCoverageDeltaList {
+public:
+ // We can store INIT_ROW_SIZE deltas per row (i.e., per y-scanline) initially
+ static constexpr int INIT_ROW_SIZE = 32;
+ static constexpr int RESERVED_HEIGHT = 128; // reserve this many rows on stack memory
+
+ SkCoverageDeltaList(SkCoverageDeltaAllocator* alloc, int top, int bottom, bool forceRLE);
+
+ inline int top() const { return fTop; }
+ inline int bottom() const { return fBottom; }
+ inline bool forceRLE() const { return fForceRLE; }
+ inline int count(int y) const { this->checkY(y); return fCounts[y]; }
+ inline bool sorted(int y) const { this->checkY(y); return fSorted[y]; }
+ inline void addDelta(int x, int y, SkFixed delta) { this->push_back(y, {x, delta}); }
+
+ inline const SkCoverageDelta& getDelta(int y, int i) const {
+ this->checkY(y);
+ SkASSERT(i < fCounts[y]);
+ return fRows[y][i];
+ }
+
+ // It might be better to sort right before blitting to make the memory hot
+ inline void sort(int y) {
+ this->checkY(y);
+ if (!fSorted[y]) {
+ SkTQSort(fRows[y], fRows[y] + fCounts[y] - 1);
+ fSorted[y] = true;
+ }
+ }
+
+ inline const SkAntiRect& getAntiRect() const { return fAntiRect; }
+ inline void setAntiRect(int x, int y, int width, int height,
+ SkAlpha leftAlpha, SkAlpha rightAlpha) {
+ fAntiRect = {x, y, width, height, leftAlpha, rightAlpha};
+ }
+
+ inline void push_back(int y, const SkCoverageDelta& delta) {
+ this->checkY(y);
+ if (fCounts[y] == fMaxCounts[y]) {
+ fMaxCounts[y] *= 2;
+ SkCoverageDelta* newRow = fAlloc->makeArrayDefault<SkCoverageDelta>(fMaxCounts[y]);
+ memcpy(newRow, fRows[y], sizeof(SkCoverageDelta) * fCounts[y]);
+ fRows[y] = newRow;
+ }
+ SkASSERT(fCounts[y] < fMaxCounts[y]);
+ fRows[y][fCounts[y]++] = delta;
+ fSorted[y] = fSorted[y] && (fCounts[y] == 1 || delta.fX >= fRows[y][fCounts[y] - 2].fX);
+ }
+
+private:
+ SkCoverageDeltaAllocator* fAlloc;
+ SkCoverageDelta** fRows;
+ bool* fSorted;
+ int* fCounts;
+ int* fMaxCounts;
+ int fTop;
+ int fBottom;
+ SkAntiRect fAntiRect;
+ bool fForceRLE;
+
+ SkCoverageDelta fReservedStorage[RESERVED_HEIGHT * INIT_ROW_SIZE];
+ SkCoverageDelta* fReservedRows[RESERVED_HEIGHT];
+ bool fReservedSorted[RESERVED_HEIGHT];
+ int fReservedCounts[RESERVED_HEIGHT];
+ int fReservedMaxCounts[RESERVED_HEIGHT];
+
+ inline void checkY(int y) const { SkASSERT(y >= fTop && y < fBottom); }
+};
+
+class SkCoverageDeltaMask {
+public:
+ // 1 for precision error, 1 for boundary delta (e.g., -SK_Fixed1 at fBounds.fRight + 1)
+ static constexpr int PADDING = 2;
+
+ static constexpr int SIMD_WIDTH = 8;
+ static constexpr int SUITABLE_WIDTH = 32;
+ static constexpr int MAX_MASK_SIZE = 2048;
+
+ // Expand PADDING on both sides, and make it a multiple of SIMD_WIDTH
+ static int ExpandWidth(int width);
+ static bool CanHandle(const SkIRect& bounds); // whether bounds fits into MAX_MASK_SIZE
+ static bool Suitable(const SkIRect& bounds); // CanHandle(bounds) && width <= SUITABLE_WIDTH
+
+ SkCoverageDeltaMask(const SkIRect& bounds);
+
+ inline int top() const { return fBounds.fTop; }
+ inline int bottom() const { return fBounds.fBottom; }
+ inline SkAlpha* getMask() { return fMask; }
+ inline const SkIRect& getBounds() const { return fBounds; }
+
+ inline void addDelta (int x, int y, SkFixed delta) { this->delta(x, y) += delta; }
+ inline SkFixed& delta (int x, int y) {
+ this->checkX(x);
+ this->checkY(y);
+ return fDeltas[this->index(x, y)];
+ }
+
+ inline void setAntiRect(int x, int y, int width, int height,
+ SkAlpha leftAlpha, SkAlpha rightAlpha) {
+ fAntiRect = {x, y, width, height, leftAlpha, rightAlpha};
+ }
+
+ inline SkMask prepareSkMask() {
+ SkMask mask;
+ mask.fImage = fMask;
+ mask.fBounds = fBounds;
+ mask.fRowBytes = fBounds.width();
+ mask.fFormat = SkMask::kA8_Format;
+ return mask;
+ }
+
+ void convertCoverageToAlpha(bool isEvenOdd, bool isInverse, bool isConvex);
+
+private:
+ SkIRect fBounds;
+ SkFixed fDeltaStorage[MAX_MASK_SIZE];
+ SkFixed* fDeltas;
+ SkAlpha fMask[MAX_MASK_SIZE];
+ int fExpandedWidth;
+ SkAntiRect fAntiRect;
+
+ inline int index(int x, int y) const { return y * fExpandedWidth + x; }
+ inline void checkY(int y) const { SkASSERT(y >= fBounds.fTop && y < fBounds.fBottom); }
+ inline void checkX(int x) const {
+ SkASSERT(x >= fBounds.fLeft - PADDING && x < fBounds.fRight + PADDING);
+ }
+};
+
+static SK_ALWAYS_INLINE SkAlpha CoverageToAlpha(SkFixed coverage, bool isEvenOdd, bool isInverse) {
+ SkAlpha result;
+ if (isEvenOdd) {
+ SkFixed mod17 = coverage & 0x1ffff;
+ SkFixed mod16 = coverage & 0xffff;
+ result = SkTPin(SkAbs32((mod16 << 1) - mod17) >> 8, 0, 255);
+ } else {
+ result = SkTPin(SkAbs32(coverage) >> 8, 0, 255);
+ }
+ return isInverse ? 255 - result : result;
+}
+
+template<typename T>
+static SK_ALWAYS_INLINE T CoverageToAlpha(T coverage, bool isEvenOdd, bool isInverse) {
+ T t0(0), t255(255);
+ T result;
+ if (isEvenOdd) {
+ T mod17 = coverage & 0x1ffff;
+ T mod16 = coverage & 0xffff;
+ result = ((mod16 << 1) - mod17).abs() >> 8;
+ } else {
+ result = coverage.abs() >> 8;;
+ }
+ result = T::Min(result, t255);
+ result = T::Max(result, t0);
+ return isInverse ? 255 - result : result;
+}
+
+// For convex paths (including inverse mode), the coverage is guaranteed to be
+// between [-SK_Fixed1, SK_Fixed1] so we can skip isEvenOdd and SkTPin.
+static SK_ALWAYS_INLINE SkAlpha ConvexCoverageToAlpha(SkFixed coverage, bool isInverse) {
+ SkASSERT(coverage >= -SK_Fixed1 && coverage <= SK_Fixed1);
+ int result = SkAbs32(coverage) >> 8;
+ result -= (result >> 8); // 256 to 255
+ return isInverse ? 255 - result : result;
+}
+
+template<typename T>
+static SK_ALWAYS_INLINE T ConvexCoverageToAlpha(T coverage, bool isInverse) {
+ // allTrue is not implemented
+ // SkASSERT((coverage >= 0).allTrue() && (coverage <= SK_Fixed1).allTrue());
+ T result = coverage.abs() >> 8;
+ result -= (result >> 8); // 256 to 255
+ return isInverse ? 255 - result : result;
+}
+
+#endif
diff --git a/src/core/SkEdgeBuilder.cpp b/src/core/SkEdgeBuilder.cpp
index 385193040b..7a02187470 100644
--- a/src/core/SkEdgeBuilder.cpp
+++ b/src/core/SkEdgeBuilder.cpp
@@ -65,7 +65,7 @@ static inline bool approximatelyEqual(SkFixed a, SkFixed b) {
SkEdgeBuilder::Combine SkEdgeBuilder::CombineVertical(
const SkAnalyticEdge* edge, SkAnalyticEdge* last) {
- SkASSERT(fAnalyticAA);
+ SkASSERT(fEdgeType == kAnalyticEdge);
if (last->fCurveCount || last->fDX || edge->fX != last->fX) {
return kNo_Combine;
}
@@ -115,12 +115,17 @@ bool SkEdgeBuilder::vertical_line(const SkEdge* edge) {
}
bool SkEdgeBuilder::vertical_line(const SkAnalyticEdge* edge) {
- SkASSERT(fAnalyticAA);
+ SkASSERT(fEdgeType == kAnalyticEdge);
return !edge->fDX && !edge->fCurveCount;
}
void SkEdgeBuilder::addLine(const SkPoint pts[]) {
- if (fAnalyticAA) {
+ if (fEdgeType == kBezier) {
+ SkLine* line = fAlloc.make<SkLine>();
+ if (line->set(pts)) {
+ fList.push(line);
+ }
+ } else if (fEdgeType == kAnalyticEdge) {
SkAnalyticEdge* edge = fAlloc.make<SkAnalyticEdge>();
if (edge->setLine(pts[0], pts[1])) {
if (vertical_line(edge) && fList.count()) {
@@ -160,7 +165,12 @@ unallocate_edge:
}
void SkEdgeBuilder::addQuad(const SkPoint pts[]) {
- if (fAnalyticAA) {
+ if (fEdgeType == kBezier) {
+ SkQuad* quad = fAlloc.make<SkQuad>();
+ if (quad->set(pts)) {
+ fList.push(quad);
+ }
+ } else if (fEdgeType == kAnalyticEdge) {
SkAnalyticQuadraticEdge* edge = fAlloc.make<SkAnalyticQuadraticEdge>();
if (edge->setQuadratic(pts)) {
fList.push(edge);
@@ -178,7 +188,12 @@ void SkEdgeBuilder::addQuad(const SkPoint pts[]) {
}
void SkEdgeBuilder::addCubic(const SkPoint pts[]) {
- if (fAnalyticAA) {
+ if (fEdgeType == kBezier) {
+ SkCubic* cubic = fAlloc.make<SkCubic>();
+ if (cubic->set(pts)) {
+ fList.push(cubic);
+ }
+ } else if (fEdgeType == kAnalyticEdge) {
SkAnalyticCubicEdge* edge = fAlloc.make<SkAnalyticCubicEdge>();
if (edge->setCubic(pts)) {
fList.push(edge);
@@ -232,7 +247,7 @@ SkEdgeBuilder::Combine SkEdgeBuilder::checkVertical(const SkEdge* edge, SkEdge**
SkEdgeBuilder::Combine SkEdgeBuilder::checkVertical(const SkAnalyticEdge* edge,
SkAnalyticEdge** edgePtr) {
- SkASSERT(fAnalyticAA);
+ SkASSERT(fEdgeType == kAnalyticEdge);
return !vertical_line(edge) || edgePtr <= (SkAnalyticEdge**)fEdgeList ? kNo_Combine :
CombineVertical(edge, edgePtr[-1]);
}
@@ -251,9 +266,22 @@ int SkEdgeBuilder::buildPoly(const SkPath& path, const SkIRect* iclip, int shift
maxEdgeCount *= SkLineClipper::kMaxClippedLineSegments;
}
- size_t edgeSize = fAnalyticAA ? sizeof(SkAnalyticEdge) : sizeof(SkEdge);
- char* edge = fAnalyticAA ? (char*)fAlloc.makeArrayDefault<SkAnalyticEdge>(maxEdgeCount)
- : (char*)fAlloc.makeArrayDefault<SkEdge>(maxEdgeCount);
+ size_t edgeSize;
+ char* edge;
+ switch (fEdgeType) {
+ case kEdge:
+ edgeSize = sizeof(SkEdge);
+ edge = (char*)fAlloc.makeArrayDefault<SkEdge>(maxEdgeCount);
+ break;
+ case kAnalyticEdge:
+ edgeSize = sizeof(SkAnalyticEdge);
+ edge = (char*)fAlloc.makeArrayDefault<SkAnalyticEdge>(maxEdgeCount);
+ break;
+ case kBezier:
+ edgeSize = sizeof(SkLine);
+ edge = (char*)fAlloc.makeArrayDefault<SkLine>(maxEdgeCount);
+ break;
+ }
SkDEBUGCODE(char* edgeStart = edge);
char** edgePtr = fAlloc.makeArrayDefault<char*>(maxEdgeCount);
@@ -275,20 +303,7 @@ int SkEdgeBuilder::buildPoly(const SkPath& path, const SkIRect* iclip, int shift
int lineCount = SkLineClipper::ClipLine(pts, clip, lines, canCullToTheRight);
SkASSERT(lineCount <= SkLineClipper::kMaxClippedLineSegments);
for (int i = 0; i < lineCount; i++) {
- bool setLineResult = fAnalyticAA ?
- ((SkAnalyticEdge*)edge)->setLine(lines[i], lines[i + 1]) :
- ((SkEdge*)edge)->setLine(lines[i], lines[i + 1], shiftUp);
- if (setLineResult) {
- Combine combine = fAnalyticAA ?
- checkVertical((SkAnalyticEdge*)edge, (SkAnalyticEdge**)edgePtr) :
- checkVertical((SkEdge*)edge, (SkEdge**)edgePtr);
- if (kNo_Combine == combine) {
- *edgePtr++ = edge;
- edge += edgeSize;
- } else if (kTotal_Combine == combine) {
- --edgePtr;
- }
- }
+ this->addPolyLine(lines + i, edge, edgeSize, edgePtr, shiftUp);
}
break;
}
@@ -306,20 +321,7 @@ int SkEdgeBuilder::buildPoly(const SkPath& path, const SkIRect* iclip, int shift
// the corresponding line/quad/cubic verbs
break;
case SkPath::kLine_Verb: {
- bool setLineResult = fAnalyticAA ?
- ((SkAnalyticEdge*)edge)->setLine(pts[0], pts[1]) :
- ((SkEdge*)edge)->setLine(pts[0], pts[1], shiftUp);
- if (setLineResult) {
- Combine combine = fAnalyticAA ?
- checkVertical((SkAnalyticEdge*)edge, (SkAnalyticEdge**)edgePtr) :
- checkVertical((SkEdge*)edge, (SkEdge**)edgePtr);
- if (kNo_Combine == combine) {
- *edgePtr++ = edge;
- edge += edgeSize;
- } else if (kTotal_Combine == combine) {
- --edgePtr;
- }
- }
+ this->addPolyLine(pts, edge, edgeSize, edgePtr, shiftUp);
break;
}
default:
@@ -342,11 +344,11 @@ static void handle_quad(SkEdgeBuilder* builder, const SkPoint pts[3]) {
}
int SkEdgeBuilder::build(const SkPath& path, const SkIRect* iclip, int shiftUp,
- bool canCullToTheRight, bool analyticAA) {
+ bool canCullToTheRight, EdgeType edgeType) {
fAlloc.reset();
fList.reset();
fShiftUp = shiftUp;
- fAnalyticAA = analyticAA;
+ fEdgeType = edgeType;
if (SkPath::kLine_SegmentMask == path.getSegmentMasks()) {
return this->buildPoly(path, iclip, shiftUp, canCullToTheRight);
@@ -443,12 +445,12 @@ int SkEdgeBuilder::build(const SkPath& path, const SkIRect* iclip, int shiftUp,
}
int SkEdgeBuilder::build_edges(const SkPath& path, const SkIRect* shiftedClip,
- int shiftEdgesUp, bool pathContainedInClip, bool analyticAA) {
+ int shiftEdgesUp, bool pathContainedInClip, EdgeType edgeType) {
// If we're convex, then we need both edges, even the right edge is past the clip
const bool canCullToTheRight = !path.isConvex();
const SkIRect* builderClip = pathContainedInClip ? nullptr : shiftedClip;
- int count = this->build(path, builderClip, shiftEdgesUp, canCullToTheRight, analyticAA);
+ int count = this->build(path, builderClip, shiftEdgesUp, canCullToTheRight, edgeType);
SkASSERT(count >= 0);
// canCullToRight == false should imply count != 1
diff --git a/src/core/SkEdgeBuilder.h b/src/core/SkEdgeBuilder.h
index 2f2b35104f..b876fcf5e5 100644
--- a/src/core/SkEdgeBuilder.h
+++ b/src/core/SkEdgeBuilder.h
@@ -10,6 +10,8 @@
#include "SkArenaAlloc.h"
#include "SkRect.h"
#include "SkTDArray.h"
+#include "SkEdge.h"
+#include "SkAnalyticEdge.h"
struct SkEdge;
struct SkAnalyticEdge;
@@ -18,18 +20,27 @@ class SkPath;
class SkEdgeBuilder {
public:
+ enum EdgeType {
+ kEdge,
+ kAnalyticEdge,
+ kBezier
+ };
+
+ // static constexpr int kEdgeSizes[3] = {sizeof(SkEdge), sizeof(SkAnalyticEdge), sizeof(SkBezier)};
+
SkEdgeBuilder();
// returns the number of built edges. The array of those edge pointers
// is returned from edgeList().
int build(const SkPath& path, const SkIRect* clip, int shiftUp, bool clipToTheRight,
- bool analyticAA = false);
+ EdgeType edgeType = kEdge);
int build_edges(const SkPath& path, const SkIRect* shiftedClip,
- int shiftEdgesUp, bool pathContainedInClip, bool analyticAA = false);
+ int shiftEdgesUp, bool pathContainedInClip, EdgeType edgeType = kEdge);
SkEdge** edgeList() { return (SkEdge**)fEdgeList; }
SkAnalyticEdge** analyticEdgeList() { return (SkAnalyticEdge**)fEdgeList; }
+ SkBezier** bezierList() { return (SkBezier**)fEdgeList; }
private:
enum Combine {
@@ -57,7 +68,7 @@ private:
void** fEdgeList;
int fShiftUp;
- bool fAnalyticAA;
+ EdgeType fEdgeType;
public:
void addLine(const SkPoint pts[]);
@@ -66,6 +77,32 @@ public:
void addClipper(SkEdgeClipper*);
int buildPoly(const SkPath& path, const SkIRect* clip, int shiftUp, bool clipToTheRight);
+
+ inline void addPolyLine(SkPoint pts[], char* &edge, size_t edgeSize, char** &edgePtr,
+ int shiftUp) {
+ if (fEdgeType == kBezier) {
+ if (((SkLine*)edge)->set(pts)) {
+ *edgePtr++ = edge;
+ edge += edgeSize;
+ }
+ return;
+ }
+ bool analyticAA = fEdgeType == kAnalyticEdge;
+ bool setLineResult = analyticAA ?
+ ((SkAnalyticEdge*)edge)->setLine(pts[0], pts[1]) :
+ ((SkEdge*)edge)->setLine(pts[0], pts[1], shiftUp);
+ if (setLineResult) {
+ Combine combine = analyticAA ?
+ checkVertical((SkAnalyticEdge*)edge, (SkAnalyticEdge**)edgePtr) :
+ checkVertical((SkEdge*)edge, (SkEdge**)edgePtr);
+ if (kNo_Combine == combine) {
+ *edgePtr++ = edge;
+ edge += edgeSize;
+ } else if (kTotal_Combine == combine) {
+ --edgePtr;
+ }
+ }
+ }
};
#endif
diff --git a/src/core/SkScan.cpp b/src/core/SkScan.cpp
index c2f00f3a84..b62c972a9f 100644
--- a/src/core/SkScan.cpp
+++ b/src/core/SkScan.cpp
@@ -18,6 +18,10 @@
std::atomic<bool> gSkForceAnalyticAA{false};
+std::atomic<bool> gSkUseDeltaAA{false};
+
+std::atomic<bool> gSkForceDeltaAA{false};
+
static inline void blitrect(SkBlitter* blitter, const SkIRect& r) {
blitter->blitRect(r.fLeft, r.fTop, r.width(), r.height());
}
diff --git a/src/core/SkScan.h b/src/core/SkScan.h
index e5c6047187..8bb9d1f86d 100644
--- a/src/core/SkScan.h
+++ b/src/core/SkScan.h
@@ -23,6 +23,8 @@ class SkPath;
*/
typedef SkIRect SkXRect;
+extern std::atomic<bool> gSkUseDeltaAA;
+extern std::atomic<bool> gSkForceDeltaAA;
extern std::atomic<bool> gSkUseAnalyticAA;
extern std::atomic<bool> gSkForceAnalyticAA;
@@ -89,6 +91,8 @@ private:
static void AntiHairLineRgn(const SkPoint[], int count, const SkRegion*, SkBlitter*);
static void AAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
bool forceRLE = false); // SkAAClip uses forceRLE
+ static void DAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+ bool forceRLE = false);
};
/** Assign an SkXRect from a SkIRect, by promoting the src rect's coordinates
diff --git a/src/core/SkScan_AAAPath.cpp b/src/core/SkScan_AAAPath.cpp
index 8917c7ffc6..0da39d522c 100644
--- a/src/core/SkScan_AAAPath.cpp
+++ b/src/core/SkScan_AAAPath.cpp
@@ -1575,7 +1575,8 @@ static SK_ALWAYS_INLINE void aaa_fill_path(const SkPath& path, const SkIRect& cl
SkASSERT(blitter);
SkEdgeBuilder builder;
- int count = builder.build_edges(path, &clipRect, 0, pathContainedInClip, true);
+ int count = builder.build_edges(path, &clipRect, 0, pathContainedInClip,
+ SkEdgeBuilder::kAnalyticEdge);
SkAnalyticEdge** list = builder.analyticEdgeList();
SkIRect rect = clipRect;
diff --git a/src/core/SkScan_AntiPath.cpp b/src/core/SkScan_AntiPath.cpp
index 69b5ba1fce..2a92b42360 100644
--- a/src/core/SkScan_AntiPath.cpp
+++ b/src/core/SkScan_AntiPath.cpp
@@ -634,6 +634,14 @@ void SkScan::FillPath(const SkPath& path, const SkRasterClip& clip,
}
}
+static bool suitableForDAA(const SkPath& path) {
+ if (gSkForceAnalyticAA.load()) {
+ return true;
+ }
+ const SkRect& bounds = path.getBounds();
+ return !path.isConvex() && path.countPoints() >= SkTMax(bounds.width(), bounds.height()) / 8;
+}
+
static bool suitableForAAA(const SkPath& path) {
if (gSkForceAnalyticAA.load()) {
return true;
@@ -659,9 +667,11 @@ void SkScan::AntiFillPath(const SkPath& path, const SkRasterClip& clip,
using FillPathProc = void(*)(const SkPath&, const SkRegion&, SkBlitter*, bool);
FillPathProc fillPathProc = &SkScan::AntiFillPath;
- // Do not use AAA if path is too complicated:
- // there won't be any speedup or significant visual improvement.
- if (gSkUseAnalyticAA.load() && suitableForAAA(path)) {
+ if (gSkUseDeltaAA.load() && suitableForDAA(path)) {
+ fillPathProc = &SkScan::DAAFillPath;
+ } else if (gSkUseAnalyticAA.load() && suitableForAAA(path)) {
+ // Do not use AAA if path is too complicated:
+ // there won't be any speedup or significant visual improvement.
fillPathProc = &SkScan::AAAFillPath;
}
@@ -673,6 +683,6 @@ void SkScan::AntiFillPath(const SkPath& path, const SkRasterClip& clip,
tmp.setRect(clip.getBounds());
aaBlitter.init(blitter, &clip.aaRgn());
- fillPathProc(path, tmp, &aaBlitter, true);
+ fillPathProc(path, tmp, &aaBlitter, true); // SkAAClipBlitter can blitMask, why forceRLE?
}
}
diff --git a/src/core/SkScan_DAAPath.cpp b/src/core/SkScan_DAAPath.cpp
new file mode 100644
index 0000000000..295d621446
--- /dev/null
+++ b/src/core/SkScan_DAAPath.cpp
@@ -0,0 +1,352 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+
+#include "SkAnalyticEdge.h"
+#include "SkAntiRun.h"
+#include "SkAutoMalloc.h"
+#include "SkBlitter.h"
+#include "SkCoverageDelta.h"
+#include "SkEdge.h"
+#include "SkEdgeBuilder.h"
+#include "SkGeometry.h"
+#include "SkMask.h"
+#include "SkPath.h"
+#include "SkQuadClipper.h"
+#include "SkRasterClip.h"
+#include "SkRegion.h"
+#include "SkScan.h"
+#include "SkScanPriv.h"
+#include "SkTSort.h"
+#include "SkTemplates.h"
+#include "SkUtils.h"
+
+///////////////////////////////////////////////////////////////////////////////
+
+/*
+
+DAA stands for Delta-based Anti-Aliasing.
+
+This is an improved version of our analytic AA algorithm (in SkScan_AAAPath.cpp)
+which first scan convert paths into coverage deltas (this step can happen out of order,
+and we don't seem to be needed to worry about the intersection, clamping, etc.),
+and then use a single blitter run to convert all those deltas into the final alphas.
+
+Before we go to the final blitter run, we'll use SkFixed for all delta values so we
+don't ever have to worry about under/overflow.
+
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+
+// The following helper functions are the same as those from SkScan_AAAPath.cpp
+// except that we use SkFixed everywhere instead of SkAlpha.
+
+static inline SkFixed SkFixedMul_lowprec(SkFixed a, SkFixed b) {
+ return (a >> 8) * (b >> 8);
+}
+
+// Return the alpha of a trapezoid whose height is 1
+static inline SkFixed trapezoidToAlpha(SkFixed l1, SkFixed l2) {
+ SkASSERT(l1 >= 0 && l2 >= 0);
+ return (l1 + l2) >> 1;
+}
+
+// The alpha of right-triangle (a, a*b)
+static inline SkFixed partialTriangleToAlpha(SkFixed a, SkFixed b) {
+ SkASSERT(a <= SK_Fixed1);
+ // SkFixedMul(SkFixedMul(a, a), b) >> 1
+ // return ((((a >> 8) * (a >> 8)) >> 8) * (b >> 8)) >> 1;
+ return (a >> 11) * (a >> 11) * (b >> 11);
+}
+
+static inline SkFixed getPartialAlpha(SkFixed alpha, SkFixed partialMultiplier) {
+ // DAA should not be so sensitive to the precision (compared to AAA).
+ return SkFixedMul_lowprec(alpha, partialMultiplier);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+template<bool isPartial, class Deltas>
+static inline void add_coverage_delta_segment(int y, SkFixed rowHeight, const SkAnalyticEdge* edge,
+ SkFixed nextX, Deltas* deltas) { // rowHeight=fullAlpha
+ SkASSERT(rowHeight <= SK_Fixed1 && rowHeight >= 0);
+
+ // Let's see if multiplying sign is faster than multiplying edge->fWinding.
+ // (Compiler should be able to optimize multiplication with 1/-1?)
+ int sign = edge->fWinding == 1 ? 1 : -1;
+
+ SkFixed l = SkTMin(edge->fX, nextX);
+ SkFixed r = edge->fX + nextX - l;
+ int L = SkFixedFloorToInt(l);
+ int R = SkFixedCeilToInt(r);
+ int len = R - L;
+
+ switch (len) {
+ case 0: {
+ deltas->addDelta(L, y, rowHeight * sign);
+ return;
+ }
+ case 1: {
+ SkFixed fixedR = SkIntToFixed(R);
+ SkFixed alpha = trapezoidToAlpha(fixedR - l, fixedR - r);
+ if (isPartial) {
+ alpha = getPartialAlpha(alpha, rowHeight);
+ }
+ deltas->addDelta(L, y, alpha * sign);
+ deltas->addDelta(L + 1, y, (rowHeight - alpha) * sign);
+ return;
+ }
+ case 2: {
+ SkFixed middle = SkIntToFixed(L + 1);
+ SkFixed x1 = middle - l;
+ SkFixed x2 = r - middle;
+ SkFixed alpha1 = partialTriangleToAlpha(x1, edge->fDY);
+ SkFixed alpha2 = rowHeight - partialTriangleToAlpha(x2, edge->fDY);
+ deltas->addDelta(L, y, alpha1 * sign);
+ deltas->addDelta(L + 1, y, (alpha2 - alpha1) * sign);
+ deltas->addDelta(L + 2, y, (rowHeight - alpha2) * sign);
+ return;
+ }
+ }
+
+ // When len > 2, computations are similar to computeAlphaAboveLine in SkScan_AAAPath.cpp
+ SkFixed dY = edge->fDY;
+ SkFixed fixedL = SkIntToFixed(L);
+ SkFixed fixedR = SkIntToFixed(R);
+ SkFixed first = SK_Fixed1 + fixedL - l; // horizontal edge of the left-most triangle
+ SkFixed last = r - (fixedR - SK_Fixed1); // horizontal edge of the right-most triangle
+ SkFixed firstH = SkFixedMul_lowprec(first, dY); // vertical edge of the left-most triangle
+
+ SkFixed alpha0 = SkFixedMul_lowprec(first, firstH) >> 1; // triangle alpha
+ SkFixed alpha1 = firstH + (dY >> 1); // rectangle plus triangle
+ deltas->addDelta(L, y, alpha0 * sign);
+ deltas->addDelta(L + 1, y, (alpha1 - alpha0) * sign);
+ for(int i = 2; i < len - 1; ++i) {
+ deltas->addDelta(L + i, y, dY * sign); // the increment is always a rect of height = dY
+ }
+
+ SkFixed alphaR2 = alpha1 + dY * (len - 3); // the alpha at R - 2
+ SkFixed lastAlpha = rowHeight - partialTriangleToAlpha(last, dY); // the alpha at R - 1
+ deltas->addDelta(R - 1, y, (lastAlpha - alphaR2) * sign);
+ deltas->addDelta(R, y, (rowHeight - lastAlpha) * sign);
+}
+
+class XLessThan {
+public:
+ bool operator()(const SkBezier* a, const SkBezier* b) {
+ return a->fP0.fX + a->fP1.fX < b->fP0.fX + b->fP1.fX;
+ }
+};
+
+class YLessThan {
+public:
+ bool operator()(const SkBezier* a, const SkBezier* b) {
+ return a->fP0.fY + a->fP1.fY < b->fP0.fY + b->fP1.fY;
+ }
+};
+
+template<class Deltas> static SK_ALWAYS_INLINE
+void gen_alpha_deltas(const SkPath& path, const SkRegion& clipRgn, Deltas& result,
+ SkBlitter* blitter, bool skipRect, bool pathContainedInClip) {
+ // 1. Build edges
+ SkEdgeBuilder builder;
+ SkIRect ir = path.getBounds().roundOut();
+ const SkIRect& clipRect = clipRgn.getBounds();
+ int count = builder.build_edges(path, &clipRect, 0, pathContainedInClip,
+ SkEdgeBuilder::kBezier);
+ if (count == 0) {
+ return;
+ }
+ SkBezier** list = builder.bezierList();
+
+ // 2. Try to find the rect part because blitAntiRect is so much faster than blitCoverageDeltas
+ int rectTop = ir.fBottom; // the rect is initialized to be empty as top = bot
+ int rectBot = ir.fBottom;
+ if (skipRect) { // only find that rect is skipRect == true
+ YLessThan lessThan; // sort edges in YX order
+ SkTQSort(list, list + count - 1, lessThan);
+ for(int i = 0; i < count - 1; ++i) {
+ SkBezier* lb = list[i];
+ SkBezier* rb = list[i + 1];
+
+ bool lDX0 = lb->fP0.fX == lb->fP1.fX;
+ bool rDX0 = rb->fP0.fX == rb->fP1.fX;
+ if (!lDX0 || !rDX0) { // make sure that the edges are vertical
+ continue;
+ }
+
+ SkAnalyticEdge l, r;
+ l.setLine(lb->fP0, lb->fP1);
+ r.setLine(rb->fP0, rb->fP1);
+
+ SkFixed xorUpperY = l.fUpperY ^ r.fUpperY;
+ SkFixed xorLowerY = l.fLowerY ^ r.fLowerY;
+ if ((xorUpperY | xorLowerY) == 0) { // equal upperY and lowerY
+ rectTop = SkFixedCeilToInt(l.fUpperY);
+ rectBot = SkFixedFloorToInt(l.fLowerY);
+ if (rectBot > rectTop) { // if bot == top, the rect is too short for blitAntiRect
+ int L = SkFixedCeilToInt(l.fUpperX);
+ int R = SkFixedFloorToInt(r.fUpperX);
+ if (L <= R) {
+ SkAlpha la = (SkIntToFixed(L) - l.fUpperX) >> 8;
+ SkAlpha ra = (r.fUpperX - SkIntToFixed(R)) >> 8;
+ result.setAntiRect(L - 1, rectTop, R - L, rectBot - rectTop, la, ra);
+ } else { // too thin to use blitAntiRect; reset the rect region to be emtpy
+ rectTop = rectBot = ir.fBottom;
+ }
+ }
+ break;
+ }
+
+ }
+ }
+
+ // 3. Sort edges in x so we may need less sorting for delta based on x. This only helps
+ // SkCoverageDeltaList. And we don't want to sort more than SORT_THRESHOLD edges where
+ // the log(count) factor of the quick sort may become a bottleneck; when there are so
+ // many edges, we're unlikely to make deltas sorted anyway.
+ constexpr int SORT_THRESHOLD = 4096;
+ if (std::is_same<Deltas, SkCoverageDeltaList>::value && count < SORT_THRESHOLD) {
+ XLessThan lessThan;
+ SkTQSort(list, list + count - 1, lessThan);
+ }
+
+ // Future todo: parallize and SIMD the following code.
+ // 4. iterate through edges and generate deltas
+ for(int index = 0; index < count; ++index) {
+ SkAnalyticCubicEdge storage;
+ SkASSERT(sizeof(SkAnalyticQuadraticEdge) >= sizeof(SkAnalyticEdge));
+ SkASSERT(sizeof(SkAnalyticCubicEdge) >= sizeof(SkAnalyticQuadraticEdge));
+
+ SkBezier* bezier = list[index];
+ SkAnalyticEdge* currE = &storage;
+ bool edgeSet = false;
+
+ switch (bezier->fCount) {
+ case 2: {
+ edgeSet = currE->setLine(bezier->fP0, bezier->fP1);
+ break;
+ }
+ case 3: {
+ SkQuad* quad = static_cast<SkQuad*>(bezier);
+ SkPoint pts[3] = {quad->fP0, quad->fP1, quad->fP2};
+ edgeSet = static_cast<SkAnalyticQuadraticEdge*>(currE)->setQuadratic(pts);
+ break;
+ }
+ case 4: {
+ SkCubic* cubic = static_cast<SkCubic*>(bezier);
+ SkPoint pts[4] = {cubic->fP0, cubic->fP1, cubic->fP2, cubic->fP3};
+ edgeSet = static_cast<SkAnalyticCubicEdge*>(currE)->setCubic(pts);
+ break;
+ }
+ }
+
+ if (!edgeSet) {
+ continue;
+ }
+
+ do {
+ currE->fX = currE->fUpperX;
+
+ SkFixed upperFloor = SkFixedFloorToFixed(currE->fUpperY);
+ SkFixed lowerCeil = SkFixedCeilToFixed(currE->fLowerY);
+ int iy = SkFixedFloorToInt(upperFloor);
+
+ if (lowerCeil <= upperFloor + SK_Fixed1) { // only one row is affected by the currE
+ SkFixed rowHeight = currE->fLowerY - currE->fUpperY;
+ SkFixed nextX = currE->fX + SkFixedMul(currE->fDX, rowHeight);
+ add_coverage_delta_segment<true>(iy, rowHeight, currE, nextX, &result);
+ continue;
+ }
+
+ // check first row
+ SkFixed rowHeight = upperFloor + SK_Fixed1 - currE->fUpperY;
+ SkFixed nextX;
+ if (rowHeight != SK_Fixed1) { // it's a partial row
+ nextX = currE->fX + SkFixedMul(currE->fDX, rowHeight);
+ add_coverage_delta_segment<true>(iy, rowHeight, currE, nextX, &result);
+ } else { // it's a full row so we can leave it to the while loop
+ iy--; // compensate the iy++ in the while loop
+ nextX = currE->fX;
+ }
+
+ while (true) { // process the full rows in the middle
+ iy++;
+ SkFixed y = SkIntToFixed(iy);
+ currE->fX = nextX;
+ nextX += currE->fDX;
+
+ if (y + SK_Fixed1 > currE->fLowerY) {
+ break; // no full rows left, break
+ }
+
+ // Check whether we're in the rect part that will be covered by blitAntiRect
+ if (iy >= rectTop && iy < rectBot) {
+ SkASSERT(currE->fDX == 0); // If yes, we must be on an edge with fDX = 0.
+ iy = rectBot - 1; // Skip the rect part by advancing iy to the bottom.
+ continue;
+ }
+
+ // Add current edge's coverage deltas on this full row
+ add_coverage_delta_segment<false>(iy, SK_Fixed1, currE, nextX, &result);
+ }
+
+ // last partial row
+ if (SkIntToFixed(iy) < currE->fLowerY) {
+ rowHeight = currE->fLowerY - SkIntToFixed(iy);
+ nextX = currE->fX + SkFixedMul(currE->fDX, rowHeight);
+ add_coverage_delta_segment<true>(iy, rowHeight, currE, nextX, &result);
+ }
+ } while (currE->update(currE->fLowerY));
+ }
+}
+
+void SkScan::DAAFillPath(const SkPath& path, const SkRegion& origClip, SkBlitter* blitter,
+ bool forceRLE) {
+
+ FillPathFunc fillPathFunc = [](const SkPath& path, SkBlitter* blitter, bool isInverse,
+ const SkIRect& ir, const SkRegion* clipRgn, const SkIRect* clipRect, bool forceRLE){
+ bool isEvenOdd = path.getFillType() & 1;
+ bool isConvex = path.isConvex();
+ bool skipRect = isConvex && !isInverse;
+
+ const SkIRect& clipBounds = clipRgn->getBounds();
+ SkIRect clippedIR = ir;
+ clippedIR.intersect(clipBounds);
+
+ SkRect rect;
+ if (!isInverse && path.isRect(&rect) && clippedIR.height() >= 3 && clippedIR.width() >= 3) {
+ // The overhead of even constructing SkCoverageDeltaList/Mask is too big. So just blit.
+ bool nonEmpty = rect.intersect(SkRect::MakeFromIRect(clipBounds));
+ SkASSERT(nonEmpty); // do_fill_path should have already handled the empty case
+ if (nonEmpty) {
+ blitter->blitFatAntiRect(rect);
+ }
+ return;
+ }
+
+ // Only blitter->blitXXX need to be done in order in the threaded backend.
+ // Everything before can be done out of order in the threaded backend.
+ if (!forceRLE && !isInverse && SkCoverageDeltaMask::Suitable(clippedIR)) {
+ SkCoverageDeltaMask deltaMask(clippedIR);
+ gen_alpha_deltas(path, *clipRgn, deltaMask, blitter, skipRect, clipRect == nullptr);
+ deltaMask.convertCoverageToAlpha(isEvenOdd, isInverse, isConvex);
+ blitter->blitMask(deltaMask.prepareSkMask(), clippedIR);
+ } else {
+ SkCoverageDeltaAllocator alloc;
+ SkCoverageDeltaList deltaList(&alloc, clippedIR.fTop, clippedIR.fBottom, forceRLE);
+ gen_alpha_deltas(path, *clipRgn, deltaList, blitter, skipRect, clipRect == nullptr);
+ blitter->blitCoverageDeltas(&deltaList, clipBounds, isEvenOdd, isInverse, isConvex);
+ }
+ };
+
+ // For threaded backend with out-of-order init-once (and therefore out-of-order do_fill_path),
+ // we probably have to take care of the blitRegion, sk_blit_above, sk_blit_below in do_fill_path
+ // to maintain the draw order. If we do that, be caureful that blitRect may throw exception is
+ // the rect is empty.
+ do_fill_path(path, origClip, blitter, forceRLE, 2, std::move(fillPathFunc));
+}
diff --git a/tools/flags/SkCommonFlags.cpp b/tools/flags/SkCommonFlags.cpp
index 0148bcd820..19afd10909 100644
--- a/tools/flags/SkCommonFlags.cpp
+++ b/tools/flags/SkCommonFlags.cpp
@@ -72,6 +72,11 @@ DEFINE_bool(forceAnalyticAA, false, "Force analytic anti-aliasing even if the pa
"whether it's concave or convex, we consider a path complicated"
"if its number of points is comparable to its resolution.");
+DEFINE_bool(deltaAA, false,
+ "If true, use delta anti-aliasing in suitable cases (it overrides forceAnalyticAA.");
+
+DEFINE_bool(forceDeltaAA, false, "Force delta anti-aliasing for all paths.");
+
bool CollectImages(SkCommandLineFlags::StringArray images, SkTArray<SkString>* output) {
SkASSERT(output);
diff --git a/tools/flags/SkCommonFlags.h b/tools/flags/SkCommonFlags.h
index 92ac141dad..d35e9a8d51 100644
--- a/tools/flags/SkCommonFlags.h
+++ b/tools/flags/SkCommonFlags.h
@@ -34,6 +34,8 @@ DECLARE_string(writePath);
DECLARE_bool(pre_log);
DECLARE_bool(analyticAA);
DECLARE_bool(forceAnalyticAA);
+DECLARE_bool(deltaAA);
+DECLARE_bool(forceDeltaAA);
DECLARE_string(key);
DECLARE_string(properties);