CCPR: Recalculate inflection/intersection points on GPU

Just recalculates these points instead of investing the code complexity and memory bandwidth in passing them around. Bug: skia: Change-Id: Ib6fc7af49363469de91b4c59baf238015045d79c Reviewed-on: https://skia-review.googlesource.com/39540 Reviewed-by: Greg Daniel <egdaniel@google.com> Commit-Queue: Chris Dalton <csmartdalton@google.com>
author: Chris Dalton <csmartdalton@google.com> 2017-08-28 11:25:00 -0600
committer: Skia Commit-Bot <skia-commit-bot@chromium.org> 2017-08-28 18:15:27 +0000
commit: a2090e7240c90510dde6296485815e132abeadf5 (patch)
tree: 170182e038aec7329f3deaed66b214846c71b834 /src/gpu
parent: 3c2d09f89ae119de506722f550a6e28305d4813f (diff)
4 files changed, 30 insertions, 42 deletions
diff --git a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
index f2d27e8269..f943f67f6c 100644
--- a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
+++ b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.cpp
@@ -194,8 +194,6 @@ void MaxBufferItems::countPathItems(GrCCPRCoverageOpsBuilder::ScissorMode scisso
                 currFanPts += kMaxCubicSegments;
                 // Each cubic segment has two control points.
                 fMaxControlPoints += kMaxCubicSegments * 2;
-                // Each cubic segment also emits two root t,s values as "control points".
-                fMaxControlPoints += kMaxCubicSegments * 2;
                 maxPrimitives.fMaxCubics += kMaxCubicSegments;
                 // The cubic may also turn out to be a quadratic. While we over-allocate by a fair
                 // amount, this is still a relatively small amount of space compared to the atlas.
@@ -379,8 +377,7 @@ void GrCCPRCoverageOpsBuilder::cubicTo(SkPoint controlPt1, SkPoint controlPt2, S
         }
 
         // (This might put ts0/ts1 out of order, but it doesn't matter anymore at this point.)
-        this->emitCubicSegment(type, chopped.first(),
-                               to_skpoint(t[1 - x], s[1 - x] * chopT), to_skpoint(1, 1));
+        this->emitCubicSegment(type, chopped.first());
         t[x] = 0;
         s[x] = 1;
 
@@ -391,25 +388,20 @@ void GrCCPRCoverageOpsBuilder::cubicTo(SkPoint controlPt1, SkPoint controlPt2, S
         C = chopped.second();
     }
 
-    this->emitCubicSegment(type, C, to_skpoint(t[0], s[0]), to_skpoint(t[1], s[1]));
+    this->emitCubicSegment(type, C);
 }
 
-void GrCCPRCoverageOpsBuilder::emitCubicSegment(SkCubicType type, const SkDCubic& C,
-                                                const SkPoint& ts0, const SkPoint& ts1) {
+void GrCCPRCoverageOpsBuilder::emitCubicSegment(SkCubicType type, const SkDCubic& C) {
     SkASSERT(fCurrPathIndices.fSerpentines < fCurrPathIndices.fLoops);
 
     fPointsData[fControlPtsIdx++] = to_skpoint(C[1]);
     fPointsData[fControlPtsIdx++] = to_skpoint(C[2]);
     this->fanTo(to_skpoint(C[3]));
 
-    // Also emit the cubic's root t,s values as "control points".
-    fPointsData[fControlPtsIdx++] = ts0;
-    fPointsData[fControlPtsIdx++] = ts1;
-
     // Serpentines grow up from the front, and loops grow down from the back.
     fInstanceData[SkCubicType::kLoop != type ?
                   fCurrPathIndices.fSerpentines++ : --fCurrPathIndices.fLoops].fCubicData = {
-        fControlPtsIdx - 4,
+        fControlPtsIdx - 2,
         fFanPtsIdx - 2
     };
 }
diff --git a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
index 710ee88206..7648ef3c0f 100644
--- a/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
+++ b/src/gpu/ccpr/GrCCPRCoverageOpsBuilder.h
@@ -119,7 +119,7 @@ private:
     void fanTo(const SkPoint& pt);
     void quadraticTo(SkPoint controlPt, SkPoint endPt);
     void cubicTo(SkPoint controlPt1, SkPoint controlPt2, SkPoint endPt);
-    void emitCubicSegment(SkCubicType, const SkDCubic&, const SkPoint& ts0, const SkPoint& ts1);
+    void emitCubicSegment(SkCubicType, const SkDCubic&);
     void closeContour();
     void emitHierarchicalFan(int32_t indices[], int count);
     SkDEBUGCODE(void validate();)
diff --git a/src/gpu/ccpr/GrCCPRCubicProcessor.cpp b/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
index 5f6f759c6d..c978468891 100644
--- a/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
+++ b/src/gpu/ccpr/GrCCPRCubicProcessor.cpp
@@ -64,12 +64,6 @@ void GrCCPRCubicProcessor::onEmitVertexShader(const GrCCPRCoverageProcessor& pro
     v->codeAppendf("%s.z = determinant(N) * sign(%s.x) * sign(%s.z);",
                    fInset.vsOut(), rtAdjust, rtAdjust);
 
-    // Fetch one of the t,s klm root values for the geometry shader.
-    v->codeAppendf("%s = ", fTS.vsOut());
-    v->appendTexelFetch(pointsBuffer,
-                        SkStringPrintf("%s.x + 2 + sk_VertexID/2", proc.instanceAttrib()).c_str());
-    v->codeAppend ("[sk_VertexID % 2];");
-
     // Emit the vertex position.
     v->codeAppendf("highp float2 self = bezierpts[0] + %s;", atlasOffset);
     gpArgs->fPositionVar.set(kVec2f_GrSLType, "self");
@@ -156,35 +150,40 @@ void GrCCPRCubicProcessor::onEmitGeometryShader(GrGLSLGeometryBuilder* g, const
     g->codeAppend (    "}");
     g->codeAppend ("}");
 
+    // Find the cubic's power basis coefficients.
+    g->codeAppend ("highp float2x4 C = float4x4(-1,  3, -3,  1, "
+                                               " 3, -6,  3,  0, "
+                                               "-3,  3,  0,  0, "
+                                               " 1,  0,  0,  0) * transpose(bezierpts);");
+
+    // Find the cubic's inflection function.
+    g->codeAppend ("highp float D3 = +determinant(float2x2(C[0].yz, C[1].yz));");
+    g->codeAppend ("highp float D2 = -determinant(float2x2(C[0].xz, C[1].xz));");
+    g->codeAppend ("highp float D1 = +determinant(float2x2(C));");
+
     // Calculate the KLM matrix.
     g->declareGlobal(fKLMMatrix);
     g->codeAppend ("highp float4 K, L, M;");
+    g->codeAppend ("highp float2 l, m;");
+    g->codeAppend ("highp float discr = 3*D2*D2 - 4*D1*D3;");
     if (Type::kSerpentine == fType) {
-        g->codeAppend ("highp float2 l,m;");
-        g->codeAppendf("l.ts = float2(%s[0], %s[1]);", fTS.gsIn(), fTS.gsIn());
-        g->codeAppendf("m.ts = float2(%s[2], %s[3]);", fTS.gsIn(), fTS.gsIn());
+        // This math also works out for the "cusp" and "cusp at infinity" cases.
+        g->codeAppend ("highp float q = 3*D2 + sign(D2) * sqrt(max(3*discr, 0));");
+        g->codeAppend ("l.ts = normalize(float2(q, 6*D1));");
+        g->codeAppend ("m.ts = discr <= 0 ? l.ts : normalize(float2(2*D3, q));");
         g->codeAppend ("K = float4(0, l.s * m.s, -l.t * m.s - m.t * l.s, l.t * m.t);");
         g->codeAppend ("L = float4(-1,3,-3,1) * l.ssst * l.sstt * l.sttt;");
         g->codeAppend ("M = float4(-1,3,-3,1) * m.ssst * m.sstt * m.sttt;");
-
     } else {
-        g->codeAppend ("highp float2 d,e;");
-        g->codeAppendf("d.ts = float2(%s[0], %s[1]);", fTS.gsIn(), fTS.gsIn());
-        g->codeAppendf("e.ts = float2(%s[2], %s[3]);", fTS.gsIn(), fTS.gsIn());
-        g->codeAppend ("highp float4 dxe = float4(d.s * e.s, d.s * e.t, d.t * e.s, d.t * e.t);");
-        g->codeAppend ("K = float4(0, dxe.x, -dxe.y - dxe.z, dxe.w);");
-        g->codeAppend ("L = float4(-1,1,-1,1) * d.sstt * (dxe.xyzw + float4(0, 2*dxe.zy, 0));");
-        g->codeAppend ("M = float4(-1,1,-1,1) * e.sstt * (dxe.xzyw + float4(0, 2*dxe.yz, 0));");
+        g->codeAppend ("highp float q = D2 + sign(D2) * sqrt(max(-discr, 0));");
+        g->codeAppend ("l.ts = normalize(float2(q, 2*D1));");
+        g->codeAppend ("m.ts = discr >= 0 ? l.ts : normalize(float2(2 * (D2*D2 - D3*D1), D1*q));");
+        g->codeAppend ("highp float4 lxm = float4(l.s * m.s, l.s * m.t, l.t * m.s, l.t * m.t);");
+        g->codeAppend ("K = float4(0, lxm.x, -lxm.y - lxm.z, lxm.w);");
+        g->codeAppend ("L = float4(-1,1,-1,1) * l.sstt * (lxm.xyzw + float4(0, 2*lxm.zy, 0));");
+        g->codeAppend ("M = float4(-1,1,-1,1) * m.sstt * (lxm.xzyw + float4(0, 2*lxm.yz, 0));");
     }
-
-    g->codeAppend ("highp float2x4 C = float4x4(-1,  3, -3,  1, "
-                                               " 3, -6,  3,  0, "
-                                               "-3,  3,  0,  0, "
-                                               " 1,  0,  0,  0) * transpose(bezierpts);");
-
-    g->codeAppend ("highp float2 absdet = abs(C[0].xx * C[1].zy - C[1].xx * C[0].zy);");
-    g->codeAppend ("lowp int middlerow = absdet[0] > absdet[1] ? 2 : 1;");
-
+    g->codeAppend ("lowp int middlerow = abs(D2) > abs(D1) ? 2 : 1;");
     g->codeAppend ("highp float3x3 CI = inverse(float3x3(C[0][0], C[0][middlerow], C[0][3], "
                                                         "C[1][0], C[1][middlerow], C[1][3], "
                                                         "      0,               0,       1));");
diff --git a/src/gpu/ccpr/GrCCPRCubicProcessor.h b/src/gpu/ccpr/GrCCPRCubicProcessor.h
index f31dad793e..26ff9ac127 100644
--- a/src/gpu/ccpr/GrCCPRCubicProcessor.h
+++ b/src/gpu/ccpr/GrCCPRCubicProcessor.h
@@ -47,14 +47,12 @@ public:
             : INHERITED(CoverageType::kShader)
             , fType(type)
             , fInset(kVec3f_GrSLType)
-            , fTS(kFloat_GrSLType)
             , fKLMMatrix("klm_matrix", kMat33f_GrSLType, GrShaderVar::kNonArray,
                          kHigh_GrSLPrecision)
             , fKLMDerivatives("klm_derivatives", kVec2f_GrSLType, 3, kHigh_GrSLPrecision) {}
 
     void resetVaryings(GrGLSLVaryingHandler* varyingHandler) override {
         varyingHandler->addVarying("insets", &fInset, kHigh_GrSLPrecision);
-        varyingHandler->addVarying("ts", &fTS, kHigh_GrSLPrecision);
     }
 
     void onEmitVertexShader(const GrCCPRCoverageProcessor&, GrGLSLVertexBuilder*,
@@ -70,7 +68,6 @@ protected:
 
     const Type        fType;
     GrGLSLVertToGeo   fInset;
-    GrGLSLVertToGeo   fTS;
     GrShaderVar       fKLMMatrix;
     GrShaderVar       fKLMDerivatives;
author	Chris Dalton <csmartdalton@google.com>	2017-08-28 11:25:00 -0600
committer	Skia Commit-Bot <skia-commit-bot@chromium.org>	2017-08-28 18:15:27 +0000
commit	a2090e7240c90510dde6296485815e132abeadf5 (patch)
tree	170182e038aec7329f3deaed66b214846c71b834 /src/gpu
parent	3c2d09f89ae119de506722f550a6e28305d4813f (diff)