Allow compiler to optimize applying quadratic UV matrix to verts

Code Review: http://codereview.appspot.com/5833048/ git-svn-id: http://skia.googlecode.com/svn/trunk@3398 2bbb7eff-a529-9590-31e7-b0007b416f81
author: bsalomon@google.com <bsalomon@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> 2012-03-15 13:51:08 +0000
committer: bsalomon@google.com <bsalomon@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> 2012-03-15 13:51:08 +0000
commit: 1971317bb43580330a9e7e9a1c09c5025fe84aac (patch)
tree: bf0aa1899f5d47350a8283cc564f5d3813ababaf /src
parent: 7816a4e840d7b5703d8b90731f80c2d88170d7f9 (diff)
4 files changed, 95 insertions, 59 deletions
diff --git a/src/gpu/GrAAConvexPathRenderer.cpp b/src/gpu/GrAAConvexPathRenderer.cpp
index d1715e9079..22a1bb5070 100644
--- a/src/gpu/GrAAConvexPathRenderer.cpp
+++ b/src/gpu/GrAAConvexPathRenderer.cpp
@@ -404,12 +404,8 @@ void create_vertices(const SegmentArray&  segments,
             verts[v + 4].fD1 = -GR_ScalarMax/100;
             verts[v + 5].fD1 = -GR_ScalarMax/100;
 
-            GrMatrix toUV;
-            GrPathUtils::quadDesignSpaceToUVCoordsMatrix(qpts, &toUV);
-            toUV.mapPointsWithStride(&verts[v].fUV,
-                                     &verts[v].fPos,
-                                     sizeof(QuadVertex),
-                                     6);
+            GrPathUtils::QuadUVMatrix toUV(qpts);
+            toUV.apply<6, sizeof(QuadVertex), sizeof(GrPoint)>(verts + v);
 
             idxs[i + 0] = v + 3;
             idxs[i + 1] = v + 1;
diff --git a/src/gpu/GrAAHairLinePathRenderer.cpp b/src/gpu/GrAAHairLinePathRenderer.cpp
index 527c18b36b..f24ce7b40d 100644
--- a/src/gpu/GrAAHairLinePathRenderer.cpp
+++ b/src/gpu/GrAAHairLinePathRenderer.cpp
@@ -362,8 +362,7 @@ void bloat_quad(const SkPoint qpts[3], const GrMatrix* toDevice,
     SkPoint c = qpts[2];
 
     // this should be in the src space, not dev coords, when we have perspective
-    SkMatrix DevToUV;
-    GrPathUtils::quadDesignSpaceToUVCoordsMatrix(qpts, &DevToUV);
+    GrPathUtils::QuadUVMatrix DevToUV(qpts);
 
     if (toDevice) {
         toDevice->mapPoints(&a, 1);
@@ -427,8 +426,7 @@ void bloat_quad(const SkPoint qpts[3], const GrMatrix* toDevice,
     if (toSrc) {
         toSrc->mapPointsWithStride(&verts[0].fPos, sizeof(Vertex), kVertsPerQuad);
     }
-    DevToUV.mapPointsWithStride(&verts[0].fQuadCoord,
-                                &verts[0].fPos, sizeof(Vertex), kVertsPerQuad);
+    DevToUV.apply<kVertsPerQuad, sizeof(Vertex), sizeof(GrPoint)>(verts);
 }
 
 void add_quads(const SkPoint p[3],
diff --git a/src/gpu/GrPathUtils.cpp b/src/gpu/GrPathUtils.cpp
index c32ee8e30a..cecc514f35 100644
--- a/src/gpu/GrPathUtils.cpp
+++ b/src/gpu/GrPathUtils.cpp
@@ -187,44 +187,13 @@ int GrPathUtils::worstCasePointCount(const GrPath& path, int* subpaths,
     return pointCount;
 }
 
-namespace {
-// The matrix computed for quadDesignSpaceToUVCoordsMatrix should never really
-// have perspective and we really want to avoid perspective matrix muls.
-//  However, the first two entries of the perspective row may be really close to
-// 0 and the third may not be 1 due to a scale on the entire matrix.
-inline void fixup_matrix(GrMatrix* mat) {
-#ifndef SK_SCALAR_IS_FLOAT
-    GrCrash("Expected scalar is float.");
-#endif
-     static const GrScalar gTOL = 1.f / 100.f;
-    GrAssert(GrScalarAbs(mat->get(SkMatrix::kMPersp0)) < gTOL);
-    GrAssert(GrScalarAbs(mat->get(SkMatrix::kMPersp1)) < gTOL);
-    float m33 = mat->get(SkMatrix::kMPersp2);
-    if (1.f != m33) {
-        m33 = 1.f / m33;
-        mat->setAll(m33 * mat->get(SkMatrix::kMScaleX),
-                    m33 * mat->get(SkMatrix::kMSkewX),
-                    m33 * mat->get(SkMatrix::kMTransX),
-                    m33 * mat->get(SkMatrix::kMSkewY),
-                    m33 * mat->get(SkMatrix::kMScaleY),
-                    m33 * mat->get(SkMatrix::kMTransY),
-                    0.f, 0.f, 1.f);
-    } else {
-        mat->setPerspX(0);
-        mat->setPerspY(0);
-    }
-}
-}
-
-// Compute a matrix that goes from the 2d space coordinates to UV space where
-// u^2-v = 0 specifies the quad.
-void GrPathUtils::quadDesignSpaceToUVCoordsMatrix(const SkPoint qPts[3],
-                                                  GrMatrix* matrix) {
+void GrPathUtils::QuadUVMatrix::set(const GrPoint qPts[3]) {
     // can't make this static, no cons :(
     SkMatrix UVpts;
 #ifndef SK_SCALAR_IS_FLOAT
     GrCrash("Expected scalar is float.");
 #endif
+    SkMatrix m;
     // We want M such that M * xy_pt = uv_pt
     // We know M * control_pts = [0  1/2 1]
     //                           [0  0   1]
@@ -233,10 +202,10 @@ void GrPathUtils::quadDesignSpaceToUVCoordsMatrix(const SkPoint qPts[3],
     UVpts.setAll(0,   0.5f,  1.f,
                  0,   0,     1.f,
                  1.f, 1.f,   1.f);
-    matrix->setAll(qPts[0].fX, qPts[1].fX, qPts[2].fX,
-                   qPts[0].fY, qPts[1].fY, qPts[2].fY,
-                   1.f,        1.f,        1.f);
-    if (!matrix->invert(matrix)) {
+    m.setAll(qPts[0].fX, qPts[1].fX, qPts[2].fX,
+             qPts[0].fY, qPts[1].fY, qPts[2].fY,
+             1.f,        1.f,        1.f);
+    if (!m.invert(&m)) {
         // The quad is degenerate. Hopefully this is rare. Find the pts that are
         // farthest apart to compute a line (unless it is really a pt).
         SkScalar maxD = qPts[0].distanceToSqd(qPts[1]);
@@ -260,19 +229,46 @@ void GrPathUtils::quadDesignSpaceToUVCoordsMatrix(const SkPoint qPts[3],
             // case.
             lineVec.setOrthog(lineVec, GrPoint::kLeft_Side);
             lineVec.dot(qPts[0]);
-            matrix->setAll(0, 0, 0,
-                           lineVec.fX, lineVec.fY, -lineVec.dot(qPts[maxEdge]),
-                           0, 0, 1.f);
+            // first row
+            fM[0] = 0;
+            fM[1] = 0;
+            fM[2] = 0;
+            // second row
+            fM[3] = lineVec.fX;
+            fM[4] = lineVec.fY;
+            fM[5] = -lineVec.dot(qPts[maxEdge]);
         } else {
             // It's a point. It should cover zero area. Just set the matrix such
             // that (u, v) will always be far away from the quad.
-            matrix->setAll(0, 0, 100 * SK_Scalar1,
-                           0, 0, 100 * SK_Scalar1,
-                           0, 0, 1.f);
+            fM[0] = 0; fM[1] = 0; fM[2] = 100.f;
+            fM[3] = 0; fM[4] = 0; fM[5] = 100.f;
         }
     } else {
-        matrix->postConcat(UVpts);
-        fixup_matrix(matrix);
+        m.postConcat(UVpts);
+
+        // The matrix should not have perspective.
+        static const GrScalar gTOL = 1.f / 100.f;
+        GrAssert(GrScalarAbs(m.get(SkMatrix::kMPersp0)) < gTOL);
+        GrAssert(GrScalarAbs(m.get(SkMatrix::kMPersp1)) < gTOL);
+
+        // It may not be normalized to have 1.0 in the bottom right
+        float m33 = m.get(SkMatrix::kMPersp2);
+        if (1.f != m33) {
+            m33 = 1.f / m33;
+            fM[0] = m33 * m.get(SkMatrix::kMScaleX);
+            fM[1] = m33 * m.get(SkMatrix::kMSkewX);
+            fM[2] = m33 * m.get(SkMatrix::kMTransX);
+            fM[3] = m33 * m.get(SkMatrix::kMSkewY);
+            fM[4] = m33 * m.get(SkMatrix::kMScaleY);
+            fM[5] = m33 * m.get(SkMatrix::kMTransY);
+        } else {
+            fM[0] = m.get(SkMatrix::kMScaleX);
+            fM[1] = m.get(SkMatrix::kMSkewX);
+            fM[2] = m.get(SkMatrix::kMTransX);
+            fM[3] = m.get(SkMatrix::kMSkewY);
+            fM[4] = m.get(SkMatrix::kMScaleY);
+            fM[5] = m.get(SkMatrix::kMTransY);
+        }
     }
 }
 
diff --git a/src/gpu/GrPathUtils.h b/src/gpu/GrPathUtils.h
index df2e16c57c..403f03ae16 100644
--- a/src/gpu/GrPathUtils.h
+++ b/src/gpu/GrPathUtils.h
@@ -27,18 +27,22 @@ namespace GrPathUtils {
     int worstCasePointCount(const GrPath&,
                             int* subpaths,
                             GrScalar tol);
+
     /// Since we divide by tol if we're computing exact worst-case bounds,
     /// very small tolerances will be increased to gMinCurveTol.
     uint32_t quadraticPointCount(const GrPoint points[], GrScalar tol);
+
     uint32_t generateQuadraticPoints(const GrPoint& p0,
                                      const GrPoint& p1,
                                      const GrPoint& p2,
                                      GrScalar tolSqd,
                                      GrPoint** points,
                                      uint32_t pointsLeft);
+
     /// Since we divide by tol if we're computing exact worst-case bounds,
     /// very small tolerances will be increased to gMinCurveTol.
     uint32_t cubicPointCount(const GrPoint points[], GrScalar tol);
+
     uint32_t generateCubicPoints(const GrPoint& p0,
                                  const GrPoint& p1,
                                  const GrPoint& p2,
@@ -46,10 +50,52 @@ namespace GrPathUtils {
                                  GrScalar tolSqd,
                                  GrPoint** points,
                                  uint32_t pointsLeft);
-    // Compute a matrix that goes from the 2d space coordinates to UV space
-    // where u^2-v = 0 specifies the quad.
-    void quadDesignSpaceToUVCoordsMatrix(const GrPoint qPts[3],
-                                         GrMatrix* matrix);
+
+    // A 2x3 matrix that goes from the 2d space coordinates to UV space where
+    // u^2-v = 0 specifies the quad. The matrix is determined by the control
+    // points of the quadratic.
+    class QuadUVMatrix {
+    public:
+        QuadUVMatrix() {};
+        // Initialize the matrix from the control pts
+        QuadUVMatrix(const GrPoint controlPts[3]) { this->set(controlPts); }
+        void set(const GrPoint controlPts[3]);
+
+        /**
+         * Applies the matrix to vertex positions to compute UV coords. This
+         * has been templated so that the compiler can easliy unroll the loop
+         * and reorder to avoid stalling for loads. The assumption is that a
+         * path renderer will have a small fixed number of vertices that it
+         * uploads for each quad.
+         *
+         * N is the number of vertices.
+         * STRIDE is the size of each vertex.
+         * UV_OFFSET is the offset of the UV values within each vertex.
+         * vertices is a pointer to the first vertex.
+         */
+        template <int N, size_t STRIDE, size_t UV_OFFSET>
+        void apply(const void* vertices) {
+            intptr_t xyPtr = reinterpret_cast<intptr_t>(vertices);
+            intptr_t uvPtr = reinterpret_cast<intptr_t>(vertices) + UV_OFFSET;
+            float sx = fM[0];
+            float kx = fM[1];
+            float tx = fM[2];
+            float ky = fM[3];
+            float sy = fM[4];
+            float ty = fM[5];
+            for (int i = 0; i < N; ++i) {
+                const GrPoint* xy = reinterpret_cast<const GrPoint*>(xyPtr);
+                GrPoint* uv = reinterpret_cast<GrPoint*>(uvPtr);
+                uv->fX = sx * xy->fX + kx * xy->fY + tx;
+                uv->fY = ky * xy->fX + sy * xy->fY + ty;
+                xyPtr += STRIDE;
+                uvPtr += STRIDE;
+            }
+        }
+    private:
+        float fM[6];
+    };
+
     // Converts a cubic into a sequence of quads. If working in device space
     // use tolScale = 1, otherwise set based on stretchiness of the matrix. The
     // result is sets of 3 points in quads (TODO: share endpoints in returned
author	bsalomon@google.com <bsalomon@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>	2012-03-15 13:51:08 +0000
committer	bsalomon@google.com <bsalomon@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>	2012-03-15 13:51:08 +0000
commit	1971317bb43580330a9e7e9a1c09c5025fe84aac (patch)
tree	bf0aa1899f5d47350a8283cc564f5d3813ababaf /src
parent	7816a4e840d7b5703d8b90731f80c2d88170d7f9 (diff)