aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/gpu/GrPath.cpp
diff options
context:
space:
mode:
authorGravatar kkinnunen <kkinnunen@nvidia.com>2015-05-21 00:37:30 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2015-05-21 00:37:30 -0700
commit070e01056acdd9980619e71e2da390efb94e912e (patch)
treeaa2eb790a9170813e396a4f593bc6eeec85efdca /src/gpu/GrPath.cpp
parentf2539d50f911914af0f80f0092ff8c654869e650 (diff)
Improve caching of special case paths in GrStencilAndCoverPathRenderer
Cache lines and oval paths in their own cache domains. Skia has many hard-to-replace codepaths that create volatile paths out of lines or ovals. Results for amd64: desk_tigersvg.skp_1 3.06ms -> 3.07ms 1x tabl_cnet.skp_1 2.3ms -> 2.3ms 1x desk_baidu.skp_1 8.99ms -> 8.98ms 1x desk_weather.skp_1 4.58ms -> 4.57ms 1x desk_samoasvg.skp_1 12.3ms -> 12.3ms 1x tabl_gamedeksiam.skp_1 15.8ms -> 15.7ms 1x desk_chalkboard.skp_1 14.9ms -> 14.9ms 1x desk_mapsvg.skp_1 6.57ms -> 6.54ms 1x desk_wordpress.skp_1 2.2ms -> 2.19ms 1x tabl_slashdot.skp_1 6.91ms -> 6.84ms 0.99x desk_linkedin.skp_1 7.7ms -> 7.58ms 0.98x desk_googlespreadsheet.skp_1 58.7ms -> 57.7ms 0.98x tabl_ukwsj.skp_1 7.67ms -> 7.53ms 0.98x tabl_engadget.skp_1 4.71ms -> 4.61ms 0.98x desk_carsvg.skp_1 116ms -> 114ms 0.98x tabl_nytimes.skp_1 3.71ms -> 3.61ms 0.97x desk_googlespreadsheetdashed.skp_1 25.8ms -> 24.9ms 0.97x tabl_hsfi.skp_1 5.3ms -> 5.1ms 0.96x tabl_techmeme.skp_1 2.9ms -> 2.8ms 0.96x tabl_cnn.skp_1 6.88ms -> 6.62ms 0.96x desk_espn.skp_1 9.12ms -> 8.64ms 0.95x desk_gws.skp_1 5.15ms -> 4.88ms 0.95x tabl_gspro.skp_1 4.48ms -> 4.2ms 0.94x desk_yahooanswers.skp_1 6.84ms -> 6.3ms 0.92x tabl_pravda.skp_1 11.1ms -> 10.1ms 0.91x tabl_sahadan.skp_1 11.8ms -> 10.7ms 0.91x desk_wowwiki.skp_1 8.48ms -> 7.58ms 0.89x tabl_googleblog.skp_1 6ms -> 5.34ms 0.89x tabl_worldjournal.skp_1 6.08ms -> 5.35ms 0.88x desk_booking.skp_1 15.3ms -> 13.4ms 0.88x tabl_androidpolice.skp_1 15.5ms -> 13.5ms 0.87x desk_twitter.skp_1 12.2ms -> 10.6ms 0.87x tabl_nofolo.skp_1 5.49ms -> 4.76ms 0.87x desk_mobilenews.skp_1 22ms -> 18.9ms 0.86x desk_forecastio.skp_1 9.47ms -> 8.05ms 0.85x tabl_culturalsolutions.skp_1 6.21ms -> 5.28ms 0.85x desk_youtube.skp_1 16.1ms -> 13.5ms 0.84x tabl_mlb.skp_1 9.75ms -> 8.01ms 0.82x tabl_digg.skp_1 5.2ms -> 4.22ms 0.81x desk_blogger.skp_1 10.2ms -> 8.24ms 0.81x desk_gmailthread.skp_1 26.8ms -> 21.6ms 0.81x desk_googleplus.skp_1 10.5ms -> 8.39ms 0.8x tabl_frantzen.skp_1 4.55ms -> 3.58ms 0.79x desk_pinterest.skp_1 8.85ms -> 6.88ms 0.78x desk_ebay.skp_1 10.5ms -> 8.15ms 0.77x tabl_transformice.skp_1 4.93ms -> 3.5ms 0.71x Results for arm_v7_neon: desk_samoasvg.skp_1 13.9ms -> 14.6ms 1.05x desk_mapsvg.skp_1 8.31ms -> 8.75ms 1.05x tabl_deviantart.skp_1 1.41ms -> 1.45ms 1.02x desk_weather.skp_1 3.8ms -> 3.88ms 1.02x desk_sfgate.skp_1 3.06ms -> 3.1ms 1.01x desk_css3gradients.skp_1 2.78ms -> 2.79ms 1x desk_espn.skp_1 6.52ms -> 6.43ms 0.99x desk_gws.skp_1 4.16ms -> 4.09ms 0.98x tabl_cnn.skp_1 4.66ms -> 4.58ms 0.98x tabl_hsfi.skp_1 3.49ms -> 3.42ms 0.98x tabl_cuteoverload.skp_1 2.41ms -> 2.35ms 0.98x desk_yahooanswers.skp_1 5.28ms -> 5.14ms 0.97x desk_carsvg.skp_1 90.8ms -> 87.9ms 0.97x tabl_gspro.skp_1 2.81ms -> 2.71ms 0.96x desk_wowwiki.skp_1 5.85ms -> 5.63ms 0.96x tabl_pravda.skp_1 7.8ms -> 7.5ms 0.96x desk_twitter.skp_1 8.14ms -> 7.8ms 0.96x tabl_androidpolice.skp_1 10.4ms -> 9.96ms 0.96x tabl_googleblog.skp_1 4.06ms -> 3.83ms 0.95x desk_mobilenews.skp_1 15.2ms -> 14ms 0.93x desk_booking.skp_1 9.89ms -> 9.08ms 0.92x desk_forecastio.skp_1 6.16ms -> 5.65ms 0.92x desk_blogger.skp_1 6.17ms -> 5.66ms 0.92x tabl_digg.skp_1 3.73ms -> 3.41ms 0.91x tabl_nofolo.skp_1 3.82ms -> 3.47ms 0.91x tabl_worldjournal.skp_1 4.24ms -> 3.84ms 0.9x desk_youtube.skp_1 10.5ms -> 9.39ms 0.9x desk_googleplus.skp_1 7.01ms -> 6.19ms 0.88x tabl_mlb.skp_1 5.91ms -> 5.22ms 0.88x tabl_googlecalendar.skp_1 10.7ms -> 9.44ms 0.88x desk_gmailthread.skp_1 19.2ms -> 16.8ms 0.88x desk_ebay.skp_1 5.68ms -> 4.93ms 0.87x desk_pinterest.skp_1 5.99ms -> 5.08ms 0.85x desk_googlehome.skp_1 3.31ms -> 2.71ms 0.82x tabl_transformice.skp_1 3.03ms -> 2.44ms 0.81x desk_amazon.skp_1 6.05ms -> 4.84ms 0.8x desk_facebook.skp_1 12.6ms -> 9.62ms 0.76x Review URL: https://codereview.chromium.org/1120023005
Diffstat (limited to 'src/gpu/GrPath.cpp')
-rw-r--r--src/gpu/GrPath.cpp162
1 files changed, 158 insertions, 4 deletions
diff --git a/src/gpu/GrPath.cpp b/src/gpu/GrPath.cpp
index e76bdf2466..5b75683628 100644
--- a/src/gpu/GrPath.cpp
+++ b/src/gpu/GrPath.cpp
@@ -7,14 +7,168 @@
#include "GrPath.h"
-void GrPath::ComputeKey(const SkPath& path, const GrStrokeInfo& stroke, GrUniqueKey* key) {
- static const GrUniqueKey::Domain kPathDomain = GrUniqueKey::GenerateDomain();
+namespace {
+// Verb count limit for generating path key from content of a volatile path.
+// The value should accomodate at least simple rects and rrects.
+static const int kSimpleVolatilePathVerbLimit = 10;
+
+inline static bool compute_key_for_line_path(const SkPath& path, const GrStrokeInfo& stroke,
+ GrUniqueKey* key) {
+ SkPoint pts[2];
+ if (!path.isLine(pts)) {
+ return false;
+ }
+ SK_COMPILE_ASSERT((sizeof(pts) % sizeof(uint32_t)) == 0 && sizeof(pts) > sizeof(uint32_t),
+ pts_needs_padding);
+
+ const int kBaseData32Cnt = 1 + sizeof(pts) / sizeof(uint32_t);
+ int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt();
+ static const GrUniqueKey::Domain kOvalPathDomain = GrUniqueKey::GenerateDomain();
+ GrUniqueKey::Builder builder(key, kOvalPathDomain, kBaseData32Cnt + strokeDataCnt);
+ builder[0] = path.getFillType();
+ memcpy(&builder[1], &pts, sizeof(pts));
+ if (strokeDataCnt > 0) {
+ stroke.asUniqueKeyFragment(&builder[kBaseData32Cnt]);
+ }
+ return true;
+}
+
+inline static bool compute_key_for_oval_path(const SkPath& path, const GrStrokeInfo& stroke,
+ GrUniqueKey* key) {
+ SkRect rect;
+ if (!path.isOval(&rect)) {
+ return false;
+ }
+ SK_COMPILE_ASSERT((sizeof(rect) % sizeof(uint32_t)) == 0 && sizeof(rect) > sizeof(uint32_t),
+ rect_needs_padding);
+
+ const int kBaseData32Cnt = 1 + sizeof(rect) / sizeof(uint32_t);
int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt();
- GrUniqueKey::Builder builder(key, kPathDomain, 2 + strokeDataCnt);
+ static const GrUniqueKey::Domain kOvalPathDomain = GrUniqueKey::GenerateDomain();
+ GrUniqueKey::Builder builder(key, kOvalPathDomain, kBaseData32Cnt + strokeDataCnt);
+ builder[0] = path.getFillType();
+ memcpy(&builder[1], &rect, sizeof(rect));
+ if (strokeDataCnt > 0) {
+ stroke.asUniqueKeyFragment(&builder[kBaseData32Cnt]);
+ }
+ return true;
+}
+
+// Encodes the full path data to the unique key for very small, volatile paths. This is typically
+// hit when clipping stencils the clip stack. Intention is that this handles rects too, since
+// SkPath::isRect seems to do non-trivial amount of work.
+inline static bool compute_key_for_simple_path(const SkPath& path, const GrStrokeInfo& stroke,
+ GrUniqueKey* key) {
+ if (!path.isVolatile()) {
+ return false;
+ }
+
+ // The check below should take care of negative values casted positive.
+ const int verbCnt = path.countVerbs();
+ if (verbCnt > kSimpleVolatilePathVerbLimit) {
+ return false;
+ }
+
+ // If somebody goes wild with the constant, it might cause an overflow.
+ SK_COMPILE_ASSERT(kSimpleVolatilePathVerbLimit <= 100,
+ big_simple_volatile_path_verb_limit_may_cause_overflow);
+
+ const int pointCnt = path.countPoints();
+ if (pointCnt < 0) {
+ SkASSERT(false);
+ return false;
+ }
+
+ // Construct counts that align as uint32_t counts.
+#define ARRAY_DATA32_COUNT(array_type, count) \
+ static_cast<int>((((count) * sizeof(array_type) + sizeof(uint32_t) - 1) / sizeof(uint32_t)))
+
+ const int verbData32Cnt = ARRAY_DATA32_COUNT(uint8_t, verbCnt);
+ const int pointData32Cnt = ARRAY_DATA32_COUNT(SkPoint, pointCnt);
+
+#undef ARRAY_DATA32_COUNT
+
+ // The unique key data is a "message" with following fragments:
+ // 0) domain, key length, uint32_t for fill type and uint32_t for verbCnt
+ // (fragment 0, fixed size)
+ // 1) verb and point data (varying size)
+ // 2) stroke data (varying size)
+
+ const int baseData32Cnt = 2 + verbData32Cnt + pointData32Cnt;
+ const int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt();
+ static const GrUniqueKey::Domain kSimpleVolatilePathDomain = GrUniqueKey::GenerateDomain();
+ GrUniqueKey::Builder builder(key, kSimpleVolatilePathDomain, baseData32Cnt + strokeDataCnt);
+ int i = 0;
+ builder[i++] = path.getFillType();
+
+ // Serialize the verbCnt to make the whole message unambiguous.
+ // We serialize two variable length fragments to the message:
+ // * verb and point data (fragment 1)
+ // * stroke data (fragment 2)
+ // "Proof:"
+ // Verb count establishes unambiguous verb data.
+ // Unambiguous verb data establishes unambiguous point data, making fragment 1 unambiguous.
+ // Unambiguous fragment 1 establishes unambiguous fragment 2, since the length of the message
+ // has been established.
+
+ builder[i++] = SkToU32(verbCnt); // The path limit is compile-asserted above, so the cast is ok.
+
+ // Fill the last uint32_t with 0 first, since the last uint8_ts of the uint32_t may be
+ // uninitialized. This does not produce ambiguous verb data, since we have serialized the exact
+ // verb count.
+ if (verbData32Cnt != static_cast<int>((verbCnt * sizeof(uint8_t) / sizeof(uint32_t)))) {
+ builder[i + verbData32Cnt - 1] = 0;
+ }
+ path.getVerbs(reinterpret_cast<uint8_t*>(&builder[i]), verbCnt);
+ i += verbData32Cnt;
+
+ SK_COMPILE_ASSERT(((sizeof(SkPoint) % sizeof(uint32_t)) == 0) &&
+ sizeof(SkPoint) > sizeof(uint32_t), skpoint_array_needs_padding);
+
+ // Here we assume getPoints does a memcpy, so that we do not need to worry about the alignment.
+ path.getPoints(reinterpret_cast<SkPoint*>(&builder[i]), pointCnt);
+ SkDEBUGCODE(i += pointData32Cnt);
+
+ SkASSERT(i == baseData32Cnt);
+ if (strokeDataCnt > 0) {
+ stroke.asUniqueKeyFragment(&builder[baseData32Cnt]);
+ }
+ return true;
+}
+
+inline static void compute_key_for_general_path(const SkPath& path, const GrStrokeInfo& stroke,
+ GrUniqueKey* key) {
+ const int kBaseData32Cnt = 2;
+ int strokeDataCnt = stroke.computeUniqueKeyFragmentData32Cnt();
+ static const GrUniqueKey::Domain kGeneralPathDomain = GrUniqueKey::GenerateDomain();
+ GrUniqueKey::Builder builder(key, kGeneralPathDomain, kBaseData32Cnt + strokeDataCnt);
builder[0] = path.getGenerationID();
builder[1] = path.getFillType();
if (strokeDataCnt > 0) {
- stroke.asUniqueKeyFragment(&builder[2]);
+ stroke.asUniqueKeyFragment(&builder[kBaseData32Cnt]);
+ }
+}
+
+}
+
+void GrPath::ComputeKey(const SkPath& path, const GrStrokeInfo& stroke, GrUniqueKey* key,
+ bool* outIsVolatile) {
+ if (compute_key_for_line_path(path, stroke, key)) {
+ *outIsVolatile = false;
+ return;
}
+
+ if (compute_key_for_oval_path(path, stroke, key)) {
+ *outIsVolatile = false;
+ return;
+ }
+
+ if (compute_key_for_simple_path(path, stroke, key)) {
+ *outIsVolatile = false;
+ return;
+ }
+
+ compute_key_for_general_path(path, stroke, key);
+ *outIsVolatile = path.isVolatile();
}