aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar herb <herb@google.com>2016-07-22 14:06:27 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-07-22 14:06:27 -0700
commit86a6c6d8bdb1055ee2d0689eeaa4a814402e69dd (patch)
treea38062e21f41537ddb99178b164e08278ff3de45
parent9c1d45d986f3c58593fde0fd62ab22e056dd3881 (diff)
In the current code, tiling and bilerp sampling are strongly tied together. They can be separated by taking advantage of observation that translating a sample point into filter points in the bilerp stage the filter points will be at most 0.5 outside the tile. This allows simplified repositioning for the various tiling modes; clamp and mirror use min and max while repeat has max -> 0 and 0-> max. This allows bilerp to simply treat the filter points that fall off the tile. This allows tiling and bilerp sampling to be totally separate.
This CL has several parts that are intertwined: * move pin/wrap functionality into BilerpSampler. * remove the nearest neighbor and bilerp tilers * create a simplified general tiler * remove the pipeline virtual calls bilerpEdge and bilerpSpan because everything works of sample points now. * redo all the bilerp sampling to use the new local to methods to wrap/pin. * introduce a new medium rate sample that handles spans with 1 < |dx| < 2. This change improves the performance as displayed below: Most of top 25 desktop improves or are the same. A few are worse, but close to the noise floor. In addition, this change has about 3% smaller code. old time new time new/old 13274693 8414645 0.633886 top25desk_google_com_search_q_c.skp_1 4946466 3258018 0.658656 top25desk_wordpress.skp_1 6977187 5737584 0.822335 top25desk_youtube_com.skp_1 3770021 3296831 0.874486 top25desk_google_com__hl_en_q_b.skp_1 8890813 8600143 0.967307 top25desk_answers_yahoo_com.skp_1 3178974 3094300 0.973364 top25desk_facebook.skp_1 8871835 8711260 0.981901 top25desk_twitter.skp_1 838509 829290 0.989005 top25desk_blogger.skp_1 2821870 2801111 0.992644 top25desk_plus_google_com_11003.skp_1 511978 509530 0.995219 top25desk_techcrunch_com.skp_1 2408588 2397435 0.995369 top25desk_ebay_com.skp_1 4446919 4448004 1.00024 top25desk_espn.skp_1 2863241 2875696 1.00435 top25desk_google_com_calendar_.skp_1 7170086 7208447 1.00535 top25desk_booking_com.skp_1 7356109 7417776 1.00838 top25desk_pinterest.skp_1 5265591 5340392 1.01421 top25desk_weather_com.skp_1 5675244 5774144 1.01743 top25desk_sports_yahoo_com_.skp_1 1048531 1067663 1.01825 top25desk_games_yahoo_com.skp_1 2075501 2115131 1.01909 top25desk_amazon_com.skp_1 4262170 4370441 1.0254 top25desk_news_yahoo_com.skp_1 3789319 3897996 1.02868 top25desk_docs___1_open_documen.skp_1 919336 949979 1.03333 top25desk_wikipedia__1_tab_.skp_1 4274454 4489369 1.05028 top25desk_mail_google_com_mail_.skp_1 4149326 4376556 1.05476 top25desk_linkedin.skp_1 BUG=skia:5566 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2134893002 CQ_INCLUDE_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot;master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Committed: https://skia.googlesource.com/skia/+/8602ede5fdfa721dcad4dcb11db028c1c24265f1 Review-Url: https://codereview.chromium.org/2134893002
-rw-r--r--src/core/SkBitmapProcShader.h2
-rw-r--r--src/core/SkLinearBitmapPipeline.cpp242
-rw-r--r--src/core/SkLinearBitmapPipeline.h4
-rw-r--r--src/core/SkLinearBitmapPipeline_core.h33
-rw-r--r--src/core/SkLinearBitmapPipeline_sample.h783
5 files changed, 557 insertions, 507 deletions
diff --git a/src/core/SkBitmapProcShader.h b/src/core/SkBitmapProcShader.h
index f21e3d6adc..624034851d 100644
--- a/src/core/SkBitmapProcShader.h
+++ b/src/core/SkBitmapProcShader.h
@@ -54,7 +54,7 @@ private:
typedef SkShader INHERITED;
};
-enum {kSkBlitterContextSize = 3200};
+enum {kSkBlitterContextSize = 3332};
// Commonly used allocator. It currently is only used to allocate up to 3 objects. The total
// bytes requested is calculated using one of our large shaders, its context size plus the size of
diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp
index 088e829345..44a3d77195 100644
--- a/src/core/SkLinearBitmapPipeline.cpp
+++ b/src/core/SkLinearBitmapPipeline.cpp
@@ -165,15 +165,14 @@ static SkLinearBitmapPipeline::PointProcessorInterface* choose_matrix(
// Tile Stage
template<typename XStrategy, typename YStrategy, typename Next>
-class NearestTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface {
+class CombinedTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface {
public:
- template <typename... Args>
- NearestTileStage(Next* next, SkISize dimensions)
+ CombinedTileStage(Next* next, SkISize dimensions)
: fNext{next}
, fXStrategy{dimensions.width()}
, fYStrategy{dimensions.height()}{ }
- NearestTileStage(Next* next, const NearestTileStage& stage)
+ CombinedTileStage(Next* next, const CombinedTileStage& stage)
: fNext{next}
, fXStrategy{stage.fXStrategy}
, fYStrategy{stage.fYStrategy} { }
@@ -195,187 +194,52 @@ public:
SkASSERT(!span.isEmpty());
SkPoint start; SkScalar length; int count;
std::tie(start, length, count) = span;
- SkScalar x = X(start);
- SkScalar y = fYStrategy.tileY(Y(start));
- Span yAdjustedSpan{{x, y}, length, count};
- if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) {
- span_fallback(span, this);
- }
- }
-
-private:
- Next* const fNext;
- XStrategy fXStrategy;
- YStrategy fYStrategy;
-};
-
-template<typename XStrategy, typename YStrategy, typename Next>
-class BilerpTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface {
-public:
- template <typename... Args>
- BilerpTileStage(Next* next, SkISize dimensions)
- : fNext{next}
- , fXMax(dimensions.width())
- , fYMax(dimensions.height())
- , fXStrategy{dimensions.width()}
- , fYStrategy{dimensions.height()} { }
-
- BilerpTileStage(Next* next, const BilerpTileStage& stage)
- : fNext{next}
- , fXMax{stage.fXMax}
- , fYMax{stage.fYMax}
- , fXStrategy{stage.fXStrategy}
- , fYStrategy{stage.fYStrategy} { }
- void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
- fXStrategy.tileXPoints(&xs);
- fYStrategy.tileYPoints(&ys);
- // TODO: check to see if xs and ys are in range then just call pointListFew on next.
- if (n >= 1) this->bilerpPoint(xs[0], ys[0]);
- if (n >= 2) this->bilerpPoint(xs[1], ys[1]);
- if (n >= 3) this->bilerpPoint(xs[2], ys[2]);
- }
-
- void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
- fXStrategy.tileXPoints(&xs);
- fYStrategy.tileYPoints(&ys);
- // TODO: check to see if xs and ys are in range then just call pointList4 on next.
- this->bilerpPoint(xs[0], ys[0]);
- this->bilerpPoint(xs[1], ys[1]);
- this->bilerpPoint(xs[2], ys[2]);
- this->bilerpPoint(xs[3], ys[3]);
- }
-
- struct Wrapper {
- void pointSpan(Span span) {
- processor->breakIntoEdges(span);
- }
-
- void repeatSpan(Span span, int32_t repeatCount) {
- while (repeatCount --> 0) {
- processor->pointSpan(span);
- }
+ if (span.count() == 1) {
+ // DANGER:
+ // The explicit casts from float to Sk4f are not usually necessary, but are here to
+ // work around an MSVC 2015u2 c++ code generation bug. This is tracked using skia bug
+ // 5566.
+ this->pointListFew(1, Sk4f{span.startX()}, Sk4f{span.startY()});
+ return;
}
- BilerpTileStage* processor;
- };
-
- // The span you pass must not be empty.
- void pointSpan(Span span) override {
- SkASSERT(!span.isEmpty());
+ SkScalar x = X(start);
+ SkScalar y = fYStrategy.tileY(Y(start));
+ Span yAdjustedSpan{{x, y}, length, count};
- Wrapper wrapper = {this};
- if (!fXStrategy.maybeProcessSpan(span, &wrapper)) {
+ if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) {
span_fallback(span, this);
}
}
private:
- void bilerpPoint(SkScalar x, SkScalar y) {
- Sk4f txs = Sk4f{x} + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f};
- Sk4f tys = Sk4f{y} + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f};
- fXStrategy.tileXPoints(&txs);
- fYStrategy.tileYPoints(&tys);
- fNext->bilerpEdge(txs, tys);
- }
-
- void handleEdges(Span span, SkScalar dx) {
- SkPoint start; SkScalar length; int count;
- std::tie(start, length, count) = span;
- SkScalar x = X(start);
- SkScalar y = Y(start);
- SkScalar tiledY = fYStrategy.tileY(y);
- while (count > 0) {
- this->bilerpPoint(x, tiledY);
- x += dx;
- count -= 1;
- }
- }
-
- void yProcessSpan(Span span) {
- SkScalar tiledY = fYStrategy.tileY(span.startY());
- if (0.5f <= tiledY && tiledY < fYMax - 0.5f ) {
- Span tiledSpan{{span.startX(), tiledY}, span.length(), span.count()};
- fNext->pointSpan(tiledSpan);
- } else {
- // Convert to the Y0 bilerp sample set by shifting by -0.5f. Then tile that new y
- // value and shift it back resulting in the working Y0. Do the same thing with Y1 but
- // in the opposite direction.
- SkScalar y0 = fYStrategy.tileY(span.startY() - 0.5f) + 0.5f;
- SkScalar y1 = fYStrategy.tileY(span.startY() + 0.5f) - 0.5f;
- Span newSpan{{span.startX(), y0}, span.length(), span.count()};
- fNext->bilerpSpan(newSpan, y1);
- }
- }
- void breakIntoEdges(Span span) {
- if (span.count() == 1) {
- this->bilerpPoint(span.startX(), span.startY());
- } else if (span.length() == 0) {
- yProcessSpan(span);
- } else {
- SkScalar dx = span.length() / (span.count() - 1);
- if (span.length() > 0) {
- Span leftBorder = span.breakAt(0.5f, dx);
- if (!leftBorder.isEmpty()) {
- this->handleEdges(leftBorder, dx);
- }
- Span center = span.breakAt(fXMax - 0.5f, dx);
- if (!center.isEmpty()) {
- this->yProcessSpan(center);
- }
-
- if (!span.isEmpty()) {
- this->handleEdges(span, dx);
- }
- } else {
- Span center = span.breakAt(fXMax + 0.5f, dx);
- if (!span.isEmpty()) {
- this->handleEdges(span, dx);
- }
- Span leftEdge = center.breakAt(0.5f, dx);
- if (!center.isEmpty()) {
- this->yProcessSpan(center);
- }
- if (!leftEdge.isEmpty()) {
- this->handleEdges(leftEdge, dx);
- }
-
- }
- }
- }
-
Next* const fNext;
- SkScalar fXMax;
- SkScalar fYMax;
XStrategy fXStrategy;
YStrategy fYStrategy;
};
-template <typename XStrategy, typename YStrategy, typename Next>
-void make_tile_stage(
- SkFilterQuality filterQuality, SkISize dimensions,
- Next* next, SkLinearBitmapPipeline::TileStage* tileStage) {
- if (filterQuality == kNone_SkFilterQuality) {
- tileStage->initStage<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
- } else {
- tileStage->initStage<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
- }
-}
-template <typename XStrategy>
+template <typename XStrategy, typename Next>
void choose_tiler_ymode(
SkShader::TileMode yMode, SkFilterQuality filterQuality, SkISize dimensions,
- SkLinearBitmapPipeline::SampleProcessorInterface* next,
+ Next* next,
SkLinearBitmapPipeline::TileStage* tileStage) {
switch (yMode) {
- case SkShader::kClamp_TileMode:
- make_tile_stage<XStrategy, YClampStrategy>(filterQuality, dimensions, next, tileStage);
+ case SkShader::kClamp_TileMode: {
+ using Tiler = CombinedTileStage<XStrategy, YClampStrategy, Next>;
+ tileStage->initStage<Tiler>(next, dimensions);
break;
- case SkShader::kRepeat_TileMode:
- make_tile_stage<XStrategy, YRepeatStrategy>(filterQuality, dimensions, next, tileStage);
+ }
+ case SkShader::kRepeat_TileMode: {
+ using Tiler = CombinedTileStage<XStrategy, YRepeatStrategy, Next>;
+ tileStage->initStage<Tiler>(next, dimensions);
break;
- case SkShader::kMirror_TileMode:
- make_tile_stage<XStrategy, YMirrorStrategy>(filterQuality, dimensions, next, tileStage);
+ }
+ case SkShader::kMirror_TileMode: {
+ using Tiler = CombinedTileStage<XStrategy, YMirrorStrategy, Next>;
+ tileStage->initStage<Tiler>(next, dimensions);
break;
+ }
}
};
@@ -467,10 +331,6 @@ public:
fDest = dest;
}
- void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); }
-
- void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); }
-
void setDestination(void* dst, int count) override {
fDest = static_cast<uint32_t*>(dst);
fEnd = fDest + count;
@@ -538,10 +398,6 @@ public:
SkASSERT(fDest <= fEnd);
}
- void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); }
-
- void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); }
-
void setDestination(void* dst, int count) override {
SkASSERT(count > 0);
fDest = static_cast<uint32_t*>(dst);
@@ -582,12 +438,9 @@ static SkLinearBitmapPipeline::PixelAccessorInterface* choose_specific_accessor(
}
}
-template<template <typename, typename> class Sampler>
-static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_base(
- Blender* next,
+static SkLinearBitmapPipeline::PixelAccessorInterface* choose_pixel_accessor(
const SkPixmap& srcPixmap,
const SkColor A8TintColor,
- SkLinearBitmapPipeline::SampleStage* sampleStage,
SkLinearBitmapPipeline::Accessor* accessor)
{
const SkImageInfo& imageInfo = srcPixmap.info();
@@ -629,19 +482,19 @@ static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_ba
break;
}
- using S = Sampler<PixelAccessorShim, Blender>;
- sampleStage->initStage<S>(next, pixelAccessor);
- return sampleStage->get();
+ return pixelAccessor;
}
SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
Blender* next,
SkFilterQuality filterQuality,
+ SkShader::TileMode xTile, SkShader::TileMode yTile,
const SkPixmap& srcPixmap,
const SkColor A8TintColor,
SkLinearBitmapPipeline::SampleStage* sampleStage,
SkLinearBitmapPipeline::Accessor* accessor) {
const SkImageInfo& imageInfo = srcPixmap.info();
+ SkISize dimensions = imageInfo.dimensions();
// Special case samplers with fully expanded templates
if (imageInfo.gammaCloseToSRGB()) {
@@ -670,14 +523,14 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
using S =
BilerpSampler<
PixelAccessor<kN32_SkColorType, kSRGB_SkGammaType>, Blender>;
- sampleStage->initStage<S>(next, srcPixmap);
+ sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap);
return sampleStage->get();
}
case kIndex_8_SkColorType: {
using S =
BilerpSampler<
PixelAccessor<kIndex_8_SkColorType, kSRGB_SkGammaType>, Blender>;
- sampleStage->initStage<S>(next, srcPixmap);
+ sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap);
return sampleStage->get();
}
default:
@@ -686,14 +539,16 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
}
}
+ auto pixelAccessor = choose_pixel_accessor(srcPixmap, A8TintColor, accessor);
// General cases.
if (filterQuality == kNone_SkFilterQuality) {
- return choose_pixel_sampler_base<NearestNeighborSampler>(
- next, srcPixmap, A8TintColor, sampleStage, accessor);
+ using S = NearestNeighborSampler<PixelAccessorShim, Blender>;
+ sampleStage->initStage<S>(next, pixelAccessor);
} else {
- return choose_pixel_sampler_base<BilerpSampler>(
- next, srcPixmap, A8TintColor, sampleStage, accessor);
+ using S = BilerpSampler<PixelAccessorShim, Blender>;
+ sampleStage->initStage<S>(next, dimensions, xTile, yTile, pixelAccessor);
}
+ return sampleStage->get();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -705,17 +560,17 @@ public:
SrcFPPixel(const SrcFPPixel& Blender) : fPostAlpha(Blender.fPostAlpha) {}
void SK_VECTORCALL blendPixel(Sk4f pixel) override {
SkASSERT(fDst + 1 <= fEnd );
- SrcPixel(fDst, pixel, 0);
+ this->srcPixel(fDst, pixel, 0);
fDst += 1;
}
void SK_VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override {
SkASSERT(fDst + 4 <= fEnd);
SkPM4f* dst = fDst;
- SrcPixel(dst, p0, 0);
- SrcPixel(dst, p1, 1);
- SrcPixel(dst, p2, 2);
- SrcPixel(dst, p3, 3);
+ this->srcPixel(dst, p0, 0);
+ this->srcPixel(dst, p1, 1);
+ this->srcPixel(dst, p2, 2);
+ this->srcPixel(dst, p3, 3);
fDst += 4;
}
@@ -725,7 +580,9 @@ public:
}
private:
- void SK_VECTORCALL SrcPixel(SkPM4f* dst, Sk4f pixel, int index) {
+ void SK_VECTORCALL srcPixel(SkPM4f* dst, Sk4f pixel, int index) {
+ check_pixel(pixel);
+
Sk4f newPixel = pixel;
if (alphaType == kUnpremul_SkAlphaType) {
newPixel = Premultiply(pixel);
@@ -797,7 +654,8 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline(
// identity matrix, the matrix stage is skipped, and the tilerStage is the first stage.
auto blenderStage = choose_blender_for_shading(alphaType, postAlpha, &fBlenderStage);
auto samplerStage = choose_pixel_sampler(
- blenderStage, filterQuality, srcPixmap, paintColor, &fSampleStage, &fAccessor);
+ blenderStage, filterQuality, xTile, yTile,
+ srcPixmap, paintColor, &fSampleStage, &fAccessor);
auto tilerStage = choose_tiler(samplerStage, dimensions, xTile, yTile,
filterQuality, dx, &fTileStage);
fFirstStage = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage);
diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h
index b0f7e9dd20..91b573df5d 100644
--- a/src/core/SkLinearBitmapPipeline.h
+++ b/src/core/SkLinearBitmapPipeline.h
@@ -133,9 +133,9 @@ public:
// These values were generated by the assert above in Stage::init{Sink|Stage}.
using MatrixStage = Stage<PointProcessorInterface, 160, PointProcessorInterface>;
using TileStage = Stage<PointProcessorInterface, 160, SampleProcessorInterface>;
- using SampleStage = Stage<SampleProcessorInterface, 100, BlendProcessorInterface>;
+ using SampleStage = Stage<SampleProcessorInterface, 160, BlendProcessorInterface>;
using BlenderStage = Stage<BlendProcessorInterface, 40>;
- using Accessor = PolyMemory<PixelAccessorInterface, 48>;
+ using Accessor = PolyMemory<PixelAccessorInterface, 64>;
private:
PointProcessorInterface* fFirstStage;
diff --git a/src/core/SkLinearBitmapPipeline_core.h b/src/core/SkLinearBitmapPipeline_core.h
index 2c39a38320..5ef6fcab5b 100644
--- a/src/core/SkLinearBitmapPipeline_core.h
+++ b/src/core/SkLinearBitmapPipeline_core.h
@@ -178,6 +178,15 @@ void span_fallback(Span span, Stage* stage) {
stage->pointListFew(count, xs, ys);
}
}
+
+inline Sk4f SK_VECTORCALL check_pixel(const Sk4f& pixel) {
+ SkASSERTF(0.0f <= pixel[0] && pixel[0] <= 1.0f, "pixel[0]: %f", pixel[0]);
+ SkASSERTF(0.0f <= pixel[1] && pixel[1] <= 1.0f, "pixel[1]: %f", pixel[1]);
+ SkASSERTF(0.0f <= pixel[2] && pixel[2] <= 1.0f, "pixel[2]: %f", pixel[2]);
+ SkASSERTF(0.0f <= pixel[3] && pixel[3] <= 1.0f, "pixel[3]: %f", pixel[3]);
+ return pixel;
+}
+
} // namespace
class SkLinearBitmapPipeline::PointProcessorInterface {
@@ -201,26 +210,6 @@ public:
// Used for nearest neighbor when scale factor is 1.0. The span can just be repeated with no
// edge pixel alignment problems. This is for handling a very common case.
virtual void repeatSpan(Span span, int32_t repeatCount) = 0;
-
- // The x's and y's are setup in the following order:
- // +--------+--------+
- // | | |
- // | px00 | px10 |
- // | 0 | 1 |
- // +--------+--------+
- // | | |
- // | px01 | px11 |
- // | 2 | 3 |
- // +--------+--------+
- // These pixels coordinates are arranged in the following order in xs and ys:
- // px00 px10 px01 px11
- virtual void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) = 0;
-
- // A span represents sample points that have been mapped from destination space to source
- // space. Each sample point is then expanded to the four bilerp points by add +/- 0.5. The
- // resulting Y values my be off the tile. When y +/- 0.5 are more than 1 apart because of
- // tiling, the second Y is used to denote the retiled Y value.
- virtual void bilerpSpan(Span span, SkScalar y) = 0;
};
class SkLinearBitmapPipeline::DestinationInterface {
@@ -243,10 +232,10 @@ class SkLinearBitmapPipeline::PixelAccessorInterface {
public:
virtual ~PixelAccessorInterface() { }
virtual void SK_VECTORCALL getFewPixels(
- int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0;
+ int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0;
virtual void SK_VECTORCALL get4Pixels(
- Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0;
+ Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0;
virtual void get4Pixels(
const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0;
diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h
index 759075b3e5..20057cc992 100644
--- a/src/core/SkLinearBitmapPipeline_sample.h
+++ b/src/core/SkLinearBitmapPipeline_sample.h
@@ -40,7 +40,7 @@ namespace {
// * px11 -> xy
// So x * y is calculated first and then used to calculate all the other factors.
static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
- Sk4f px01, Sk4f px11) {
+ Sk4f px01, Sk4f px11) {
// Calculate fractional xs and ys.
Sk4s fxs = xs - xs.floor();
Sk4s fys = ys - ys.floor();
@@ -134,20 +134,21 @@ template <SkGammaType gammaType>
class PixelConverter<kIndex_8_SkColorType, gammaType> {
public:
using Element = uint8_t;
- PixelConverter(const SkPixmap& srcPixmap) {
+ PixelConverter(const SkPixmap& srcPixmap)
+ : fColorTableSize(srcPixmap.ctable()->count()){
SkColorTable* skColorTable = srcPixmap.ctable();
SkASSERT(skColorTable != nullptr);
fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
- for (int i = 0; i < skColorTable->count(); i++) {
+ for (int i = 0; i < fColorTableSize; i++) {
fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]);
}
}
- PixelConverter(const PixelConverter& strategy) {
+ PixelConverter(const PixelConverter& strategy)
+ : fColorTableSize{strategy.fColorTableSize}{
fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
- // TODO: figure out the count.
- for (int i = 0; i < 256; i++) {
+ for (int i = 0; i < fColorTableSize; i++) {
fColorTable[i] = strategy.fColorTable[i];
}
}
@@ -158,9 +159,9 @@ public:
private:
static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
-
- SkAutoMalloc fColorTableStorage{kColorTableSize};
- Sk4f* fColorTable;
+ const int fColorTableSize;
+ SkAutoMalloc fColorTableStorage{kColorTableSize};
+ Sk4f* fColorTable;
};
template <SkGammaType gammaType>
@@ -194,12 +195,12 @@ public:
: fPixelAccessor(accessor) { }
void SK_VECTORCALL getFewPixels(
- int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
+ int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
}
void SK_VECTORCALL get4Pixels(
- Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
+ Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
}
@@ -237,10 +238,8 @@ public:
, fConverter{srcPixmap, std::move<Args>(args)...} { }
void SK_VECTORCALL getFewPixels (
- int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
- Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
- Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
- Sk4i bufferLoc = YIs * fWidth + XIs;
+ int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
+ Sk4i bufferLoc = ys * fWidth + xs;
switch (n) {
case 3:
*px2 = this->getPixelAt(bufferLoc[2]);
@@ -254,10 +253,8 @@ public:
}
void SK_VECTORCALL get4Pixels(
- Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
- Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
- Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
- Sk4i bufferLoc = YIs * fWidth + XIs;
+ Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
+ Sk4i bufferLoc = ys * fWidth + xs;
*px0 = this->getPixelAt(bufferLoc[0]);
*px1 = this->getPixelAt(bufferLoc[1]);
*px2 = this->getPixelAt(bufferLoc[2]);
@@ -330,6 +327,7 @@ static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
}
}
+// -- NearestNeighborSampler -----------------------------------------------------------------------
// NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
template<typename Accessor, typename Next>
class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
@@ -345,7 +343,7 @@ public:
void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
SkASSERT(0 < n && n < 4);
Sk4f px0, px1, px2;
- fAccessor.getFewPixels(n, xs, ys, &px0, &px1, &px2);
+ fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
if (n >= 1) fNext->blendPixel(px0);
if (n >= 2) fNext->blendPixel(px1);
if (n >= 3) fNext->blendPixel(px2);
@@ -353,7 +351,7 @@ public:
void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
Sk4f px0, px1, px2, px3;
- fAccessor.get4Pixels(xs, ys, &px0, &px1, &px2, &px3);
+ fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
fNext->blend4Pixels(px0, px1, px2, px3);
}
@@ -380,21 +378,11 @@ public:
}
}
- void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override {
- SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge.");
- }
-
- void bilerpSpan(Span span, SkScalar y) override {
- SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan.");
- }
-
private:
// When moving through source space more slowly than dst space (zoomed in),
// we'll be sampling from the same source pixel more than once.
void spanSlowRate(Span span) {
- SkPoint start;
- SkScalar length;
- int count;
+ SkPoint start; SkScalar length; int count;
std::tie(start, length, count) = span;
SkScalar x = X(start);
SkFixed fx = SkScalarToFixed(x);
@@ -451,35 +439,82 @@ private:
Accessor fAccessor;
};
+// From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
+// vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
+// generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
+// on the interval [0, vMax].
+// Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
+static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
+ SkASSERT(-1 <= vs && vs <= vMax + 1)
+ switch (edgeType) {
+ case SkShader::kClamp_TileMode:
+ case SkShader::kMirror_TileMode:
+ vs = std::max(vs, 0);
+ vs = std::min(vs, vMax);
+ break;
+ case SkShader::kRepeat_TileMode:
+ vs = (vs <= vMax) ? vs : 0;
+ vs = (vs >= 0) ? vs : vMax;
+ break;
+ }
+ SkASSERT(0 <= vs && vs <= vMax);
+ return vs;
+}
+
+// From a sample point on the tile, return the top or left filter value.
+// The result r should be in the range (0, 1]. Since this represents the weight given to the top
+// left element, then if x == 0.5 the filter value should be 1.0.
+// The input sample point must be on the tile, therefore it must be >= 0.
+static SkScalar sample_to_filter(SkScalar x) {
+ SkASSERT(x >= 0.0f);
+ // The usual form of the top or left edge is x - .5, but since we are working on the unit
+ // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
+ // of trunc.
+ SkScalar v = x + 0.5f;
+ // Produce the top or left offset a value on the range [0, 1).
+ SkScalar f = v - SkScalarTruncToScalar(v);
+ // Produce the filter value which is on the range (0, 1].
+ SkScalar r = 1.0f - f;
+ SkASSERT(0.0f < r && r <= 1.0f);
+ return r;
+}
+
// -- BilerpSampler --------------------------------------------------------------------------------
// BilerpSampler - use a bilerp filter to create runs of destination pixels.
+// Note: in the code below, there are two types of points
+// * sample points - these are the points passed in by pointList* and Spans.
+// * filter points - are created from a sample point to form the coordinates of the points
+// to use in the filter and to generate the filter values.
template<typename Accessor, typename Next>
class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
public:
template<typename... Args>
- BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
- : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
+ BilerpSampler(
+ SkLinearBitmapPipeline::BlendProcessorInterface* next,
+ SkISize dimensions,
+ SkShader::TileMode xTile, SkShader::TileMode yTile,
+ Args&& ... args
+ )
+ : fNext{next}
+ , fXEdgeType{xTile}
+ , fXMax{dimensions.width() - 1}
+ , fYEdgeType{yTile}
+ , fYMax{dimensions.height() - 1}
+ , fAccessor{std::forward<Args>(args)...} { }
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
const BilerpSampler& sampler)
- : fNext{next}, fAccessor{sampler.fAccessor} { }
-
- Sk4f bilerpNonEdgePixel(SkScalar x, SkScalar y) {
- Sk4f px00, px10, px01, px11;
-
- // bilerp4() expects xs, ys are the top-lefts of the 2x2 kernel.
- Sk4f xs = Sk4f{x} - 0.5f;
- Sk4f ys = Sk4f{y} - 0.5f;
- Sk4f sampleXs = xs + Sk4f{0.0f, 1.0f, 0.0f, 1.0f};
- Sk4f sampleYs = ys + Sk4f{0.0f, 0.0f, 1.0f, 1.0f};
- fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
- return bilerp4(xs, ys, px00, px10, px01, px11);
- }
+ : fNext{next}
+ , fXEdgeType{sampler.fXEdgeType}
+ , fXMax{sampler.fXMax}
+ , fYEdgeType{sampler.fYEdgeType}
+ , fYMax{sampler.fYMax}
+ , fAccessor{sampler.fAccessor} { }
void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
SkASSERT(0 < n && n < 4);
auto bilerpPixel = [&](int index) {
- return this->bilerpNonEdgePixel(xs[index], ys[index]);
+ return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
};
if (n >= 1) fNext->blendPixel(bilerpPixel(0));
@@ -489,308 +524,484 @@ public:
void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
auto bilerpPixel = [&](int index) {
- return this->bilerpNonEdgePixel(xs[index], ys[index]);
+ return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
};
fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
}
void pointSpan(Span span) override {
- this->bilerpSpan(span, span.startY());
- }
-
- void repeatSpan(Span span, int32_t repeatCount) override {
- while (repeatCount > 0) {
- this->pointSpan(span);
- repeatCount--;
- }
- }
-
- void SK_VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) override {
- Sk4f px00, px10, px01, px11;
- Sk4f xs = Sk4f{sampleXs[0]};
- Sk4f ys = Sk4f{sampleYs[0]};
- fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
- Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
- fNext->blendPixel(pixel);
- }
-
- void bilerpSpan(Span span, SkScalar y) override {
SkASSERT(!span.isEmpty());
SkPoint start;
SkScalar length;
int count;
std::tie(start, length, count) = span;
+
+ // Nothing to do.
+ if (count == 0) {
+ return;
+ }
+
+ // Trivial case. No sample points are generated other than start.
+ if (count == 1) {
+ fNext->blendPixel(this->bilerpSamplePoint(start));
+ return;
+ }
+
+ // Note: the following code could be done in terms of dx = length / (count -1), but that
+ // would introduce a divide that is not needed for the most common dx == 1 cases.
SkScalar absLength = SkScalarAbs(length);
if (absLength == 0.0f) {
- this->spanZeroRate(span, y);
+ // |dx| == 0
+ // length is zero, so clamp an edge pixel.
+ this->spanZeroRate(span);
} else if (absLength < (count - 1)) {
- this->spanSlowRate(span, y);
+ // 0 < |dx| < 1.
+ this->spanSlowRate(span);
} else if (absLength == (count - 1)) {
- if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) {
- if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) {
- src_strategy_blend(span, fNext, &fAccessor);
- } else {
- this->spanUnitRateAlignedX(span, y);
- }
+ // |dx| == 1.
+ if (sample_to_filter(span.startX()) == 1.0f
+ && sample_to_filter(span.startY()) == 1.0f) {
+ // All the pixels are aligned with the dest; go fast.
+ src_strategy_blend(span, fNext, &fAccessor);
} else {
- this->spanUnitRate(span, y);
+ // There is some sub-pixel offsets, so bilerp.
+ this->spanUnitRate(span);
}
+ } else if (absLength < 2.0f * (count - 1)) {
+ // 1 < |dx| < 2.
+ this->spanMediumRate(span);
} else {
- this->spanFastRate(span, y);
+ // |dx| >= 2.
+ this->spanFastRate(span);
+ }
+ }
+
+ void repeatSpan(Span span, int32_t repeatCount) override {
+ while (repeatCount > 0) {
+ this->pointSpan(span);
+ repeatCount--;
}
}
private:
- void spanZeroRate(Span span, SkScalar y1) {
- SkScalar y0 = span.startY() - 0.5f;
- y1 += 0.5f;
- int iy0 = SkScalarFloorToInt(y0);
- SkScalar filterY1 = y0 - iy0;
- SkScalar filterY0 = 1.0f - filterY1;
- int iy1 = SkScalarFloorToInt(y1);
- int ix = SkScalarFloorToInt(span.startX());
- Sk4f pixelY0 = fAccessor.getPixelFromRow(fAccessor.row(iy0), ix);
- Sk4f pixelY1 = fAccessor.getPixelFromRow(fAccessor.row(iy1), ix);
- Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1;
- int count = span.count();
+
+ // Convert a sample point to the points used by the filter.
+ void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
+ // May be less than zero. Be careful to use Floor.
+ int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
+ // Always greater than zero. Use the faster Trunc.
+ int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
+ int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
+ int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
+
+ *filterXs = Sk4i{x0, x1, x0, x1};
+ *filterYs = Sk4i{y0, y0, y1, y1};
+ }
+
+ // Given a sample point, generate a color by bilerping the four filter points.
+ Sk4f bilerpSamplePoint(SkPoint sample) {
+ Sk4i iXs, iYs;
+ filterPoints(sample, &iXs, &iYs);
+ Sk4f px00, px10, px01, px11;
+ fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
+ return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
+ }
+
+ // Get two pixels at x from row0 and row1.
+ void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
+ *px0 = fAccessor.getPixelFromRow(row0, x);
+ *px1 = fAccessor.getPixelFromRow(row1, x);
+ }
+
+ // |dx| == 0. This code assumes that length is zero.
+ void spanZeroRate(Span span) {
+ SkPoint start; SkScalar length; int count;
+ std::tie(start, length, count) = span;
+ SkASSERT(length == 0.0f);
+
+ // Filter for the blending of the top and bottom pixels.
+ SkScalar filterY = sample_to_filter(Y(start));
+
+ // Generate the four filter points from the sample point start. Generate the row* values.
+ Sk4i iXs, iYs;
+ this->filterPoints(start, &iXs, &iYs);
+ const void* const row0 = fAccessor.row(iYs[0]);
+ const void* const row1 = fAccessor.row(iYs[2]);
+
+ // Get the two pixels that make up the clamping pixel.
+ Sk4f pxTop, pxBottom;
+ this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
+ Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
+
while (count >= 4) {
- fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
+ fNext->blend4Pixels(pixel, pixel, pixel, pixel);
count -= 4;
}
while (count > 0) {
- fNext->blendPixel(filterPixel);
+ fNext->blendPixel(pixel);
count -= 1;
}
}
- // When moving through source space more slowly than dst space (zoomed in),
- // we'll be sampling from the same source pixel more than once.
- void spanSlowRate(Span span, SkScalar ry1) {
- SkPoint start;
- SkScalar length;
- int count;
+ // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
+ // computation. In particular, several destination pixels maybe generated from the same four
+ // source pixels.
+ // In the following code a "part" is a combination of two pixels from the same column of the
+ // filter.
+ void spanSlowRate(Span span) {
+ SkPoint start; SkScalar length; int count;
std::tie(start, length, count) = span;
- SkFixed fx = SkScalarToFixed(X(start)-0.5f);
- SkFixed fdx = SkScalarToFixed(length / (count - 1));
+ // Calculate the distance between each sample point.
+ const SkScalar dx = length / (count - 1);
+ SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
+
+ // Generate the filter values for the top-left corner.
+ // Note: these values are in filter space; this has implications about how to adjust
+ // these values at each step. For example, as the sample point increases, the filter
+ // value decreases, this is because the filter and position are related by
+ // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
+ // direction of the sample point which is increasing by dx.
+ SkScalar filterX = sample_to_filter(X(start));
+ SkScalar filterY = sample_to_filter(Y(start));
+
+ // Generate the four filter points from the sample point start. Generate the row* values.
+ Sk4i iXs, iYs;
+ this->filterPoints(start, &iXs, &iYs);
+ const void* const row0 = fAccessor.row(iYs[0]);
+ const void* const row1 = fAccessor.row(iYs[2]);
+
+ // Generate part of the filter value at xColumn.
+ auto partAtColumn = [&](int xColumn) {
+ int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
+ Sk4f pxTop, pxBottom;
+ this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
+ return pxTop * filterY + (1.0f - filterY) * pxBottom;
+ };
- Sk4f xAdjust;
- if (fdx >= 0) {
- xAdjust = Sk4f{-1.0f};
- } else {
- xAdjust = Sk4f{1.0f};
- }
- int ix = SkFixedFloorToInt(fx);
- int ioldx = ix;
- Sk4f x{SkFixedToScalar(fx) - ix};
- Sk4f dx{SkFixedToScalar(fdx)};
- SkScalar ry0 = Y(start) - 0.5f;
- ry1 += 0.5f;
- SkScalar yFloor = std::floor(ry0);
- Sk4f y1 = Sk4f{ry0 - yFloor};
- Sk4f y0 = Sk4f{1.0f} - y1;
- const void* const row0 = fAccessor.row(SkScalarFloorToInt(ry0));
- const void* const row1 = fAccessor.row(SkScalarFloorToInt(ry1));
- Sk4f fpixel00 = y0 * fAccessor.getPixelFromRow(row0, ix);
- Sk4f fpixel01 = y1 * fAccessor.getPixelFromRow(row1, ix);
- Sk4f fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1);
- Sk4f fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1);
- auto getNextPixel = [&]() {
- if (ix != ioldx) {
- fpixel00 = fpixel10;
- fpixel01 = fpixel11;
- fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1);
- fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1);
- ioldx = ix;
- x = x + xAdjust;
- }
+ // The leftPart is made up of two pixels from the left column of the filter, right part
+ // is similar. The top and bottom pixels in the *Part are created as a linear blend of
+ // the top and bottom pixels using filterY. See the partAtColumn function above.
+ Sk4f leftPart = partAtColumn(iXs[0]);
+ Sk4f rightPart = partAtColumn(iXs[1]);
- Sk4f x0, x1;
- x0 = Sk4f{1.0f} - x;
- x1 = x;
- Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11);
- fx += fdx;
- ix = SkFixedFloorToInt(fx);
- x = x + dx;
- return fpixel;
+ // Create a destination color by blending together a left and right part using filterX.
+ auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
+ Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
+ return check_pixel(pixel);
};
- while (count >= 4) {
- Sk4f fpixel0 = getNextPixel();
- Sk4f fpixel1 = getNextPixel();
- Sk4f fpixel2 = getNextPixel();
- Sk4f fpixel3 = getNextPixel();
+ // Send the first pixel to the destination. This simplifies the loop structure so that no
+ // extra pixels are fetched for the last iteration of the loop.
+ fNext->blendPixel(bilerp(leftPart, rightPart));
+ count -= 1;
+
+ if (dx > 0.0f) {
+ // * positive direction - generate destination pixels by sliding the filter from left
+ // to right.
+ int rightPartCursor = iXs[1];
+
+ // Advance the filter from left to right. Remember that moving the top-left corner of
+ // the filter to the right actually makes the filter value smaller.
+ auto advanceFilter = [&]() {
+ filterX -= dx;
+ if (filterX <= 0.0f) {
+ filterX += 1.0f;
+ leftPart = rightPart;
+ rightPartCursor += 1;
+ rightPart = partAtColumn(rightPartCursor);
+ }
+ SkASSERT(0.0f < filterX && filterX <= 1.0f);
- fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
- count -= 4;
- }
+ return bilerp(leftPart, rightPart);
+ };
- while (count > 0) {
- fNext->blendPixel(getNextPixel());
+ while (count >= 4) {
+ Sk4f px0 = advanceFilter(),
+ px1 = advanceFilter(),
+ px2 = advanceFilter(),
+ px3 = advanceFilter();
+ fNext->blend4Pixels(px0, px1, px2, px3);
+ count -= 4;
+ }
- count -= 1;
+ while (count > 0) {
+ fNext->blendPixel(advanceFilter());
+ count -= 1;
+ }
+ } else {
+ // * negative direction - generate destination pixels by sliding the filter from
+ // right to left.
+ int leftPartCursor = iXs[0];
+
+ // Advance the filter from right to left. Remember that moving the top-left corner of
+ // the filter to the left actually makes the filter value larger.
+ auto advanceFilter = [&]() {
+ // Remember, dx < 0 therefore this adds |dx| to filterX.
+ filterX -= dx;
+ // At this point filterX may be > 1, and needs to be wrapped back on to the filter
+ // interval, and the next column in the filter is calculated.
+ if (filterX > 1.0f) {
+ filterX -= 1.0f;
+ rightPart = leftPart;
+ leftPartCursor -= 1;
+ leftPart = partAtColumn(leftPartCursor);
+ }
+ SkASSERT(0.0f < filterX && filterX <= 1.0f);
+
+ return bilerp(leftPart, rightPart);
+ };
+
+ while (count >= 4) {
+ Sk4f px0 = advanceFilter(),
+ px1 = advanceFilter(),
+ px2 = advanceFilter(),
+ px3 = advanceFilter();
+ fNext->blend4Pixels(px0, px1, px2, px3);
+ count -= 4;
+ }
+
+ while (count > 0) {
+ fNext->blendPixel(advanceFilter());
+ count -= 1;
+ }
}
}
- // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
- // We'll never re-use pixels, but we can at least load contiguous pixels.
- void spanUnitRate(Span span, SkScalar y1) {
- y1 += 0.5f;
- SkScalar y0 = span.startY() - 0.5f;
- int iy0 = SkScalarFloorToInt(y0);
- SkScalar filterY1 = y0 - iy0;
- SkScalar filterY0 = 1.0f - filterY1;
- int iy1 = SkScalarFloorToInt(y1);
- const void* rowY0 = fAccessor.row(iy0);
- const void* rowY1 = fAccessor.row(iy1);
- SkScalar x0 = span.startX() - 0.5f;
- int ix0 = SkScalarFloorToInt(x0);
- SkScalar filterX1 = x0 - ix0;
- SkScalar filterX0 = 1.0f - filterX1;
-
- auto getPixelY0 = [&]() {
- Sk4f px = fAccessor.getPixelFromRow(rowY0, ix0);
- return px * filterY0;
- };
-
- auto getPixelY1 = [&]() {
- Sk4f px = fAccessor.getPixelFromRow(rowY1, ix0);
- return px * filterY1;
+ // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
+ // Every filter part is used for two destination pixels, and the code can bulk load four
+ // pixels at a time.
+ void spanUnitRate(Span span) {
+ SkPoint start; SkScalar length; int count;
+ std::tie(start, length, count) = span;
+ SkASSERT(SkScalarAbs(length) == (count - 1));
+
+ // Calculate the four filter points of start, and use the two different Y values to
+ // generate the row pointers.
+ Sk4i iXs, iYs;
+ filterPoints(start, &iXs, &iYs);
+ const void* row0 = fAccessor.row(iYs[0]);
+ const void* row1 = fAccessor.row(iYs[2]);
+
+ // Calculate the filter values for the top-left filter element.
+ const SkScalar filterX = sample_to_filter(X(start));
+ const SkScalar filterY = sample_to_filter(Y(start));
+
+ // Generate part of the filter value at xColumn.
+ auto partAtColumn = [&](int xColumn) {
+ int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
+ Sk4f pxTop, pxBottom;
+ this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
+ return pxTop * filterY + (1.0f - filterY) * pxBottom;
};
- auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
- fAccessor.get4Pixels(rowY0, ix, px0, px1, px2, px3);
- *px0 = *px0 * filterY0;
- *px1 = *px1 * filterY0;
- *px2 = *px2 * filterY0;
- *px3 = *px3 * filterY0;
+ auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
+ // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
+ // otherwise be careful.
+ if (0 <= ix && ix <= fXMax - 3) {
+ Sk4f px00, px10, px20, px30,
+ px01, px11, px21, px31;
+ fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
+ fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
+ *part0 = filterY * px00 + (1.0f - filterY) * px01;
+ *part1 = filterY * px10 + (1.0f - filterY) * px11;
+ *part2 = filterY * px20 + (1.0f - filterY) * px21;
+ *part3 = filterY * px30 + (1.0f - filterY) * px31;
+ } else {
+ *part0 = partAtColumn(ix + 0);
+ *part1 = partAtColumn(ix + 1);
+ *part2 = partAtColumn(ix + 2);
+ *part3 = partAtColumn(ix + 3);
+ }
};
- auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
- fAccessor.get4Pixels(rowY1, ix, px0, px1, px2, px3);
- *px0 = *px0 * filterY1;
- *px1 = *px1 * filterY1;
- *px2 = *px2 * filterY1;
- *px3 = *px3 * filterY1;
+ auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) {
+ return part0 * filterX + part1 * (1.0f - filterX);
};
- auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) {
- return pixelX0 * filterX0 + pixelX1 * filterX1;
- };
+ if (length > 0) {
+ // * positive direction - generate destination pixels by sliding the filter from left
+ // to right.
- // Mid making 4 unit rate.
- Sk4f pxB = getPixelY0() + getPixelY1();
- if (span.length() > 0) {
- int count = span.count();
+ // overlapPart is the filter part from the end of the previous four pixels used at
+ // the start of the next four pixels.
+ Sk4f overlapPart = partAtColumn(iXs[0]);
+ int rightColumnCursor = iXs[1];
while (count >= 4) {
- Sk4f px00, px10, px20, px30;
- get4PixelsY0(ix0, &px00, &px10, &px20, &px30);
- Sk4f px01, px11, px21, px31;
- get4PixelsY1(ix0, &px01, &px11, &px21, &px31);
- Sk4f pxS0 = px00 + px01;
- Sk4f px0 = lerp(pxB, pxS0);
- Sk4f pxS1 = px10 + px11;
- Sk4f px1 = lerp(pxS0, pxS1);
- Sk4f pxS2 = px20 + px21;
- Sk4f px2 = lerp(pxS1, pxS2);
- Sk4f pxS3 = px30 + px31;
- Sk4f px3 = lerp(pxS2, pxS3);
- pxB = pxS3;
+ Sk4f part0, part1, part2, part3;
+ get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
+ Sk4f px0 = bilerp(overlapPart, part0);
+ Sk4f px1 = bilerp(part0, part1);
+ Sk4f px2 = bilerp(part1, part2);
+ Sk4f px3 = bilerp(part2, part3);
+ overlapPart = part3;
fNext->blend4Pixels(px0, px1, px2, px3);
- ix0 += 4;
+ rightColumnCursor += 4;
count -= 4;
}
+
while (count > 0) {
- Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0);
- Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0);
+ Sk4f rightPart = partAtColumn(rightColumnCursor);
- fNext->blendPixel(lerp(pixelY0, pixelY1));
- ix0 += 1;
+ fNext->blendPixel(bilerp(overlapPart, rightPart));
+ overlapPart = rightPart;
+ rightColumnCursor += 1;
count -= 1;
}
} else {
- int count = span.count();
+ // * negative direction - generate destination pixels by sliding the filter from
+ // right to left.
+ Sk4f overlapPart = partAtColumn(iXs[1]);
+ int leftColumnCursor = iXs[0];
+
while (count >= 4) {
- Sk4f px00, px10, px20, px30;
- get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30);
- Sk4f px01, px11, px21, px31;
- get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31);
- Sk4f pxS3 = px30 + px31;
- Sk4f px0 = lerp(pxS3, pxB);
- Sk4f pxS2 = px20 + px21;
- Sk4f px1 = lerp(pxS2, pxS3);
- Sk4f pxS1 = px10 + px11;
- Sk4f px2 = lerp(pxS1, pxS2);
- Sk4f pxS0 = px00 + px01;
- Sk4f px3 = lerp(pxS0, pxS1);
- pxB = pxS0;
+ Sk4f part0, part1, part2, part3;
+ get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
+ Sk4f px0 = bilerp(part0, overlapPart);
+ Sk4f px1 = bilerp(part1, part0);
+ Sk4f px2 = bilerp(part2, part1);
+ Sk4f px3 = bilerp(part3, part2);
+ overlapPart = part3;
fNext->blend4Pixels(px0, px1, px2, px3);
- ix0 -= 4;
+ leftColumnCursor -= 4;
count -= 4;
}
+
while (count > 0) {
- Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0);
- Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0);
+ Sk4f leftPart = partAtColumn(leftColumnCursor);
- fNext->blendPixel(lerp(pixelY0, pixelY1));
- ix0 -= 1;
+ fNext->blendPixel(bilerp(leftPart, overlapPart));
+ overlapPart = leftPart;
+ leftColumnCursor -= 1;
count -= 1;
}
}
}
- void spanUnitRateAlignedX(Span span, SkScalar y1) {
- SkScalar y0 = span.startY() - 0.5f;
- y1 += 0.5f;
- int iy0 = SkScalarFloorToInt(y0);
- SkScalar filterY1 = y0 - iy0;
- SkScalar filterY0 = 1.0f - filterY1;
- int iy1 = SkScalarFloorToInt(y1);
- int ix = SkScalarFloorToInt(span.startX());
- const void* rowY0 = fAccessor.row(iy0);
- const void* rowY1 = fAccessor.row(iy1);
- auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) {
- return *pixelY0 * filterY0 + *pixelY1 * filterY1;
+ // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
+ // still slow enough to take advantage of previous calculations.
+ void spanMediumRate(Span span) {
+ SkPoint start; SkScalar length; int count;
+ std::tie(start, length, count) = span;
+
+ // Calculate the distance between each sample point.
+ const SkScalar dx = length / (count - 1);
+ SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
+
+ // Generate the filter values for the top-left corner.
+ // Note: these values are in filter space; this has implications about how to adjust
+ // these values at each step. For example, as the sample point increases, the filter
+ // value decreases, this is because the filter and position are related by
+ // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
+ // direction of the sample point which is increasing by dx.
+ SkScalar filterX = sample_to_filter(X(start));
+ SkScalar filterY = sample_to_filter(Y(start));
+
+ // Generate the four filter points from the sample point start. Generate the row* values.
+ Sk4i iXs, iYs;
+ this->filterPoints(start, &iXs, &iYs);
+ const void* const row0 = fAccessor.row(iYs[0]);
+ const void* const row1 = fAccessor.row(iYs[2]);
+
+ // Generate part of the filter value at xColumn.
+ auto partAtColumn = [&](int xColumn) {
+ int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
+ Sk4f pxTop, pxBottom;
+ this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
+ return pxTop * filterY + (1.0f - filterY) * pxBottom;
+ };
+
+ // The leftPart is made up of two pixels from the left column of the filter, right part
+ // is similar. The top and bottom pixels in the *Part are created as a linear blend of
+ // the top and bottom pixels using filterY. See the nextPart function below.
+ Sk4f leftPart = partAtColumn(iXs[0]);
+ Sk4f rightPart = partAtColumn(iXs[1]);
+
+ // Create a destination color by blending together a left and right part using filterX.
+ auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) {
+ Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
+ return check_pixel(pixel);
};
- if (span.length() > 0) {
- int count = span.count();
+ // Send the first pixel to the destination. This simplifies the loop structure so that no
+ // extra pixels are fetched for the last iteration of the loop.
+ fNext->blendPixel(bilerp(leftPart, rightPart));
+ count -= 1;
+
+ if (dx > 0.0f) {
+ // * positive direction - generate destination pixels by sliding the filter from left
+ // to right.
+ int rightPartCursor = iXs[1];
+
+ // Advance the filter from left to right. Remember that moving the top-left corner of
+ // the filter to the right actually makes the filter value smaller.
+ auto advanceFilter = [&]() {
+ filterX -= dx;
+ // At this point filterX is less than zero, but might actually be less than -1.
+ if (filterX > -1.0f) {
+ filterX += 1.0f;
+ leftPart = rightPart;
+ rightPartCursor += 1;
+ rightPart = partAtColumn(rightPartCursor);
+ } else {
+ filterX += 2.0f;
+ rightPartCursor += 2;
+ leftPart = partAtColumn(rightPartCursor - 1);
+ rightPart = partAtColumn(rightPartCursor);
+ }
+ SkASSERT(0.0f < filterX && filterX <= 1.0f);
+
+ return bilerp(leftPart, rightPart);
+ };
+
while (count >= 4) {
- Sk4f px00, px10, px20, px30;
- fAccessor.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30);
- Sk4f px01, px11, px21, px31;
- fAccessor.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31);
- fNext->blend4Pixels(
- lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
- ix += 4;
+ Sk4f px0 = advanceFilter(),
+ px1 = advanceFilter(),
+ px2 = advanceFilter(),
+ px3 = advanceFilter();
+ fNext->blend4Pixels(px0, px1, px2, px3);
count -= 4;
}
- while (count > 0) {
- Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix);
- Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix);
- fNext->blendPixel(lerp(&pixelY0, &pixelY1));
- ix += 1;
+ while (count > 0) {
+ fNext->blendPixel(advanceFilter());
count -= 1;
}
} else {
- int count = span.count();
+ // * negative direction - generate destination pixels by sliding the filter from
+ // right to left.
+ int leftPartCursor = iXs[0];
+
+ auto advanceFilter = [&]() {
+ // Remember, dx < 0 therefore this adds |dx| to filterX.
+ filterX -= dx;
+ // At this point, filterX is greater than one, but may actually be greater than two.
+ if (filterX < 2.0f) {
+ filterX -= 1.0f;
+ rightPart = leftPart;
+ leftPartCursor -= 1;
+ leftPart = partAtColumn(leftPartCursor);
+ } else {
+ filterX -= 2.0f;
+ leftPartCursor -= 2;
+ rightPart = partAtColumn(leftPartCursor - 1);
+ leftPart = partAtColumn(leftPartCursor);
+ }
+ SkASSERT(0.0f < filterX && filterX <= 1.0f);
+ return bilerp(leftPart, rightPart);
+ };
+
while (count >= 4) {
- Sk4f px00, px10, px20, px30;
- fAccessor.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00);
- Sk4f px01, px11, px21, px31;
- fAccessor.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01);
- fNext->blend4Pixels(
- lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
- ix -= 4;
+ Sk4f px0 = advanceFilter(),
+ px1 = advanceFilter(),
+ px2 = advanceFilter(),
+ px3 = advanceFilter();
+ fNext->blend4Pixels(px0, px1, px2, px3);
count -= 4;
}
- while (count > 0) {
- Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix);
- Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix);
- fNext->blendPixel(lerp(&pixelY0, &pixelY1));
- ix -= 1;
+ while (count > 0) {
+ fNext->blendPixel(advanceFilter());
count -= 1;
}
}
@@ -798,34 +1009,26 @@ private:
// We're moving through source space faster than dst (zoomed out),
// so we'll never reuse a source pixel or be able to do contiguous loads.
- void spanFastRate(Span span, SkScalar y1) {
- SkPoint start;
- SkScalar length;
- int count;
+ void spanFastRate(Span span) {
+ SkPoint start; SkScalar length; int count;
std::tie(start, length, count) = span;
SkScalar x = X(start);
SkScalar y = Y(start);
- // In this sampler, it is assumed that if span.StartY() and y1 are the same then both
- // y-lines are on the same tile.
- if (y == y1) {
- // Both y-lines are on the same tile.
- span_fallback(span, this);
- } else {
- // The y-lines are on different tiles.
- SkScalar dx = length / (count - 1);
- Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f};
- while (count > 0) {
- Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x};
- this->bilerpEdge(xs, ys);
- x += dx;
- count -= 1;
- }
+ SkScalar dx = length / (count - 1);
+ while (count > 0) {
+ fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
+ x += dx;
+ count -= 1;
}
}
- Next* const fNext;
- Accessor fAccessor;
+ Next* const fNext;
+ const SkShader::TileMode fXEdgeType;
+ const int fXMax;
+ const SkShader::TileMode fYEdgeType;
+ const int fYMax;
+ Accessor fAccessor;
};
} // namespace