aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar jcgregorio <jcgregorio@google.com>2016-07-22 05:40:58 -0700
committerGravatar Commit bot <commit-bot@chromium.org>2016-07-22 05:40:58 -0700
commitda626aadcced1b10a1f9d4d10b17e3ed429ba81b (patch)
tree2982adcf208832e9fbafd20128ceec20499450b2
parent2259c5f65015f2ba61c5ce9d0a4410bfa7d3eedf (diff)
Revert of Redo Tiling (patchset #14 id:260001 of https://codereview.chromium.org/2134893002/ )
Reason for revert: Crashing on Win with: Caught exception 3221225477 EXCEPTION_ACCESS_VIOLATION, was running: unit test GrShape srgb gm shadertext2 srgb gm shallow_gradient_conical srgb gm shallow_gradient_sweep srgb gm shallow_gradient_linear_nodither step returned non-zero exit code: -1073741819 https://status.skia.org/?commit_label=author&filter=search&search_value=Test-Win-MSVC-GCE-CPU-AVX2-x86-Release Original issue's description: > In the current code, tiling and bilerp sampling are strongly tied together. They can be separated by taking advantage of observation that translating a sample point into filter points in the bilerp stage the filter points will be at most 0.5 outside the tile. This allows simplified repositioning for the various tiling modes; clamp and mirror use min and max while repeat has max -> 0 and 0-> max. This allows bilerp to simply treat the filter points that fall off the tile. This allows tiling and bilerp sampling to be totally separate. > > This CL has several parts that are intertwined: > * move pin/wrap functionality into BilerpSampler. > * remove the nearest neighbor and bilerp tilers > * create a simplified general tiler > * remove the pipeline virtual calls bilerpEdge and bilerpSpan because everything works of sample points now. > * redo all the bilerp sampling to use the new local to methods to wrap/pin. > * introduce a new medium rate sample that handles spans with 1 < |dx| < 2. > > This change improves the performance as displayed below: > Most of top 25 desktop improves or are the same. A few are worse, but close to the noise floor. In addition, this change has about 3% smaller code. > > old time new time new/old > 13274693 8414645 0.633886 top25desk_google_com_search_q_c.skp_1 > 4946466 3258018 0.658656 top25desk_wordpress.skp_1 > 6977187 5737584 0.822335 top25desk_youtube_com.skp_1 > 3770021 3296831 0.874486 top25desk_google_com__hl_en_q_b.skp_1 > 8890813 8600143 0.967307 top25desk_answers_yahoo_com.skp_1 > 3178974 3094300 0.973364 top25desk_facebook.skp_1 > 8871835 8711260 0.981901 top25desk_twitter.skp_1 > 838509 829290 0.989005 top25desk_blogger.skp_1 > 2821870 2801111 0.992644 top25desk_plus_google_com_11003.skp_1 > 511978 509530 0.995219 top25desk_techcrunch_com.skp_1 > 2408588 2397435 0.995369 top25desk_ebay_com.skp_1 > 4446919 4448004 1.00024 top25desk_espn.skp_1 > 2863241 2875696 1.00435 top25desk_google_com_calendar_.skp_1 > 7170086 7208447 1.00535 top25desk_booking_com.skp_1 > 7356109 7417776 1.00838 top25desk_pinterest.skp_1 > 5265591 5340392 1.01421 top25desk_weather_com.skp_1 > 5675244 5774144 1.01743 top25desk_sports_yahoo_com_.skp_1 > 1048531 1067663 1.01825 top25desk_games_yahoo_com.skp_1 > 2075501 2115131 1.01909 top25desk_amazon_com.skp_1 > 4262170 4370441 1.0254 top25desk_news_yahoo_com.skp_1 > 3789319 3897996 1.02868 top25desk_docs___1_open_documen.skp_1 > 919336 949979 1.03333 top25desk_wikipedia__1_tab_.skp_1 > 4274454 4489369 1.05028 top25desk_mail_google_com_mail_.skp_1 > 4149326 4376556 1.05476 top25desk_linkedin.skp_1 > > BUG=skia: > GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2134893002 > CQ_INCLUDE_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot;master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot > > Committed: https://skia.googlesource.com/skia/+/8602ede5fdfa721dcad4dcb11db028c1c24265f1 TBR=mtklein@google.com,herb@google.com # Skipping CQ checks because original CL landed less than 1 days ago. NOPRESUBMIT=true NOTREECHECKS=true NOTRY=true BUG=skia: Review-Url: https://codereview.chromium.org/2174793002
-rw-r--r--src/core/SkBitmapProcShader.h2
-rw-r--r--src/core/SkLinearBitmapPipeline.cpp238
-rw-r--r--src/core/SkLinearBitmapPipeline.h4
-rw-r--r--src/core/SkLinearBitmapPipeline_core.h33
-rw-r--r--src/core/SkLinearBitmapPipeline_sample.h783
5 files changed, 507 insertions, 553 deletions
diff --git a/src/core/SkBitmapProcShader.h b/src/core/SkBitmapProcShader.h
index 67b005ac05..a4591c7355 100644
--- a/src/core/SkBitmapProcShader.h
+++ b/src/core/SkBitmapProcShader.h
@@ -56,7 +56,7 @@ private:
typedef SkShader INHERITED;
};
-enum {kSkBlitterContextSize = 3332};
+enum {kSkBlitterContextSize = 3200};
// Commonly used allocator. It currently is only used to allocate up to 3 objects. The total
// bytes requested is calculated using one of our large shaders, its context size plus the size of
diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp
index 0122765709..088e829345 100644
--- a/src/core/SkLinearBitmapPipeline.cpp
+++ b/src/core/SkLinearBitmapPipeline.cpp
@@ -165,14 +165,15 @@ static SkLinearBitmapPipeline::PointProcessorInterface* choose_matrix(
// Tile Stage
template<typename XStrategy, typename YStrategy, typename Next>
-class CombinedTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface {
+class NearestTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface {
public:
- CombinedTileStage(Next* next, SkISize dimensions)
+ template <typename... Args>
+ NearestTileStage(Next* next, SkISize dimensions)
: fNext{next}
, fXStrategy{dimensions.width()}
, fYStrategy{dimensions.height()}{ }
- CombinedTileStage(Next* next, const CombinedTileStage& stage)
+ NearestTileStage(Next* next, const NearestTileStage& stage)
: fNext{next}
, fXStrategy{stage.fXStrategy}
, fYStrategy{stage.fYStrategy} { }
@@ -194,16 +195,9 @@ public:
SkASSERT(!span.isEmpty());
SkPoint start; SkScalar length; int count;
std::tie(start, length, count) = span;
-
- if (span.count() == 1) {
- this->pointListFew(1, span.startX(), span.startY());
- return;
- }
-
SkScalar x = X(start);
SkScalar y = fYStrategy.tileY(Y(start));
Span yAdjustedSpan{{x, y}, length, count};
-
if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) {
span_fallback(span, this);
}
@@ -215,27 +209,173 @@ private:
YStrategy fYStrategy;
};
-template <typename XStrategy, typename Next>
+template<typename XStrategy, typename YStrategy, typename Next>
+class BilerpTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface {
+public:
+ template <typename... Args>
+ BilerpTileStage(Next* next, SkISize dimensions)
+ : fNext{next}
+ , fXMax(dimensions.width())
+ , fYMax(dimensions.height())
+ , fXStrategy{dimensions.width()}
+ , fYStrategy{dimensions.height()} { }
+
+ BilerpTileStage(Next* next, const BilerpTileStage& stage)
+ : fNext{next}
+ , fXMax{stage.fXMax}
+ , fYMax{stage.fYMax}
+ , fXStrategy{stage.fXStrategy}
+ , fYStrategy{stage.fYStrategy} { }
+
+ void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
+ fXStrategy.tileXPoints(&xs);
+ fYStrategy.tileYPoints(&ys);
+ // TODO: check to see if xs and ys are in range then just call pointListFew on next.
+ if (n >= 1) this->bilerpPoint(xs[0], ys[0]);
+ if (n >= 2) this->bilerpPoint(xs[1], ys[1]);
+ if (n >= 3) this->bilerpPoint(xs[2], ys[2]);
+ }
+
+ void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
+ fXStrategy.tileXPoints(&xs);
+ fYStrategy.tileYPoints(&ys);
+ // TODO: check to see if xs and ys are in range then just call pointList4 on next.
+ this->bilerpPoint(xs[0], ys[0]);
+ this->bilerpPoint(xs[1], ys[1]);
+ this->bilerpPoint(xs[2], ys[2]);
+ this->bilerpPoint(xs[3], ys[3]);
+ }
+
+ struct Wrapper {
+ void pointSpan(Span span) {
+ processor->breakIntoEdges(span);
+ }
+
+ void repeatSpan(Span span, int32_t repeatCount) {
+ while (repeatCount --> 0) {
+ processor->pointSpan(span);
+ }
+ }
+
+ BilerpTileStage* processor;
+ };
+
+ // The span you pass must not be empty.
+ void pointSpan(Span span) override {
+ SkASSERT(!span.isEmpty());
+
+ Wrapper wrapper = {this};
+ if (!fXStrategy.maybeProcessSpan(span, &wrapper)) {
+ span_fallback(span, this);
+ }
+ }
+
+private:
+ void bilerpPoint(SkScalar x, SkScalar y) {
+ Sk4f txs = Sk4f{x} + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f};
+ Sk4f tys = Sk4f{y} + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f};
+ fXStrategy.tileXPoints(&txs);
+ fYStrategy.tileYPoints(&tys);
+ fNext->bilerpEdge(txs, tys);
+ }
+
+ void handleEdges(Span span, SkScalar dx) {
+ SkPoint start; SkScalar length; int count;
+ std::tie(start, length, count) = span;
+ SkScalar x = X(start);
+ SkScalar y = Y(start);
+ SkScalar tiledY = fYStrategy.tileY(y);
+ while (count > 0) {
+ this->bilerpPoint(x, tiledY);
+ x += dx;
+ count -= 1;
+ }
+ }
+
+ void yProcessSpan(Span span) {
+ SkScalar tiledY = fYStrategy.tileY(span.startY());
+ if (0.5f <= tiledY && tiledY < fYMax - 0.5f ) {
+ Span tiledSpan{{span.startX(), tiledY}, span.length(), span.count()};
+ fNext->pointSpan(tiledSpan);
+ } else {
+ // Convert to the Y0 bilerp sample set by shifting by -0.5f. Then tile that new y
+ // value and shift it back resulting in the working Y0. Do the same thing with Y1 but
+ // in the opposite direction.
+ SkScalar y0 = fYStrategy.tileY(span.startY() - 0.5f) + 0.5f;
+ SkScalar y1 = fYStrategy.tileY(span.startY() + 0.5f) - 0.5f;
+ Span newSpan{{span.startX(), y0}, span.length(), span.count()};
+ fNext->bilerpSpan(newSpan, y1);
+ }
+ }
+ void breakIntoEdges(Span span) {
+ if (span.count() == 1) {
+ this->bilerpPoint(span.startX(), span.startY());
+ } else if (span.length() == 0) {
+ yProcessSpan(span);
+ } else {
+ SkScalar dx = span.length() / (span.count() - 1);
+ if (span.length() > 0) {
+ Span leftBorder = span.breakAt(0.5f, dx);
+ if (!leftBorder.isEmpty()) {
+ this->handleEdges(leftBorder, dx);
+ }
+ Span center = span.breakAt(fXMax - 0.5f, dx);
+ if (!center.isEmpty()) {
+ this->yProcessSpan(center);
+ }
+
+ if (!span.isEmpty()) {
+ this->handleEdges(span, dx);
+ }
+ } else {
+ Span center = span.breakAt(fXMax + 0.5f, dx);
+ if (!span.isEmpty()) {
+ this->handleEdges(span, dx);
+ }
+ Span leftEdge = center.breakAt(0.5f, dx);
+ if (!center.isEmpty()) {
+ this->yProcessSpan(center);
+ }
+ if (!leftEdge.isEmpty()) {
+ this->handleEdges(leftEdge, dx);
+ }
+
+ }
+ }
+ }
+
+ Next* const fNext;
+ SkScalar fXMax;
+ SkScalar fYMax;
+ XStrategy fXStrategy;
+ YStrategy fYStrategy;
+};
+
+template <typename XStrategy, typename YStrategy, typename Next>
+void make_tile_stage(
+ SkFilterQuality filterQuality, SkISize dimensions,
+ Next* next, SkLinearBitmapPipeline::TileStage* tileStage) {
+ if (filterQuality == kNone_SkFilterQuality) {
+ tileStage->initStage<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
+ } else {
+ tileStage->initStage<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions);
+ }
+}
+template <typename XStrategy>
void choose_tiler_ymode(
SkShader::TileMode yMode, SkFilterQuality filterQuality, SkISize dimensions,
- Next* next,
+ SkLinearBitmapPipeline::SampleProcessorInterface* next,
SkLinearBitmapPipeline::TileStage* tileStage) {
switch (yMode) {
- case SkShader::kClamp_TileMode: {
- using Tiler = CombinedTileStage<XStrategy, YClampStrategy, Next>;
- tileStage->initStage<Tiler>(next, dimensions);
+ case SkShader::kClamp_TileMode:
+ make_tile_stage<XStrategy, YClampStrategy>(filterQuality, dimensions, next, tileStage);
break;
- }
- case SkShader::kRepeat_TileMode: {
- using Tiler = CombinedTileStage<XStrategy, YRepeatStrategy, Next>;
- tileStage->initStage<Tiler>(next, dimensions);
+ case SkShader::kRepeat_TileMode:
+ make_tile_stage<XStrategy, YRepeatStrategy>(filterQuality, dimensions, next, tileStage);
break;
- }
- case SkShader::kMirror_TileMode: {
- using Tiler = CombinedTileStage<XStrategy, YMirrorStrategy, Next>;
- tileStage->initStage<Tiler>(next, dimensions);
+ case SkShader::kMirror_TileMode:
+ make_tile_stage<XStrategy, YMirrorStrategy>(filterQuality, dimensions, next, tileStage);
break;
- }
}
};
@@ -327,6 +467,10 @@ public:
fDest = dest;
}
+ void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); }
+
+ void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); }
+
void setDestination(void* dst, int count) override {
fDest = static_cast<uint32_t*>(dst);
fEnd = fDest + count;
@@ -394,6 +538,10 @@ public:
SkASSERT(fDest <= fEnd);
}
+ void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); }
+
+ void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); }
+
void setDestination(void* dst, int count) override {
SkASSERT(count > 0);
fDest = static_cast<uint32_t*>(dst);
@@ -434,9 +582,12 @@ static SkLinearBitmapPipeline::PixelAccessorInterface* choose_specific_accessor(
}
}
-static SkLinearBitmapPipeline::PixelAccessorInterface* choose_pixel_accessor(
+template<template <typename, typename> class Sampler>
+static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_base(
+ Blender* next,
const SkPixmap& srcPixmap,
const SkColor A8TintColor,
+ SkLinearBitmapPipeline::SampleStage* sampleStage,
SkLinearBitmapPipeline::Accessor* accessor)
{
const SkImageInfo& imageInfo = srcPixmap.info();
@@ -478,19 +629,19 @@ static SkLinearBitmapPipeline::PixelAccessorInterface* choose_pixel_accessor(
break;
}
- return pixelAccessor;
+ using S = Sampler<PixelAccessorShim, Blender>;
+ sampleStage->initStage<S>(next, pixelAccessor);
+ return sampleStage->get();
}
SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
Blender* next,
SkFilterQuality filterQuality,
- SkShader::TileMode xTile, SkShader::TileMode yTile,
const SkPixmap& srcPixmap,
const SkColor A8TintColor,
SkLinearBitmapPipeline::SampleStage* sampleStage,
SkLinearBitmapPipeline::Accessor* accessor) {
const SkImageInfo& imageInfo = srcPixmap.info();
- SkISize dimensions = imageInfo.dimensions();
// Special case samplers with fully expanded templates
if (imageInfo.gammaCloseToSRGB()) {
@@ -519,14 +670,14 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
using S =
BilerpSampler<
PixelAccessor<kN32_SkColorType, kSRGB_SkGammaType>, Blender>;
- sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap);
+ sampleStage->initStage<S>(next, srcPixmap);
return sampleStage->get();
}
case kIndex_8_SkColorType: {
using S =
BilerpSampler<
PixelAccessor<kIndex_8_SkColorType, kSRGB_SkGammaType>, Blender>;
- sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap);
+ sampleStage->initStage<S>(next, srcPixmap);
return sampleStage->get();
}
default:
@@ -535,16 +686,14 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler(
}
}
- auto pixelAccessor = choose_pixel_accessor(srcPixmap, A8TintColor, accessor);
// General cases.
if (filterQuality == kNone_SkFilterQuality) {
- using S = NearestNeighborSampler<PixelAccessorShim, Blender>;
- sampleStage->initStage<S>(next, pixelAccessor);
+ return choose_pixel_sampler_base<NearestNeighborSampler>(
+ next, srcPixmap, A8TintColor, sampleStage, accessor);
} else {
- using S = BilerpSampler<PixelAccessorShim, Blender>;
- sampleStage->initStage<S>(next, dimensions, xTile, yTile, pixelAccessor);
+ return choose_pixel_sampler_base<BilerpSampler>(
+ next, srcPixmap, A8TintColor, sampleStage, accessor);
}
- return sampleStage->get();
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -556,17 +705,17 @@ public:
SrcFPPixel(const SrcFPPixel& Blender) : fPostAlpha(Blender.fPostAlpha) {}
void SK_VECTORCALL blendPixel(Sk4f pixel) override {
SkASSERT(fDst + 1 <= fEnd );
- this->srcPixel(fDst, pixel, 0);
+ SrcPixel(fDst, pixel, 0);
fDst += 1;
}
void SK_VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override {
SkASSERT(fDst + 4 <= fEnd);
SkPM4f* dst = fDst;
- this->srcPixel(dst, p0, 0);
- this->srcPixel(dst, p1, 1);
- this->srcPixel(dst, p2, 2);
- this->srcPixel(dst, p3, 3);
+ SrcPixel(dst, p0, 0);
+ SrcPixel(dst, p1, 1);
+ SrcPixel(dst, p2, 2);
+ SrcPixel(dst, p3, 3);
fDst += 4;
}
@@ -576,9 +725,7 @@ public:
}
private:
- void SK_VECTORCALL srcPixel(SkPM4f* dst, Sk4f pixel, int index) {
- check_pixel(pixel);
-
+ void SK_VECTORCALL SrcPixel(SkPM4f* dst, Sk4f pixel, int index) {
Sk4f newPixel = pixel;
if (alphaType == kUnpremul_SkAlphaType) {
newPixel = Premultiply(pixel);
@@ -650,8 +797,7 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline(
// identity matrix, the matrix stage is skipped, and the tilerStage is the first stage.
auto blenderStage = choose_blender_for_shading(alphaType, postAlpha, &fBlenderStage);
auto samplerStage = choose_pixel_sampler(
- blenderStage, filterQuality, xTile, yTile,
- srcPixmap, paintColor, &fSampleStage, &fAccessor);
+ blenderStage, filterQuality, srcPixmap, paintColor, &fSampleStage, &fAccessor);
auto tilerStage = choose_tiler(samplerStage, dimensions, xTile, yTile,
filterQuality, dx, &fTileStage);
fFirstStage = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage);
diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h
index 91b573df5d..b0f7e9dd20 100644
--- a/src/core/SkLinearBitmapPipeline.h
+++ b/src/core/SkLinearBitmapPipeline.h
@@ -133,9 +133,9 @@ public:
// These values were generated by the assert above in Stage::init{Sink|Stage}.
using MatrixStage = Stage<PointProcessorInterface, 160, PointProcessorInterface>;
using TileStage = Stage<PointProcessorInterface, 160, SampleProcessorInterface>;
- using SampleStage = Stage<SampleProcessorInterface, 160, BlendProcessorInterface>;
+ using SampleStage = Stage<SampleProcessorInterface, 100, BlendProcessorInterface>;
using BlenderStage = Stage<BlendProcessorInterface, 40>;
- using Accessor = PolyMemory<PixelAccessorInterface, 64>;
+ using Accessor = PolyMemory<PixelAccessorInterface, 48>;
private:
PointProcessorInterface* fFirstStage;
diff --git a/src/core/SkLinearBitmapPipeline_core.h b/src/core/SkLinearBitmapPipeline_core.h
index cf120eec65..2c39a38320 100644
--- a/src/core/SkLinearBitmapPipeline_core.h
+++ b/src/core/SkLinearBitmapPipeline_core.h
@@ -178,15 +178,6 @@ void span_fallback(Span span, Stage* stage) {
stage->pointListFew(count, xs, ys);
}
}
-
-inline Sk4f check_pixel(Sk4f& pixel) {
- SkASSERTF(0.0f <= pixel[0] && pixel[0] <= 1.0f, "pixel[0]: %f", pixel[0]);
- SkASSERTF(0.0f <= pixel[1] && pixel[1] <= 1.0f, "pixel[1]: %f", pixel[1]);
- SkASSERTF(0.0f <= pixel[2] && pixel[2] <= 1.0f, "pixel[2]: %f", pixel[2]);
- SkASSERTF(0.0f <= pixel[3] && pixel[3] <= 1.0f, "pixel[3]: %f", pixel[3]);
- return pixel;
-}
-
} // namespace
class SkLinearBitmapPipeline::PointProcessorInterface {
@@ -210,6 +201,26 @@ public:
// Used for nearest neighbor when scale factor is 1.0. The span can just be repeated with no
// edge pixel alignment problems. This is for handling a very common case.
virtual void repeatSpan(Span span, int32_t repeatCount) = 0;
+
+ // The x's and y's are setup in the following order:
+ // +--------+--------+
+ // | | |
+ // | px00 | px10 |
+ // | 0 | 1 |
+ // +--------+--------+
+ // | | |
+ // | px01 | px11 |
+ // | 2 | 3 |
+ // +--------+--------+
+ // These pixels coordinates are arranged in the following order in xs and ys:
+ // px00 px10 px01 px11
+ virtual void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) = 0;
+
+ // A span represents sample points that have been mapped from destination space to source
+ // space. Each sample point is then expanded to the four bilerp points by add +/- 0.5. The
+ // resulting Y values my be off the tile. When y +/- 0.5 are more than 1 apart because of
+ // tiling, the second Y is used to denote the retiled Y value.
+ virtual void bilerpSpan(Span span, SkScalar y) = 0;
};
class SkLinearBitmapPipeline::DestinationInterface {
@@ -232,10 +243,10 @@ class SkLinearBitmapPipeline::PixelAccessorInterface {
public:
virtual ~PixelAccessorInterface() { }
virtual void SK_VECTORCALL getFewPixels(
- int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0;
+ int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0;
virtual void SK_VECTORCALL get4Pixels(
- Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0;
+ Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0;
virtual void get4Pixels(
const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0;
diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h
index 5421758297..759075b3e5 100644
--- a/src/core/SkLinearBitmapPipeline_sample.h
+++ b/src/core/SkLinearBitmapPipeline_sample.h
@@ -40,7 +40,7 @@ namespace {
// * px11 -> xy
// So x * y is calculated first and then used to calculate all the other factors.
static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10,
- Sk4f px01, Sk4f px11) {
+ Sk4f px01, Sk4f px11) {
// Calculate fractional xs and ys.
Sk4s fxs = xs - xs.floor();
Sk4s fys = ys - ys.floor();
@@ -134,21 +134,20 @@ template <SkGammaType gammaType>
class PixelConverter<kIndex_8_SkColorType, gammaType> {
public:
using Element = uint8_t;
- PixelConverter(const SkPixmap& srcPixmap)
- : fColorTableSize(srcPixmap.ctable()->count()){
+ PixelConverter(const SkPixmap& srcPixmap) {
SkColorTable* skColorTable = srcPixmap.ctable();
SkASSERT(skColorTable != nullptr);
fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
- for (int i = 0; i < fColorTableSize; i++) {
+ for (int i = 0; i < skColorTable->count(); i++) {
fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]);
}
}
- PixelConverter(const PixelConverter& strategy)
- : fColorTableSize{strategy.fColorTableSize}{
+ PixelConverter(const PixelConverter& strategy) {
fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get());
- for (int i = 0; i < fColorTableSize; i++) {
+ // TODO: figure out the count.
+ for (int i = 0; i < 256; i++) {
fColorTable[i] = strategy.fColorTable[i];
}
}
@@ -159,9 +158,9 @@ public:
private:
static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12;
- const int fColorTableSize;
- SkAutoMalloc fColorTableStorage{kColorTableSize};
- Sk4f* fColorTable;
+
+ SkAutoMalloc fColorTableStorage{kColorTableSize};
+ Sk4f* fColorTable;
};
template <SkGammaType gammaType>
@@ -195,12 +194,12 @@ public:
: fPixelAccessor(accessor) { }
void SK_VECTORCALL getFewPixels(
- int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
+ int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const {
fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2);
}
void SK_VECTORCALL get4Pixels(
- Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
+ Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const {
fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3);
}
@@ -238,8 +237,10 @@ public:
, fConverter{srcPixmap, std::move<Args>(args)...} { }
void SK_VECTORCALL getFewPixels (
- int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
- Sk4i bufferLoc = ys * fWidth + xs;
+ int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override {
+ Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
+ Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
+ Sk4i bufferLoc = YIs * fWidth + XIs;
switch (n) {
case 3:
*px2 = this->getPixelAt(bufferLoc[2]);
@@ -253,8 +254,10 @@ public:
}
void SK_VECTORCALL get4Pixels(
- Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
- Sk4i bufferLoc = ys * fWidth + xs;
+ Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override {
+ Sk4i XIs = SkNx_cast<int, SkScalar>(xs);
+ Sk4i YIs = SkNx_cast<int, SkScalar>(ys);
+ Sk4i bufferLoc = YIs * fWidth + XIs;
*px0 = this->getPixelAt(bufferLoc[0]);
*px1 = this->getPixelAt(bufferLoc[1]);
*px2 = this->getPixelAt(bufferLoc[2]);
@@ -327,7 +330,6 @@ static void src_strategy_blend(Span span, Next* next, Strategy* strategy) {
}
}
-// -- NearestNeighborSampler -----------------------------------------------------------------------
// NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels.
template<typename Accessor, typename Next>
class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
@@ -343,7 +345,7 @@ public:
void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
SkASSERT(0 < n && n < 4);
Sk4f px0, px1, px2;
- fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2);
+ fAccessor.getFewPixels(n, xs, ys, &px0, &px1, &px2);
if (n >= 1) fNext->blendPixel(px0);
if (n >= 2) fNext->blendPixel(px1);
if (n >= 3) fNext->blendPixel(px2);
@@ -351,7 +353,7 @@ public:
void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
Sk4f px0, px1, px2, px3;
- fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3);
+ fAccessor.get4Pixels(xs, ys, &px0, &px1, &px2, &px3);
fNext->blend4Pixels(px0, px1, px2, px3);
}
@@ -378,11 +380,21 @@ public:
}
}
+ void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override {
+ SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge.");
+ }
+
+ void bilerpSpan(Span span, SkScalar y) override {
+ SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan.");
+ }
+
private:
// When moving through source space more slowly than dst space (zoomed in),
// we'll be sampling from the same source pixel more than once.
void spanSlowRate(Span span) {
- SkPoint start; SkScalar length; int count;
+ SkPoint start;
+ SkScalar length;
+ int count;
std::tie(start, length, count) = span;
SkScalar x = X(start);
SkFixed fx = SkScalarToFixed(x);
@@ -439,82 +451,35 @@ private:
Accessor fAccessor;
};
-// From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge
-// vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to
-// generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value
-// on the interval [0, vMax].
-// Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel.
-static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) {
- SkASSERT(-1 <= vs && vs <= vMax + 1)
- switch (edgeType) {
- case SkShader::kClamp_TileMode:
- case SkShader::kMirror_TileMode:
- vs = std::max(vs, 0);
- vs = std::min(vs, vMax);
- break;
- case SkShader::kRepeat_TileMode:
- vs = (vs <= vMax) ? vs : 0;
- vs = (vs >= 0) ? vs : vMax;
- break;
- }
- SkASSERT(0 <= vs && vs <= vMax);
- return vs;
-}
-
-// From a sample point on the tile, return the top or left filter value.
-// The result r should be in the range (0, 1]. Since this represents the weight given to the top
-// left element, then if x == 0.5 the filter value should be 1.0.
-// The input sample point must be on the tile, therefore it must be >= 0.
-static SkScalar sample_to_filter(SkScalar x) {
- SkASSERT(x >= 0.0f);
- // The usual form of the top or left edge is x - .5, but since we are working on the unit
- // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use
- // of trunc.
- SkScalar v = x + 0.5f;
- // Produce the top or left offset a value on the range [0, 1).
- SkScalar f = v - SkScalarTruncToScalar(v);
- // Produce the filter value which is on the range (0, 1].
- SkScalar r = 1.0f - f;
- SkASSERT(0.0f < r && r <= 1.0f);
- return r;
-}
-
// -- BilerpSampler --------------------------------------------------------------------------------
// BilerpSampler - use a bilerp filter to create runs of destination pixels.
-// Note: in the code below, there are two types of points
-// * sample points - these are the points passed in by pointList* and Spans.
-// * filter points - are created from a sample point to form the coordinates of the points
-// to use in the filter and to generate the filter values.
template<typename Accessor, typename Next>
class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface {
public:
template<typename... Args>
- BilerpSampler(
- SkLinearBitmapPipeline::BlendProcessorInterface* next,
- SkISize dimensions,
- SkShader::TileMode xTile, SkShader::TileMode yTile,
- Args&& ... args
- )
- : fNext{next}
- , fXEdgeType{xTile}
- , fXMax{dimensions.width() - 1}
- , fYEdgeType{yTile}
- , fYMax{dimensions.height() - 1}
- , fAccessor{std::forward<Args>(args)...} { }
+ BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args)
+ : fNext{next}, fAccessor{std::forward<Args>(args)...} { }
BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next,
const BilerpSampler& sampler)
- : fNext{next}
- , fXEdgeType{sampler.fXEdgeType}
- , fXMax{sampler.fXMax}
- , fYEdgeType{sampler.fYEdgeType}
- , fYMax{sampler.fYMax}
- , fAccessor{sampler.fAccessor} { }
+ : fNext{next}, fAccessor{sampler.fAccessor} { }
+
+ Sk4f bilerpNonEdgePixel(SkScalar x, SkScalar y) {
+ Sk4f px00, px10, px01, px11;
+
+ // bilerp4() expects xs, ys are the top-lefts of the 2x2 kernel.
+ Sk4f xs = Sk4f{x} - 0.5f;
+ Sk4f ys = Sk4f{y} - 0.5f;
+ Sk4f sampleXs = xs + Sk4f{0.0f, 1.0f, 0.0f, 1.0f};
+ Sk4f sampleYs = ys + Sk4f{0.0f, 0.0f, 1.0f, 1.0f};
+ fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
+ return bilerp4(xs, ys, px00, px10, px01, px11);
+ }
void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override {
SkASSERT(0 < n && n < 4);
auto bilerpPixel = [&](int index) {
- return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
+ return this->bilerpNonEdgePixel(xs[index], ys[index]);
};
if (n >= 1) fNext->blendPixel(bilerpPixel(0));
@@ -524,484 +489,308 @@ public:
void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override {
auto bilerpPixel = [&](int index) {
- return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]});
+ return this->bilerpNonEdgePixel(xs[index], ys[index]);
};
fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3));
}
void pointSpan(Span span) override {
+ this->bilerpSpan(span, span.startY());
+ }
+
+ void repeatSpan(Span span, int32_t repeatCount) override {
+ while (repeatCount > 0) {
+ this->pointSpan(span);
+ repeatCount--;
+ }
+ }
+
+ void SK_VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) override {
+ Sk4f px00, px10, px01, px11;
+ Sk4f xs = Sk4f{sampleXs[0]};
+ Sk4f ys = Sk4f{sampleYs[0]};
+ fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11);
+ Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11);
+ fNext->blendPixel(pixel);
+ }
+
+ void bilerpSpan(Span span, SkScalar y) override {
SkASSERT(!span.isEmpty());
SkPoint start;
SkScalar length;
int count;
std::tie(start, length, count) = span;
-
- // Nothing to do.
- if (count == 0) {
- return;
- }
-
- // Trivial case. No sample points are generated other than start.
- if (count == 1) {
- fNext->blendPixel(this->bilerpSamplePoint(start));
- return;
- }
-
- // Note: the following code could be done in terms of dx = length / (count -1), but that
- // would introduce a divide that is not needed for the most common dx == 1 cases.
SkScalar absLength = SkScalarAbs(length);
if (absLength == 0.0f) {
- // |dx| == 0
- // length is zero, so clamp an edge pixel.
- this->spanZeroRate(span);
+ this->spanZeroRate(span, y);
} else if (absLength < (count - 1)) {
- // 0 < |dx| < 1.
- this->spanSlowRate(span);
+ this->spanSlowRate(span, y);
} else if (absLength == (count - 1)) {
- // |dx| == 1.
- if (sample_to_filter(span.startX()) == 1.0f
- && sample_to_filter(span.startY()) == 1.0f) {
- // All the pixels are aligned with the dest; go fast.
- src_strategy_blend(span, fNext, &fAccessor);
+ if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) {
+ if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) {
+ src_strategy_blend(span, fNext, &fAccessor);
+ } else {
+ this->spanUnitRateAlignedX(span, y);
+ }
} else {
- // There is some sub-pixel offsets, so bilerp.
- this->spanUnitRate(span);
+ this->spanUnitRate(span, y);
}
- } else if (absLength < 2.0f * (count - 1)) {
- // 1 < |dx| < 2.
- this->spanMediumRate(span);
} else {
- // |dx| >= 2.
- this->spanFastRate(span);
- }
- }
-
- void repeatSpan(Span span, int32_t repeatCount) override {
- while (repeatCount > 0) {
- this->pointSpan(span);
- repeatCount--;
+ this->spanFastRate(span, y);
}
}
private:
-
- // Convert a sample point to the points used by the filter.
- void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) {
- // May be less than zero. Be careful to use Floor.
- int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax);
- // Always greater than zero. Use the faster Trunc.
- int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax);
- int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax);
- int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax);
-
- *filterXs = Sk4i{x0, x1, x0, x1};
- *filterYs = Sk4i{y0, y0, y1, y1};
- }
-
- // Given a sample point, generate a color by bilerping the four filter points.
- Sk4f bilerpSamplePoint(SkPoint sample) {
- Sk4i iXs, iYs;
- filterPoints(sample, &iXs, &iYs);
- Sk4f px00, px10, px01, px11;
- fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11);
- return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11);
- }
-
- // Get two pixels at x from row0 and row1.
- void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) {
- *px0 = fAccessor.getPixelFromRow(row0, x);
- *px1 = fAccessor.getPixelFromRow(row1, x);
- }
-
- // |dx| == 0. This code assumes that length is zero.
- void spanZeroRate(Span span) {
- SkPoint start; SkScalar length; int count;
- std::tie(start, length, count) = span;
- SkASSERT(length == 0.0f);
-
- // Filter for the blending of the top and bottom pixels.
- SkScalar filterY = sample_to_filter(Y(start));
-
- // Generate the four filter points from the sample point start. Generate the row* values.
- Sk4i iXs, iYs;
- this->filterPoints(start, &iXs, &iYs);
- const void* const row0 = fAccessor.row(iYs[0]);
- const void* const row1 = fAccessor.row(iYs[2]);
-
- // Get the two pixels that make up the clamping pixel.
- Sk4f pxTop, pxBottom;
- this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom);
- Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom;
-
+ void spanZeroRate(Span span, SkScalar y1) {
+ SkScalar y0 = span.startY() - 0.5f;
+ y1 += 0.5f;
+ int iy0 = SkScalarFloorToInt(y0);
+ SkScalar filterY1 = y0 - iy0;
+ SkScalar filterY0 = 1.0f - filterY1;
+ int iy1 = SkScalarFloorToInt(y1);
+ int ix = SkScalarFloorToInt(span.startX());
+ Sk4f pixelY0 = fAccessor.getPixelFromRow(fAccessor.row(iy0), ix);
+ Sk4f pixelY1 = fAccessor.getPixelFromRow(fAccessor.row(iy1), ix);
+ Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1;
+ int count = span.count();
while (count >= 4) {
- fNext->blend4Pixels(pixel, pixel, pixel, pixel);
+ fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel);
count -= 4;
}
while (count > 0) {
- fNext->blendPixel(pixel);
+ fNext->blendPixel(filterPixel);
count -= 1;
}
}
- // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce
- // computation. In particular, several destination pixels maybe generated from the same four
- // source pixels.
- // In the following code a "part" is a combination of two pixels from the same column of the
- // filter.
- void spanSlowRate(Span span) {
- SkPoint start; SkScalar length; int count;
+ // When moving through source space more slowly than dst space (zoomed in),
+ // we'll be sampling from the same source pixel more than once.
+ void spanSlowRate(Span span, SkScalar ry1) {
+ SkPoint start;
+ SkScalar length;
+ int count;
std::tie(start, length, count) = span;
+ SkFixed fx = SkScalarToFixed(X(start)-0.5f);
- // Calculate the distance between each sample point.
- const SkScalar dx = length / (count - 1);
- SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f);
-
- // Generate the filter values for the top-left corner.
- // Note: these values are in filter space; this has implications about how to adjust
- // these values at each step. For example, as the sample point increases, the filter
- // value decreases, this is because the filter and position are related by
- // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
- // direction of the sample point which is increasing by dx.
- SkScalar filterX = sample_to_filter(X(start));
- SkScalar filterY = sample_to_filter(Y(start));
-
- // Generate the four filter points from the sample point start. Generate the row* values.
- Sk4i iXs, iYs;
- this->filterPoints(start, &iXs, &iYs);
- const void* const row0 = fAccessor.row(iYs[0]);
- const void* const row1 = fAccessor.row(iYs[2]);
-
- // Generate part of the filter value at xColumn.
- auto partAtColumn = [&](int xColumn) {
- int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
- Sk4f pxTop, pxBottom;
- this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
- return pxTop * filterY + (1.0f - filterY) * pxBottom;
- };
+ SkFixed fdx = SkScalarToFixed(length / (count - 1));
- // The leftPart is made up of two pixels from the left column of the filter, right part
- // is similar. The top and bottom pixels in the *Part are created as a linear blend of
- // the top and bottom pixels using filterY. See the partAtColumn function above.
- Sk4f leftPart = partAtColumn(iXs[0]);
- Sk4f rightPart = partAtColumn(iXs[1]);
+ Sk4f xAdjust;
+ if (fdx >= 0) {
+ xAdjust = Sk4f{-1.0f};
+ } else {
+ xAdjust = Sk4f{1.0f};
+ }
+ int ix = SkFixedFloorToInt(fx);
+ int ioldx = ix;
+ Sk4f x{SkFixedToScalar(fx) - ix};
+ Sk4f dx{SkFixedToScalar(fdx)};
+ SkScalar ry0 = Y(start) - 0.5f;
+ ry1 += 0.5f;
+ SkScalar yFloor = std::floor(ry0);
+ Sk4f y1 = Sk4f{ry0 - yFloor};
+ Sk4f y0 = Sk4f{1.0f} - y1;
+ const void* const row0 = fAccessor.row(SkScalarFloorToInt(ry0));
+ const void* const row1 = fAccessor.row(SkScalarFloorToInt(ry1));
+ Sk4f fpixel00 = y0 * fAccessor.getPixelFromRow(row0, ix);
+ Sk4f fpixel01 = y1 * fAccessor.getPixelFromRow(row1, ix);
+ Sk4f fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1);
+ Sk4f fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1);
+ auto getNextPixel = [&]() {
+ if (ix != ioldx) {
+ fpixel00 = fpixel10;
+ fpixel01 = fpixel11;
+ fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1);
+ fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1);
+ ioldx = ix;
+ x = x + xAdjust;
+ }
- // Create a destination color by blending together a left and right part using filterX.
- auto bilerp = [&]() {
- Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
- return check_pixel(pixel);
+ Sk4f x0, x1;
+ x0 = Sk4f{1.0f} - x;
+ x1 = x;
+ Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11);
+ fx += fdx;
+ ix = SkFixedFloorToInt(fx);
+ x = x + dx;
+ return fpixel;
};
- // Send the first pixel to the destination. This simplifies the loop structure so that no
- // extra pixels are fetched for the last iteration of the loop.
- fNext->blendPixel(bilerp());
- count -= 1;
-
- if (dx > 0.0f) {
- // * positive direction - generate destination pixels by sliding the filter from left
- // to right.
- int rightPartCursor = iXs[1];
-
- // Advance the filter from left to right. Remember that moving the top-left corner of
- // the filter to the right actually makes the filter value smaller.
- auto advanceFilter = [&]() {
- filterX -= dx;
- if (filterX <= 0.0f) {
- filterX += 1.0f;
- leftPart = rightPart;
- rightPartCursor += 1;
- rightPart = partAtColumn(rightPartCursor);
- }
- SkASSERT(0.0f < filterX && filterX <= 1.0f);
-
- return bilerp();
- };
-
- while (count >= 4) {
- Sk4f px0 = advanceFilter(),
- px1 = advanceFilter(),
- px2 = advanceFilter(),
- px3 = advanceFilter();
- fNext->blend4Pixels(px0, px1, px2, px3);
- count -= 4;
- }
-
- while (count > 0) {
- fNext->blendPixel(advanceFilter());
- count -= 1;
- }
- } else {
- // * negative direction - generate destination pixels by sliding the filter from
- // right to left.
- int leftPartCursor = iXs[0];
-
- // Advance the filter from right to left. Remember that moving the top-left corner of
- // the filter to the left actually makes the filter value larger.
- auto advanceFilter = [&]() {
- // Remember, dx < 0 therefore this adds |dx| to filterX.
- filterX -= dx;
- // At this point filterX may be > 1, and needs to be wrapped back on to the filter
- // interval, and the next column in the filter is calculated.
- if (filterX > 1.0f) {
- filterX -= 1.0f;
- rightPart = leftPart;
- leftPartCursor -= 1;
- leftPart = partAtColumn(leftPartCursor);
- }
- SkASSERT(0.0f < filterX && filterX <= 1.0f);
+ while (count >= 4) {
+ Sk4f fpixel0 = getNextPixel();
+ Sk4f fpixel1 = getNextPixel();
+ Sk4f fpixel2 = getNextPixel();
+ Sk4f fpixel3 = getNextPixel();
- return bilerp();
- };
+ fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3);
+ count -= 4;
+ }
- while (count >= 4) {
- Sk4f px0 = advanceFilter(),
- px1 = advanceFilter(),
- px2 = advanceFilter(),
- px3 = advanceFilter();
- fNext->blend4Pixels(px0, px1, px2, px3);
- count -= 4;
- }
+ while (count > 0) {
+ fNext->blendPixel(getNextPixel());
- while (count > 0) {
- fNext->blendPixel(advanceFilter());
- count -= 1;
- }
+ count -= 1;
}
}
- // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel.
- // Every filter part is used for two destination pixels, and the code can bulk load four
- // pixels at a time.
- void spanUnitRate(Span span) {
- SkPoint start; SkScalar length; int count;
- std::tie(start, length, count) = span;
- SkASSERT(SkScalarAbs(length) == (count - 1));
-
- // Calculate the four filter points of start, and use the two different Y values to
- // generate the row pointers.
- Sk4i iXs, iYs;
- filterPoints(start, &iXs, &iYs);
- const void* row0 = fAccessor.row(iYs[0]);
- const void* row1 = fAccessor.row(iYs[2]);
-
- // Calculate the filter values for the top-left filter element.
- const SkScalar filterX = sample_to_filter(X(start));
- const SkScalar filterY = sample_to_filter(Y(start));
-
- // Generate part of the filter value at xColumn.
- auto partAtColumn = [&](int xColumn) {
- int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
- Sk4f pxTop, pxBottom;
- this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
- return pxTop * filterY + (1.0f - filterY) * pxBottom;
+ // We're moving through source space at a rate of 1 source pixel per 1 dst pixel.
+ // We'll never re-use pixels, but we can at least load contiguous pixels.
+ void spanUnitRate(Span span, SkScalar y1) {
+ y1 += 0.5f;
+ SkScalar y0 = span.startY() - 0.5f;
+ int iy0 = SkScalarFloorToInt(y0);
+ SkScalar filterY1 = y0 - iy0;
+ SkScalar filterY0 = 1.0f - filterY1;
+ int iy1 = SkScalarFloorToInt(y1);
+ const void* rowY0 = fAccessor.row(iy0);
+ const void* rowY1 = fAccessor.row(iy1);
+ SkScalar x0 = span.startX() - 0.5f;
+ int ix0 = SkScalarFloorToInt(x0);
+ SkScalar filterX1 = x0 - ix0;
+ SkScalar filterX0 = 1.0f - filterX1;
+
+ auto getPixelY0 = [&]() {
+ Sk4f px = fAccessor.getPixelFromRow(rowY0, ix0);
+ return px * filterY0;
};
- auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) {
- // Check if the pixels needed are near the edges. If not go fast using bulk pixels,
- // otherwise be careful.
- if (0 <= ix && ix <= fXMax - 3) {
- Sk4f px00, px10, px20, px30,
- px01, px11, px21, px31;
- fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30);
- fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31);
- *part0 = filterY * px00 + (1.0f - filterY) * px01;
- *part1 = filterY * px10 + (1.0f - filterY) * px11;
- *part2 = filterY * px20 + (1.0f - filterY) * px21;
- *part3 = filterY * px30 + (1.0f - filterY) * px31;
- } else {
- *part0 = partAtColumn(ix + 0);
- *part1 = partAtColumn(ix + 1);
- *part2 = partAtColumn(ix + 2);
- *part3 = partAtColumn(ix + 3);
- }
+ auto getPixelY1 = [&]() {
+ Sk4f px = fAccessor.getPixelFromRow(rowY1, ix0);
+ return px * filterY1;
+ };
+
+ auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
+ fAccessor.get4Pixels(rowY0, ix, px0, px1, px2, px3);
+ *px0 = *px0 * filterY0;
+ *px1 = *px1 * filterY0;
+ *px2 = *px2 * filterY0;
+ *px3 = *px3 * filterY0;
};
- auto bilerp = [&](Sk4f& part0, Sk4f& part1) {
- return part0 * filterX + part1 * (1.0f - filterX);
+ auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) {
+ fAccessor.get4Pixels(rowY1, ix, px0, px1, px2, px3);
+ *px0 = *px0 * filterY1;
+ *px1 = *px1 * filterY1;
+ *px2 = *px2 * filterY1;
+ *px3 = *px3 * filterY1;
};
- if (length > 0) {
- // * positive direction - generate destination pixels by sliding the filter from left
- // to right.
+ auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) {
+ return pixelX0 * filterX0 + pixelX1 * filterX1;
+ };
- // overlapPart is the filter part from the end of the previous four pixels used at
- // the start of the next four pixels.
- Sk4f overlapPart = partAtColumn(iXs[0]);
- int rightColumnCursor = iXs[1];
+ // Mid making 4 unit rate.
+ Sk4f pxB = getPixelY0() + getPixelY1();
+ if (span.length() > 0) {
+ int count = span.count();
while (count >= 4) {
- Sk4f part0, part1, part2, part3;
- get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3);
- Sk4f px0 = bilerp(overlapPart, part0);
- Sk4f px1 = bilerp(part0, part1);
- Sk4f px2 = bilerp(part1, part2);
- Sk4f px3 = bilerp(part2, part3);
- overlapPart = part3;
+ Sk4f px00, px10, px20, px30;
+ get4PixelsY0(ix0, &px00, &px10, &px20, &px30);
+ Sk4f px01, px11, px21, px31;
+ get4PixelsY1(ix0, &px01, &px11, &px21, &px31);
+ Sk4f pxS0 = px00 + px01;
+ Sk4f px0 = lerp(pxB, pxS0);
+ Sk4f pxS1 = px10 + px11;
+ Sk4f px1 = lerp(pxS0, pxS1);
+ Sk4f pxS2 = px20 + px21;
+ Sk4f px2 = lerp(pxS1, pxS2);
+ Sk4f pxS3 = px30 + px31;
+ Sk4f px3 = lerp(pxS2, pxS3);
+ pxB = pxS3;
fNext->blend4Pixels(px0, px1, px2, px3);
- rightColumnCursor += 4;
+ ix0 += 4;
count -= 4;
}
-
while (count > 0) {
- Sk4f rightPart = partAtColumn(rightColumnCursor);
+ Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0);
+ Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0);
- fNext->blendPixel(bilerp(overlapPart, rightPart));
- overlapPart = rightPart;
- rightColumnCursor += 1;
+ fNext->blendPixel(lerp(pixelY0, pixelY1));
+ ix0 += 1;
count -= 1;
}
} else {
- // * negative direction - generate destination pixels by sliding the filter from
- // right to left.
- Sk4f overlapPart = partAtColumn(iXs[1]);
- int leftColumnCursor = iXs[0];
-
+ int count = span.count();
while (count >= 4) {
- Sk4f part0, part1, part2, part3;
- get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0);
- Sk4f px0 = bilerp(part0, overlapPart);
- Sk4f px1 = bilerp(part1, part0);
- Sk4f px2 = bilerp(part2, part1);
- Sk4f px3 = bilerp(part3, part2);
- overlapPart = part3;
+ Sk4f px00, px10, px20, px30;
+ get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30);
+ Sk4f px01, px11, px21, px31;
+ get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31);
+ Sk4f pxS3 = px30 + px31;
+ Sk4f px0 = lerp(pxS3, pxB);
+ Sk4f pxS2 = px20 + px21;
+ Sk4f px1 = lerp(pxS2, pxS3);
+ Sk4f pxS1 = px10 + px11;
+ Sk4f px2 = lerp(pxS1, pxS2);
+ Sk4f pxS0 = px00 + px01;
+ Sk4f px3 = lerp(pxS0, pxS1);
+ pxB = pxS0;
fNext->blend4Pixels(px0, px1, px2, px3);
- leftColumnCursor -= 4;
+ ix0 -= 4;
count -= 4;
}
-
while (count > 0) {
- Sk4f leftPart = partAtColumn(leftColumnCursor);
+ Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0);
+ Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0);
- fNext->blendPixel(bilerp(leftPart, overlapPart));
- overlapPart = leftPart;
- leftColumnCursor -= 1;
+ fNext->blendPixel(lerp(pixelY0, pixelY1));
+ ix0 -= 1;
count -= 1;
}
}
}
- // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but
- // still slow enough to take advantage of previous calculations.
- void spanMediumRate(Span span) {
- SkPoint start; SkScalar length; int count;
- std::tie(start, length, count) = span;
-
- // Calculate the distance between each sample point.
- const SkScalar dx = length / (count - 1);
- SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f));
-
- // Generate the filter values for the top-left corner.
- // Note: these values are in filter space; this has implications about how to adjust
- // these values at each step. For example, as the sample point increases, the filter
- // value decreases, this is because the filter and position are related by
- // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite
- // direction of the sample point which is increasing by dx.
- SkScalar filterX = sample_to_filter(X(start));
- SkScalar filterY = sample_to_filter(Y(start));
-
- // Generate the four filter points from the sample point start. Generate the row* values.
- Sk4i iXs, iYs;
- this->filterPoints(start, &iXs, &iYs);
- const void* const row0 = fAccessor.row(iYs[0]);
- const void* const row1 = fAccessor.row(iYs[2]);
-
- // Generate part of the filter value at xColumn.
- auto partAtColumn = [&](int xColumn) {
- int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax);
- Sk4f pxTop, pxBottom;
- this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom);
- return pxTop * filterY + (1.0f - filterY) * pxBottom;
- };
-
- // The leftPart is made up of two pixels from the left column of the filter, right part
- // is similar. The top and bottom pixels in the *Part are created as a linear blend of
- // the top and bottom pixels using filterY. See the nextPart function below.
- Sk4f leftPart = partAtColumn(iXs[0]);
- Sk4f rightPart = partAtColumn(iXs[1]);
-
- // Create a destination color by blending together a left and right part using filterX.
- auto bilerp = [&]() {
- Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX);
- return check_pixel(pixel);
+ void spanUnitRateAlignedX(Span span, SkScalar y1) {
+ SkScalar y0 = span.startY() - 0.5f;
+ y1 += 0.5f;
+ int iy0 = SkScalarFloorToInt(y0);
+ SkScalar filterY1 = y0 - iy0;
+ SkScalar filterY0 = 1.0f - filterY1;
+ int iy1 = SkScalarFloorToInt(y1);
+ int ix = SkScalarFloorToInt(span.startX());
+ const void* rowY0 = fAccessor.row(iy0);
+ const void* rowY1 = fAccessor.row(iy1);
+ auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) {
+ return *pixelY0 * filterY0 + *pixelY1 * filterY1;
};
- // Send the first pixel to the destination. This simplifies the loop structure so that no
- // extra pixels are fetched for the last iteration of the loop.
- fNext->blendPixel(bilerp());
- count -= 1;
-
- if (dx > 0.0f) {
- // * positive direction - generate destination pixels by sliding the filter from left
- // to right.
- int rightPartCursor = iXs[1];
-
- // Advance the filter from left to right. Remember that moving the top-left corner of
- // the filter to the right actually makes the filter value smaller.
- auto advanceFilter = [&]() {
- filterX -= dx;
- // At this point filterX is less than zero, but might actually be less than -1.
- if (filterX > -1.0f) {
- filterX += 1.0f;
- leftPart = rightPart;
- rightPartCursor += 1;
- rightPart = partAtColumn(rightPartCursor);
- } else {
- filterX += 2.0f;
- rightPartCursor += 2;
- leftPart = partAtColumn(rightPartCursor - 1);
- rightPart = partAtColumn(rightPartCursor);
- }
- SkASSERT(0.0f < filterX && filterX <= 1.0f);
-
- return bilerp();
- };
-
+ if (span.length() > 0) {
+ int count = span.count();
while (count >= 4) {
- Sk4f px0 = advanceFilter(),
- px1 = advanceFilter(),
- px2 = advanceFilter(),
- px3 = advanceFilter();
- fNext->blend4Pixels(px0, px1, px2, px3);
+ Sk4f px00, px10, px20, px30;
+ fAccessor.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30);
+ Sk4f px01, px11, px21, px31;
+ fAccessor.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31);
+ fNext->blend4Pixels(
+ lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
+ ix += 4;
count -= 4;
}
-
while (count > 0) {
- fNext->blendPixel(advanceFilter());
+ Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix);
+ Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix);
+
+ fNext->blendPixel(lerp(&pixelY0, &pixelY1));
+ ix += 1;
count -= 1;
}
} else {
- // * negative direction - generate destination pixels by sliding the filter from
- // right to left.
- int leftPartCursor = iXs[0];
-
- auto advanceFilter = [&]() {
- // Remember, dx < 0 therefore this adds |dx| to filterX.
- filterX -= dx;
- // At this point, filterX is greater than one, but may actually be greater than two.
- if (filterX < 2.0f) {
- filterX -= 1.0f;
- rightPart = leftPart;
- leftPartCursor -= 1;
- leftPart = partAtColumn(leftPartCursor);
- } else {
- filterX -= 2.0f;
- leftPartCursor -= 2;
- rightPart = partAtColumn(leftPartCursor - 1);
- leftPart = partAtColumn(leftPartCursor);
- }
- SkASSERT(0.0f < filterX && filterX <= 1.0f);
- return bilerp();
- };
-
+ int count = span.count();
while (count >= 4) {
- Sk4f px0 = advanceFilter(),
- px1 = advanceFilter(),
- px2 = advanceFilter(),
- px3 = advanceFilter();
- fNext->blend4Pixels(px0, px1, px2, px3);
+ Sk4f px00, px10, px20, px30;
+ fAccessor.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00);
+ Sk4f px01, px11, px21, px31;
+ fAccessor.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01);
+ fNext->blend4Pixels(
+ lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31));
+ ix -= 4;
count -= 4;
}
-
while (count > 0) {
- fNext->blendPixel(advanceFilter());
+ Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix);
+ Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix);
+
+ fNext->blendPixel(lerp(&pixelY0, &pixelY1));
+ ix -= 1;
count -= 1;
}
}
@@ -1009,26 +798,34 @@ private:
// We're moving through source space faster than dst (zoomed out),
// so we'll never reuse a source pixel or be able to do contiguous loads.
- void spanFastRate(Span span) {
- SkPoint start; SkScalar length; int count;
+ void spanFastRate(Span span, SkScalar y1) {
+ SkPoint start;
+ SkScalar length;
+ int count;
std::tie(start, length, count) = span;
SkScalar x = X(start);
SkScalar y = Y(start);
- SkScalar dx = length / (count - 1);
- while (count > 0) {
- fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y}));
- x += dx;
- count -= 1;
+ // In this sampler, it is assumed that if span.StartY() and y1 are the same then both
+ // y-lines are on the same tile.
+ if (y == y1) {
+ // Both y-lines are on the same tile.
+ span_fallback(span, this);
+ } else {
+ // The y-lines are on different tiles.
+ SkScalar dx = length / (count - 1);
+ Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f};
+ while (count > 0) {
+ Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x};
+ this->bilerpEdge(xs, ys);
+ x += dx;
+ count -= 1;
+ }
}
}
- Next* const fNext;
- const SkShader::TileMode fXEdgeType;
- const int fXMax;
- const SkShader::TileMode fYEdgeType;
- const int fYMax;
- Accessor fAccessor;
+ Next* const fNext;
+ Accessor fAccessor;
};
} // namespace