diff options
author | jcgregorio <jcgregorio@google.com> | 2016-07-22 05:40:58 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2016-07-22 05:40:58 -0700 |
commit | da626aadcced1b10a1f9d4d10b17e3ed429ba81b (patch) | |
tree | 2982adcf208832e9fbafd20128ceec20499450b2 | |
parent | 2259c5f65015f2ba61c5ce9d0a4410bfa7d3eedf (diff) |
Revert of Redo Tiling (patchset #14 id:260001 of https://codereview.chromium.org/2134893002/ )
Reason for revert:
Crashing on Win with:
Caught exception 3221225477 EXCEPTION_ACCESS_VIOLATION, was running:
unit test GrShape
srgb gm shadertext2
srgb gm shallow_gradient_conical
srgb gm shallow_gradient_sweep
srgb gm shallow_gradient_linear_nodither
step returned non-zero exit code: -1073741819
https://status.skia.org/?commit_label=author&filter=search&search_value=Test-Win-MSVC-GCE-CPU-AVX2-x86-Release
Original issue's description:
> In the current code, tiling and bilerp sampling are strongly tied together. They can be separated by taking advantage of observation that translating a sample point into filter points in the bilerp stage the filter points will be at most 0.5 outside the tile. This allows simplified repositioning for the various tiling modes; clamp and mirror use min and max while repeat has max -> 0 and 0-> max. This allows bilerp to simply treat the filter points that fall off the tile. This allows tiling and bilerp sampling to be totally separate.
>
> This CL has several parts that are intertwined:
> * move pin/wrap functionality into BilerpSampler.
> * remove the nearest neighbor and bilerp tilers
> * create a simplified general tiler
> * remove the pipeline virtual calls bilerpEdge and bilerpSpan because everything works of sample points now.
> * redo all the bilerp sampling to use the new local to methods to wrap/pin.
> * introduce a new medium rate sample that handles spans with 1 < |dx| < 2.
>
> This change improves the performance as displayed below:
> Most of top 25 desktop improves or are the same. A few are worse, but close to the noise floor. In addition, this change has about 3% smaller code.
>
> old time new time new/old
> 13274693 8414645 0.633886 top25desk_google_com_search_q_c.skp_1
> 4946466 3258018 0.658656 top25desk_wordpress.skp_1
> 6977187 5737584 0.822335 top25desk_youtube_com.skp_1
> 3770021 3296831 0.874486 top25desk_google_com__hl_en_q_b.skp_1
> 8890813 8600143 0.967307 top25desk_answers_yahoo_com.skp_1
> 3178974 3094300 0.973364 top25desk_facebook.skp_1
> 8871835 8711260 0.981901 top25desk_twitter.skp_1
> 838509 829290 0.989005 top25desk_blogger.skp_1
> 2821870 2801111 0.992644 top25desk_plus_google_com_11003.skp_1
> 511978 509530 0.995219 top25desk_techcrunch_com.skp_1
> 2408588 2397435 0.995369 top25desk_ebay_com.skp_1
> 4446919 4448004 1.00024 top25desk_espn.skp_1
> 2863241 2875696 1.00435 top25desk_google_com_calendar_.skp_1
> 7170086 7208447 1.00535 top25desk_booking_com.skp_1
> 7356109 7417776 1.00838 top25desk_pinterest.skp_1
> 5265591 5340392 1.01421 top25desk_weather_com.skp_1
> 5675244 5774144 1.01743 top25desk_sports_yahoo_com_.skp_1
> 1048531 1067663 1.01825 top25desk_games_yahoo_com.skp_1
> 2075501 2115131 1.01909 top25desk_amazon_com.skp_1
> 4262170 4370441 1.0254 top25desk_news_yahoo_com.skp_1
> 3789319 3897996 1.02868 top25desk_docs___1_open_documen.skp_1
> 919336 949979 1.03333 top25desk_wikipedia__1_tab_.skp_1
> 4274454 4489369 1.05028 top25desk_mail_google_com_mail_.skp_1
> 4149326 4376556 1.05476 top25desk_linkedin.skp_1
>
> BUG=skia:
> GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2134893002
> CQ_INCLUDE_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot;master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
>
> Committed: https://skia.googlesource.com/skia/+/8602ede5fdfa721dcad4dcb11db028c1c24265f1
TBR=mtklein@google.com,herb@google.com
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=skia:
Review-Url: https://codereview.chromium.org/2174793002
-rw-r--r-- | src/core/SkBitmapProcShader.h | 2 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.cpp | 238 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.h | 4 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_core.h | 33 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_sample.h | 783 |
5 files changed, 507 insertions, 553 deletions
diff --git a/src/core/SkBitmapProcShader.h b/src/core/SkBitmapProcShader.h index 67b005ac05..a4591c7355 100644 --- a/src/core/SkBitmapProcShader.h +++ b/src/core/SkBitmapProcShader.h @@ -56,7 +56,7 @@ private: typedef SkShader INHERITED; }; -enum {kSkBlitterContextSize = 3332}; +enum {kSkBlitterContextSize = 3200}; // Commonly used allocator. It currently is only used to allocate up to 3 objects. The total // bytes requested is calculated using one of our large shaders, its context size plus the size of diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp index 0122765709..088e829345 100644 --- a/src/core/SkLinearBitmapPipeline.cpp +++ b/src/core/SkLinearBitmapPipeline.cpp @@ -165,14 +165,15 @@ static SkLinearBitmapPipeline::PointProcessorInterface* choose_matrix( // Tile Stage template<typename XStrategy, typename YStrategy, typename Next> -class CombinedTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { +class NearestTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { public: - CombinedTileStage(Next* next, SkISize dimensions) + template <typename... Args> + NearestTileStage(Next* next, SkISize dimensions) : fNext{next} , fXStrategy{dimensions.width()} , fYStrategy{dimensions.height()}{ } - CombinedTileStage(Next* next, const CombinedTileStage& stage) + NearestTileStage(Next* next, const NearestTileStage& stage) : fNext{next} , fXStrategy{stage.fXStrategy} , fYStrategy{stage.fYStrategy} { } @@ -194,16 +195,9 @@ public: SkASSERT(!span.isEmpty()); SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; - - if (span.count() == 1) { - this->pointListFew(1, span.startX(), span.startY()); - return; - } - SkScalar x = X(start); SkScalar y = fYStrategy.tileY(Y(start)); Span yAdjustedSpan{{x, y}, length, count}; - if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) { span_fallback(span, this); } @@ -215,27 +209,173 @@ private: YStrategy fYStrategy; }; -template <typename XStrategy, typename Next> +template<typename XStrategy, typename YStrategy, typename Next> +class BilerpTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { +public: + template <typename... Args> + BilerpTileStage(Next* next, SkISize dimensions) + : fNext{next} + , fXMax(dimensions.width()) + , fYMax(dimensions.height()) + , fXStrategy{dimensions.width()} + , fYStrategy{dimensions.height()} { } + + BilerpTileStage(Next* next, const BilerpTileStage& stage) + : fNext{next} + , fXMax{stage.fXMax} + , fYMax{stage.fYMax} + , fXStrategy{stage.fXStrategy} + , fYStrategy{stage.fYStrategy} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + // TODO: check to see if xs and ys are in range then just call pointListFew on next. + if (n >= 1) this->bilerpPoint(xs[0], ys[0]); + if (n >= 2) this->bilerpPoint(xs[1], ys[1]); + if (n >= 3) this->bilerpPoint(xs[2], ys[2]); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + // TODO: check to see if xs and ys are in range then just call pointList4 on next. + this->bilerpPoint(xs[0], ys[0]); + this->bilerpPoint(xs[1], ys[1]); + this->bilerpPoint(xs[2], ys[2]); + this->bilerpPoint(xs[3], ys[3]); + } + + struct Wrapper { + void pointSpan(Span span) { + processor->breakIntoEdges(span); + } + + void repeatSpan(Span span, int32_t repeatCount) { + while (repeatCount --> 0) { + processor->pointSpan(span); + } + } + + BilerpTileStage* processor; + }; + + // The span you pass must not be empty. + void pointSpan(Span span) override { + SkASSERT(!span.isEmpty()); + + Wrapper wrapper = {this}; + if (!fXStrategy.maybeProcessSpan(span, &wrapper)) { + span_fallback(span, this); + } + } + +private: + void bilerpPoint(SkScalar x, SkScalar y) { + Sk4f txs = Sk4f{x} + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f}; + Sk4f tys = Sk4f{y} + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f}; + fXStrategy.tileXPoints(&txs); + fYStrategy.tileYPoints(&tys); + fNext->bilerpEdge(txs, tys); + } + + void handleEdges(Span span, SkScalar dx) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkScalar x = X(start); + SkScalar y = Y(start); + SkScalar tiledY = fYStrategy.tileY(y); + while (count > 0) { + this->bilerpPoint(x, tiledY); + x += dx; + count -= 1; + } + } + + void yProcessSpan(Span span) { + SkScalar tiledY = fYStrategy.tileY(span.startY()); + if (0.5f <= tiledY && tiledY < fYMax - 0.5f ) { + Span tiledSpan{{span.startX(), tiledY}, span.length(), span.count()}; + fNext->pointSpan(tiledSpan); + } else { + // Convert to the Y0 bilerp sample set by shifting by -0.5f. Then tile that new y + // value and shift it back resulting in the working Y0. Do the same thing with Y1 but + // in the opposite direction. + SkScalar y0 = fYStrategy.tileY(span.startY() - 0.5f) + 0.5f; + SkScalar y1 = fYStrategy.tileY(span.startY() + 0.5f) - 0.5f; + Span newSpan{{span.startX(), y0}, span.length(), span.count()}; + fNext->bilerpSpan(newSpan, y1); + } + } + void breakIntoEdges(Span span) { + if (span.count() == 1) { + this->bilerpPoint(span.startX(), span.startY()); + } else if (span.length() == 0) { + yProcessSpan(span); + } else { + SkScalar dx = span.length() / (span.count() - 1); + if (span.length() > 0) { + Span leftBorder = span.breakAt(0.5f, dx); + if (!leftBorder.isEmpty()) { + this->handleEdges(leftBorder, dx); + } + Span center = span.breakAt(fXMax - 0.5f, dx); + if (!center.isEmpty()) { + this->yProcessSpan(center); + } + + if (!span.isEmpty()) { + this->handleEdges(span, dx); + } + } else { + Span center = span.breakAt(fXMax + 0.5f, dx); + if (!span.isEmpty()) { + this->handleEdges(span, dx); + } + Span leftEdge = center.breakAt(0.5f, dx); + if (!center.isEmpty()) { + this->yProcessSpan(center); + } + if (!leftEdge.isEmpty()) { + this->handleEdges(leftEdge, dx); + } + + } + } + } + + Next* const fNext; + SkScalar fXMax; + SkScalar fYMax; + XStrategy fXStrategy; + YStrategy fYStrategy; +}; + +template <typename XStrategy, typename YStrategy, typename Next> +void make_tile_stage( + SkFilterQuality filterQuality, SkISize dimensions, + Next* next, SkLinearBitmapPipeline::TileStage* tileStage) { + if (filterQuality == kNone_SkFilterQuality) { + tileStage->initStage<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions); + } else { + tileStage->initStage<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions); + } +} +template <typename XStrategy> void choose_tiler_ymode( SkShader::TileMode yMode, SkFilterQuality filterQuality, SkISize dimensions, - Next* next, + SkLinearBitmapPipeline::SampleProcessorInterface* next, SkLinearBitmapPipeline::TileStage* tileStage) { switch (yMode) { - case SkShader::kClamp_TileMode: { - using Tiler = CombinedTileStage<XStrategy, YClampStrategy, Next>; - tileStage->initStage<Tiler>(next, dimensions); + case SkShader::kClamp_TileMode: + make_tile_stage<XStrategy, YClampStrategy>(filterQuality, dimensions, next, tileStage); break; - } - case SkShader::kRepeat_TileMode: { - using Tiler = CombinedTileStage<XStrategy, YRepeatStrategy, Next>; - tileStage->initStage<Tiler>(next, dimensions); + case SkShader::kRepeat_TileMode: + make_tile_stage<XStrategy, YRepeatStrategy>(filterQuality, dimensions, next, tileStage); break; - } - case SkShader::kMirror_TileMode: { - using Tiler = CombinedTileStage<XStrategy, YMirrorStrategy, Next>; - tileStage->initStage<Tiler>(next, dimensions); + case SkShader::kMirror_TileMode: + make_tile_stage<XStrategy, YMirrorStrategy>(filterQuality, dimensions, next, tileStage); break; - } } }; @@ -327,6 +467,10 @@ public: fDest = dest; } + void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); } + + void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); } + void setDestination(void* dst, int count) override { fDest = static_cast<uint32_t*>(dst); fEnd = fDest + count; @@ -394,6 +538,10 @@ public: SkASSERT(fDest <= fEnd); } + void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); } + + void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); } + void setDestination(void* dst, int count) override { SkASSERT(count > 0); fDest = static_cast<uint32_t*>(dst); @@ -434,9 +582,12 @@ static SkLinearBitmapPipeline::PixelAccessorInterface* choose_specific_accessor( } } -static SkLinearBitmapPipeline::PixelAccessorInterface* choose_pixel_accessor( +template<template <typename, typename> class Sampler> +static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_base( + Blender* next, const SkPixmap& srcPixmap, const SkColor A8TintColor, + SkLinearBitmapPipeline::SampleStage* sampleStage, SkLinearBitmapPipeline::Accessor* accessor) { const SkImageInfo& imageInfo = srcPixmap.info(); @@ -478,19 +629,19 @@ static SkLinearBitmapPipeline::PixelAccessorInterface* choose_pixel_accessor( break; } - return pixelAccessor; + using S = Sampler<PixelAccessorShim, Blender>; + sampleStage->initStage<S>(next, pixelAccessor); + return sampleStage->get(); } SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler( Blender* next, SkFilterQuality filterQuality, - SkShader::TileMode xTile, SkShader::TileMode yTile, const SkPixmap& srcPixmap, const SkColor A8TintColor, SkLinearBitmapPipeline::SampleStage* sampleStage, SkLinearBitmapPipeline::Accessor* accessor) { const SkImageInfo& imageInfo = srcPixmap.info(); - SkISize dimensions = imageInfo.dimensions(); // Special case samplers with fully expanded templates if (imageInfo.gammaCloseToSRGB()) { @@ -519,14 +670,14 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler( using S = BilerpSampler< PixelAccessor<kN32_SkColorType, kSRGB_SkGammaType>, Blender>; - sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap); + sampleStage->initStage<S>(next, srcPixmap); return sampleStage->get(); } case kIndex_8_SkColorType: { using S = BilerpSampler< PixelAccessor<kIndex_8_SkColorType, kSRGB_SkGammaType>, Blender>; - sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap); + sampleStage->initStage<S>(next, srcPixmap); return sampleStage->get(); } default: @@ -535,16 +686,14 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler( } } - auto pixelAccessor = choose_pixel_accessor(srcPixmap, A8TintColor, accessor); // General cases. if (filterQuality == kNone_SkFilterQuality) { - using S = NearestNeighborSampler<PixelAccessorShim, Blender>; - sampleStage->initStage<S>(next, pixelAccessor); + return choose_pixel_sampler_base<NearestNeighborSampler>( + next, srcPixmap, A8TintColor, sampleStage, accessor); } else { - using S = BilerpSampler<PixelAccessorShim, Blender>; - sampleStage->initStage<S>(next, dimensions, xTile, yTile, pixelAccessor); + return choose_pixel_sampler_base<BilerpSampler>( + next, srcPixmap, A8TintColor, sampleStage, accessor); } - return sampleStage->get(); } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -556,17 +705,17 @@ public: SrcFPPixel(const SrcFPPixel& Blender) : fPostAlpha(Blender.fPostAlpha) {} void SK_VECTORCALL blendPixel(Sk4f pixel) override { SkASSERT(fDst + 1 <= fEnd ); - this->srcPixel(fDst, pixel, 0); + SrcPixel(fDst, pixel, 0); fDst += 1; } void SK_VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override { SkASSERT(fDst + 4 <= fEnd); SkPM4f* dst = fDst; - this->srcPixel(dst, p0, 0); - this->srcPixel(dst, p1, 1); - this->srcPixel(dst, p2, 2); - this->srcPixel(dst, p3, 3); + SrcPixel(dst, p0, 0); + SrcPixel(dst, p1, 1); + SrcPixel(dst, p2, 2); + SrcPixel(dst, p3, 3); fDst += 4; } @@ -576,9 +725,7 @@ public: } private: - void SK_VECTORCALL srcPixel(SkPM4f* dst, Sk4f pixel, int index) { - check_pixel(pixel); - + void SK_VECTORCALL SrcPixel(SkPM4f* dst, Sk4f pixel, int index) { Sk4f newPixel = pixel; if (alphaType == kUnpremul_SkAlphaType) { newPixel = Premultiply(pixel); @@ -650,8 +797,7 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline( // identity matrix, the matrix stage is skipped, and the tilerStage is the first stage. auto blenderStage = choose_blender_for_shading(alphaType, postAlpha, &fBlenderStage); auto samplerStage = choose_pixel_sampler( - blenderStage, filterQuality, xTile, yTile, - srcPixmap, paintColor, &fSampleStage, &fAccessor); + blenderStage, filterQuality, srcPixmap, paintColor, &fSampleStage, &fAccessor); auto tilerStage = choose_tiler(samplerStage, dimensions, xTile, yTile, filterQuality, dx, &fTileStage); fFirstStage = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage); diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h index 91b573df5d..b0f7e9dd20 100644 --- a/src/core/SkLinearBitmapPipeline.h +++ b/src/core/SkLinearBitmapPipeline.h @@ -133,9 +133,9 @@ public: // These values were generated by the assert above in Stage::init{Sink|Stage}. using MatrixStage = Stage<PointProcessorInterface, 160, PointProcessorInterface>; using TileStage = Stage<PointProcessorInterface, 160, SampleProcessorInterface>; - using SampleStage = Stage<SampleProcessorInterface, 160, BlendProcessorInterface>; + using SampleStage = Stage<SampleProcessorInterface, 100, BlendProcessorInterface>; using BlenderStage = Stage<BlendProcessorInterface, 40>; - using Accessor = PolyMemory<PixelAccessorInterface, 64>; + using Accessor = PolyMemory<PixelAccessorInterface, 48>; private: PointProcessorInterface* fFirstStage; diff --git a/src/core/SkLinearBitmapPipeline_core.h b/src/core/SkLinearBitmapPipeline_core.h index cf120eec65..2c39a38320 100644 --- a/src/core/SkLinearBitmapPipeline_core.h +++ b/src/core/SkLinearBitmapPipeline_core.h @@ -178,15 +178,6 @@ void span_fallback(Span span, Stage* stage) { stage->pointListFew(count, xs, ys); } } - -inline Sk4f check_pixel(Sk4f& pixel) { - SkASSERTF(0.0f <= pixel[0] && pixel[0] <= 1.0f, "pixel[0]: %f", pixel[0]); - SkASSERTF(0.0f <= pixel[1] && pixel[1] <= 1.0f, "pixel[1]: %f", pixel[1]); - SkASSERTF(0.0f <= pixel[2] && pixel[2] <= 1.0f, "pixel[2]: %f", pixel[2]); - SkASSERTF(0.0f <= pixel[3] && pixel[3] <= 1.0f, "pixel[3]: %f", pixel[3]); - return pixel; -} - } // namespace class SkLinearBitmapPipeline::PointProcessorInterface { @@ -210,6 +201,26 @@ public: // Used for nearest neighbor when scale factor is 1.0. The span can just be repeated with no // edge pixel alignment problems. This is for handling a very common case. virtual void repeatSpan(Span span, int32_t repeatCount) = 0; + + // The x's and y's are setup in the following order: + // +--------+--------+ + // | | | + // | px00 | px10 | + // | 0 | 1 | + // +--------+--------+ + // | | | + // | px01 | px11 | + // | 2 | 3 | + // +--------+--------+ + // These pixels coordinates are arranged in the following order in xs and ys: + // px00 px10 px01 px11 + virtual void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) = 0; + + // A span represents sample points that have been mapped from destination space to source + // space. Each sample point is then expanded to the four bilerp points by add +/- 0.5. The + // resulting Y values my be off the tile. When y +/- 0.5 are more than 1 apart because of + // tiling, the second Y is used to denote the retiled Y value. + virtual void bilerpSpan(Span span, SkScalar y) = 0; }; class SkLinearBitmapPipeline::DestinationInterface { @@ -232,10 +243,10 @@ class SkLinearBitmapPipeline::PixelAccessorInterface { public: virtual ~PixelAccessorInterface() { } virtual void SK_VECTORCALL getFewPixels( - int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0; + int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0; virtual void SK_VECTORCALL get4Pixels( - Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; + Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; virtual void get4Pixels( const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h index 5421758297..759075b3e5 100644 --- a/src/core/SkLinearBitmapPipeline_sample.h +++ b/src/core/SkLinearBitmapPipeline_sample.h @@ -40,7 +40,7 @@ namespace { // * px11 -> xy // So x * y is calculated first and then used to calculate all the other factors. static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, - Sk4f px01, Sk4f px11) { + Sk4f px01, Sk4f px11) { // Calculate fractional xs and ys. Sk4s fxs = xs - xs.floor(); Sk4s fys = ys - ys.floor(); @@ -134,21 +134,20 @@ template <SkGammaType gammaType> class PixelConverter<kIndex_8_SkColorType, gammaType> { public: using Element = uint8_t; - PixelConverter(const SkPixmap& srcPixmap) - : fColorTableSize(srcPixmap.ctable()->count()){ + PixelConverter(const SkPixmap& srcPixmap) { SkColorTable* skColorTable = srcPixmap.ctable(); SkASSERT(skColorTable != nullptr); fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); - for (int i = 0; i < fColorTableSize; i++) { + for (int i = 0; i < skColorTable->count(); i++) { fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); } } - PixelConverter(const PixelConverter& strategy) - : fColorTableSize{strategy.fColorTableSize}{ + PixelConverter(const PixelConverter& strategy) { fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); - for (int i = 0; i < fColorTableSize; i++) { + // TODO: figure out the count. + for (int i = 0; i < 256; i++) { fColorTable[i] = strategy.fColorTable[i]; } } @@ -159,9 +158,9 @@ public: private: static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; - const int fColorTableSize; - SkAutoMalloc fColorTableStorage{kColorTableSize}; - Sk4f* fColorTable; + + SkAutoMalloc fColorTableStorage{kColorTableSize}; + Sk4f* fColorTable; }; template <SkGammaType gammaType> @@ -195,12 +194,12 @@ public: : fPixelAccessor(accessor) { } void SK_VECTORCALL getFewPixels( - int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { + int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); } void SK_VECTORCALL get4Pixels( - Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { + Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); } @@ -238,8 +237,10 @@ public: , fConverter{srcPixmap, std::move<Args>(args)...} { } void SK_VECTORCALL getFewPixels ( - int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { - Sk4i bufferLoc = ys * fWidth + xs; + int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { + Sk4i XIs = SkNx_cast<int, SkScalar>(xs); + Sk4i YIs = SkNx_cast<int, SkScalar>(ys); + Sk4i bufferLoc = YIs * fWidth + XIs; switch (n) { case 3: *px2 = this->getPixelAt(bufferLoc[2]); @@ -253,8 +254,10 @@ public: } void SK_VECTORCALL get4Pixels( - Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { - Sk4i bufferLoc = ys * fWidth + xs; + Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { + Sk4i XIs = SkNx_cast<int, SkScalar>(xs); + Sk4i YIs = SkNx_cast<int, SkScalar>(ys); + Sk4i bufferLoc = YIs * fWidth + XIs; *px0 = this->getPixelAt(bufferLoc[0]); *px1 = this->getPixelAt(bufferLoc[1]); *px2 = this->getPixelAt(bufferLoc[2]); @@ -327,7 +330,6 @@ static void src_strategy_blend(Span span, Next* next, Strategy* strategy) { } } -// -- NearestNeighborSampler ----------------------------------------------------------------------- // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels. template<typename Accessor, typename Next> class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { @@ -343,7 +345,7 @@ public: void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { SkASSERT(0 < n && n < 4); Sk4f px0, px1, px2; - fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2); + fAccessor.getFewPixels(n, xs, ys, &px0, &px1, &px2); if (n >= 1) fNext->blendPixel(px0); if (n >= 2) fNext->blendPixel(px1); if (n >= 3) fNext->blendPixel(px2); @@ -351,7 +353,7 @@ public: void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { Sk4f px0, px1, px2, px3; - fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3); + fAccessor.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); fNext->blend4Pixels(px0, px1, px2, px3); } @@ -378,11 +380,21 @@ public: } } + void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { + SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge."); + } + + void bilerpSpan(Span span, SkScalar y) override { + SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan."); + } + private: // When moving through source space more slowly than dst space (zoomed in), // we'll be sampling from the same source pixel more than once. void spanSlowRate(Span span) { - SkPoint start; SkScalar length; int count; + SkPoint start; + SkScalar length; + int count; std::tie(start, length, count) = span; SkScalar x = X(start); SkFixed fx = SkScalarToFixed(x); @@ -439,82 +451,35 @@ private: Accessor fAccessor; }; -// From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge -// vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to -// generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value -// on the interval [0, vMax]. -// Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel. -static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) { - SkASSERT(-1 <= vs && vs <= vMax + 1) - switch (edgeType) { - case SkShader::kClamp_TileMode: - case SkShader::kMirror_TileMode: - vs = std::max(vs, 0); - vs = std::min(vs, vMax); - break; - case SkShader::kRepeat_TileMode: - vs = (vs <= vMax) ? vs : 0; - vs = (vs >= 0) ? vs : vMax; - break; - } - SkASSERT(0 <= vs && vs <= vMax); - return vs; -} - -// From a sample point on the tile, return the top or left filter value. -// The result r should be in the range (0, 1]. Since this represents the weight given to the top -// left element, then if x == 0.5 the filter value should be 1.0. -// The input sample point must be on the tile, therefore it must be >= 0. -static SkScalar sample_to_filter(SkScalar x) { - SkASSERT(x >= 0.0f); - // The usual form of the top or left edge is x - .5, but since we are working on the unit - // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use - // of trunc. - SkScalar v = x + 0.5f; - // Produce the top or left offset a value on the range [0, 1). - SkScalar f = v - SkScalarTruncToScalar(v); - // Produce the filter value which is on the range (0, 1]. - SkScalar r = 1.0f - f; - SkASSERT(0.0f < r && r <= 1.0f); - return r; -} - // -- BilerpSampler -------------------------------------------------------------------------------- // BilerpSampler - use a bilerp filter to create runs of destination pixels. -// Note: in the code below, there are two types of points -// * sample points - these are the points passed in by pointList* and Spans. -// * filter points - are created from a sample point to form the coordinates of the points -// to use in the filter and to generate the filter values. template<typename Accessor, typename Next> class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { public: template<typename... Args> - BilerpSampler( - SkLinearBitmapPipeline::BlendProcessorInterface* next, - SkISize dimensions, - SkShader::TileMode xTile, SkShader::TileMode yTile, - Args&& ... args - ) - : fNext{next} - , fXEdgeType{xTile} - , fXMax{dimensions.width() - 1} - , fYEdgeType{yTile} - , fYMax{dimensions.height() - 1} - , fAccessor{std::forward<Args>(args)...} { } + BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) + : fNext{next}, fAccessor{std::forward<Args>(args)...} { } BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, const BilerpSampler& sampler) - : fNext{next} - , fXEdgeType{sampler.fXEdgeType} - , fXMax{sampler.fXMax} - , fYEdgeType{sampler.fYEdgeType} - , fYMax{sampler.fYMax} - , fAccessor{sampler.fAccessor} { } + : fNext{next}, fAccessor{sampler.fAccessor} { } + + Sk4f bilerpNonEdgePixel(SkScalar x, SkScalar y) { + Sk4f px00, px10, px01, px11; + + // bilerp4() expects xs, ys are the top-lefts of the 2x2 kernel. + Sk4f xs = Sk4f{x} - 0.5f; + Sk4f ys = Sk4f{y} - 0.5f; + Sk4f sampleXs = xs + Sk4f{0.0f, 1.0f, 0.0f, 1.0f}; + Sk4f sampleYs = ys + Sk4f{0.0f, 0.0f, 1.0f, 1.0f}; + fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); + return bilerp4(xs, ys, px00, px10, px01, px11); + } void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { SkASSERT(0 < n && n < 4); auto bilerpPixel = [&](int index) { - return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); + return this->bilerpNonEdgePixel(xs[index], ys[index]); }; if (n >= 1) fNext->blendPixel(bilerpPixel(0)); @@ -524,484 +489,308 @@ public: void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { auto bilerpPixel = [&](int index) { - return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); + return this->bilerpNonEdgePixel(xs[index], ys[index]); }; fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); } void pointSpan(Span span) override { + this->bilerpSpan(span, span.startY()); + } + + void repeatSpan(Span span, int32_t repeatCount) override { + while (repeatCount > 0) { + this->pointSpan(span); + repeatCount--; + } + } + + void SK_VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) override { + Sk4f px00, px10, px01, px11; + Sk4f xs = Sk4f{sampleXs[0]}; + Sk4f ys = Sk4f{sampleYs[0]}; + fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); + Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11); + fNext->blendPixel(pixel); + } + + void bilerpSpan(Span span, SkScalar y) override { SkASSERT(!span.isEmpty()); SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; - - // Nothing to do. - if (count == 0) { - return; - } - - // Trivial case. No sample points are generated other than start. - if (count == 1) { - fNext->blendPixel(this->bilerpSamplePoint(start)); - return; - } - - // Note: the following code could be done in terms of dx = length / (count -1), but that - // would introduce a divide that is not needed for the most common dx == 1 cases. SkScalar absLength = SkScalarAbs(length); if (absLength == 0.0f) { - // |dx| == 0 - // length is zero, so clamp an edge pixel. - this->spanZeroRate(span); + this->spanZeroRate(span, y); } else if (absLength < (count - 1)) { - // 0 < |dx| < 1. - this->spanSlowRate(span); + this->spanSlowRate(span, y); } else if (absLength == (count - 1)) { - // |dx| == 1. - if (sample_to_filter(span.startX()) == 1.0f - && sample_to_filter(span.startY()) == 1.0f) { - // All the pixels are aligned with the dest; go fast. - src_strategy_blend(span, fNext, &fAccessor); + if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) { + if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) { + src_strategy_blend(span, fNext, &fAccessor); + } else { + this->spanUnitRateAlignedX(span, y); + } } else { - // There is some sub-pixel offsets, so bilerp. - this->spanUnitRate(span); + this->spanUnitRate(span, y); } - } else if (absLength < 2.0f * (count - 1)) { - // 1 < |dx| < 2. - this->spanMediumRate(span); } else { - // |dx| >= 2. - this->spanFastRate(span); - } - } - - void repeatSpan(Span span, int32_t repeatCount) override { - while (repeatCount > 0) { - this->pointSpan(span); - repeatCount--; + this->spanFastRate(span, y); } } private: - - // Convert a sample point to the points used by the filter. - void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) { - // May be less than zero. Be careful to use Floor. - int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax); - // Always greater than zero. Use the faster Trunc. - int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax); - int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax); - int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax); - - *filterXs = Sk4i{x0, x1, x0, x1}; - *filterYs = Sk4i{y0, y0, y1, y1}; - } - - // Given a sample point, generate a color by bilerping the four filter points. - Sk4f bilerpSamplePoint(SkPoint sample) { - Sk4i iXs, iYs; - filterPoints(sample, &iXs, &iYs); - Sk4f px00, px10, px01, px11; - fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11); - return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11); - } - - // Get two pixels at x from row0 and row1. - void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) { - *px0 = fAccessor.getPixelFromRow(row0, x); - *px1 = fAccessor.getPixelFromRow(row1, x); - } - - // |dx| == 0. This code assumes that length is zero. - void spanZeroRate(Span span) { - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - SkASSERT(length == 0.0f); - - // Filter for the blending of the top and bottom pixels. - SkScalar filterY = sample_to_filter(Y(start)); - - // Generate the four filter points from the sample point start. Generate the row* values. - Sk4i iXs, iYs; - this->filterPoints(start, &iXs, &iYs); - const void* const row0 = fAccessor.row(iYs[0]); - const void* const row1 = fAccessor.row(iYs[2]); - - // Get the two pixels that make up the clamping pixel. - Sk4f pxTop, pxBottom; - this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom); - Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom; - + void spanZeroRate(Span span, SkScalar y1) { + SkScalar y0 = span.startY() - 0.5f; + y1 += 0.5f; + int iy0 = SkScalarFloorToInt(y0); + SkScalar filterY1 = y0 - iy0; + SkScalar filterY0 = 1.0f - filterY1; + int iy1 = SkScalarFloorToInt(y1); + int ix = SkScalarFloorToInt(span.startX()); + Sk4f pixelY0 = fAccessor.getPixelFromRow(fAccessor.row(iy0), ix); + Sk4f pixelY1 = fAccessor.getPixelFromRow(fAccessor.row(iy1), ix); + Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1; + int count = span.count(); while (count >= 4) { - fNext->blend4Pixels(pixel, pixel, pixel, pixel); + fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel); count -= 4; } while (count > 0) { - fNext->blendPixel(pixel); + fNext->blendPixel(filterPixel); count -= 1; } } - // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce - // computation. In particular, several destination pixels maybe generated from the same four - // source pixels. - // In the following code a "part" is a combination of two pixels from the same column of the - // filter. - void spanSlowRate(Span span) { - SkPoint start; SkScalar length; int count; + // When moving through source space more slowly than dst space (zoomed in), + // we'll be sampling from the same source pixel more than once. + void spanSlowRate(Span span, SkScalar ry1) { + SkPoint start; + SkScalar length; + int count; std::tie(start, length, count) = span; + SkFixed fx = SkScalarToFixed(X(start)-0.5f); - // Calculate the distance between each sample point. - const SkScalar dx = length / (count - 1); - SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f); - - // Generate the filter values for the top-left corner. - // Note: these values are in filter space; this has implications about how to adjust - // these values at each step. For example, as the sample point increases, the filter - // value decreases, this is because the filter and position are related by - // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite - // direction of the sample point which is increasing by dx. - SkScalar filterX = sample_to_filter(X(start)); - SkScalar filterY = sample_to_filter(Y(start)); - - // Generate the four filter points from the sample point start. Generate the row* values. - Sk4i iXs, iYs; - this->filterPoints(start, &iXs, &iYs); - const void* const row0 = fAccessor.row(iYs[0]); - const void* const row1 = fAccessor.row(iYs[2]); - - // Generate part of the filter value at xColumn. - auto partAtColumn = [&](int xColumn) { - int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); - Sk4f pxTop, pxBottom; - this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); - return pxTop * filterY + (1.0f - filterY) * pxBottom; - }; + SkFixed fdx = SkScalarToFixed(length / (count - 1)); - // The leftPart is made up of two pixels from the left column of the filter, right part - // is similar. The top and bottom pixels in the *Part are created as a linear blend of - // the top and bottom pixels using filterY. See the partAtColumn function above. - Sk4f leftPart = partAtColumn(iXs[0]); - Sk4f rightPart = partAtColumn(iXs[1]); + Sk4f xAdjust; + if (fdx >= 0) { + xAdjust = Sk4f{-1.0f}; + } else { + xAdjust = Sk4f{1.0f}; + } + int ix = SkFixedFloorToInt(fx); + int ioldx = ix; + Sk4f x{SkFixedToScalar(fx) - ix}; + Sk4f dx{SkFixedToScalar(fdx)}; + SkScalar ry0 = Y(start) - 0.5f; + ry1 += 0.5f; + SkScalar yFloor = std::floor(ry0); + Sk4f y1 = Sk4f{ry0 - yFloor}; + Sk4f y0 = Sk4f{1.0f} - y1; + const void* const row0 = fAccessor.row(SkScalarFloorToInt(ry0)); + const void* const row1 = fAccessor.row(SkScalarFloorToInt(ry1)); + Sk4f fpixel00 = y0 * fAccessor.getPixelFromRow(row0, ix); + Sk4f fpixel01 = y1 * fAccessor.getPixelFromRow(row1, ix); + Sk4f fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); + Sk4f fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); + auto getNextPixel = [&]() { + if (ix != ioldx) { + fpixel00 = fpixel10; + fpixel01 = fpixel11; + fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); + fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); + ioldx = ix; + x = x + xAdjust; + } - // Create a destination color by blending together a left and right part using filterX. - auto bilerp = [&]() { - Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); - return check_pixel(pixel); + Sk4f x0, x1; + x0 = Sk4f{1.0f} - x; + x1 = x; + Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11); + fx += fdx; + ix = SkFixedFloorToInt(fx); + x = x + dx; + return fpixel; }; - // Send the first pixel to the destination. This simplifies the loop structure so that no - // extra pixels are fetched for the last iteration of the loop. - fNext->blendPixel(bilerp()); - count -= 1; - - if (dx > 0.0f) { - // * positive direction - generate destination pixels by sliding the filter from left - // to right. - int rightPartCursor = iXs[1]; - - // Advance the filter from left to right. Remember that moving the top-left corner of - // the filter to the right actually makes the filter value smaller. - auto advanceFilter = [&]() { - filterX -= dx; - if (filterX <= 0.0f) { - filterX += 1.0f; - leftPart = rightPart; - rightPartCursor += 1; - rightPart = partAtColumn(rightPartCursor); - } - SkASSERT(0.0f < filterX && filterX <= 1.0f); - - return bilerp(); - }; - - while (count >= 4) { - Sk4f px0 = advanceFilter(), - px1 = advanceFilter(), - px2 = advanceFilter(), - px3 = advanceFilter(); - fNext->blend4Pixels(px0, px1, px2, px3); - count -= 4; - } - - while (count > 0) { - fNext->blendPixel(advanceFilter()); - count -= 1; - } - } else { - // * negative direction - generate destination pixels by sliding the filter from - // right to left. - int leftPartCursor = iXs[0]; - - // Advance the filter from right to left. Remember that moving the top-left corner of - // the filter to the left actually makes the filter value larger. - auto advanceFilter = [&]() { - // Remember, dx < 0 therefore this adds |dx| to filterX. - filterX -= dx; - // At this point filterX may be > 1, and needs to be wrapped back on to the filter - // interval, and the next column in the filter is calculated. - if (filterX > 1.0f) { - filterX -= 1.0f; - rightPart = leftPart; - leftPartCursor -= 1; - leftPart = partAtColumn(leftPartCursor); - } - SkASSERT(0.0f < filterX && filterX <= 1.0f); + while (count >= 4) { + Sk4f fpixel0 = getNextPixel(); + Sk4f fpixel1 = getNextPixel(); + Sk4f fpixel2 = getNextPixel(); + Sk4f fpixel3 = getNextPixel(); - return bilerp(); - }; + fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3); + count -= 4; + } - while (count >= 4) { - Sk4f px0 = advanceFilter(), - px1 = advanceFilter(), - px2 = advanceFilter(), - px3 = advanceFilter(); - fNext->blend4Pixels(px0, px1, px2, px3); - count -= 4; - } + while (count > 0) { + fNext->blendPixel(getNextPixel()); - while (count > 0) { - fNext->blendPixel(advanceFilter()); - count -= 1; - } + count -= 1; } } - // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel. - // Every filter part is used for two destination pixels, and the code can bulk load four - // pixels at a time. - void spanUnitRate(Span span) { - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - SkASSERT(SkScalarAbs(length) == (count - 1)); - - // Calculate the four filter points of start, and use the two different Y values to - // generate the row pointers. - Sk4i iXs, iYs; - filterPoints(start, &iXs, &iYs); - const void* row0 = fAccessor.row(iYs[0]); - const void* row1 = fAccessor.row(iYs[2]); - - // Calculate the filter values for the top-left filter element. - const SkScalar filterX = sample_to_filter(X(start)); - const SkScalar filterY = sample_to_filter(Y(start)); - - // Generate part of the filter value at xColumn. - auto partAtColumn = [&](int xColumn) { - int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); - Sk4f pxTop, pxBottom; - this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); - return pxTop * filterY + (1.0f - filterY) * pxBottom; + // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. + // We'll never re-use pixels, but we can at least load contiguous pixels. + void spanUnitRate(Span span, SkScalar y1) { + y1 += 0.5f; + SkScalar y0 = span.startY() - 0.5f; + int iy0 = SkScalarFloorToInt(y0); + SkScalar filterY1 = y0 - iy0; + SkScalar filterY0 = 1.0f - filterY1; + int iy1 = SkScalarFloorToInt(y1); + const void* rowY0 = fAccessor.row(iy0); + const void* rowY1 = fAccessor.row(iy1); + SkScalar x0 = span.startX() - 0.5f; + int ix0 = SkScalarFloorToInt(x0); + SkScalar filterX1 = x0 - ix0; + SkScalar filterX0 = 1.0f - filterX1; + + auto getPixelY0 = [&]() { + Sk4f px = fAccessor.getPixelFromRow(rowY0, ix0); + return px * filterY0; }; - auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) { - // Check if the pixels needed are near the edges. If not go fast using bulk pixels, - // otherwise be careful. - if (0 <= ix && ix <= fXMax - 3) { - Sk4f px00, px10, px20, px30, - px01, px11, px21, px31; - fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30); - fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31); - *part0 = filterY * px00 + (1.0f - filterY) * px01; - *part1 = filterY * px10 + (1.0f - filterY) * px11; - *part2 = filterY * px20 + (1.0f - filterY) * px21; - *part3 = filterY * px30 + (1.0f - filterY) * px31; - } else { - *part0 = partAtColumn(ix + 0); - *part1 = partAtColumn(ix + 1); - *part2 = partAtColumn(ix + 2); - *part3 = partAtColumn(ix + 3); - } + auto getPixelY1 = [&]() { + Sk4f px = fAccessor.getPixelFromRow(rowY1, ix0); + return px * filterY1; + }; + + auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { + fAccessor.get4Pixels(rowY0, ix, px0, px1, px2, px3); + *px0 = *px0 * filterY0; + *px1 = *px1 * filterY0; + *px2 = *px2 * filterY0; + *px3 = *px3 * filterY0; }; - auto bilerp = [&](Sk4f& part0, Sk4f& part1) { - return part0 * filterX + part1 * (1.0f - filterX); + auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { + fAccessor.get4Pixels(rowY1, ix, px0, px1, px2, px3); + *px0 = *px0 * filterY1; + *px1 = *px1 * filterY1; + *px2 = *px2 * filterY1; + *px3 = *px3 * filterY1; }; - if (length > 0) { - // * positive direction - generate destination pixels by sliding the filter from left - // to right. + auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) { + return pixelX0 * filterX0 + pixelX1 * filterX1; + }; - // overlapPart is the filter part from the end of the previous four pixels used at - // the start of the next four pixels. - Sk4f overlapPart = partAtColumn(iXs[0]); - int rightColumnCursor = iXs[1]; + // Mid making 4 unit rate. + Sk4f pxB = getPixelY0() + getPixelY1(); + if (span.length() > 0) { + int count = span.count(); while (count >= 4) { - Sk4f part0, part1, part2, part3; - get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3); - Sk4f px0 = bilerp(overlapPart, part0); - Sk4f px1 = bilerp(part0, part1); - Sk4f px2 = bilerp(part1, part2); - Sk4f px3 = bilerp(part2, part3); - overlapPart = part3; + Sk4f px00, px10, px20, px30; + get4PixelsY0(ix0, &px00, &px10, &px20, &px30); + Sk4f px01, px11, px21, px31; + get4PixelsY1(ix0, &px01, &px11, &px21, &px31); + Sk4f pxS0 = px00 + px01; + Sk4f px0 = lerp(pxB, pxS0); + Sk4f pxS1 = px10 + px11; + Sk4f px1 = lerp(pxS0, pxS1); + Sk4f pxS2 = px20 + px21; + Sk4f px2 = lerp(pxS1, pxS2); + Sk4f pxS3 = px30 + px31; + Sk4f px3 = lerp(pxS2, pxS3); + pxB = pxS3; fNext->blend4Pixels(px0, px1, px2, px3); - rightColumnCursor += 4; + ix0 += 4; count -= 4; } - while (count > 0) { - Sk4f rightPart = partAtColumn(rightColumnCursor); + Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); + Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); - fNext->blendPixel(bilerp(overlapPart, rightPart)); - overlapPart = rightPart; - rightColumnCursor += 1; + fNext->blendPixel(lerp(pixelY0, pixelY1)); + ix0 += 1; count -= 1; } } else { - // * negative direction - generate destination pixels by sliding the filter from - // right to left. - Sk4f overlapPart = partAtColumn(iXs[1]); - int leftColumnCursor = iXs[0]; - + int count = span.count(); while (count >= 4) { - Sk4f part0, part1, part2, part3; - get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0); - Sk4f px0 = bilerp(part0, overlapPart); - Sk4f px1 = bilerp(part1, part0); - Sk4f px2 = bilerp(part2, part1); - Sk4f px3 = bilerp(part3, part2); - overlapPart = part3; + Sk4f px00, px10, px20, px30; + get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30); + Sk4f px01, px11, px21, px31; + get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31); + Sk4f pxS3 = px30 + px31; + Sk4f px0 = lerp(pxS3, pxB); + Sk4f pxS2 = px20 + px21; + Sk4f px1 = lerp(pxS2, pxS3); + Sk4f pxS1 = px10 + px11; + Sk4f px2 = lerp(pxS1, pxS2); + Sk4f pxS0 = px00 + px01; + Sk4f px3 = lerp(pxS0, pxS1); + pxB = pxS0; fNext->blend4Pixels(px0, px1, px2, px3); - leftColumnCursor -= 4; + ix0 -= 4; count -= 4; } - while (count > 0) { - Sk4f leftPart = partAtColumn(leftColumnCursor); + Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); + Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); - fNext->blendPixel(bilerp(leftPart, overlapPart)); - overlapPart = leftPart; - leftColumnCursor -= 1; + fNext->blendPixel(lerp(pixelY0, pixelY1)); + ix0 -= 1; count -= 1; } } } - // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but - // still slow enough to take advantage of previous calculations. - void spanMediumRate(Span span) { - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - - // Calculate the distance between each sample point. - const SkScalar dx = length / (count - 1); - SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f)); - - // Generate the filter values for the top-left corner. - // Note: these values are in filter space; this has implications about how to adjust - // these values at each step. For example, as the sample point increases, the filter - // value decreases, this is because the filter and position are related by - // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite - // direction of the sample point which is increasing by dx. - SkScalar filterX = sample_to_filter(X(start)); - SkScalar filterY = sample_to_filter(Y(start)); - - // Generate the four filter points from the sample point start. Generate the row* values. - Sk4i iXs, iYs; - this->filterPoints(start, &iXs, &iYs); - const void* const row0 = fAccessor.row(iYs[0]); - const void* const row1 = fAccessor.row(iYs[2]); - - // Generate part of the filter value at xColumn. - auto partAtColumn = [&](int xColumn) { - int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); - Sk4f pxTop, pxBottom; - this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); - return pxTop * filterY + (1.0f - filterY) * pxBottom; - }; - - // The leftPart is made up of two pixels from the left column of the filter, right part - // is similar. The top and bottom pixels in the *Part are created as a linear blend of - // the top and bottom pixels using filterY. See the nextPart function below. - Sk4f leftPart = partAtColumn(iXs[0]); - Sk4f rightPart = partAtColumn(iXs[1]); - - // Create a destination color by blending together a left and right part using filterX. - auto bilerp = [&]() { - Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); - return check_pixel(pixel); + void spanUnitRateAlignedX(Span span, SkScalar y1) { + SkScalar y0 = span.startY() - 0.5f; + y1 += 0.5f; + int iy0 = SkScalarFloorToInt(y0); + SkScalar filterY1 = y0 - iy0; + SkScalar filterY0 = 1.0f - filterY1; + int iy1 = SkScalarFloorToInt(y1); + int ix = SkScalarFloorToInt(span.startX()); + const void* rowY0 = fAccessor.row(iy0); + const void* rowY1 = fAccessor.row(iy1); + auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) { + return *pixelY0 * filterY0 + *pixelY1 * filterY1; }; - // Send the first pixel to the destination. This simplifies the loop structure so that no - // extra pixels are fetched for the last iteration of the loop. - fNext->blendPixel(bilerp()); - count -= 1; - - if (dx > 0.0f) { - // * positive direction - generate destination pixels by sliding the filter from left - // to right. - int rightPartCursor = iXs[1]; - - // Advance the filter from left to right. Remember that moving the top-left corner of - // the filter to the right actually makes the filter value smaller. - auto advanceFilter = [&]() { - filterX -= dx; - // At this point filterX is less than zero, but might actually be less than -1. - if (filterX > -1.0f) { - filterX += 1.0f; - leftPart = rightPart; - rightPartCursor += 1; - rightPart = partAtColumn(rightPartCursor); - } else { - filterX += 2.0f; - rightPartCursor += 2; - leftPart = partAtColumn(rightPartCursor - 1); - rightPart = partAtColumn(rightPartCursor); - } - SkASSERT(0.0f < filterX && filterX <= 1.0f); - - return bilerp(); - }; - + if (span.length() > 0) { + int count = span.count(); while (count >= 4) { - Sk4f px0 = advanceFilter(), - px1 = advanceFilter(), - px2 = advanceFilter(), - px3 = advanceFilter(); - fNext->blend4Pixels(px0, px1, px2, px3); + Sk4f px00, px10, px20, px30; + fAccessor.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30); + Sk4f px01, px11, px21, px31; + fAccessor.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31); + fNext->blend4Pixels( + lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); + ix += 4; count -= 4; } - while (count > 0) { - fNext->blendPixel(advanceFilter()); + Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); + Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); + + fNext->blendPixel(lerp(&pixelY0, &pixelY1)); + ix += 1; count -= 1; } } else { - // * negative direction - generate destination pixels by sliding the filter from - // right to left. - int leftPartCursor = iXs[0]; - - auto advanceFilter = [&]() { - // Remember, dx < 0 therefore this adds |dx| to filterX. - filterX -= dx; - // At this point, filterX is greater than one, but may actually be greater than two. - if (filterX < 2.0f) { - filterX -= 1.0f; - rightPart = leftPart; - leftPartCursor -= 1; - leftPart = partAtColumn(leftPartCursor); - } else { - filterX -= 2.0f; - leftPartCursor -= 2; - rightPart = partAtColumn(leftPartCursor - 1); - leftPart = partAtColumn(leftPartCursor); - } - SkASSERT(0.0f < filterX && filterX <= 1.0f); - return bilerp(); - }; - + int count = span.count(); while (count >= 4) { - Sk4f px0 = advanceFilter(), - px1 = advanceFilter(), - px2 = advanceFilter(), - px3 = advanceFilter(); - fNext->blend4Pixels(px0, px1, px2, px3); + Sk4f px00, px10, px20, px30; + fAccessor.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00); + Sk4f px01, px11, px21, px31; + fAccessor.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01); + fNext->blend4Pixels( + lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); + ix -= 4; count -= 4; } - while (count > 0) { - fNext->blendPixel(advanceFilter()); + Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); + Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); + + fNext->blendPixel(lerp(&pixelY0, &pixelY1)); + ix -= 1; count -= 1; } } @@ -1009,26 +798,34 @@ private: // We're moving through source space faster than dst (zoomed out), // so we'll never reuse a source pixel or be able to do contiguous loads. - void spanFastRate(Span span) { - SkPoint start; SkScalar length; int count; + void spanFastRate(Span span, SkScalar y1) { + SkPoint start; + SkScalar length; + int count; std::tie(start, length, count) = span; SkScalar x = X(start); SkScalar y = Y(start); - SkScalar dx = length / (count - 1); - while (count > 0) { - fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y})); - x += dx; - count -= 1; + // In this sampler, it is assumed that if span.StartY() and y1 are the same then both + // y-lines are on the same tile. + if (y == y1) { + // Both y-lines are on the same tile. + span_fallback(span, this); + } else { + // The y-lines are on different tiles. + SkScalar dx = length / (count - 1); + Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f}; + while (count > 0) { + Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x}; + this->bilerpEdge(xs, ys); + x += dx; + count -= 1; + } } } - Next* const fNext; - const SkShader::TileMode fXEdgeType; - const int fXMax; - const SkShader::TileMode fYEdgeType; - const int fYMax; - Accessor fAccessor; + Next* const fNext; + Accessor fAccessor; }; } // namespace |