diff options
-rw-r--r-- | src/core/SkBitmapProcShader.h | 2 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.cpp | 238 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.h | 4 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_core.h | 33 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_sample.h | 783 |
5 files changed, 553 insertions, 507 deletions
diff --git a/src/core/SkBitmapProcShader.h b/src/core/SkBitmapProcShader.h index a4591c7355..67b005ac05 100644 --- a/src/core/SkBitmapProcShader.h +++ b/src/core/SkBitmapProcShader.h @@ -56,7 +56,7 @@ private: typedef SkShader INHERITED; }; -enum {kSkBlitterContextSize = 3200}; +enum {kSkBlitterContextSize = 3332}; // Commonly used allocator. It currently is only used to allocate up to 3 objects. The total // bytes requested is calculated using one of our large shaders, its context size plus the size of diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp index 088e829345..0122765709 100644 --- a/src/core/SkLinearBitmapPipeline.cpp +++ b/src/core/SkLinearBitmapPipeline.cpp @@ -165,15 +165,14 @@ static SkLinearBitmapPipeline::PointProcessorInterface* choose_matrix( // Tile Stage template<typename XStrategy, typename YStrategy, typename Next> -class NearestTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { +class CombinedTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { public: - template <typename... Args> - NearestTileStage(Next* next, SkISize dimensions) + CombinedTileStage(Next* next, SkISize dimensions) : fNext{next} , fXStrategy{dimensions.width()} , fYStrategy{dimensions.height()}{ } - NearestTileStage(Next* next, const NearestTileStage& stage) + CombinedTileStage(Next* next, const CombinedTileStage& stage) : fNext{next} , fXStrategy{stage.fXStrategy} , fYStrategy{stage.fYStrategy} { } @@ -195,187 +194,48 @@ public: SkASSERT(!span.isEmpty()); SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; - SkScalar x = X(start); - SkScalar y = fYStrategy.tileY(Y(start)); - Span yAdjustedSpan{{x, y}, length, count}; - if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) { - span_fallback(span, this); - } - } - -private: - Next* const fNext; - XStrategy fXStrategy; - YStrategy fYStrategy; -}; - -template<typename XStrategy, typename YStrategy, typename Next> -class BilerpTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { -public: - template <typename... Args> - BilerpTileStage(Next* next, SkISize dimensions) - : fNext{next} - , fXMax(dimensions.width()) - , fYMax(dimensions.height()) - , fXStrategy{dimensions.width()} - , fYStrategy{dimensions.height()} { } - - BilerpTileStage(Next* next, const BilerpTileStage& stage) - : fNext{next} - , fXMax{stage.fXMax} - , fYMax{stage.fYMax} - , fXStrategy{stage.fXStrategy} - , fYStrategy{stage.fYStrategy} { } - void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { - fXStrategy.tileXPoints(&xs); - fYStrategy.tileYPoints(&ys); - // TODO: check to see if xs and ys are in range then just call pointListFew on next. - if (n >= 1) this->bilerpPoint(xs[0], ys[0]); - if (n >= 2) this->bilerpPoint(xs[1], ys[1]); - if (n >= 3) this->bilerpPoint(xs[2], ys[2]); - } - - void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { - fXStrategy.tileXPoints(&xs); - fYStrategy.tileYPoints(&ys); - // TODO: check to see if xs and ys are in range then just call pointList4 on next. - this->bilerpPoint(xs[0], ys[0]); - this->bilerpPoint(xs[1], ys[1]); - this->bilerpPoint(xs[2], ys[2]); - this->bilerpPoint(xs[3], ys[3]); - } - - struct Wrapper { - void pointSpan(Span span) { - processor->breakIntoEdges(span); - } - - void repeatSpan(Span span, int32_t repeatCount) { - while (repeatCount --> 0) { - processor->pointSpan(span); - } + if (span.count() == 1) { + this->pointListFew(1, span.startX(), span.startY()); + return; } - BilerpTileStage* processor; - }; - - // The span you pass must not be empty. - void pointSpan(Span span) override { - SkASSERT(!span.isEmpty()); + SkScalar x = X(start); + SkScalar y = fYStrategy.tileY(Y(start)); + Span yAdjustedSpan{{x, y}, length, count}; - Wrapper wrapper = {this}; - if (!fXStrategy.maybeProcessSpan(span, &wrapper)) { + if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) { span_fallback(span, this); } } private: - void bilerpPoint(SkScalar x, SkScalar y) { - Sk4f txs = Sk4f{x} + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f}; - Sk4f tys = Sk4f{y} + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f}; - fXStrategy.tileXPoints(&txs); - fYStrategy.tileYPoints(&tys); - fNext->bilerpEdge(txs, tys); - } - - void handleEdges(Span span, SkScalar dx) { - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - SkScalar x = X(start); - SkScalar y = Y(start); - SkScalar tiledY = fYStrategy.tileY(y); - while (count > 0) { - this->bilerpPoint(x, tiledY); - x += dx; - count -= 1; - } - } - - void yProcessSpan(Span span) { - SkScalar tiledY = fYStrategy.tileY(span.startY()); - if (0.5f <= tiledY && tiledY < fYMax - 0.5f ) { - Span tiledSpan{{span.startX(), tiledY}, span.length(), span.count()}; - fNext->pointSpan(tiledSpan); - } else { - // Convert to the Y0 bilerp sample set by shifting by -0.5f. Then tile that new y - // value and shift it back resulting in the working Y0. Do the same thing with Y1 but - // in the opposite direction. - SkScalar y0 = fYStrategy.tileY(span.startY() - 0.5f) + 0.5f; - SkScalar y1 = fYStrategy.tileY(span.startY() + 0.5f) - 0.5f; - Span newSpan{{span.startX(), y0}, span.length(), span.count()}; - fNext->bilerpSpan(newSpan, y1); - } - } - void breakIntoEdges(Span span) { - if (span.count() == 1) { - this->bilerpPoint(span.startX(), span.startY()); - } else if (span.length() == 0) { - yProcessSpan(span); - } else { - SkScalar dx = span.length() / (span.count() - 1); - if (span.length() > 0) { - Span leftBorder = span.breakAt(0.5f, dx); - if (!leftBorder.isEmpty()) { - this->handleEdges(leftBorder, dx); - } - Span center = span.breakAt(fXMax - 0.5f, dx); - if (!center.isEmpty()) { - this->yProcessSpan(center); - } - - if (!span.isEmpty()) { - this->handleEdges(span, dx); - } - } else { - Span center = span.breakAt(fXMax + 0.5f, dx); - if (!span.isEmpty()) { - this->handleEdges(span, dx); - } - Span leftEdge = center.breakAt(0.5f, dx); - if (!center.isEmpty()) { - this->yProcessSpan(center); - } - if (!leftEdge.isEmpty()) { - this->handleEdges(leftEdge, dx); - } - - } - } - } - Next* const fNext; - SkScalar fXMax; - SkScalar fYMax; XStrategy fXStrategy; YStrategy fYStrategy; }; -template <typename XStrategy, typename YStrategy, typename Next> -void make_tile_stage( - SkFilterQuality filterQuality, SkISize dimensions, - Next* next, SkLinearBitmapPipeline::TileStage* tileStage) { - if (filterQuality == kNone_SkFilterQuality) { - tileStage->initStage<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions); - } else { - tileStage->initStage<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions); - } -} -template <typename XStrategy> +template <typename XStrategy, typename Next> void choose_tiler_ymode( SkShader::TileMode yMode, SkFilterQuality filterQuality, SkISize dimensions, - SkLinearBitmapPipeline::SampleProcessorInterface* next, + Next* next, SkLinearBitmapPipeline::TileStage* tileStage) { switch (yMode) { - case SkShader::kClamp_TileMode: - make_tile_stage<XStrategy, YClampStrategy>(filterQuality, dimensions, next, tileStage); + case SkShader::kClamp_TileMode: { + using Tiler = CombinedTileStage<XStrategy, YClampStrategy, Next>; + tileStage->initStage<Tiler>(next, dimensions); break; - case SkShader::kRepeat_TileMode: - make_tile_stage<XStrategy, YRepeatStrategy>(filterQuality, dimensions, next, tileStage); + } + case SkShader::kRepeat_TileMode: { + using Tiler = CombinedTileStage<XStrategy, YRepeatStrategy, Next>; + tileStage->initStage<Tiler>(next, dimensions); break; - case SkShader::kMirror_TileMode: - make_tile_stage<XStrategy, YMirrorStrategy>(filterQuality, dimensions, next, tileStage); + } + case SkShader::kMirror_TileMode: { + using Tiler = CombinedTileStage<XStrategy, YMirrorStrategy, Next>; + tileStage->initStage<Tiler>(next, dimensions); break; + } } }; @@ -467,10 +327,6 @@ public: fDest = dest; } - void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); } - - void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); } - void setDestination(void* dst, int count) override { fDest = static_cast<uint32_t*>(dst); fEnd = fDest + count; @@ -538,10 +394,6 @@ public: SkASSERT(fDest <= fEnd); } - void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { SkFAIL("Not Implemented"); } - - void bilerpSpan(Span span, SkScalar y) override { SkFAIL("Not Implemented"); } - void setDestination(void* dst, int count) override { SkASSERT(count > 0); fDest = static_cast<uint32_t*>(dst); @@ -582,12 +434,9 @@ static SkLinearBitmapPipeline::PixelAccessorInterface* choose_specific_accessor( } } -template<template <typename, typename> class Sampler> -static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_base( - Blender* next, +static SkLinearBitmapPipeline::PixelAccessorInterface* choose_pixel_accessor( const SkPixmap& srcPixmap, const SkColor A8TintColor, - SkLinearBitmapPipeline::SampleStage* sampleStage, SkLinearBitmapPipeline::Accessor* accessor) { const SkImageInfo& imageInfo = srcPixmap.info(); @@ -629,19 +478,19 @@ static SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler_ba break; } - using S = Sampler<PixelAccessorShim, Blender>; - sampleStage->initStage<S>(next, pixelAccessor); - return sampleStage->get(); + return pixelAccessor; } SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler( Blender* next, SkFilterQuality filterQuality, + SkShader::TileMode xTile, SkShader::TileMode yTile, const SkPixmap& srcPixmap, const SkColor A8TintColor, SkLinearBitmapPipeline::SampleStage* sampleStage, SkLinearBitmapPipeline::Accessor* accessor) { const SkImageInfo& imageInfo = srcPixmap.info(); + SkISize dimensions = imageInfo.dimensions(); // Special case samplers with fully expanded templates if (imageInfo.gammaCloseToSRGB()) { @@ -670,14 +519,14 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler( using S = BilerpSampler< PixelAccessor<kN32_SkColorType, kSRGB_SkGammaType>, Blender>; - sampleStage->initStage<S>(next, srcPixmap); + sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap); return sampleStage->get(); } case kIndex_8_SkColorType: { using S = BilerpSampler< PixelAccessor<kIndex_8_SkColorType, kSRGB_SkGammaType>, Blender>; - sampleStage->initStage<S>(next, srcPixmap); + sampleStage->initStage<S>(next, dimensions, xTile, yTile, srcPixmap); return sampleStage->get(); } default: @@ -686,14 +535,16 @@ SkLinearBitmapPipeline::SampleProcessorInterface* choose_pixel_sampler( } } + auto pixelAccessor = choose_pixel_accessor(srcPixmap, A8TintColor, accessor); // General cases. if (filterQuality == kNone_SkFilterQuality) { - return choose_pixel_sampler_base<NearestNeighborSampler>( - next, srcPixmap, A8TintColor, sampleStage, accessor); + using S = NearestNeighborSampler<PixelAccessorShim, Blender>; + sampleStage->initStage<S>(next, pixelAccessor); } else { - return choose_pixel_sampler_base<BilerpSampler>( - next, srcPixmap, A8TintColor, sampleStage, accessor); + using S = BilerpSampler<PixelAccessorShim, Blender>; + sampleStage->initStage<S>(next, dimensions, xTile, yTile, pixelAccessor); } + return sampleStage->get(); } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -705,17 +556,17 @@ public: SrcFPPixel(const SrcFPPixel& Blender) : fPostAlpha(Blender.fPostAlpha) {} void SK_VECTORCALL blendPixel(Sk4f pixel) override { SkASSERT(fDst + 1 <= fEnd ); - SrcPixel(fDst, pixel, 0); + this->srcPixel(fDst, pixel, 0); fDst += 1; } void SK_VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override { SkASSERT(fDst + 4 <= fEnd); SkPM4f* dst = fDst; - SrcPixel(dst, p0, 0); - SrcPixel(dst, p1, 1); - SrcPixel(dst, p2, 2); - SrcPixel(dst, p3, 3); + this->srcPixel(dst, p0, 0); + this->srcPixel(dst, p1, 1); + this->srcPixel(dst, p2, 2); + this->srcPixel(dst, p3, 3); fDst += 4; } @@ -725,7 +576,9 @@ public: } private: - void SK_VECTORCALL SrcPixel(SkPM4f* dst, Sk4f pixel, int index) { + void SK_VECTORCALL srcPixel(SkPM4f* dst, Sk4f pixel, int index) { + check_pixel(pixel); + Sk4f newPixel = pixel; if (alphaType == kUnpremul_SkAlphaType) { newPixel = Premultiply(pixel); @@ -797,7 +650,8 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline( // identity matrix, the matrix stage is skipped, and the tilerStage is the first stage. auto blenderStage = choose_blender_for_shading(alphaType, postAlpha, &fBlenderStage); auto samplerStage = choose_pixel_sampler( - blenderStage, filterQuality, srcPixmap, paintColor, &fSampleStage, &fAccessor); + blenderStage, filterQuality, xTile, yTile, + srcPixmap, paintColor, &fSampleStage, &fAccessor); auto tilerStage = choose_tiler(samplerStage, dimensions, xTile, yTile, filterQuality, dx, &fTileStage); fFirstStage = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage); diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h index b0f7e9dd20..91b573df5d 100644 --- a/src/core/SkLinearBitmapPipeline.h +++ b/src/core/SkLinearBitmapPipeline.h @@ -133,9 +133,9 @@ public: // These values were generated by the assert above in Stage::init{Sink|Stage}. using MatrixStage = Stage<PointProcessorInterface, 160, PointProcessorInterface>; using TileStage = Stage<PointProcessorInterface, 160, SampleProcessorInterface>; - using SampleStage = Stage<SampleProcessorInterface, 100, BlendProcessorInterface>; + using SampleStage = Stage<SampleProcessorInterface, 160, BlendProcessorInterface>; using BlenderStage = Stage<BlendProcessorInterface, 40>; - using Accessor = PolyMemory<PixelAccessorInterface, 48>; + using Accessor = PolyMemory<PixelAccessorInterface, 64>; private: PointProcessorInterface* fFirstStage; diff --git a/src/core/SkLinearBitmapPipeline_core.h b/src/core/SkLinearBitmapPipeline_core.h index 2c39a38320..cf120eec65 100644 --- a/src/core/SkLinearBitmapPipeline_core.h +++ b/src/core/SkLinearBitmapPipeline_core.h @@ -178,6 +178,15 @@ void span_fallback(Span span, Stage* stage) { stage->pointListFew(count, xs, ys); } } + +inline Sk4f check_pixel(Sk4f& pixel) { + SkASSERTF(0.0f <= pixel[0] && pixel[0] <= 1.0f, "pixel[0]: %f", pixel[0]); + SkASSERTF(0.0f <= pixel[1] && pixel[1] <= 1.0f, "pixel[1]: %f", pixel[1]); + SkASSERTF(0.0f <= pixel[2] && pixel[2] <= 1.0f, "pixel[2]: %f", pixel[2]); + SkASSERTF(0.0f <= pixel[3] && pixel[3] <= 1.0f, "pixel[3]: %f", pixel[3]); + return pixel; +} + } // namespace class SkLinearBitmapPipeline::PointProcessorInterface { @@ -201,26 +210,6 @@ public: // Used for nearest neighbor when scale factor is 1.0. The span can just be repeated with no // edge pixel alignment problems. This is for handling a very common case. virtual void repeatSpan(Span span, int32_t repeatCount) = 0; - - // The x's and y's are setup in the following order: - // +--------+--------+ - // | | | - // | px00 | px10 | - // | 0 | 1 | - // +--------+--------+ - // | | | - // | px01 | px11 | - // | 2 | 3 | - // +--------+--------+ - // These pixels coordinates are arranged in the following order in xs and ys: - // px00 px10 px01 px11 - virtual void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) = 0; - - // A span represents sample points that have been mapped from destination space to source - // space. Each sample point is then expanded to the four bilerp points by add +/- 0.5. The - // resulting Y values my be off the tile. When y +/- 0.5 are more than 1 apart because of - // tiling, the second Y is used to denote the retiled Y value. - virtual void bilerpSpan(Span span, SkScalar y) = 0; }; class SkLinearBitmapPipeline::DestinationInterface { @@ -243,10 +232,10 @@ class SkLinearBitmapPipeline::PixelAccessorInterface { public: virtual ~PixelAccessorInterface() { } virtual void SK_VECTORCALL getFewPixels( - int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0; + int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0; virtual void SK_VECTORCALL get4Pixels( - Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; + Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; virtual void get4Pixels( const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h index 759075b3e5..5421758297 100644 --- a/src/core/SkLinearBitmapPipeline_sample.h +++ b/src/core/SkLinearBitmapPipeline_sample.h @@ -40,7 +40,7 @@ namespace { // * px11 -> xy // So x * y is calculated first and then used to calculate all the other factors. static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, - Sk4f px01, Sk4f px11) { + Sk4f px01, Sk4f px11) { // Calculate fractional xs and ys. Sk4s fxs = xs - xs.floor(); Sk4s fys = ys - ys.floor(); @@ -134,20 +134,21 @@ template <SkGammaType gammaType> class PixelConverter<kIndex_8_SkColorType, gammaType> { public: using Element = uint8_t; - PixelConverter(const SkPixmap& srcPixmap) { + PixelConverter(const SkPixmap& srcPixmap) + : fColorTableSize(srcPixmap.ctable()->count()){ SkColorTable* skColorTable = srcPixmap.ctable(); SkASSERT(skColorTable != nullptr); fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); - for (int i = 0; i < skColorTable->count(); i++) { + for (int i = 0; i < fColorTableSize; i++) { fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); } } - PixelConverter(const PixelConverter& strategy) { + PixelConverter(const PixelConverter& strategy) + : fColorTableSize{strategy.fColorTableSize}{ fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); - // TODO: figure out the count. - for (int i = 0; i < 256; i++) { + for (int i = 0; i < fColorTableSize; i++) { fColorTable[i] = strategy.fColorTable[i]; } } @@ -158,9 +159,9 @@ public: private: static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; - - SkAutoMalloc fColorTableStorage{kColorTableSize}; - Sk4f* fColorTable; + const int fColorTableSize; + SkAutoMalloc fColorTableStorage{kColorTableSize}; + Sk4f* fColorTable; }; template <SkGammaType gammaType> @@ -194,12 +195,12 @@ public: : fPixelAccessor(accessor) { } void SK_VECTORCALL getFewPixels( - int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { + int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); } void SK_VECTORCALL get4Pixels( - Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { + Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); } @@ -237,10 +238,8 @@ public: , fConverter{srcPixmap, std::move<Args>(args)...} { } void SK_VECTORCALL getFewPixels ( - int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { - Sk4i XIs = SkNx_cast<int, SkScalar>(xs); - Sk4i YIs = SkNx_cast<int, SkScalar>(ys); - Sk4i bufferLoc = YIs * fWidth + XIs; + int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { + Sk4i bufferLoc = ys * fWidth + xs; switch (n) { case 3: *px2 = this->getPixelAt(bufferLoc[2]); @@ -254,10 +253,8 @@ public: } void SK_VECTORCALL get4Pixels( - Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { - Sk4i XIs = SkNx_cast<int, SkScalar>(xs); - Sk4i YIs = SkNx_cast<int, SkScalar>(ys); - Sk4i bufferLoc = YIs * fWidth + XIs; + Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { + Sk4i bufferLoc = ys * fWidth + xs; *px0 = this->getPixelAt(bufferLoc[0]); *px1 = this->getPixelAt(bufferLoc[1]); *px2 = this->getPixelAt(bufferLoc[2]); @@ -330,6 +327,7 @@ static void src_strategy_blend(Span span, Next* next, Strategy* strategy) { } } +// -- NearestNeighborSampler ----------------------------------------------------------------------- // NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels. template<typename Accessor, typename Next> class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { @@ -345,7 +343,7 @@ public: void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { SkASSERT(0 < n && n < 4); Sk4f px0, px1, px2; - fAccessor.getFewPixels(n, xs, ys, &px0, &px1, &px2); + fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2); if (n >= 1) fNext->blendPixel(px0); if (n >= 2) fNext->blendPixel(px1); if (n >= 3) fNext->blendPixel(px2); @@ -353,7 +351,7 @@ public: void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { Sk4f px0, px1, px2, px3; - fAccessor.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); + fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3); fNext->blend4Pixels(px0, px1, px2, px3); } @@ -380,21 +378,11 @@ public: } } - void SK_VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { - SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge."); - } - - void bilerpSpan(Span span, SkScalar y) override { - SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan."); - } - private: // When moving through source space more slowly than dst space (zoomed in), // we'll be sampling from the same source pixel more than once. void spanSlowRate(Span span) { - SkPoint start; - SkScalar length; - int count; + SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; SkScalar x = X(start); SkFixed fx = SkScalarToFixed(x); @@ -451,35 +439,82 @@ private: Accessor fAccessor; }; +// From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge +// vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to +// generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value +// on the interval [0, vMax]. +// Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel. +static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) { + SkASSERT(-1 <= vs && vs <= vMax + 1) + switch (edgeType) { + case SkShader::kClamp_TileMode: + case SkShader::kMirror_TileMode: + vs = std::max(vs, 0); + vs = std::min(vs, vMax); + break; + case SkShader::kRepeat_TileMode: + vs = (vs <= vMax) ? vs : 0; + vs = (vs >= 0) ? vs : vMax; + break; + } + SkASSERT(0 <= vs && vs <= vMax); + return vs; +} + +// From a sample point on the tile, return the top or left filter value. +// The result r should be in the range (0, 1]. Since this represents the weight given to the top +// left element, then if x == 0.5 the filter value should be 1.0. +// The input sample point must be on the tile, therefore it must be >= 0. +static SkScalar sample_to_filter(SkScalar x) { + SkASSERT(x >= 0.0f); + // The usual form of the top or left edge is x - .5, but since we are working on the unit + // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use + // of trunc. + SkScalar v = x + 0.5f; + // Produce the top or left offset a value on the range [0, 1). + SkScalar f = v - SkScalarTruncToScalar(v); + // Produce the filter value which is on the range (0, 1]. + SkScalar r = 1.0f - f; + SkASSERT(0.0f < r && r <= 1.0f); + return r; +} + // -- BilerpSampler -------------------------------------------------------------------------------- // BilerpSampler - use a bilerp filter to create runs of destination pixels. +// Note: in the code below, there are two types of points +// * sample points - these are the points passed in by pointList* and Spans. +// * filter points - are created from a sample point to form the coordinates of the points +// to use in the filter and to generate the filter values. template<typename Accessor, typename Next> class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { public: template<typename... Args> - BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) - : fNext{next}, fAccessor{std::forward<Args>(args)...} { } + BilerpSampler( + SkLinearBitmapPipeline::BlendProcessorInterface* next, + SkISize dimensions, + SkShader::TileMode xTile, SkShader::TileMode yTile, + Args&& ... args + ) + : fNext{next} + , fXEdgeType{xTile} + , fXMax{dimensions.width() - 1} + , fYEdgeType{yTile} + , fYMax{dimensions.height() - 1} + , fAccessor{std::forward<Args>(args)...} { } BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, const BilerpSampler& sampler) - : fNext{next}, fAccessor{sampler.fAccessor} { } - - Sk4f bilerpNonEdgePixel(SkScalar x, SkScalar y) { - Sk4f px00, px10, px01, px11; - - // bilerp4() expects xs, ys are the top-lefts of the 2x2 kernel. - Sk4f xs = Sk4f{x} - 0.5f; - Sk4f ys = Sk4f{y} - 0.5f; - Sk4f sampleXs = xs + Sk4f{0.0f, 1.0f, 0.0f, 1.0f}; - Sk4f sampleYs = ys + Sk4f{0.0f, 0.0f, 1.0f, 1.0f}; - fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); - return bilerp4(xs, ys, px00, px10, px01, px11); - } + : fNext{next} + , fXEdgeType{sampler.fXEdgeType} + , fXMax{sampler.fXMax} + , fYEdgeType{sampler.fYEdgeType} + , fYMax{sampler.fYMax} + , fAccessor{sampler.fAccessor} { } void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { SkASSERT(0 < n && n < 4); auto bilerpPixel = [&](int index) { - return this->bilerpNonEdgePixel(xs[index], ys[index]); + return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); }; if (n >= 1) fNext->blendPixel(bilerpPixel(0)); @@ -489,308 +524,484 @@ public: void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { auto bilerpPixel = [&](int index) { - return this->bilerpNonEdgePixel(xs[index], ys[index]); + return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); }; fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); } void pointSpan(Span span) override { - this->bilerpSpan(span, span.startY()); - } - - void repeatSpan(Span span, int32_t repeatCount) override { - while (repeatCount > 0) { - this->pointSpan(span); - repeatCount--; - } - } - - void SK_VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) override { - Sk4f px00, px10, px01, px11; - Sk4f xs = Sk4f{sampleXs[0]}; - Sk4f ys = Sk4f{sampleYs[0]}; - fAccessor.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); - Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11); - fNext->blendPixel(pixel); - } - - void bilerpSpan(Span span, SkScalar y) override { SkASSERT(!span.isEmpty()); SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; + + // Nothing to do. + if (count == 0) { + return; + } + + // Trivial case. No sample points are generated other than start. + if (count == 1) { + fNext->blendPixel(this->bilerpSamplePoint(start)); + return; + } + + // Note: the following code could be done in terms of dx = length / (count -1), but that + // would introduce a divide that is not needed for the most common dx == 1 cases. SkScalar absLength = SkScalarAbs(length); if (absLength == 0.0f) { - this->spanZeroRate(span, y); + // |dx| == 0 + // length is zero, so clamp an edge pixel. + this->spanZeroRate(span); } else if (absLength < (count - 1)) { - this->spanSlowRate(span, y); + // 0 < |dx| < 1. + this->spanSlowRate(span); } else if (absLength == (count - 1)) { - if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) { - if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) { - src_strategy_blend(span, fNext, &fAccessor); - } else { - this->spanUnitRateAlignedX(span, y); - } + // |dx| == 1. + if (sample_to_filter(span.startX()) == 1.0f + && sample_to_filter(span.startY()) == 1.0f) { + // All the pixels are aligned with the dest; go fast. + src_strategy_blend(span, fNext, &fAccessor); } else { - this->spanUnitRate(span, y); + // There is some sub-pixel offsets, so bilerp. + this->spanUnitRate(span); } + } else if (absLength < 2.0f * (count - 1)) { + // 1 < |dx| < 2. + this->spanMediumRate(span); } else { - this->spanFastRate(span, y); + // |dx| >= 2. + this->spanFastRate(span); + } + } + + void repeatSpan(Span span, int32_t repeatCount) override { + while (repeatCount > 0) { + this->pointSpan(span); + repeatCount--; } } private: - void spanZeroRate(Span span, SkScalar y1) { - SkScalar y0 = span.startY() - 0.5f; - y1 += 0.5f; - int iy0 = SkScalarFloorToInt(y0); - SkScalar filterY1 = y0 - iy0; - SkScalar filterY0 = 1.0f - filterY1; - int iy1 = SkScalarFloorToInt(y1); - int ix = SkScalarFloorToInt(span.startX()); - Sk4f pixelY0 = fAccessor.getPixelFromRow(fAccessor.row(iy0), ix); - Sk4f pixelY1 = fAccessor.getPixelFromRow(fAccessor.row(iy1), ix); - Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1; - int count = span.count(); + + // Convert a sample point to the points used by the filter. + void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) { + // May be less than zero. Be careful to use Floor. + int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax); + // Always greater than zero. Use the faster Trunc. + int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax); + int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax); + int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax); + + *filterXs = Sk4i{x0, x1, x0, x1}; + *filterYs = Sk4i{y0, y0, y1, y1}; + } + + // Given a sample point, generate a color by bilerping the four filter points. + Sk4f bilerpSamplePoint(SkPoint sample) { + Sk4i iXs, iYs; + filterPoints(sample, &iXs, &iYs); + Sk4f px00, px10, px01, px11; + fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11); + return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11); + } + + // Get two pixels at x from row0 and row1. + void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) { + *px0 = fAccessor.getPixelFromRow(row0, x); + *px1 = fAccessor.getPixelFromRow(row1, x); + } + + // |dx| == 0. This code assumes that length is zero. + void spanZeroRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkASSERT(length == 0.0f); + + // Filter for the blending of the top and bottom pixels. + SkScalar filterY = sample_to_filter(Y(start)); + + // Generate the four filter points from the sample point start. Generate the row* values. + Sk4i iXs, iYs; + this->filterPoints(start, &iXs, &iYs); + const void* const row0 = fAccessor.row(iYs[0]); + const void* const row1 = fAccessor.row(iYs[2]); + + // Get the two pixels that make up the clamping pixel. + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom); + Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom; + while (count >= 4) { - fNext->blend4Pixels(filterPixel, filterPixel, filterPixel, filterPixel); + fNext->blend4Pixels(pixel, pixel, pixel, pixel); count -= 4; } while (count > 0) { - fNext->blendPixel(filterPixel); + fNext->blendPixel(pixel); count -= 1; } } - // When moving through source space more slowly than dst space (zoomed in), - // we'll be sampling from the same source pixel more than once. - void spanSlowRate(Span span, SkScalar ry1) { - SkPoint start; - SkScalar length; - int count; + // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce + // computation. In particular, several destination pixels maybe generated from the same four + // source pixels. + // In the following code a "part" is a combination of two pixels from the same column of the + // filter. + void spanSlowRate(Span span) { + SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; - SkFixed fx = SkScalarToFixed(X(start)-0.5f); - SkFixed fdx = SkScalarToFixed(length / (count - 1)); + // Calculate the distance between each sample point. + const SkScalar dx = length / (count - 1); + SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f); + + // Generate the filter values for the top-left corner. + // Note: these values are in filter space; this has implications about how to adjust + // these values at each step. For example, as the sample point increases, the filter + // value decreases, this is because the filter and position are related by + // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite + // direction of the sample point which is increasing by dx. + SkScalar filterX = sample_to_filter(X(start)); + SkScalar filterY = sample_to_filter(Y(start)); + + // Generate the four filter points from the sample point start. Generate the row* values. + Sk4i iXs, iYs; + this->filterPoints(start, &iXs, &iYs); + const void* const row0 = fAccessor.row(iYs[0]); + const void* const row1 = fAccessor.row(iYs[2]); + + // Generate part of the filter value at xColumn. + auto partAtColumn = [&](int xColumn) { + int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); + return pxTop * filterY + (1.0f - filterY) * pxBottom; + }; - Sk4f xAdjust; - if (fdx >= 0) { - xAdjust = Sk4f{-1.0f}; - } else { - xAdjust = Sk4f{1.0f}; - } - int ix = SkFixedFloorToInt(fx); - int ioldx = ix; - Sk4f x{SkFixedToScalar(fx) - ix}; - Sk4f dx{SkFixedToScalar(fdx)}; - SkScalar ry0 = Y(start) - 0.5f; - ry1 += 0.5f; - SkScalar yFloor = std::floor(ry0); - Sk4f y1 = Sk4f{ry0 - yFloor}; - Sk4f y0 = Sk4f{1.0f} - y1; - const void* const row0 = fAccessor.row(SkScalarFloorToInt(ry0)); - const void* const row1 = fAccessor.row(SkScalarFloorToInt(ry1)); - Sk4f fpixel00 = y0 * fAccessor.getPixelFromRow(row0, ix); - Sk4f fpixel01 = y1 * fAccessor.getPixelFromRow(row1, ix); - Sk4f fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); - Sk4f fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); - auto getNextPixel = [&]() { - if (ix != ioldx) { - fpixel00 = fpixel10; - fpixel01 = fpixel11; - fpixel10 = y0 * fAccessor.getPixelFromRow(row0, ix + 1); - fpixel11 = y1 * fAccessor.getPixelFromRow(row1, ix + 1); - ioldx = ix; - x = x + xAdjust; - } + // The leftPart is made up of two pixels from the left column of the filter, right part + // is similar. The top and bottom pixels in the *Part are created as a linear blend of + // the top and bottom pixels using filterY. See the partAtColumn function above. + Sk4f leftPart = partAtColumn(iXs[0]); + Sk4f rightPart = partAtColumn(iXs[1]); - Sk4f x0, x1; - x0 = Sk4f{1.0f} - x; - x1 = x; - Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11); - fx += fdx; - ix = SkFixedFloorToInt(fx); - x = x + dx; - return fpixel; + // Create a destination color by blending together a left and right part using filterX. + auto bilerp = [&]() { + Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); + return check_pixel(pixel); }; - while (count >= 4) { - Sk4f fpixel0 = getNextPixel(); - Sk4f fpixel1 = getNextPixel(); - Sk4f fpixel2 = getNextPixel(); - Sk4f fpixel3 = getNextPixel(); + // Send the first pixel to the destination. This simplifies the loop structure so that no + // extra pixels are fetched for the last iteration of the loop. + fNext->blendPixel(bilerp()); + count -= 1; + + if (dx > 0.0f) { + // * positive direction - generate destination pixels by sliding the filter from left + // to right. + int rightPartCursor = iXs[1]; + + // Advance the filter from left to right. Remember that moving the top-left corner of + // the filter to the right actually makes the filter value smaller. + auto advanceFilter = [&]() { + filterX -= dx; + if (filterX <= 0.0f) { + filterX += 1.0f; + leftPart = rightPart; + rightPartCursor += 1; + rightPart = partAtColumn(rightPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); - fNext->blend4Pixels(fpixel0, fpixel1, fpixel2, fpixel3); - count -= 4; - } + return bilerp(); + }; - while (count > 0) { - fNext->blendPixel(getNextPixel()); + while (count >= 4) { + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } - count -= 1; + while (count > 0) { + fNext->blendPixel(advanceFilter()); + count -= 1; + } + } else { + // * negative direction - generate destination pixels by sliding the filter from + // right to left. + int leftPartCursor = iXs[0]; + + // Advance the filter from right to left. Remember that moving the top-left corner of + // the filter to the left actually makes the filter value larger. + auto advanceFilter = [&]() { + // Remember, dx < 0 therefore this adds |dx| to filterX. + filterX -= dx; + // At this point filterX may be > 1, and needs to be wrapped back on to the filter + // interval, and the next column in the filter is calculated. + if (filterX > 1.0f) { + filterX -= 1.0f; + rightPart = leftPart; + leftPartCursor -= 1; + leftPart = partAtColumn(leftPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + + return bilerp(); + }; + + while (count >= 4) { + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } + + while (count > 0) { + fNext->blendPixel(advanceFilter()); + count -= 1; + } } } - // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. - // We'll never re-use pixels, but we can at least load contiguous pixels. - void spanUnitRate(Span span, SkScalar y1) { - y1 += 0.5f; - SkScalar y0 = span.startY() - 0.5f; - int iy0 = SkScalarFloorToInt(y0); - SkScalar filterY1 = y0 - iy0; - SkScalar filterY0 = 1.0f - filterY1; - int iy1 = SkScalarFloorToInt(y1); - const void* rowY0 = fAccessor.row(iy0); - const void* rowY1 = fAccessor.row(iy1); - SkScalar x0 = span.startX() - 0.5f; - int ix0 = SkScalarFloorToInt(x0); - SkScalar filterX1 = x0 - ix0; - SkScalar filterX0 = 1.0f - filterX1; - - auto getPixelY0 = [&]() { - Sk4f px = fAccessor.getPixelFromRow(rowY0, ix0); - return px * filterY0; - }; - - auto getPixelY1 = [&]() { - Sk4f px = fAccessor.getPixelFromRow(rowY1, ix0); - return px * filterY1; + // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel. + // Every filter part is used for two destination pixels, and the code can bulk load four + // pixels at a time. + void spanUnitRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkASSERT(SkScalarAbs(length) == (count - 1)); + + // Calculate the four filter points of start, and use the two different Y values to + // generate the row pointers. + Sk4i iXs, iYs; + filterPoints(start, &iXs, &iYs); + const void* row0 = fAccessor.row(iYs[0]); + const void* row1 = fAccessor.row(iYs[2]); + + // Calculate the filter values for the top-left filter element. + const SkScalar filterX = sample_to_filter(X(start)); + const SkScalar filterY = sample_to_filter(Y(start)); + + // Generate part of the filter value at xColumn. + auto partAtColumn = [&](int xColumn) { + int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); + return pxTop * filterY + (1.0f - filterY) * pxBottom; }; - auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { - fAccessor.get4Pixels(rowY0, ix, px0, px1, px2, px3); - *px0 = *px0 * filterY0; - *px1 = *px1 * filterY0; - *px2 = *px2 * filterY0; - *px3 = *px3 * filterY0; + auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) { + // Check if the pixels needed are near the edges. If not go fast using bulk pixels, + // otherwise be careful. + if (0 <= ix && ix <= fXMax - 3) { + Sk4f px00, px10, px20, px30, + px01, px11, px21, px31; + fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30); + fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31); + *part0 = filterY * px00 + (1.0f - filterY) * px01; + *part1 = filterY * px10 + (1.0f - filterY) * px11; + *part2 = filterY * px20 + (1.0f - filterY) * px21; + *part3 = filterY * px30 + (1.0f - filterY) * px31; + } else { + *part0 = partAtColumn(ix + 0); + *part1 = partAtColumn(ix + 1); + *part2 = partAtColumn(ix + 2); + *part3 = partAtColumn(ix + 3); + } }; - auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { - fAccessor.get4Pixels(rowY1, ix, px0, px1, px2, px3); - *px0 = *px0 * filterY1; - *px1 = *px1 * filterY1; - *px2 = *px2 * filterY1; - *px3 = *px3 * filterY1; + auto bilerp = [&](Sk4f& part0, Sk4f& part1) { + return part0 * filterX + part1 * (1.0f - filterX); }; - auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) { - return pixelX0 * filterX0 + pixelX1 * filterX1; - }; + if (length > 0) { + // * positive direction - generate destination pixels by sliding the filter from left + // to right. - // Mid making 4 unit rate. - Sk4f pxB = getPixelY0() + getPixelY1(); - if (span.length() > 0) { - int count = span.count(); + // overlapPart is the filter part from the end of the previous four pixels used at + // the start of the next four pixels. + Sk4f overlapPart = partAtColumn(iXs[0]); + int rightColumnCursor = iXs[1]; while (count >= 4) { - Sk4f px00, px10, px20, px30; - get4PixelsY0(ix0, &px00, &px10, &px20, &px30); - Sk4f px01, px11, px21, px31; - get4PixelsY1(ix0, &px01, &px11, &px21, &px31); - Sk4f pxS0 = px00 + px01; - Sk4f px0 = lerp(pxB, pxS0); - Sk4f pxS1 = px10 + px11; - Sk4f px1 = lerp(pxS0, pxS1); - Sk4f pxS2 = px20 + px21; - Sk4f px2 = lerp(pxS1, pxS2); - Sk4f pxS3 = px30 + px31; - Sk4f px3 = lerp(pxS2, pxS3); - pxB = pxS3; + Sk4f part0, part1, part2, part3; + get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3); + Sk4f px0 = bilerp(overlapPart, part0); + Sk4f px1 = bilerp(part0, part1); + Sk4f px2 = bilerp(part1, part2); + Sk4f px3 = bilerp(part2, part3); + overlapPart = part3; fNext->blend4Pixels(px0, px1, px2, px3); - ix0 += 4; + rightColumnCursor += 4; count -= 4; } + while (count > 0) { - Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); - Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); + Sk4f rightPart = partAtColumn(rightColumnCursor); - fNext->blendPixel(lerp(pixelY0, pixelY1)); - ix0 += 1; + fNext->blendPixel(bilerp(overlapPart, rightPart)); + overlapPart = rightPart; + rightColumnCursor += 1; count -= 1; } } else { - int count = span.count(); + // * negative direction - generate destination pixels by sliding the filter from + // right to left. + Sk4f overlapPart = partAtColumn(iXs[1]); + int leftColumnCursor = iXs[0]; + while (count >= 4) { - Sk4f px00, px10, px20, px30; - get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30); - Sk4f px01, px11, px21, px31; - get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31); - Sk4f pxS3 = px30 + px31; - Sk4f px0 = lerp(pxS3, pxB); - Sk4f pxS2 = px20 + px21; - Sk4f px1 = lerp(pxS2, pxS3); - Sk4f pxS1 = px10 + px11; - Sk4f px2 = lerp(pxS1, pxS2); - Sk4f pxS0 = px00 + px01; - Sk4f px3 = lerp(pxS0, pxS1); - pxB = pxS0; + Sk4f part0, part1, part2, part3; + get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0); + Sk4f px0 = bilerp(part0, overlapPart); + Sk4f px1 = bilerp(part1, part0); + Sk4f px2 = bilerp(part2, part1); + Sk4f px3 = bilerp(part3, part2); + overlapPart = part3; fNext->blend4Pixels(px0, px1, px2, px3); - ix0 -= 4; + leftColumnCursor -= 4; count -= 4; } + while (count > 0) { - Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix0); - Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix0); + Sk4f leftPart = partAtColumn(leftColumnCursor); - fNext->blendPixel(lerp(pixelY0, pixelY1)); - ix0 -= 1; + fNext->blendPixel(bilerp(leftPart, overlapPart)); + overlapPart = leftPart; + leftColumnCursor -= 1; count -= 1; } } } - void spanUnitRateAlignedX(Span span, SkScalar y1) { - SkScalar y0 = span.startY() - 0.5f; - y1 += 0.5f; - int iy0 = SkScalarFloorToInt(y0); - SkScalar filterY1 = y0 - iy0; - SkScalar filterY0 = 1.0f - filterY1; - int iy1 = SkScalarFloorToInt(y1); - int ix = SkScalarFloorToInt(span.startX()); - const void* rowY0 = fAccessor.row(iy0); - const void* rowY1 = fAccessor.row(iy1); - auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) { - return *pixelY0 * filterY0 + *pixelY1 * filterY1; + // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but + // still slow enough to take advantage of previous calculations. + void spanMediumRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + + // Calculate the distance between each sample point. + const SkScalar dx = length / (count - 1); + SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f)); + + // Generate the filter values for the top-left corner. + // Note: these values are in filter space; this has implications about how to adjust + // these values at each step. For example, as the sample point increases, the filter + // value decreases, this is because the filter and position are related by + // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite + // direction of the sample point which is increasing by dx. + SkScalar filterX = sample_to_filter(X(start)); + SkScalar filterY = sample_to_filter(Y(start)); + + // Generate the four filter points from the sample point start. Generate the row* values. + Sk4i iXs, iYs; + this->filterPoints(start, &iXs, &iYs); + const void* const row0 = fAccessor.row(iYs[0]); + const void* const row1 = fAccessor.row(iYs[2]); + + // Generate part of the filter value at xColumn. + auto partAtColumn = [&](int xColumn) { + int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); + return pxTop * filterY + (1.0f - filterY) * pxBottom; + }; + + // The leftPart is made up of two pixels from the left column of the filter, right part + // is similar. The top and bottom pixels in the *Part are created as a linear blend of + // the top and bottom pixels using filterY. See the nextPart function below. + Sk4f leftPart = partAtColumn(iXs[0]); + Sk4f rightPart = partAtColumn(iXs[1]); + + // Create a destination color by blending together a left and right part using filterX. + auto bilerp = [&]() { + Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); + return check_pixel(pixel); }; - if (span.length() > 0) { - int count = span.count(); + // Send the first pixel to the destination. This simplifies the loop structure so that no + // extra pixels are fetched for the last iteration of the loop. + fNext->blendPixel(bilerp()); + count -= 1; + + if (dx > 0.0f) { + // * positive direction - generate destination pixels by sliding the filter from left + // to right. + int rightPartCursor = iXs[1]; + + // Advance the filter from left to right. Remember that moving the top-left corner of + // the filter to the right actually makes the filter value smaller. + auto advanceFilter = [&]() { + filterX -= dx; + // At this point filterX is less than zero, but might actually be less than -1. + if (filterX > -1.0f) { + filterX += 1.0f; + leftPart = rightPart; + rightPartCursor += 1; + rightPart = partAtColumn(rightPartCursor); + } else { + filterX += 2.0f; + rightPartCursor += 2; + leftPart = partAtColumn(rightPartCursor - 1); + rightPart = partAtColumn(rightPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + + return bilerp(); + }; + while (count >= 4) { - Sk4f px00, px10, px20, px30; - fAccessor.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30); - Sk4f px01, px11, px21, px31; - fAccessor.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31); - fNext->blend4Pixels( - lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); - ix += 4; + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); count -= 4; } - while (count > 0) { - Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); - Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); - fNext->blendPixel(lerp(&pixelY0, &pixelY1)); - ix += 1; + while (count > 0) { + fNext->blendPixel(advanceFilter()); count -= 1; } } else { - int count = span.count(); + // * negative direction - generate destination pixels by sliding the filter from + // right to left. + int leftPartCursor = iXs[0]; + + auto advanceFilter = [&]() { + // Remember, dx < 0 therefore this adds |dx| to filterX. + filterX -= dx; + // At this point, filterX is greater than one, but may actually be greater than two. + if (filterX < 2.0f) { + filterX -= 1.0f; + rightPart = leftPart; + leftPartCursor -= 1; + leftPart = partAtColumn(leftPartCursor); + } else { + filterX -= 2.0f; + leftPartCursor -= 2; + rightPart = partAtColumn(leftPartCursor - 1); + leftPart = partAtColumn(leftPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + return bilerp(); + }; + while (count >= 4) { - Sk4f px00, px10, px20, px30; - fAccessor.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00); - Sk4f px01, px11, px21, px31; - fAccessor.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01); - fNext->blend4Pixels( - lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); - ix -= 4; + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); count -= 4; } - while (count > 0) { - Sk4f pixelY0 = fAccessor.getPixelFromRow(rowY0, ix); - Sk4f pixelY1 = fAccessor.getPixelFromRow(rowY1, ix); - fNext->blendPixel(lerp(&pixelY0, &pixelY1)); - ix -= 1; + while (count > 0) { + fNext->blendPixel(advanceFilter()); count -= 1; } } @@ -798,34 +1009,26 @@ private: // We're moving through source space faster than dst (zoomed out), // so we'll never reuse a source pixel or be able to do contiguous loads. - void spanFastRate(Span span, SkScalar y1) { - SkPoint start; - SkScalar length; - int count; + void spanFastRate(Span span) { + SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; SkScalar x = X(start); SkScalar y = Y(start); - // In this sampler, it is assumed that if span.StartY() and y1 are the same then both - // y-lines are on the same tile. - if (y == y1) { - // Both y-lines are on the same tile. - span_fallback(span, this); - } else { - // The y-lines are on different tiles. - SkScalar dx = length / (count - 1); - Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f}; - while (count > 0) { - Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x}; - this->bilerpEdge(xs, ys); - x += dx; - count -= 1; - } + SkScalar dx = length / (count - 1); + while (count > 0) { + fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y})); + x += dx; + count -= 1; } } - Next* const fNext; - Accessor fAccessor; + Next* const fNext; + const SkShader::TileMode fXEdgeType; + const int fXMax; + const SkShader::TileMode fYEdgeType; + const int fYMax; + Accessor fAccessor; }; } // namespace |