diff options
author | 2016-03-23 09:00:33 -0700 | |
---|---|---|
committer | 2016-03-23 09:00:33 -0700 | |
commit | 6eff52afb458bf6702a715d88611fd571544ef73 (patch) | |
tree | a2ffdedd2b0b1fe888dbc3faff6b1f894803f5c2 /src | |
parent | 0b8321e19b565f3a13af85b55f046c0a74396a7d (diff) |
WIP: experimental bilerp pipeline.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1775963002
Review URL: https://codereview.chromium.org/1775963002
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkBitmapProcShader.cpp | 29 | ||||
-rw-r--r-- | src/core/SkBitmapProcState.cpp | 7 | ||||
-rw-r--r-- | src/core/SkBitmapProcState.h | 4 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.cpp | 652 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.h | 17 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_core.h | 89 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_matrix.h | 28 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_sample.h | 644 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_tile.h | 258 |
9 files changed, 1198 insertions, 530 deletions
diff --git a/src/core/SkBitmapProcShader.cpp b/src/core/SkBitmapProcShader.cpp index 4813e01d54..fb58f542cc 100644 --- a/src/core/SkBitmapProcShader.cpp +++ b/src/core/SkBitmapProcShader.cpp @@ -46,7 +46,7 @@ public: ~BitmapProcInfoContext() override { fInfo->~SkBitmapProcInfo(); } - + uint32_t getFlags() const override { return fFlags; } private: @@ -123,8 +123,10 @@ public: { // Need to ensure that our pipeline is created at a 16byte aligned address fPipeline = (SkLinearBitmapPipeline*)SkAlign16((intptr_t)fStorage); - new (fPipeline) SkLinearBitmapPipeline(info->fInvMatrix, info->fFilterQuality, + float alpha = SkColorGetA(info->fPaintColor) / 255.0f; + new (fPipeline) SkLinearBitmapPipeline(info->fRealInvMatrix, info->fFilterQuality, info->fTileModeX, info->fTileModeY, + alpha, info->fPixmap); // To implement the old shadeSpan entry-point, we need to efficiently convert our native @@ -175,7 +177,8 @@ static bool choose_linear_pipeline(const SkShader::ContextRec& rec, const SkImag // These src attributes are not supported in the new 4f context (yet) // if (srcInfo.bytesPerPixel() < 4 || - kRGBA_F16_SkColorType == srcInfo.colorType()) { + kRGBA_F16_SkColorType == srcInfo.colorType() || + kIndex_8_SkColorType == srcInfo.colorType()) { return false; } @@ -211,25 +214,13 @@ SkShader::Context* SkBitmapProcShader::MakeContext(const SkShader& shader, return nullptr; } - // Decide if we can/want to use the new linear pipeine + // Decide if we can/want to use the new linear pipeline bool useLinearPipeline = choose_linear_pipeline(rec, provider.info()); - // New code doesn't support Mirror (YET), so we detect that here. - // - if (SkShader::kMirror_TileMode == tmx || SkShader::kMirror_TileMode == tmy) { - useLinearPipeline = false; - } - - // New code doesn't support Mirror (YET), so we detect that here. - // - if (totalInverse.hasPerspective()) { - useLinearPipeline = false; - } - // // For now, only enable locally since we are hitting some crashers on the test bots // - useLinearPipeline = false; + //useLinearPipeline = false; if (useLinearPipeline) { void* infoStorage = (char*)storage + sizeof(LinearPipelineContext); @@ -238,6 +229,10 @@ SkShader::Context* SkBitmapProcShader::MakeContext(const SkShader& shader, info->~SkBitmapProcInfo(); return nullptr; } + if (info->fPixmap.colorType() != kRGBA_8888_SkColorType + && info->fPixmap.colorType() != kBGRA_8888_SkColorType) { + return nullptr; + } return new (storage) LinearPipelineContext(shader, rec, info); } else { void* stateStorage = (char*)storage + sizeof(BitmapProcShaderContext); diff --git a/src/core/SkBitmapProcState.cpp b/src/core/SkBitmapProcState.cpp index c1692881dc..ab2321254b 100644 --- a/src/core/SkBitmapProcState.cpp +++ b/src/core/SkBitmapProcState.cpp @@ -138,6 +138,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) { } fPixmap = fBMState->pixmap(); fInvMatrix = fBMState->invMatrix(); + fRealInvMatrix = fBMState->invMatrix(); fPaintColor = paint.getColor(); fFilterQuality = fBMState->quality(); SkASSERT(fPixmap.addr()); @@ -198,7 +199,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) { fFilterQuality = kNone_SkFilterQuality; } } - + return true; } @@ -332,7 +333,7 @@ bool SkBitmapProcState::chooseScanlineProcs(bool trivialMatrix, bool clampClamp) S4444_alpha_D32_filter_DXDY, S4444_opaque_D32_filter_DX, S4444_alpha_D32_filter_DX, - + // A8 treats alpha/opaque the same (equally efficient) SA8_alpha_D32_nofilter_DXDY, SA8_alpha_D32_nofilter_DXDY, @@ -342,7 +343,7 @@ bool SkBitmapProcState::chooseScanlineProcs(bool trivialMatrix, bool clampClamp) SA8_alpha_D32_filter_DXDY, SA8_alpha_D32_filter_DX, SA8_alpha_D32_filter_DX, - + // todo: possibly specialize on opaqueness SG8_alpha_D32_nofilter_DXDY, SG8_alpha_D32_nofilter_DXDY, diff --git a/src/core/SkBitmapProcState.h b/src/core/SkBitmapProcState.h index 26e8db8858..40dc31a5e0 100644 --- a/src/core/SkBitmapProcState.h +++ b/src/core/SkBitmapProcState.h @@ -35,7 +35,9 @@ struct SkBitmapProcInfo { const SkBitmapProvider fProvider; SkPixmap fPixmap; - SkMatrix fInvMatrix; // copy of what is in fBMState, can we remove the dup? + SkMatrix fInvMatrix; // This changes based on tile mode. + // TODO: combine fInvMatrix and fRealInvMatrix. + SkMatrix fRealInvMatrix; // The actual inverse matrix. SkColor fPaintColor; SkShader::TileMode fTileModeX; SkShader::TileMode fTileModeY; diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp index 4c21180a16..3a9a0196f5 100644 --- a/src/core/SkLinearBitmapPipeline.cpp +++ b/src/core/SkLinearBitmapPipeline.cpp @@ -17,12 +17,20 @@ #include "SkLinearBitmapPipeline_core.h" #include "SkLinearBitmapPipeline_matrix.h" #include "SkLinearBitmapPipeline_tile.h" +#include "SkLinearBitmapPipeline_sample.h" class SkLinearBitmapPipeline::PointProcessorInterface { public: virtual ~PointProcessorInterface() { } + // Take the first n (where 0 < n && n < 4) items from xs and ys and sample those points. For + // nearest neighbor, that means just taking the floor xs and ys. For bilerp, this means + // to expand the bilerp filter around the point and sample using that filter. virtual void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) = 0; + // Same as pointListFew, but n = 4. virtual void VECTORCALL pointList4(Sk4s xs, Sk4s ys) = 0; + // A span is a compact form of sample points that are obtained by mapping points from + // destination space to source space. This is used for horizontal lines only, and is mainly + // used to take advantage of memory coherence for horizontal spans. virtual void pointSpan(Span span) = 0; }; @@ -41,8 +49,13 @@ public: // +--------+--------+ // These pixels coordinates are arranged in the following order in xs and ys: // px00 px10 px01 px11 - virtual void VECTORCALL bilerpList(Sk4s xs, Sk4s ys) = 0; - virtual void bilerpSpan(BilerpSpan span) = 0; + virtual void VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) = 0; + + // A span represents sample points that have been mapped from destination space to source + // space. Each sample point is then expanded to the four bilerp points by add +/- 0.5. The + // resulting Y values my be off the tile. When y +/- 0.5 are more than 1 apart because of + // tiling, the second Y is used to denote the retiled Y value. + virtual void bilerpSpan(Span span, SkScalar y) = 0; }; class SkLinearBitmapPipeline::PixelPlacerInterface { @@ -54,6 +67,9 @@ public: }; namespace { + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Matrix Stage // PointProcessor uses a strategy to help complete the work of the different stages. The strategy // must implement the following methods: // * processPoints(xs, ys) - must mutate the xs and ys for the stage. @@ -64,10 +80,10 @@ namespace { // maybeProcessSpan - returns false if it can not process the span and needs to fallback to // point lists for processing. template<typename Strategy, typename Next> -class PointProcessor final : public SkLinearBitmapPipeline::PointProcessorInterface { +class MatrixStage final : public SkLinearBitmapPipeline::PointProcessorInterface { public: template <typename... Args> - PointProcessor(Next* next, Args&&... args) + MatrixStage(Next* next, Args&&... args) : fNext{next} , fStrategy{std::forward<Args>(args)...}{ } @@ -94,66 +110,31 @@ private: Strategy fStrategy; }; -// See PointProcessor for responsibilities of Strategy. -template<typename Strategy, typename Next> -class BilerpProcessor final : public SkLinearBitmapPipeline::BilerpProcessorInterface { -public: - template <typename... Args> - BilerpProcessor(Next* next, Args&&... args) - : fNext{next} - , fStrategy{std::forward<Args>(args)...}{ } - - void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { - fStrategy.processPoints(&xs, &ys); - fNext->pointListFew(n, xs, ys); - } - - void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { - fStrategy.processPoints(&xs, &ys); - fNext->pointList4(xs, ys); - } - - void VECTORCALL bilerpList(Sk4s xs, Sk4s ys) override { - fStrategy.processPoints(&xs, &ys); - fNext->bilerpList(xs, ys); - } - - void pointSpan(Span span) override { - SkASSERT(!span.isEmpty()); - if (!fStrategy.maybeProcessSpan(span, fNext)) { - span_fallback(span, this); - } - } - - void bilerpSpan(BilerpSpan bSpan) override { - SkASSERT(!bSpan.isEmpty()); - if (!fStrategy.maybeProcessBilerpSpan(bSpan, fNext)) { - bilerp_span_fallback(bSpan, this); - } - } - -private: - Next* const fNext; - Strategy fStrategy; -}; +template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> +using TranslateMatrix = MatrixStage<TranslateMatrixStrategy, Next>; -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Matrix Stage template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> -using TranslateMatrix = PointProcessor<TranslateMatrixStrategy, Next>; +using ScaleMatrix = MatrixStage<ScaleMatrixStrategy, Next>; template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> -using ScaleMatrix = PointProcessor<ScaleMatrixStrategy, Next>; +using AffineMatrix = MatrixStage<AffineMatrixStrategy, Next>; template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> -using AffineMatrix = PointProcessor<AffineMatrixStrategy, Next>; +using PerspectiveMatrix = MatrixStage<PerspectiveMatrixStrategy, Next>; + static SkLinearBitmapPipeline::PointProcessorInterface* choose_matrix( SkLinearBitmapPipeline::PointProcessorInterface* next, const SkMatrix& inverse, SkLinearBitmapPipeline::MatrixStage* matrixProc) { if (inverse.hasPerspective()) { - SkFAIL("Not implemented."); + matrixProc->Initialize<PerspectiveMatrix<>>( + next, + SkVector{inverse.getTranslateX(), inverse.getTranslateY()}, + SkVector{inverse.getScaleX(), inverse.getScaleY()}, + SkVector{inverse.getSkewX(), inverse.getSkewY()}, + SkVector{inverse.getPerspX(), inverse.getPerspY()}, + inverse.get(SkMatrix::kMPersp2)); } else if (inverse.getSkewX() != 0.0f || inverse.getSkewY() != 0.0f) { matrixProc->Initialize<AffineMatrix<>>( next, @@ -176,370 +157,305 @@ static SkLinearBitmapPipeline::PointProcessorInterface* choose_matrix( } //////////////////////////////////////////////////////////////////////////////////////////////////// -// Bilerp Expansion Stage -template <typename Next = SkLinearBitmapPipeline::BilerpProcessorInterface> -class ExpandBilerp final : public SkLinearBitmapPipeline::PointProcessorInterface { +// Tile Stage + +template<typename XStrategy, typename YStrategy, typename Next> +class NearestTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { public: - ExpandBilerp(Next* next) : fNext{next} { } + template <typename... Args> + NearestTileStage(Next* next, SkISize dimensions) + : fNext{next} + , fXStrategy{dimensions.width()} + , fYStrategy{dimensions.height()}{ } void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { - SkASSERT(0 < n && n < 4); - // px00 px10 px01 px11 - const Sk4s kXOffsets{-0.5f, 0.5f, -0.5f, 0.5f}, - kYOffsets{-0.5f, -0.5f, 0.5f, 0.5f}; - if (n >= 1) fNext->bilerpList(Sk4s{xs[0]} + kXOffsets, Sk4s{ys[0]} + kYOffsets); - if (n >= 2) fNext->bilerpList(Sk4s{xs[1]} + kXOffsets, Sk4s{ys[1]} + kYOffsets); - if (n >= 3) fNext->bilerpList(Sk4s{xs[2]} + kXOffsets, Sk4s{ys[2]} + kYOffsets); + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + fNext->pointListFew(n, xs, ys); } - void VECTORCALL pointList4(Sk4f xs, Sk4f ys) override { - // px00 px10 px01 px11 - const Sk4f kXOffsets{-0.5f, 0.5f, -0.5f, 0.5f}, - kYOffsets{-0.5f, -0.5f, 0.5f, 0.5f}; - fNext->bilerpList(Sk4s{xs[0]} + kXOffsets, Sk4s{ys[0]} + kYOffsets); - fNext->bilerpList(Sk4s{xs[1]} + kXOffsets, Sk4s{ys[1]} + kYOffsets); - fNext->bilerpList(Sk4s{xs[2]} + kXOffsets, Sk4s{ys[2]} + kYOffsets); - fNext->bilerpList(Sk4s{xs[3]} + kXOffsets, Sk4s{ys[3]} + kYOffsets); + void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + fNext->pointList4(xs, ys); } + // The span you pass must not be empty. void pointSpan(Span span) override { SkASSERT(!span.isEmpty()); SkPoint start; SkScalar length; int count; std::tie(start, length, count) = span; - // Adjust the span so that it is in the correct phase with the pixel. - BilerpSpan bSpan{X(start) - 0.5f, Y(start) - 0.5f, Y(start) + 0.5f, length, count}; - fNext->bilerpSpan(bSpan); + SkScalar x = X(start); + SkScalar y = fYStrategy.tileY(Y(start)); + Span yAdjustedSpan{{x, y}, length, count}; + if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) { + span_fallback(span, this); + } } private: Next* const fNext; + XStrategy fXStrategy; + YStrategy fYStrategy; }; -static SkLinearBitmapPipeline::PointProcessorInterface* choose_filter( - SkLinearBitmapPipeline::BilerpProcessorInterface* next, - SkFilterQuality filterQuailty, - SkLinearBitmapPipeline::FilterStage* filterProc) { - if (SkFilterQuality::kNone_SkFilterQuality == filterQuailty) { - return next; - } else { - filterProc->Initialize<ExpandBilerp<>>(next); - return filterProc->get(); +template<typename XStrategy, typename YStrategy, typename Next> +class BilerpTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { +public: + template <typename... Args> + BilerpTileStage(Next* next, SkISize dimensions) + : fXMax(dimensions.width()) + , fYMax(dimensions.height()) + , fNext{next} + , fXStrategy{dimensions.width()} + , fYStrategy{dimensions.height()}{ } + + void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + // TODO: check to see if xs and ys are in range then just call pointListFew on next. + if (n >= 1) this->bilerpPoint(xs[0], ys[0]); + if (n >= 2) this->bilerpPoint(xs[1], ys[1]); + if (n >= 3) this->bilerpPoint(xs[2], ys[2]); } -} -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Tile Stage -template <typename Next = SkLinearBitmapPipeline::BilerpProcessorInterface> -using Clamp = BilerpProcessor<ClampStrategy, Next>; + void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + // TODO: check to see if xs and ys are in range then just call pointList4 on next. + this->bilerpPoint(xs[0], ys[0]); + this->bilerpPoint(xs[1], ys[1]); + this->bilerpPoint(xs[2], ys[2]); + this->bilerpPoint(xs[3], ys[3]); + } + + struct Wrapper { + void pointSpan(Span span) { + processor->breakIntoEdges(span); + } -template <typename Next = SkLinearBitmapPipeline::BilerpProcessorInterface> -using Repeat = BilerpProcessor<RepeatStrategy, Next>; + BilerpTileStage* processor; + }; -static SkLinearBitmapPipeline::BilerpProcessorInterface* choose_tiler( - SkLinearBitmapPipeline::BilerpProcessorInterface* next, - SkSize dimensions, - SkShader::TileMode xMode, - SkShader::TileMode yMode, - SkLinearBitmapPipeline::TileStage* tileProcXOrBoth, - SkLinearBitmapPipeline::TileStage* tileProcY) { - if (xMode == yMode) { - switch (xMode) { - case SkShader::kClamp_TileMode: - tileProcXOrBoth->Initialize<Clamp<>>(next, dimensions); - break; - case SkShader::kRepeat_TileMode: - tileProcXOrBoth->Initialize<Repeat<>>(next, dimensions); - break; - case SkShader::kMirror_TileMode: - SkFAIL("Not implemented."); - break; - } - } else { - switch (yMode) { - case SkShader::kClamp_TileMode: - tileProcY->Initialize<Clamp<>>(next, Y(dimensions)); - break; - case SkShader::kRepeat_TileMode: - tileProcY->Initialize<Repeat<>>(next, Y(dimensions)); - break; - case SkShader::kMirror_TileMode: - SkFAIL("Not implemented."); - break; - } - switch (xMode) { - case SkShader::kClamp_TileMode: - tileProcXOrBoth->Initialize<Clamp<>>(tileProcY->get(), X(dimensions)); - break; - case SkShader::kRepeat_TileMode: - tileProcXOrBoth->Initialize<Repeat<>>(tileProcY->get(), X(dimensions)); - break; - case SkShader::kMirror_TileMode: - SkFAIL("Not implemented."); - break; + // The span you pass must not be empty. + void pointSpan(Span span) override { + SkASSERT(!span.isEmpty()); + + Wrapper wrapper = {this}; + if (!fXStrategy.maybeProcessSpan(span, &wrapper)) { + span_fallback(span, this); } } - return tileProcXOrBoth->get(); -} -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Source Sampling Stage -class sRGBFast { -public: - static Sk4s VECTORCALL sRGBToLinear(Sk4s pixel) { - Sk4s l = pixel * pixel; - return Sk4s{l[0], l[1], l[2], pixel[3]}; +private: + void bilerpPoint(SkScalar x, SkScalar y) { + Sk4f txs = Sk4f{x} + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f}; + Sk4f tys = Sk4f{y} + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f}; + fXStrategy.tileXPoints(&txs); + fYStrategy.tileYPoints(&tys); + fNext->bilerpEdge(txs, tys); } -}; -enum class ColorOrder { - kRGBA = false, - kBGRA = true, -}; -template <SkColorProfileType colorProfile, ColorOrder colorOrder> -class Pixel8888 { -public: - Pixel8888(int width, const uint32_t* src) : fSrc{src}, fWidth{width}{ } - Pixel8888(const SkPixmap& srcPixmap) - : fSrc{srcPixmap.addr32()} - , fWidth{static_cast<int>(srcPixmap.rowBytes() / 4)} { } - - void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) { - Sk4i XIs = SkNx_cast<int, SkScalar>(xs); - Sk4i YIs = SkNx_cast<int, SkScalar>(ys); - Sk4i bufferLoc = YIs * fWidth + XIs; - switch (n) { - case 3: - *px2 = this->getPixel(fSrc, bufferLoc[2]); - case 2: - *px1 = this->getPixel(fSrc, bufferLoc[1]); - case 1: - *px0 = this->getPixel(fSrc, bufferLoc[0]); - default: - break; + void handleEdges(Span span, SkScalar dx) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkScalar x = X(start); + SkScalar y = Y(start); + SkScalar tiledY = fYStrategy.tileY(y); + while (count > 0) { + this->bilerpPoint(x, tiledY); + x += dx; + count -= 1; } } - void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { - Sk4i XIs = SkNx_cast<int, SkScalar>(xs); - Sk4i YIs = SkNx_cast<int, SkScalar>(ys); - Sk4i bufferLoc = YIs * fWidth + XIs; - *px0 = this->getPixel(fSrc, bufferLoc[0]); - *px1 = this->getPixel(fSrc, bufferLoc[1]); - *px2 = this->getPixel(fSrc, bufferLoc[2]); - *px3 = this->getPixel(fSrc, bufferLoc[3]); - } - - void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { - const uint32_t* src = static_cast<const uint32_t*>(vsrc); - *px0 = this->getPixel(src, index + 0); - *px1 = this->getPixel(src, index + 1); - *px2 = this->getPixel(src, index + 2); - *px3 = this->getPixel(src, index + 3); + void yProcessSpan(Span span) { + SkScalar tiledY = fYStrategy.tileY(span.startY()); + if (0.5f <= tiledY && tiledY < fYMax - 0.5f ) { + Span tiledSpan{{span.startX(), tiledY}, span.length(), span.count()}; + fNext->pointSpan(tiledSpan); + } else { + // Convert to the Y0 bilerp sample set by shifting by -0.5f. Then tile that new y + // value and shift it back resulting in the working Y0. Do the same thing with Y1 but + // in the opposite direction. + SkScalar y0 = fYStrategy.tileY(span.startY() - 0.5f) + 0.5f; + SkScalar y1 = fYStrategy.tileY(span.startY() + 0.5f) - 0.5f; + Span newSpan{{span.startX(), y0}, span.length(), span.count()}; + fNext->bilerpSpan(newSpan, y1); + } } + void breakIntoEdges(Span span) { + if (span.length() == 0) { + yProcessSpan(span); + } else { + SkScalar dx = span.length() / (span.count() - 1); + if (span.length() > 0) { + Span leftBorder = span.breakAt(0.5f, dx); + if (!leftBorder.isEmpty()) { + this->handleEdges(leftBorder, dx); + } + Span center = span.breakAt(fXMax - 0.5f, dx); + if (!center.isEmpty()) { + this->yProcessSpan(center); + } + + if (!span.isEmpty()) { + this->handleEdges(span, dx); + } + } else { + Span center = span.breakAt(fXMax + 0.5f, dx); + if (!span.isEmpty()) { + this->handleEdges(span, dx); + } + Span leftEdge = center.breakAt(0.5f, dx); + if (!center.isEmpty()) { + this->yProcessSpan(center); + } + if (!leftEdge.isEmpty()) { + this->handleEdges(leftEdge, dx); + } - Sk4f getPixel(const void* vsrc, int index) { - const uint32_t* src = static_cast<const uint32_t*>(vsrc); - Sk4b bytePixel = Sk4b::Load((uint8_t *)(&src[index])); - Sk4f pixel = SkNx_cast<float, uint8_t>(bytePixel); - if (colorOrder == ColorOrder::kBGRA) { - pixel = SkNx_shuffle<2, 1, 0, 3>(pixel); - } - pixel = pixel * Sk4f{1.0f/255.0f}; - if (colorProfile == kSRGB_SkColorProfileType) { - pixel = sRGBFast::sRGBToLinear(pixel); + } } - return pixel; } - const uint32_t* row(int y) { return fSrc + y * fWidth[0]; } + SkScalar fXMax; + SkScalar fYMax; + Next* const fNext; + XStrategy fXStrategy; + YStrategy fYStrategy; +}; -private: - const uint32_t* const fSrc; - const Sk4i fWidth; +template <typename XStrategy, typename YStrategy, typename Next> +void make_tile_stage( + SkFilterQuality filterQuality, SkISize dimensions, + Next* next, SkLinearBitmapPipeline::TileStage* tileStage) { + if (filterQuality == kNone_SkFilterQuality) { + tileStage->Initialize<NearestTileStage<XStrategy, YStrategy, Next>>(next, dimensions); + } else { + tileStage->Initialize<BilerpTileStage<XStrategy, YStrategy, Next>>(next, dimensions); + } +} +template <typename XStrategy> +void choose_tiler_ymode( + SkShader::TileMode yMode, SkFilterQuality filterQuality, SkISize dimensions, + SkLinearBitmapPipeline::BilerpProcessorInterface* next, + SkLinearBitmapPipeline::TileStage* tileStage) { + switch (yMode) { + case SkShader::kClamp_TileMode: + make_tile_stage<XStrategy, YClampStrategy>(filterQuality, dimensions, next, tileStage); + break; + case SkShader::kRepeat_TileMode: + make_tile_stage<XStrategy, YRepeatStrategy>(filterQuality, dimensions, next, tileStage); + break; + case SkShader::kMirror_TileMode: + make_tile_stage<XStrategy, YMirrorStrategy>(filterQuality, dimensions, next, tileStage); + break; + } }; -// Explaination of the math: -// 1 - x x -// +--------+--------+ -// | | | -// 1 - y | px00 | px10 | -// | | | -// +--------+--------+ -// | | | -// y | px01 | px11 | -// | | | -// +--------+--------+ -// -// -// Given a pixelxy each is multiplied by a different factor derived from the fractional part of x -// and y: -// * px00 -> (1 - x)(1 - y) = 1 - x - y + xy -// * px10 -> x(1 - y) = x - xy -// * px01 -> (1 - x)y = y - xy -// * px11 -> xy -// So x * y is calculated first and then used to calculate all the other factors. -static Sk4s VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, - Sk4f px01, Sk4f px11) { - // Calculate fractional xs and ys. - Sk4s fxs = xs - xs.floor(); - Sk4s fys = ys - ys.floor(); - Sk4s fxys{fxs * fys}; - Sk4f sum = px11 * fxys; - sum = sum + px01 * (fys - fxys); - sum = sum + px10 * (fxs - fxys); - sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); - return sum; +static SkLinearBitmapPipeline::PointProcessorInterface* choose_tiler( + SkLinearBitmapPipeline::BilerpProcessorInterface* next, + SkISize dimensions, + SkShader::TileMode xMode, + SkShader::TileMode yMode, + SkFilterQuality filterQuality, + SkLinearBitmapPipeline::TileStage* tileStage) { + switch (xMode) { + case SkShader::kClamp_TileMode: + choose_tiler_ymode<XClampStrategy>(yMode, filterQuality, dimensions, next, tileStage); + break; + case SkShader::kRepeat_TileMode: + choose_tiler_ymode<XRepeatStrategy>(yMode, filterQuality, dimensions, next, tileStage); + break; + case SkShader::kMirror_TileMode: + choose_tiler_ymode<XMirrorStrategy>(yMode, filterQuality, dimensions, next, tileStage); + break; + } + + return tileStage->get(); } -template <typename SourceStrategy> -class Sampler final : public SkLinearBitmapPipeline::BilerpProcessorInterface { + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Source Sampling Stage +template <typename SourceStrategy, typename Next> +class NearestNeighborSampler final : public SkLinearBitmapPipeline::BilerpProcessorInterface { public: template <typename... Args> - Sampler(SkLinearBitmapPipeline::PixelPlacerInterface* next, Args&&... args) - : fNext{next} - , fStrategy{std::forward<Args>(args)...} { } + NearestNeighborSampler(Next* next, Args&&... args) + : fSampler{next, std::forward<Args>(args)...} { } void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { - SkASSERT(0 < n && n < 4); - Sk4f px0, px1, px2; - fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2); - if (n >= 1) fNext->placePixel(px0); - if (n >= 2) fNext->placePixel(px1); - if (n >= 3) fNext->placePixel(px2); + fSampler.nearestListFew(n, xs, ys); } - void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { - Sk4f px0, px1, px2, px3; - fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); - fNext->place4Pixels(px0, px1, px2, px3); + fSampler.nearestList4(xs, ys); } - - void VECTORCALL bilerpList(Sk4s xs, Sk4s ys) override { - Sk4f px00, px10, px01, px11; - fStrategy.get4Pixels(xs, ys, &px00, &px10, &px01, &px11); - Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11); - fNext->placePixel(pixel); + void pointSpan(Span span) override { + fSampler.nearestSpan(span); + } + void VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { + SkFAIL("Using nearest neighbor sampler, but calling a bilerpEdge."); } - void pointSpan(Span span) override { - SkASSERT(!span.isEmpty()); - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - if (length < (count - 1)) { - this->pointSpanSlowRate(span); - } else if (length == (count - 1)) { - this->pointSpanUnitRate(span); - } else { - this->pointSpanFastRate(span); - } + virtual void bilerpSpan(Span span, SkScalar y) override { + SkFAIL("Using nearest neighbor sampler, but calling a bilerpSpan."); } private: - // When moving through source space more slowly than dst space (zoomed in), - // we'll be sampling from the same source pixel more than once. - void pointSpanSlowRate(Span span) { - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - SkScalar x = X(start); - SkFixed fx = SkScalarToFixed(x); - SkScalar dx = length / (count - 1); - SkFixed fdx = SkScalarToFixed(dx); - - const void* row = fStrategy.row((int)std::floor(Y(start))); - SkLinearBitmapPipeline::PixelPlacerInterface* next = fNext; - - int ix = SkFixedFloorToInt(fx); - int prevIX = ix; - Sk4f fpixel = fStrategy.getPixel(row, ix); - - // When dx is less than one, each pixel is used more than once. Using the fixed point fx - // allows the code to quickly check that the same pixel is being used. The code uses this - // same pixel check to do the sRGB and normalization only once. - auto getNextPixel = [&]() { - if (ix != prevIX) { - fpixel = fStrategy.getPixel(row, ix); - prevIX = ix; - } - fx += fdx; - ix = SkFixedFloorToInt(fx); - return fpixel; - }; - - while (count >= 4) { - Sk4f px0 = getNextPixel(); - Sk4f px1 = getNextPixel(); - Sk4f px2 = getNextPixel(); - Sk4f px3 = getNextPixel(); - next->place4Pixels(px0, px1, px2, px3); - count -= 4; - } - while (count > 0) { - next->placePixel(getNextPixel()); - count -= 1; - } - } + GeneralSampler<SourceStrategy, Next> fSampler; +}; - // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. - // We'll never re-use pixels, but we can at least load contiguous pixels. - void pointSpanUnitRate(Span span) { - SkPoint start; SkScalar length; int count; - std::tie(start, length, count) = span; - int ix = SkScalarFloorToInt(X(start)); - const void* row = fStrategy.row((int)std::floor(Y(start))); - SkLinearBitmapPipeline::PixelPlacerInterface* next = fNext; - while (count >= 4) { - Sk4f px0, px1, px2, px3; - fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3); - next->place4Pixels(px0, px1, px2, px3); - ix += 4; - count -= 4; - } +template <typename SourceStrategy, typename Next> +class BilerpSampler final : public SkLinearBitmapPipeline::BilerpProcessorInterface { +public: + template <typename... Args> + BilerpSampler(Next* next, Args&&... args) + : fSampler{next, std::forward<Args>(args)...} { } - while (count > 0) { - next->placePixel(fStrategy.getPixel(row, ix)); - ix += 1; - count -= 1; - } + void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + fSampler.bilerpListFew(n, xs, ys); } - - // We're moving through source space faster than dst (zoomed out), - // so we'll never reuse a source pixel or be able to do contiguous loads. - void pointSpanFastRate(Span span) { - span_fallback(span, this); + void VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + fSampler.bilerpList4(xs, ys); + } + void pointSpan(Span span) override { + fSampler.bilerpSpan(span); + } + void VECTORCALL bilerpEdge(Sk4s xs, Sk4s ys) override { + fSampler.bilerpEdge(xs, ys); } - void bilerpSpan(BilerpSpan span) override { - bilerp_span_fallback(span, this); + virtual void bilerpSpan(Span span, SkScalar y) override { + fSampler.bilerpSpanWithY(span, y); } private: - SkLinearBitmapPipeline::PixelPlacerInterface* const fNext; - SourceStrategy fStrategy; + GeneralSampler<SourceStrategy, Next> fSampler; }; -using Pixel8888SRGB = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kRGBA>; -using Pixel8888LRGB = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kRGBA>; -using Pixel8888SBGR = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kBGRA>; -using Pixel8888LBGR = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kBGRA>; +using Placer = SkLinearBitmapPipeline::PixelPlacerInterface; -static SkLinearBitmapPipeline::BilerpProcessorInterface* choose_pixel_sampler( - SkLinearBitmapPipeline::PixelPlacerInterface* next, +template<template <typename, typename> class Sampler> +static SkLinearBitmapPipeline::BilerpProcessorInterface* choose_pixel_sampler_base( + Placer* next, const SkPixmap& srcPixmap, SkLinearBitmapPipeline::SampleStage* sampleStage) { const SkImageInfo& imageInfo = srcPixmap.info(); switch (imageInfo.colorType()) { case kRGBA_8888_SkColorType: if (imageInfo.profileType() == kSRGB_SkColorProfileType) { - sampleStage->Initialize<Sampler<Pixel8888SRGB>>(next, srcPixmap); + sampleStage->Initialize<Sampler<Pixel8888SRGB, Placer>>(next, srcPixmap); } else { - sampleStage->Initialize<Sampler<Pixel8888LRGB>>(next, srcPixmap); + sampleStage->Initialize<Sampler<Pixel8888LRGB, Placer>>(next, srcPixmap); } break; case kBGRA_8888_SkColorType: if (imageInfo.profileType() == kSRGB_SkColorProfileType) { - sampleStage->Initialize<Sampler<Pixel8888SBGR>>(next, srcPixmap); + sampleStage->Initialize<Sampler<Pixel8888SBGR, Placer>>(next, srcPixmap); } else { - sampleStage->Initialize<Sampler<Pixel8888LBGR>>(next, srcPixmap); + sampleStage->Initialize<Sampler<Pixel8888LBGR, Placer>>(next, srcPixmap); } break; default: @@ -549,11 +465,24 @@ static SkLinearBitmapPipeline::BilerpProcessorInterface* choose_pixel_sampler( return sampleStage->get(); } +SkLinearBitmapPipeline::BilerpProcessorInterface* choose_pixel_sampler( + Placer* next, + SkFilterQuality filterQuality, + const SkPixmap& srcPixmap, + SkLinearBitmapPipeline::SampleStage* sampleStage) { + if (filterQuality == kNone_SkFilterQuality) { + return choose_pixel_sampler_base<NearestNeighborSampler>(next, srcPixmap, sampleStage); + } else { + return choose_pixel_sampler_base<BilerpSampler>(next, srcPixmap, sampleStage); + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// // Pixel Placement Stage template <SkAlphaType alphaType> class PlaceFPPixel final : public SkLinearBitmapPipeline::PixelPlacerInterface { public: + PlaceFPPixel(float postAlpha) : fPostAlpha{postAlpha} { } void VECTORCALL placePixel(Sk4f pixel) override { PlacePixel(fDst, pixel, 0); fDst += 1; @@ -573,11 +502,12 @@ public: } private: - static void VECTORCALL PlacePixel(SkPM4f* dst, Sk4f pixel, int index) { + void VECTORCALL PlacePixel(SkPM4f* dst, Sk4f pixel, int index) { Sk4f newPixel = pixel; if (alphaType == kUnpremul_SkAlphaType) { newPixel = Premultiply(pixel); } + newPixel = newPixel * fPostAlpha; newPixel.store(dst + index); } static Sk4f VECTORCALL Premultiply(Sk4f pixel) { @@ -586,16 +516,18 @@ private: } SkPM4f* fDst; + Sk4f fPostAlpha; }; static SkLinearBitmapPipeline::PixelPlacerInterface* choose_pixel_placer( SkAlphaType alphaType, + float postAlpha, SkLinearBitmapPipeline::PixelStage* placerStage) { if (alphaType == kUnpremul_SkAlphaType) { - placerStage->Initialize<PlaceFPPixel<kUnpremul_SkAlphaType>>(); + placerStage->Initialize<PlaceFPPixel<kUnpremul_SkAlphaType>>(postAlpha); } else { // kOpaque_SkAlphaType is treated the same as kPremul_SkAlphaType - placerStage->Initialize<PlaceFPPixel<kPremul_SkAlphaType>>(); + placerStage->Initialize<PlaceFPPixel<kPremul_SkAlphaType>>(postAlpha); } return placerStage->get(); } @@ -608,18 +540,31 @@ SkLinearBitmapPipeline::SkLinearBitmapPipeline( const SkMatrix& inverse, SkFilterQuality filterQuality, SkShader::TileMode xTile, SkShader::TileMode yTile, + float postAlpha, const SkPixmap& srcPixmap) { - SkSize size = SkSize::Make(srcPixmap.width(), srcPixmap.height()); + SkISize dimensions = srcPixmap.info().dimensions(); const SkImageInfo& srcImageInfo = srcPixmap.info(); + SkMatrix adjustedInverse = inverse; + if (filterQuality == kNone_SkFilterQuality) { + if (inverse.getScaleX() >= 0.0f) { + adjustedInverse.setTranslateX( + nextafterf(inverse.getTranslateX(), std::floor(inverse.getTranslateX()))); + } + if (inverse.getScaleY() >= 0.0f) { + adjustedInverse.setTranslateY( + nextafterf(inverse.getTranslateY(), std::floor(inverse.getTranslateY()))); + } + } + // As the stages are built, the chooser function may skip a stage. For example, with the // identity matrix, the matrix stage is skipped, and the tilerStage is the first stage. - auto placementStage = choose_pixel_placer(srcImageInfo.alphaType(), &fPixelStage); - auto samplerStage = choose_pixel_sampler(placementStage, srcPixmap, &fSampleStage); - auto tilerStage = choose_tiler(samplerStage, size, xTile, yTile, &fTileXOrBothStage, - &fTileYStage); - auto filterStage = choose_filter(tilerStage, filterQuality, &fFilterStage); - fFirstStage = choose_matrix(filterStage, inverse, &fMatrixStage); + auto placementStage = choose_pixel_placer(srcImageInfo.alphaType(), postAlpha, &fPixelStage); + auto samplerStage = choose_pixel_sampler(placementStage, + filterQuality, srcPixmap, &fSampleStage); + auto tilerStage = choose_tiler(samplerStage, + dimensions, xTile, yTile, filterQuality, &fTiler); + fFirstStage = choose_matrix(tilerStage, adjustedInverse, &fMatrixStage); } void SkLinearBitmapPipeline::shadeSpan4f(int x, int y, SkPM4f* dst, int count) { @@ -629,5 +574,6 @@ void SkLinearBitmapPipeline::shadeSpan4f(int x, int y, SkPM4f* dst, int count) { // math correct through the different stages. Count is the number of pixel to produce. // Since the code samples at pixel centers, length is the distance from the center of the // first pixel to the center of the last pixel. This implies that length is count-1. - fFirstStage->pointSpan(Span{SkPoint{x + 0.5f, y + 0.5f}, count - 1.0f, count}); + fFirstStage->pointSpan(Span{{x + 0.5f, y + 0.5f}, count - 1.0f, count}); } + diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h index c65b7538aa..7efdd1c6fb 100644 --- a/src/core/SkLinearBitmapPipeline.h +++ b/src/core/SkLinearBitmapPipeline.h @@ -21,6 +21,7 @@ public: const SkMatrix& inverse, SkFilterQuality filterQuality, SkShader::TileMode xTile, SkShader::TileMode yTile, + float postAlpha, const SkPixmap& srcPixmap); ~SkLinearBitmapPipeline(); @@ -33,7 +34,7 @@ public: ~PolymorphicUnion() { if (fIsInitialized) { - get()->~Base(); + this->get()->~Base(); } } @@ -47,8 +48,8 @@ public: }; Base* get() const { return reinterpret_cast<Base*>(&fSpace); } - Base* operator->() const { return get(); } - Base& operator*() const { return *get(); } + Base* operator->() const { return this->get(); } + Base& operator*() const { return *(this->get()); } private: struct SK_STRUCT_ALIGN(16) Space { @@ -62,18 +63,16 @@ public: class BilerpProcessorInterface; class PixelPlacerInterface; - using MatrixStage = PolymorphicUnion<PointProcessorInterface, 112>; - using FilterStage = PolymorphicUnion<PointProcessorInterface, 8>; - using TileStage = PolymorphicUnion<BilerpProcessorInterface, 96>; + // These values were generated by the assert above in PolymorphicUnion. + using MatrixStage = PolymorphicUnion<PointProcessorInterface, 160>; + using TileStage = PolymorphicUnion<PointProcessorInterface, 160>; using SampleStage = PolymorphicUnion<BilerpProcessorInterface, 80>; using PixelStage = PolymorphicUnion<PixelPlacerInterface, 80>; private: PointProcessorInterface* fFirstStage; MatrixStage fMatrixStage; - FilterStage fFilterStage; - TileStage fTileXOrBothStage; - TileStage fTileYStage; + TileStage fTiler; SampleStage fSampleStage; PixelStage fPixelStage; }; diff --git a/src/core/SkLinearBitmapPipeline_core.h b/src/core/SkLinearBitmapPipeline_core.h index 0541f3cce5..2759f0bb8c 100644 --- a/src/core/SkLinearBitmapPipeline_core.h +++ b/src/core/SkLinearBitmapPipeline_core.h @@ -10,6 +10,16 @@ #include <cmath> +// New bilerp strategy: +// Pass through on bilerpList4 and bilerpListFew (analogs to pointList), introduce bilerpEdge +// which takes 4 points. If the sample spans an edge, then break it into a bilerpEdge. Bilerp +// span then becomes a normal span except in special cases where an extra Y is given. The bilerp +// need to stay single point calculations until the tile layer. +// TODO: +// - edge span predicate. +// - introduce new point API +// - Add tile for new api. + // Tweak ABI of functions that pass Sk4f by value to pass them via registers. #if defined(_MSC_VER) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 #define VECTORCALL __vectorcall @@ -65,12 +75,13 @@ public: } bool isEmpty() const { return 0 == fCount; } + void clear() { fCount = 0; } + int count() const { return fCount; } SkScalar length() const { return fLength; } SkScalar startX() const { return X(fStart); } - SkScalar endX() const { return startX() + length(); } - void clear() { - fCount = 0; - } + SkScalar endX() const { return this->startX() + this->length(); } + SkScalar startY() const { return Y(fStart); } + Span emptySpan() { return Span{{0.0, 0.0}, 0.0f, 0}; } bool completelyWithin(SkScalar xMin, SkScalar xMax) const { SkScalar sMin, sMax; @@ -88,17 +99,14 @@ public: SkASSERT(dx != 0.0f); if (this->isEmpty()) { - return Span{{0.0, 0.0}, 0.0f, 0}; + return this->emptySpan(); } int dxSteps = SkScalarFloorToInt((breakX - this->startX()) / dx); - // Calculate the values for the span to cleave off. - SkScalar newLength = dxSteps * dx; - if (dxSteps < 0) { // The span is wholly after breakX. - return Span{{0.0, 0.0}, 0.0f, 0}; + return this->emptySpan(); } else if (dxSteps >= fCount) { // The span is wholly before breakX. Span answer = *this; @@ -106,6 +114,9 @@ public: return answer; } + // Calculate the values for the span to cleave off. + SkScalar newLength = dxSteps * dx; + // If the last (or first if count = 1) sample lands directly on the boundary. Include it // when dx < 0 and exclude it when dx > 0. // Reasoning: @@ -113,15 +124,16 @@ public: // pixel is after the boundary. // dx < 0: The sample point on the boundary is part of the current span because the // entire pixel is before the boundary. - if (startX() + newLength == breakX && dx > 0) { - if (dxSteps != 0) { + if (this->startX() + newLength == breakX && dx > 0) { + if (dxSteps > 0) { dxSteps -= 1; newLength -= dx; } else { - return Span{{0.0, 0.0}, 0.0f, 0}; + return this->emptySpan(); } } + // Calculate new span parameters SkPoint newStart = fStart; int newCount = dxSteps + 1; SkASSERT(newCount > 0); @@ -146,39 +158,6 @@ private: int fCount; }; -// BilerpSpans are similar to Spans, but they represent four source samples converting to single -// destination pixel per count. The pixels for the four samples are collect along two horizontal -// lines; one starting at {x, y0} and the other starting at {x, y1}. There are two distinct lines -// to deal with the edge case of the tile mode. For example, y0 may be at the last y position in -// a tile while y1 would be at the first. -// The step of a Bilerp (dx) is still length / (count - 1) and the start to the next sample is -// still dx * count, but the bounds are complicated by the sampling kernel so that the pixels -// touched are from x to x + length + 1. -class BilerpSpan { -public: - BilerpSpan(SkScalar x, SkScalar y0, SkScalar y1, SkScalar length, int count) - : fX{x}, fY0{y0}, fY1{y1}, fLength{length}, fCount{count} { - SkASSERT(count >= 0); - SkASSERT(std::isfinite(length)); - SkASSERT(std::isfinite(x)); - SkASSERT(std::isfinite(y0)); - SkASSERT(std::isfinite(y1)); - } - - operator std::tuple<SkScalar&, SkScalar&, SkScalar&, SkScalar&, int&>() { - return std::tie(fX, fY0, fY1, fLength, fCount); - } - - bool isEmpty() const { return 0 == fCount; } - -private: - SkScalar fX; - SkScalar fY0; - SkScalar fY1; - SkScalar fLength; - int fCount; -}; - template<typename Stage> void span_fallback(Span span, Stage* stage) { SkPoint start; @@ -206,26 +185,6 @@ void span_fallback(Span span, Stage* stage) { stage->pointListFew(count, xs, ys); } } - -template <typename Next> -void bilerp_span_fallback(BilerpSpan span, Next* next) { - SkScalar x, y0, y1; SkScalar length; int count; - std::tie(x, y0, y1, length, count) = span; - - SkASSERT(!span.isEmpty()); - float dx = length / (count - 1); - - Sk4f xs = Sk4f{x} + Sk4f{0.0f, 1.0f, 0.0f, 1.0f}; - Sk4f ys = Sk4f{y0, y0, y1, y1}; - - // If count == 1 then dx will be inf or NaN, but that is ok because the resulting addition is - // never used. - while (count > 0) { - next->bilerpList(xs, ys); - xs = xs + dx; - count -= 1; - } -} } // namespace #endif // SkLinearBitmapPipeline_core_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_matrix.h b/src/core/SkLinearBitmapPipeline_matrix.h index b1bd81f163..d194d0729a 100644 --- a/src/core/SkLinearBitmapPipeline_matrix.h +++ b/src/core/SkLinearBitmapPipeline_matrix.h @@ -85,6 +85,34 @@ private: const Sk4s fXSkew, fYSkew; }; +class PerspectiveMatrixStrategy { +public: + PerspectiveMatrixStrategy(SkVector offset, SkVector scale, SkVector skew, + SkVector zSkew, SkScalar zOffset) + : fXOffset{X(offset)}, fYOffset{Y(offset)}, fZOffset{zOffset} + , fXScale{X(scale)}, fYScale{Y(scale)} + , fXSkew{X(skew)}, fYSkew{Y(skew)}, fZXSkew{X(zSkew)}, fZYSkew{Y(zSkew)} { } + void processPoints(Sk4s* xs, Sk4s* ys) { + Sk4s newXs = fXScale * *xs + fXSkew * *ys + fXOffset; + Sk4s newYs = fYSkew * *xs + fYScale * *ys + fYOffset; + Sk4s newZs = fZXSkew * *xs + fZYSkew * *ys + fZOffset; + + *xs = newXs / newZs; + *ys = newYs / newZs; + } + + template <typename Next> + bool maybeProcessSpan(Span span, Next* next) { + return false; + } + +private: + const Sk4s fXOffset, fYOffset, fZOffset; + const Sk4s fXScale, fYScale; + const Sk4s fXSkew, fYSkew, fZXSkew, fZYSkew; +}; + + } // namespace #endif // SkLinearBitmapPipeline_matrix_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h new file mode 100644 index 0000000000..2115379368 --- /dev/null +++ b/src/core/SkLinearBitmapPipeline_sample.h @@ -0,0 +1,644 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkLinearBitmapPipeline_sampler_DEFINED +#define SkLinearBitmapPipeline_sampler_DEFINED + +#include "SkLinearBitmapPipeline_core.h" +#include <tuple> + +namespace { +// Explaination of the math: +// 1 - x x +// +--------+--------+ +// | | | +// 1 - y | px00 | px10 | +// | | | +// +--------+--------+ +// | | | +// y | px01 | px11 | +// | | | +// +--------+--------+ +// +// +// Given a pixelxy each is multiplied by a different factor derived from the fractional part of x +// and y: +// * px00 -> (1 - x)(1 - y) = 1 - x - y + xy +// * px10 -> x(1 - y) = x - xy +// * px01 -> (1 - x)y = y - xy +// * px11 -> xy +// So x * y is calculated first and then used to calculate all the other factors. +static Sk4s VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, + Sk4f px01, Sk4f px11) { + // Calculate fractional xs and ys. + Sk4s fxs = xs - xs.floor(); + Sk4s fys = ys - ys.floor(); + Sk4s fxys{fxs * fys}; + Sk4f sum = px11 * fxys; + sum = sum + px01 * (fys - fxys); + sum = sum + px10 * (fxs - fxys); + sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); + return sum; +} + +// The GeneralSampler class +template<typename SourceStrategy, typename Next> +class GeneralSampler { +public: + template<typename... Args> + GeneralSampler(SkLinearBitmapPipeline::PixelPlacerInterface* next, Args&& ... args) + : fNext{next}, fStrategy{std::forward<Args>(args)...} { } + + void VECTORCALL nearestListFew(int n, Sk4s xs, Sk4s ys) { + SkASSERT(0 < n && n < 4); + Sk4f px0, px1, px2; + fStrategy.getFewPixels(n, xs, ys, &px0, &px1, &px2); + if (n >= 1) fNext->placePixel(px0); + if (n >= 2) fNext->placePixel(px1); + if (n >= 3) fNext->placePixel(px2); + } + + void VECTORCALL nearestList4(Sk4s xs, Sk4s ys) { + Sk4f px0, px1, px2, px3; + fStrategy.get4Pixels(xs, ys, &px0, &px1, &px2, &px3); + fNext->place4Pixels(px0, px1, px2, px3); + } + + void nearestSpan(Span span) { + SkASSERT(!span.isEmpty()); + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + SkScalar absLength = SkScalarAbs(length); + if (absLength < (count - 1)) { + this->nearestSpanSlowRate(span); + } else if (absLength == (count - 1)) { + this->nearestSpanUnitRate(span); + } else { + this->nearestSpanFastRate(span); + } + } + + Sk4f bilerNonEdgePixel(SkScalar x, SkScalar y) { + Sk4f px00, px10, px01, px11; + Sk4f xs = Sk4f{x}; + Sk4f ys = Sk4f{y}; + Sk4f sampleXs = xs + Sk4f{-0.5f, 0.5f, -0.5f, 0.5f}; + Sk4f sampleYs = ys + Sk4f{-0.5f, -0.5f, 0.5f, 0.5f}; + fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); + return bilerp4(xs, ys, px00, px10, px01, px11); + } + + void VECTORCALL bilerpListFew(int n, Sk4s xs, Sk4s ys) { + SkASSERT(0 < n && n < 4); + auto bilerpPixel = [&](int index) { + return this->bilerNonEdgePixel(xs[index], ys[index]); + }; + + if (n >= 1) fNext->placePixel(bilerpPixel(0)); + if (n >= 2) fNext->placePixel(bilerpPixel(1)); + if (n >= 3) fNext->placePixel(bilerpPixel(2)); + } + + void VECTORCALL bilerpList4(Sk4s xs, Sk4s ys) { + auto bilerpPixel = [&](int index) { + return this->bilerNonEdgePixel(xs[index], ys[index]); + }; + fNext->place4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); + } + + void VECTORCALL bilerpEdge(Sk4s sampleXs, Sk4s sampleYs) { + Sk4f px00, px10, px01, px11; + Sk4f xs = Sk4f{sampleXs[0]}; + Sk4f ys = Sk4f{sampleYs[0]}; + fStrategy.get4Pixels(sampleXs, sampleYs, &px00, &px10, &px01, &px11); + Sk4f pixel = bilerp4(xs, ys, px00, px10, px01, px11); + fNext->placePixel(pixel); + } + + void bilerpSpan(Span span) { + this->bilerpSpanWithY(span, span.startY()); + } + + void bilerpSpanWithY(Span span, SkScalar y) { + SkASSERT(!span.isEmpty()); + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + SkScalar absLength = SkScalarAbs(length); + if (absLength == 0.0f) { + this->bilerpSpanZeroRate(span, y); + } else if (absLength < (count - 1)) { + this->bilerpSpanSlowRate(span, y); + } else if (absLength == (count - 1)) { + if (std::fmod(span.startX() - 0.5f, 1.0f) == 0.0f) { + if (std::fmod(span.startY() - 0.5f, 1.0f) == 0.0f) { + this->nearestSpanUnitRate(span); + } else { + this->bilerpSpanUnitRateAlignedX(span, y); + } + } else { + this->bilerpSpanUnitRate(span, y); + } + } else { + this->bilerpSpanFastRate(span, y); + } + } + +private: + // When moving through source space more slowly than dst space (zoomed in), + // we'll be sampling from the same source pixel more than once. + void nearestSpanSlowRate(Span span) { + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + SkScalar x = X(start); + SkFixed fx = SkScalarToFixed(x); + SkScalar dx = length / (count - 1); + SkFixed fdx = SkScalarToFixed(dx); + + const void* row = fStrategy.row((int)std::floor(Y(start))); + Next* next = fNext; + + int ix = SkFixedFloorToInt(fx); + int prevIX = ix; + Sk4f fpixel = fStrategy.getPixel(row, ix); + + // When dx is less than one, each pixel is used more than once. Using the fixed point fx + // allows the code to quickly check that the same pixel is being used. The code uses this + // same pixel check to do the sRGB and normalization only once. + auto getNextPixel = [&]() { + if (ix != prevIX) { + fpixel = fStrategy.getPixel(row, ix); + prevIX = ix; + } + fx += fdx; + ix = SkFixedFloorToInt(fx); + return fpixel; + }; + + while (count >= 4) { + Sk4f px0 = getNextPixel(); + Sk4f px1 = getNextPixel(); + Sk4f px2 = getNextPixel(); + Sk4f px3 = getNextPixel(); + next->place4Pixels(px0, px1, px2, px3); + count -= 4; + } + while (count > 0) { + next->placePixel(getNextPixel()); + count -= 1; + } + } + + // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. + // We'll never re-use pixels, but we can at least load contiguous pixels. + void nearestSpanUnitRate(Span span) { + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + int ix = SkScalarFloorToInt(X(start)); + const void* row = fStrategy.row((int)std::floor(Y(start))); + Next* next = fNext; + if (length > 0) { + while (count >= 4) { + Sk4f px0, px1, px2, px3; + fStrategy.get4Pixels(row, ix, &px0, &px1, &px2, &px3); + next->place4Pixels(px0, px1, px2, px3); + ix += 4; + count -= 4; + } + + while (count > 0) { + next->placePixel(fStrategy.getPixel(row, ix)); + ix += 1; + count -= 1; + } + } else { + while (count >= 4) { + Sk4f px0, px1, px2, px3; + fStrategy.get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0); + next->place4Pixels(px0, px1, px2, px3); + ix -= 4; + count -= 4; + } + + while (count > 0) { + next->placePixel(fStrategy.getPixel(row, ix)); + ix -= 1; + count -= 1; + } + } + } + + // We're moving through source space faster than dst (zoomed out), + // so we'll never reuse a source pixel or be able to do contiguous loads. + void nearestSpanFastRate(Span span) { + struct NearestWrapper { + void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) { + fSampler.nearestListFew(n, xs, ys); + } + + void VECTORCALL pointList4(Sk4s xs, Sk4s ys) { + fSampler.nearestList4(xs, ys); + } + + GeneralSampler& fSampler; + }; + NearestWrapper wrapper{*this}; + span_fallback(span, &wrapper); + } + + void bilerpSpanZeroRate(Span span, SkScalar y1) { + SkScalar y0 = span.startY() - 0.5f; + y1 += 0.5f; + int iy0 = SkScalarFloorToInt(y0); + SkScalar filterY1 = y0 - iy0; + SkScalar filterY0 = 1.0f - filterY1; + int iy1 = SkScalarFloorToInt(y1); + int ix = SkScalarFloorToInt(span.startX()); + Sk4f pixelY0 = fStrategy.getPixel(fStrategy.row(iy0), ix); + Sk4f pixelY1 = fStrategy.getPixel(fStrategy.row(iy1), ix); + Sk4f filterPixel = pixelY0 * filterY0 + pixelY1 * filterY1; + int count = span.count(); + while (count >= 4) { + fNext->place4Pixels(filterPixel, filterPixel, filterPixel, filterPixel); + count -= 4; + } + while (count > 0) { + fNext->placePixel(filterPixel); + count -= 1; + } + } + + // When moving through source space more slowly than dst space (zoomed in), + // we'll be sampling from the same source pixel more than once. + void bilerpSpanSlowRate(Span span, SkScalar ry1) { + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + SkFixed fx = SkScalarToFixed(X(start) + -0.5f); + + SkFixed fdx = SkScalarToFixed(length / (count - 1)); + //start = start + SkPoint{-0.5f, -0.5f}; + + Sk4f xAdjust; + if (fdx >= 0) { + xAdjust = Sk4f{-1.0f}; + } else { + xAdjust = Sk4f{1.0f}; + } + int ix = SkFixedFloorToInt(fx); + int ioldx = ix; + Sk4f x{SkFixedToScalar(fx) - ix}; + Sk4f dx{SkFixedToScalar(fdx)}; + SkScalar ry0 = Y(start) - 0.5f; + ry1 += 0.5f; + SkScalar yFloor = std::floor(ry0); + Sk4f y1 = Sk4f{ry0 - yFloor}; + Sk4f y0 = Sk4f{1.0f} - y1; + const uint32_t* const row0 = fStrategy.row(SkScalarFloorToInt(ry0)); + const uint32_t* const row1 = fStrategy.row(SkScalarFloorToInt(ry1)); + Sk4f fpixel00 = y0 * fStrategy.getPixel(row0, ix); + Sk4f fpixel01 = y1 * fStrategy.getPixel(row1, ix); + Sk4f fpixel10 = y0 * fStrategy.getPixel(row0, ix + 1); + Sk4f fpixel11 = y1 * fStrategy.getPixel(row1, ix + 1); + auto getNextPixel = [&]() { + if (ix != ioldx) { + fpixel00 = fpixel10; + fpixel01 = fpixel11; + fpixel10 = y0 * fStrategy.getPixel(row0, ix + 1); + fpixel11 = y1 * fStrategy.getPixel(row1, ix + 1); + ioldx = ix; + x = x + xAdjust; + } + + Sk4f x0, x1; + x0 = Sk4f{1.0f} - x; + x1 = x; + Sk4f fpixel = x0 * (fpixel00 + fpixel01) + x1 * (fpixel10 + fpixel11); + fx += fdx; + ix = SkFixedFloorToInt(fx); + x = x + dx; + return fpixel; + }; + + while (count >= 4) { + Sk4f fpixel0 = getNextPixel(); + Sk4f fpixel1 = getNextPixel(); + Sk4f fpixel2 = getNextPixel(); + Sk4f fpixel3 = getNextPixel(); + + fNext->place4Pixels(fpixel0, fpixel1, fpixel2, fpixel3); + count -= 4; + } + + while (count > 0) { + fNext->placePixel(getNextPixel()); + + count -= 1; + } + } + + // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. + // We'll never re-use pixels, but we can at least load contiguous pixels. + void bilerpSpanUnitRate(Span span, SkScalar y1) { + y1 += 0.5f; + SkScalar y0 = span.startY() - 0.5f; + int iy0 = SkScalarFloorToInt(y0); + SkScalar filterY1 = y0 - iy0; + SkScalar filterY0 = 1.0f - filterY1; + int iy1 = SkScalarFloorToInt(y1); + const void* rowY0 = fStrategy.row(iy0); + const void* rowY1 = fStrategy.row(iy1); + SkScalar x0 = span.startX() - 0.5f; + int ix0 = SkScalarFloorToInt(x0); + SkScalar filterX1 = x0 - ix0; + SkScalar filterX0 = 1.0f - filterX1; + + auto getPixelY0 = [&]() { + Sk4f px = fStrategy.getPixel(rowY0, ix0); + return px * filterY0; + }; + + auto getPixelY1 = [&]() { + Sk4f px = fStrategy.getPixel(rowY1, ix0); + return px * filterY1; + }; + + auto get4PixelsY0 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { + fStrategy.get4Pixels(rowY0, ix, px0, px1, px2, px3); + *px0 = *px0 * filterY0; + *px1 = *px1 * filterY0; + *px2 = *px2 * filterY0; + *px3 = *px3 * filterY0; + }; + + auto get4PixelsY1 = [&](int ix, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { + fStrategy.get4Pixels(rowY1, ix, px0, px1, px2, px3); + *px0 = *px0 * filterY1; + *px1 = *px1 * filterY1; + *px2 = *px2 * filterY1; + *px3 = *px3 * filterY1; + }; + + auto lerp = [&](Sk4f& pixelX0, Sk4f& pixelX1) { + return pixelX0 * filterX0 + pixelX1 * filterX1; + }; + + // Mid making 4 unit rate. + Sk4f pxB = getPixelY0() + getPixelY1(); + if (span.length() > 0) { + int count = span.count(); + while (count >= 4) { + Sk4f px00, px10, px20, px30; + get4PixelsY0(ix0, &px00, &px10, &px20, &px30); + Sk4f px01, px11, px21, px31; + get4PixelsY1(ix0, &px01, &px11, &px21, &px31); + Sk4f pxS0 = px00 + px01; + Sk4f px0 = lerp(pxB, pxS0); + Sk4f pxS1 = px10 + px11; + Sk4f px1 = lerp(pxS0, pxS1); + Sk4f pxS2 = px20 + px21; + Sk4f px2 = lerp(pxS1, pxS2); + Sk4f pxS3 = px30 + px31; + Sk4f px3 = lerp(pxS2, pxS3); + pxB = pxS3; + fNext->place4Pixels( + px0, + px1, + px2, + px3); + ix0 += 4; + count -= 4; + } + while (count > 0) { + Sk4f pixelY0 = fStrategy.getPixel(rowY0, ix0); + Sk4f pixelY1 = fStrategy.getPixel(rowY1, ix0); + + fNext->placePixel(lerp(pixelY0, pixelY1)); + ix0 += 1; + count -= 1; + } + } else { + int count = span.count(); + while (count >= 4) { + Sk4f px00, px10, px20, px30; + get4PixelsY0(ix0 - 3, &px00, &px10, &px20, &px30); + Sk4f px01, px11, px21, px31; + get4PixelsY1(ix0 - 3, &px01, &px11, &px21, &px31); + Sk4f pxS3 = px30 + px31; + Sk4f px0 = lerp(pxS3, pxB); + Sk4f pxS2 = px20 + px21; + Sk4f px1 = lerp(pxS2, pxS3); + Sk4f pxS1 = px10 + px11; + Sk4f px2 = lerp(pxS1, pxS2); + Sk4f pxS0 = px00 + px01; + Sk4f px3 = lerp(pxS0, pxS1); + pxB = pxS0; + fNext->place4Pixels( + px0, + px1, + px2, + px3); + ix0 -= 4; + count -= 4; + } + while (count > 0) { + Sk4f pixelY0 = fStrategy.getPixel(rowY0, ix0); + Sk4f pixelY1 = fStrategy.getPixel(rowY1, ix0); + + fNext->placePixel(lerp(pixelY0, pixelY1)); + ix0 -= 1; + count -= 1; + } + } + } + + void bilerpSpanUnitRateAlignedX(Span span, SkScalar y1) { + SkScalar y0 = span.startY() - 0.5f; + y1 += 0.5f; + int iy0 = SkScalarFloorToInt(y0); + SkScalar filterY1 = y0 - iy0; + SkScalar filterY0 = 1.0f - filterY1; + int iy1 = SkScalarFloorToInt(y1); + int ix = SkScalarFloorToInt(span.startX()); + const void* rowY0 = fStrategy.row(iy0); + const void* rowY1 = fStrategy.row(iy1); + auto lerp = [&](Sk4f* pixelY0, Sk4f* pixelY1) { + return *pixelY0 * filterY0 + *pixelY1 * filterY1; + }; + + if (span.length() > 0) { + int count = span.count(); + while (count >= 4) { + Sk4f px00, px10, px20, px30; + fStrategy.get4Pixels(rowY0, ix, &px00, &px10, &px20, &px30); + Sk4f px01, px11, px21, px31; + fStrategy.get4Pixels(rowY1, ix, &px01, &px11, &px21, &px31); + fNext->place4Pixels( + lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); + ix += 4; + count -= 4; + } + while (count > 0) { + Sk4f pixelY0 = fStrategy.getPixel(rowY0, ix); + Sk4f pixelY1 = fStrategy.getPixel(rowY1, ix); + + fNext->placePixel(lerp(&pixelY0, &pixelY1)); + ix += 1; + count -= 1; + } + } else { + int count = span.count(); + while (count >= 4) { + Sk4f px00, px10, px20, px30; + fStrategy.get4Pixels(rowY0, ix - 3, &px30, &px20, &px10, &px00); + Sk4f px01, px11, px21, px31; + fStrategy.get4Pixels(rowY1, ix - 3, &px31, &px21, &px11, &px01); + fNext->place4Pixels( + lerp(&px00, &px01), lerp(&px10, &px11), lerp(&px20, &px21), lerp(&px30, &px31)); + ix -= 4; + count -= 4; + } + while (count > 0) { + Sk4f pixelY0 = fStrategy.getPixel(rowY0, ix); + Sk4f pixelY1 = fStrategy.getPixel(rowY1, ix); + + fNext->placePixel(lerp(&pixelY0, &pixelY1)); + ix -= 1; + count -= 1; + } + } + } + + // We're moving through source space faster than dst (zoomed out), + // so we'll never reuse a source pixel or be able to do contiguous loads. + void bilerpSpanFastRate(Span span, SkScalar y1) { + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + SkScalar x = X(start); + SkScalar y = Y(start); + if (false && y == y1) { + struct BilerpWrapper { + void VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) { + fSampler.bilerpListFew(n, xs, ys); + } + + void VECTORCALL pointList4(Sk4s xs, Sk4s ys) { + fSampler.bilerpList4(xs, ys); + } + + GeneralSampler& fSampler; + }; + BilerpWrapper wrapper{*this}; + span_fallback(span, &wrapper); + } else { + SkScalar dx = length / (count - 1); + Sk4f ys = {y - 0.5f, y - 0.5f, y1 + 0.5f, y1 + 0.5f}; + while (count > 0) { + Sk4f xs = Sk4f{-0.5f, 0.5f, -0.5f, 0.5f} + Sk4f{x}; + this->bilerpEdge(xs, ys); + x += dx; + count -= 1; + } + } + } + + Next* const fNext; + SourceStrategy fStrategy; +}; + +class sRGBFast { +public: + static Sk4s VECTORCALL sRGBToLinear(Sk4s pixel) { + Sk4s l = pixel * pixel; + return Sk4s{l[0], l[1], l[2], pixel[3]}; + } +}; + +enum class ColorOrder { + kRGBA = false, + kBGRA = true, +}; +template <SkColorProfileType colorProfile, ColorOrder colorOrder> +class Pixel8888 { +public: + Pixel8888(int width, const uint32_t* src) : fSrc{src}, fWidth{width}{ } + Pixel8888(const SkPixmap& srcPixmap) + : fSrc{srcPixmap.addr32()} + , fWidth{static_cast<int>(srcPixmap.rowBytes() / 4)} { } + + void VECTORCALL getFewPixels(int n, Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) { + Sk4i XIs = SkNx_cast<int, SkScalar>(xs); + Sk4i YIs = SkNx_cast<int, SkScalar>(ys); + Sk4i bufferLoc = YIs * fWidth + XIs; + switch (n) { + case 3: + *px2 = this->getPixel(fSrc, bufferLoc[2]); + case 2: + *px1 = this->getPixel(fSrc, bufferLoc[1]); + case 1: + *px0 = this->getPixel(fSrc, bufferLoc[0]); + default: + break; + } + } + + void VECTORCALL get4Pixels(Sk4s xs, Sk4s ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { + Sk4i XIs = SkNx_cast<int, SkScalar>(xs); + Sk4i YIs = SkNx_cast<int, SkScalar>(ys); + Sk4i bufferLoc = YIs * fWidth + XIs; + *px0 = this->getPixel(fSrc, bufferLoc[0]); + *px1 = this->getPixel(fSrc, bufferLoc[1]); + *px2 = this->getPixel(fSrc, bufferLoc[2]); + *px3 = this->getPixel(fSrc, bufferLoc[3]); + } + + void get4Pixels(const void* vsrc, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) { + const uint32_t* src = static_cast<const uint32_t*>(vsrc); + *px0 = this->getPixel(src, index + 0); + *px1 = this->getPixel(src, index + 1); + *px2 = this->getPixel(src, index + 2); + *px3 = this->getPixel(src, index + 3); + } + + Sk4f getPixel(const void* vsrc, int index) { + const uint32_t* src = static_cast<const uint32_t*>(vsrc); + Sk4b bytePixel = Sk4b::Load((uint8_t *)(&src[index])); + Sk4f pixel = SkNx_cast<float, uint8_t>(bytePixel); + if (colorOrder == ColorOrder::kBGRA) { + pixel = SkNx_shuffle<2, 1, 0, 3>(pixel); + } + pixel = pixel * Sk4f{1.0f/255.0f}; + if (colorProfile == kSRGB_SkColorProfileType) { + pixel = sRGBFast::sRGBToLinear(pixel); + } + return pixel; + } + + const uint32_t* row(int y) { return fSrc + y * fWidth[0]; } + +private: + const uint32_t* const fSrc; + const Sk4i fWidth; +}; +using Pixel8888SRGB = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kRGBA>; +using Pixel8888LRGB = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kRGBA>; +using Pixel8888SBGR = Pixel8888<kSRGB_SkColorProfileType, ColorOrder::kBGRA>; +using Pixel8888LBGR = Pixel8888<kLinear_SkColorProfileType, ColorOrder::kBGRA>; +} // namespace + +#endif // SkLinearBitmapPipeline_sampler_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_tile.h b/src/core/SkLinearBitmapPipeline_tile.h index 761e3c57eb..60cc2a5ef0 100644 --- a/src/core/SkLinearBitmapPipeline_tile.h +++ b/src/core/SkLinearBitmapPipeline_tile.h @@ -15,39 +15,30 @@ #include <limits> namespace { -class ClampStrategy { +class XClampStrategy { public: - ClampStrategy(X max) - : fXMin{0.0f}, fXMax{max - 1.0f} { } + XClampStrategy(int32_t max) + : fXsMax{SkScalar(max - 0.5f)} + , fXMax{SkScalar(max)} { } - ClampStrategy(Y max) - : fYMin{0.0f}, fYMax{max - 1.0f} { } - - ClampStrategy(SkSize max) - : fXMin{0.0f}, fYMin{0.0f}, fXMax{X(max) - 1.0f}, fYMax{Y(max) - 1.0f} { } - - void processPoints(Sk4s* xs, Sk4s* ys) { - *xs = Sk4s::Min(Sk4s::Max(*xs, fXMin), fXMax); - *ys = Sk4s::Min(Sk4s::Max(*ys, fYMin), fYMax); + void tileXPoints(Sk4s* xs) { + *xs = Sk4s::Min(Sk4s::Max(*xs, 0.0f), fXsMax); + SkASSERT(0 <= (*xs)[0] && (*xs)[0] < fXMax); + SkASSERT(0 <= (*xs)[1] && (*xs)[1] < fXMax); + SkASSERT(0 <= (*xs)[2] && (*xs)[2] < fXMax); + SkASSERT(0 <= (*xs)[3] && (*xs)[3] < fXMax); } template<typename Next> bool maybeProcessSpan(Span originalSpan, Next* next) { SkASSERT(!originalSpan.isEmpty()); - SkPoint start; - SkScalar length; - int count; + SkPoint start; SkScalar length; int count; std::tie(start, length, count) = originalSpan; - SkScalar xMin = fXMin[0]; - SkScalar xMax = fXMax[0] + 1.0f; - SkScalar yMin = fYMin[0]; - SkScalar yMax = fYMax[0]; SkScalar x = X(start); - SkScalar y = std::min(std::max<SkScalar>(yMin, Y(start)), yMax); - + SkScalar y = Y(start); Span span{{x, y}, length, count}; - if (span.completelyWithin(xMin, xMax)) { + if (span.completelyWithin(0.0f, fXMax)) { next->pointSpan(span); return true; } @@ -85,84 +76,100 @@ public: // * Over - for the portion of the span > xMax, take the color at pixel {xMax-1, y} and // use it to fill in the rest of the destination pixels. if (dx >= 0) { - Span leftClamped = span.breakAt(xMin, dx); + Span leftClamped = span.breakAt(0.0f, dx); if (!leftClamped.isEmpty()) { - leftClamped.clampToSinglePixel({xMin, y}); + leftClamped.clampToSinglePixel({0.0f, y}); next->pointSpan(leftClamped); } - Span middle = span.breakAt(xMax, dx); - if (!middle.isEmpty()) { - next->pointSpan(middle); + Span center = span.breakAt(fXMax, dx); + if (!center.isEmpty()) { + next->pointSpan(center); } if (!span.isEmpty()) { - span.clampToSinglePixel({xMax - 1, y}); + span.clampToSinglePixel({fXMax - 1, y}); next->pointSpan(span); } } else { - Span rightClamped = span.breakAt(xMax, dx); + Span center = span.breakAt(fXMax, dx); - if (!rightClamped.isEmpty()) { - rightClamped.clampToSinglePixel({xMax - 1, y}); - next->pointSpan(rightClamped); - } - Span middle = span.breakAt(xMin, dx); - if (!middle.isEmpty()) { - next->pointSpan(middle); - } if (!span.isEmpty()) { - span.clampToSinglePixel({xMin, y}); + span.clampToSinglePixel({fXMax - 1, y}); next->pointSpan(span); } + Span leftEdge = center.breakAt(0.0f, dx); + if (!center.isEmpty()) { + next->pointSpan(center); + } + if (!leftEdge.isEmpty()) { + leftEdge.clampToSinglePixel({0.0f, y}); + next->pointSpan(leftEdge); + } } return true; } - template <typename Next> - bool maybeProcessBilerpSpan(BilerpSpan bSpan, Next* next) { - return false; - } - private: - const Sk4s fXMin{SK_FloatNegativeInfinity}; - const Sk4s fYMin{SK_FloatNegativeInfinity}; - const Sk4s fXMax{SK_FloatInfinity}; - const Sk4s fYMax{SK_FloatInfinity}; + const Sk4s fXsMax; + const SkScalar fXMax; }; -class RepeatStrategy { +class YClampStrategy { public: - RepeatStrategy(X max) : fXMax{max}, fXInvMax{1.0f / max} { } + YClampStrategy(int32_t max) + : fYMax{SkScalar(max) - 0.5f} + , fYsMax{SkScalar(max) - 0.5f} { } - RepeatStrategy(Y max) : fYMax{max}, fYInvMax{1.0f / max} { } + void tileYPoints(Sk4s* ys) { + *ys = Sk4s::Min(Sk4s::Max(*ys, 0.0f), fYsMax); + SkASSERT(0 <= (*ys)[0] && (*ys)[0] <= fYMax); + SkASSERT(0 <= (*ys)[1] && (*ys)[1] <= fYMax); + SkASSERT(0 <= (*ys)[2] && (*ys)[2] <= fYMax); + SkASSERT(0 <= (*ys)[3] && (*ys)[3] <= fYMax); + } - RepeatStrategy(SkSize max) - : fXMax{X(max)}, fXInvMax{1.0f / X(max)}, fYMax{Y(max)}, fYInvMax{1.0f / Y(max)} { } + SkScalar tileY(SkScalar y) { + return std::min(std::max<SkScalar>(0.0f, y), fYMax); + } + +private: + const SkScalar fYMax; + const Sk4s fYsMax; +}; + +SkScalar tile_mod(SkScalar x, SkScalar base) { + return x - SkScalarFloorToScalar(x / base) * base; +} - void processPoints(Sk4s* xs, Sk4s* ys) { - Sk4s divX = (*xs * fXInvMax).floor(); - Sk4s divY = (*ys * fYInvMax).floor(); - Sk4s baseX = (divX * fXMax); - Sk4s baseY = (divY * fYMax); - *xs = *xs - baseX; - *ys = *ys - baseY; +class XRepeatStrategy { +public: + XRepeatStrategy(int32_t max) + : fXMax{SkScalar(max)} + , fXsMax{SkScalar(max)} + , fXsCap{SkScalar(nextafterf(SkScalar(max), 0.0f))} + , fXsInvMax{1.0f / SkScalar(max)} { } + + void tileXPoints(Sk4s* xs) { + Sk4s divX = *xs * fXsInvMax; + Sk4s modX = *xs - divX.floor() * fXsMax; + *xs = Sk4s::Min(fXsCap, modX); + SkASSERT(0 <= (*xs)[0] && (*xs)[0] < fXMax); + SkASSERT(0 <= (*xs)[1] && (*xs)[1] < fXMax); + SkASSERT(0 <= (*xs)[2] && (*xs)[2] < fXMax); + SkASSERT(0 <= (*xs)[3] && (*xs)[3] < fXMax); } template<typename Next> bool maybeProcessSpan(Span originalSpan, Next* next) { SkASSERT(!originalSpan.isEmpty()); - SkPoint start; - SkScalar length; - int count; + SkPoint start; SkScalar length; int count; std::tie(start, length, count) = originalSpan; // Make x and y in range on the tile. - SkScalar x = TileMod(X(start), fXMax[0]); - SkScalar y = TileMod(Y(start), fYMax[0]); - SkScalar xMax = fXMax[0]; - SkScalar xMin = 0.0f; + SkScalar x = tile_mod(X(start), fXMax); + SkScalar y = Y(start); SkScalar dx = length / (count - 1); // No need trying to go fast because the steps are larger than a tile or there is one point. - if (SkScalarAbs(dx) >= xMax || count <= 1) { + if (SkScalarAbs(dx) >= fXMax || count <= 1) { return false; } @@ -199,16 +206,16 @@ public: Span span({x, y}, length, count); if (dx > 0) { - while (!span.isEmpty() && span.endX() >= xMax) { - Span toDraw = span.breakAt(xMax, dx); + while (!span.isEmpty() && span.endX() >= fXMax) { + Span toDraw = span.breakAt(fXMax, dx); next->pointSpan(toDraw); - span.offset(-xMax); + span.offset(-fXMax); } } else { - while (!span.isEmpty() && span.endX() < xMin) { - Span toDraw = span.breakAt(xMin, dx); + while (!span.isEmpty() && span.endX() < 0.0f) { + Span toDraw = span.breakAt(0.0f, dx); next->pointSpan(toDraw); - span.offset(xMax); + span.offset(fXMax); } } @@ -220,19 +227,106 @@ public: return true; } - template <typename Next> - bool maybeProcessBilerpSpan(BilerpSpan bSpan, Next* next) { - return false; +private: + const SkScalar fXMax; + const Sk4s fXsMax; + const Sk4s fXsCap; + const Sk4s fXsInvMax; +}; + +class YRepeatStrategy { +public: + YRepeatStrategy(int32_t max) + : fYMax{SkScalar(max)} + , fYsMax{SkScalar(max)} + , fYsInvMax{1.0f / SkScalar(max)} { } + + void tileYPoints(Sk4s* ys) { + Sk4s divY = *ys * fYsInvMax; + Sk4s modY = *ys - divY.floor() * fYsMax; + *ys = modY; + SkASSERT(0 <= (*ys)[0] && (*ys)[0] < fYMax); + SkASSERT(0 <= (*ys)[1] && (*ys)[1] < fYMax); + SkASSERT(0 <= (*ys)[2] && (*ys)[2] < fYMax); + SkASSERT(0 <= (*ys)[3] && (*ys)[3] < fYMax); + } + + SkScalar tileY(SkScalar y) { + SkScalar answer = tile_mod(y, fYMax); + SkASSERT(0 <= answer && answer < fYMax); + return answer; + } + +private: + const SkScalar fYMax; + const Sk4s fYsMax; + const Sk4s fYsInvMax; +}; +// max = 40 +// mq2[x_] := Abs[(x - 40) - Floor[(x - 40)/80] * 80 - 40] +class XMirrorStrategy { +public: + XMirrorStrategy(int32_t max) + : fXsMax{SkScalar(max)} + , fXsCap{SkScalar(nextafterf(SkScalar(max), 0.0f))} + , fXsDoubleInvMax{1.0f / (2.0f * SkScalar(max))} { } + + void tileXPoints(Sk4s* xs) { + Sk4f bias = *xs - fXsMax; + Sk4f div = bias * fXsDoubleInvMax; + Sk4f mod = bias - div.floor() * 2.0f * fXsMax; + Sk4f unbias = mod - fXsMax; + *xs = Sk4f::Min(unbias.abs(), fXsCap); + SkASSERT(0 <= (*xs)[0] && (*xs)[0] < fXsMax[0]); + SkASSERT(0 <= (*xs)[1] && (*xs)[1] < fXsMax[0]); + SkASSERT(0 <= (*xs)[2] && (*xs)[2] < fXsMax[0]); + SkASSERT(0 <= (*xs)[3] && (*xs)[3] < fXsMax[0]); } + template <typename Next> + bool maybeProcessSpan(Span originalSpan, Next* next) { return false; } + private: - SkScalar TileMod(SkScalar x, SkScalar base) { - return x - std::floor(x / base) * base; + Sk4f fXsMax; + Sk4f fXsCap; + Sk4f fXsDoubleInvMax; +}; + +class YMirrorStrategy { +public: + YMirrorStrategy(int32_t max) + : fYMax{SkScalar(max)} + , fYsMax{SkScalar(max)} + , fYsCap{nextafterf(SkScalar(max), 0.0f)} + , fYsDoubleInvMax{1.0f / (2.0f * SkScalar(max))} { } + + void tileYPoints(Sk4s* ys) { + Sk4f bias = *ys - fYsMax; + Sk4f div = bias * fYsDoubleInvMax; + Sk4f mod = bias - div.floor() * 2.0f * fYsMax; + Sk4f unbias = mod - fYsMax; + *ys = Sk4f::Min(unbias.abs(), fYsCap); + SkASSERT(0 <= (*ys)[0] && (*ys)[0] < fYMax); + SkASSERT(0 <= (*ys)[1] && (*ys)[1] < fYMax); + SkASSERT(0 <= (*ys)[2] && (*ys)[2] < fYMax); + SkASSERT(0 <= (*ys)[3] && (*ys)[3] < fYMax); } - const Sk4s fXMax{0.0f}; - const Sk4s fXInvMax{0.0f}; - const Sk4s fYMax{0.0f}; - const Sk4s fYInvMax{0.0f}; + + SkScalar tileY(SkScalar y) { + SkScalar bias = y - fYMax; + SkScalar div = bias * fYsDoubleInvMax[0]; + SkScalar mod = bias - SkScalarFloorToScalar(div) * 2.0f * fYMax; + SkScalar unbias = mod - fYMax; + SkScalar answer = SkMinScalar(SkScalarAbs(unbias), fYsCap[0]); + SkASSERT(0 <= answer && answer < fYMax); + return answer; + }; + +private: + SkScalar fYMax; + Sk4f fYsMax; + Sk4f fYsCap; + Sk4f fYsDoubleInvMax; }; } // namespace |