diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/SkLinearBitmapPipeline.cpp | 686 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline.h | 112 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_core.h | 255 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_matrix.h | 118 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_sample.h | 1041 | ||||
-rw-r--r-- | src/core/SkLinearBitmapPipeline_tile.h | 412 | ||||
-rw-r--r-- | src/shaders/SkBitmapProcShader.cpp | 94 | ||||
-rw-r--r-- | src/shaders/SkBitmapProcShader.h | 1 |
8 files changed, 2714 insertions, 5 deletions
diff --git a/src/core/SkLinearBitmapPipeline.cpp b/src/core/SkLinearBitmapPipeline.cpp new file mode 100644 index 0000000000..cf2dfdc09f --- /dev/null +++ b/src/core/SkLinearBitmapPipeline.cpp @@ -0,0 +1,686 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkLinearBitmapPipeline.h" + +#include <algorithm> +#include <cmath> +#include <limits> +#include <tuple> + +#include "SkArenaAlloc.h" +#include "SkLinearBitmapPipeline_core.h" +#include "SkLinearBitmapPipeline_matrix.h" +#include "SkLinearBitmapPipeline_tile.h" +#include "SkLinearBitmapPipeline_sample.h" +#include "SkNx.h" +#include "SkOpts.h" +#include "SkPM4f.h" + +namespace { + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Matrix Stage +// PointProcessor uses a strategy to help complete the work of the different stages. The strategy +// must implement the following methods: +// * processPoints(xs, ys) - must mutate the xs and ys for the stage. +// * maybeProcessSpan(span, next) - This represents a horizontal series of pixels +// to work over. +// span - encapsulation of span. +// next - a pointer to the next stage. +// maybeProcessSpan - returns false if it can not process the span and needs to fallback to +// point lists for processing. +template<typename Strategy, typename Next> +class MatrixStage final : public SkLinearBitmapPipeline::PointProcessorInterface { +public: + template <typename... Args> + MatrixStage(Next* next, Args&&... args) + : fNext{next} + , fStrategy{std::forward<Args>(args)...}{ } + + MatrixStage(Next* next, MatrixStage* stage) + : fNext{next} + , fStrategy{stage->fStrategy} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + fStrategy.processPoints(&xs, &ys); + fNext->pointListFew(n, xs, ys); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + fStrategy.processPoints(&xs, &ys); + fNext->pointList4(xs, ys); + } + + // The span you pass must not be empty. + void pointSpan(Span span) override { + SkASSERT(!span.isEmpty()); + if (!fStrategy.maybeProcessSpan(span, fNext)) { + span_fallback(span, this); + } + } + +private: + Next* const fNext; + Strategy fStrategy; +}; + +template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> +using TranslateMatrix = MatrixStage<TranslateMatrixStrategy, Next>; + +template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> +using ScaleMatrix = MatrixStage<ScaleMatrixStrategy, Next>; + +template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> +using AffineMatrix = MatrixStage<AffineMatrixStrategy, Next>; + +template <typename Next = SkLinearBitmapPipeline::PointProcessorInterface> +using PerspectiveMatrix = MatrixStage<PerspectiveMatrixStrategy, Next>; + + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Tile Stage + +template<typename XStrategy, typename YStrategy, typename Next> +class CombinedTileStage final : public SkLinearBitmapPipeline::PointProcessorInterface { +public: + CombinedTileStage(Next* next, SkISize dimensions) + : fNext{next} + , fXStrategy{dimensions.width()} + , fYStrategy{dimensions.height()}{ } + + CombinedTileStage(Next* next, CombinedTileStage* stage) + : fNext{next} + , fXStrategy{stage->fXStrategy} + , fYStrategy{stage->fYStrategy} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + fNext->pointListFew(n, xs, ys); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + fXStrategy.tileXPoints(&xs); + fYStrategy.tileYPoints(&ys); + fNext->pointList4(xs, ys); + } + + // The span you pass must not be empty. + void pointSpan(Span span) override { + SkASSERT(!span.isEmpty()); + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + + if (span.count() == 1) { + // DANGER: + // The explicit casts from float to Sk4f are not usually necessary, but are here to + // work around an MSVC 2015u2 c++ code generation bug. This is tracked using skia bug + // 5566. + this->pointListFew(1, Sk4f{span.startX()}, Sk4f{span.startY()}); + return; + } + + SkScalar x = X(start); + SkScalar y = fYStrategy.tileY(Y(start)); + Span yAdjustedSpan{{x, y}, length, count}; + + if (!fXStrategy.maybeProcessSpan(yAdjustedSpan, fNext)) { + span_fallback(span, this); + } + } + +private: + Next* const fNext; + XStrategy fXStrategy; + YStrategy fYStrategy; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Specialized Samplers + +// RGBA8888UnitRepeatSrc - A sampler that takes advantage of the fact the the src and destination +// are the same format and do not need in transformations in pixel space. Therefore, there is no +// need to convert them to HiFi pixel format. +class RGBA8888UnitRepeatSrc final : public SkLinearBitmapPipeline::SampleProcessorInterface, + public SkLinearBitmapPipeline::DestinationInterface { +public: + RGBA8888UnitRepeatSrc(const uint32_t* src, int32_t width) + : fSrc{src}, fWidth{width} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + SkASSERT(fDest + n <= fEnd); + // At this point xs and ys should be >= 0, so trunc is the same as floor. + Sk4i iXs = SkNx_cast<int>(xs); + Sk4i iYs = SkNx_cast<int>(ys); + + if (n >= 1) *fDest++ = *this->pixelAddress(iXs[0], iYs[0]); + if (n >= 2) *fDest++ = *this->pixelAddress(iXs[1], iYs[1]); + if (n >= 3) *fDest++ = *this->pixelAddress(iXs[2], iYs[2]); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + SkASSERT(fDest + 4 <= fEnd); + Sk4i iXs = SkNx_cast<int>(xs); + Sk4i iYs = SkNx_cast<int>(ys); + *fDest++ = *this->pixelAddress(iXs[0], iYs[0]); + *fDest++ = *this->pixelAddress(iXs[1], iYs[1]); + *fDest++ = *this->pixelAddress(iXs[2], iYs[2]); + *fDest++ = *this->pixelAddress(iXs[3], iYs[3]); + } + + void pointSpan(Span span) override { + SkASSERT(fDest + span.count() <= fEnd); + if (span.length() != 0.0f) { + int32_t x = SkScalarTruncToInt(span.startX()); + int32_t y = SkScalarTruncToInt(span.startY()); + const uint32_t* src = this->pixelAddress(x, y); + memmove(fDest, src, span.count() * sizeof(uint32_t)); + fDest += span.count(); + } + } + + void repeatSpan(Span span, int32_t repeatCount) override { + SkASSERT(fDest + span.count() * repeatCount <= fEnd); + + int32_t x = SkScalarTruncToInt(span.startX()); + int32_t y = SkScalarTruncToInt(span.startY()); + const uint32_t* src = this->pixelAddress(x, y); + uint32_t* dest = fDest; + while (repeatCount --> 0) { + memmove(dest, src, span.count() * sizeof(uint32_t)); + dest += span.count(); + } + fDest = dest; + } + + void setDestination(void* dst, int count) override { + fDest = static_cast<uint32_t*>(dst); + fEnd = fDest + count; + } + +private: + const uint32_t* pixelAddress(int32_t x, int32_t y) { + return &fSrc[fWidth * y + x]; + } + const uint32_t* const fSrc; + const int32_t fWidth; + uint32_t* fDest; + uint32_t* fEnd; +}; + +// RGBA8888UnitRepeatSrc - A sampler that takes advantage of the fact the the src and destination +// are the same format and do not need in transformations in pixel space. Therefore, there is no +// need to convert them to HiFi pixel format. +class RGBA8888UnitRepeatSrcOver final : public SkLinearBitmapPipeline::SampleProcessorInterface, + public SkLinearBitmapPipeline::DestinationInterface { +public: + RGBA8888UnitRepeatSrcOver(const uint32_t* src, int32_t width) + : fSrc{src}, fWidth{width} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + SkASSERT(fDest + n <= fEnd); + // At this point xs and ys should be >= 0, so trunc is the same as floor. + Sk4i iXs = SkNx_cast<int>(xs); + Sk4i iYs = SkNx_cast<int>(ys); + + if (n >= 1) blendPixelAt(iXs[0], iYs[0]); + if (n >= 2) blendPixelAt(iXs[1], iYs[1]); + if (n >= 3) blendPixelAt(iXs[2], iYs[2]); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + SkASSERT(fDest + 4 <= fEnd); + Sk4i iXs = SkNx_cast<int>(xs); + Sk4i iYs = SkNx_cast<int>(ys); + blendPixelAt(iXs[0], iYs[0]); + blendPixelAt(iXs[1], iYs[1]); + blendPixelAt(iXs[2], iYs[2]); + blendPixelAt(iXs[3], iYs[3]); + } + + void pointSpan(Span span) override { + if (span.length() != 0.0f) { + this->repeatSpan(span, 1); + } + } + + void repeatSpan(Span span, int32_t repeatCount) override { + SkASSERT(fDest + span.count() * repeatCount <= fEnd); + SkASSERT(span.count() > 0); + SkASSERT(repeatCount > 0); + + int32_t x = (int32_t)span.startX(); + int32_t y = (int32_t)span.startY(); + const uint32_t* beginSpan = this->pixelAddress(x, y); + + SkOpts::srcover_srgb_srgb(fDest, beginSpan, span.count() * repeatCount, span.count()); + + fDest += span.count() * repeatCount; + + SkASSERT(fDest <= fEnd); + } + + void setDestination(void* dst, int count) override { + SkASSERT(count > 0); + fDest = static_cast<uint32_t*>(dst); + fEnd = fDest + count; + } + +private: + const uint32_t* pixelAddress(int32_t x, int32_t y) { + return &fSrc[fWidth * y + x]; + } + + void blendPixelAt(int32_t x, int32_t y) { + const uint32_t* src = this->pixelAddress(x, y); + SkOpts::srcover_srgb_srgb(fDest, src, 1, 1); + fDest += 1; + } + + const uint32_t* const fSrc; + const int32_t fWidth; + uint32_t* fDest; + uint32_t* fEnd; +}; + +using Blender = SkLinearBitmapPipeline::BlendProcessorInterface; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Pixel Blender Stage +template <SkAlphaType alphaType> +class SrcFPPixel final : public Blender { +public: + SrcFPPixel(float postAlpha) : fPostAlpha{postAlpha} { } + SrcFPPixel(const SrcFPPixel& Blender) : fPostAlpha(Blender.fPostAlpha) {} + void SK_VECTORCALL blendPixel(Sk4f pixel) override { + SkASSERT(fDst + 1 <= fEnd ); + this->srcPixel(fDst, pixel, 0); + fDst += 1; + } + + void SK_VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) override { + SkASSERT(fDst + 4 <= fEnd); + SkPM4f* dst = fDst; + this->srcPixel(dst, p0, 0); + this->srcPixel(dst, p1, 1); + this->srcPixel(dst, p2, 2); + this->srcPixel(dst, p3, 3); + fDst += 4; + } + + void setDestination(void* dst, int count) override { + fDst = static_cast<SkPM4f*>(dst); + fEnd = fDst + count; + } + +private: + void SK_VECTORCALL srcPixel(SkPM4f* dst, Sk4f pixel, int index) { + check_pixel(pixel); + + Sk4f newPixel = pixel; + if (alphaType == kUnpremul_SkAlphaType) { + newPixel = Premultiply(pixel); + } + newPixel = newPixel * fPostAlpha; + newPixel.store(dst + index); + } + static Sk4f SK_VECTORCALL Premultiply(Sk4f pixel) { + float alpha = pixel[3]; + return pixel * Sk4f{alpha, alpha, alpha, 1.0f}; + } + + SkPM4f* fDst; + SkPM4f* fEnd; + float fPostAlpha; +}; + +} // namespace + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// SkLinearBitmapPipeline +SkLinearBitmapPipeline::~SkLinearBitmapPipeline() {} + +SkLinearBitmapPipeline::SkLinearBitmapPipeline( + const SkMatrix& inverse, + SkFilterQuality filterQuality, + SkShader::TileMode xTile, SkShader::TileMode yTile, + SkColor paintColor, + const SkPixmap& srcPixmap, + SkArenaAlloc* allocator) +{ + SkISize dimensions = srcPixmap.info().dimensions(); + const SkImageInfo& srcImageInfo = srcPixmap.info(); + + SkMatrix adjustedInverse = inverse; + if (filterQuality == kNone_SkFilterQuality) { + if (inverse.getScaleX() >= 0.0f) { + adjustedInverse.setTranslateX( + nextafterf(inverse.getTranslateX(), std::floor(inverse.getTranslateX()))); + } + if (inverse.getScaleY() >= 0.0f) { + adjustedInverse.setTranslateY( + nextafterf(inverse.getTranslateY(), std::floor(inverse.getTranslateY()))); + } + } + + SkScalar dx = adjustedInverse.getScaleX(); + + // If it is an index 8 color type, the sampler converts to unpremul for better fidelity. + SkAlphaType alphaType = srcImageInfo.alphaType(); + if (srcPixmap.colorType() == kIndex_8_SkColorType) { + alphaType = kUnpremul_SkAlphaType; + } + + float postAlpha = SkColorGetA(paintColor) * (1.0f / 255.0f); + // As the stages are built, the chooser function may skip a stage. For example, with the + // identity matrix, the matrix stage is skipped, and the tilerStage is the first stage. + auto blenderStage = this->chooseBlenderForShading(alphaType, postAlpha, allocator); + auto samplerStage = this->chooseSampler( + blenderStage, filterQuality, xTile, yTile, srcPixmap, paintColor, allocator); + auto tilerStage = this->chooseTiler( + samplerStage, dimensions, xTile, yTile, filterQuality, dx, allocator); + fFirstStage = this->chooseMatrix(tilerStage, adjustedInverse, allocator); + fLastStage = blenderStage; +} + +SkLinearBitmapPipeline::SkLinearBitmapPipeline( + const SkLinearBitmapPipeline& pipeline, + const SkPixmap& srcPixmap, + SkBlendMode mode, + const SkImageInfo& dstInfo, + SkArenaAlloc* allocator) +{ + SkASSERT(mode == SkBlendMode::kSrc || mode == SkBlendMode::kSrcOver); + SkASSERT(srcPixmap.info().colorType() == dstInfo.colorType() + && srcPixmap.info().colorType() == kRGBA_8888_SkColorType); + + SampleProcessorInterface* sampleStage; + if (mode == SkBlendMode::kSrc) { + auto sampler = allocator->make<RGBA8888UnitRepeatSrc>( + srcPixmap.writable_addr32(0, 0), srcPixmap.rowBytes() / 4); + sampleStage = sampler; + fLastStage = sampler; + } else { + auto sampler = allocator->make<RGBA8888UnitRepeatSrcOver>( + srcPixmap.writable_addr32(0, 0), srcPixmap.rowBytes() / 4); + sampleStage = sampler; + fLastStage = sampler; + } + + auto tilerStage = pipeline.fTileStageCloner(sampleStage, allocator); + auto matrixStage = pipeline.fMatrixStageCloner(tilerStage, allocator); + fFirstStage = matrixStage; +} + +void SkLinearBitmapPipeline::shadeSpan4f(int x, int y, SkPM4f* dst, int count) { + SkASSERT(count > 0); + this->blitSpan(x, y, dst, count); +} + +void SkLinearBitmapPipeline::blitSpan(int x, int y, void* dst, int count) { + SkASSERT(count > 0); + fLastStage->setDestination(dst, count); + + // The count and length arguments start out in a precise relation in order to keep the + // math correct through the different stages. Count is the number of pixel to produce. + // Since the code samples at pixel centers, length is the distance from the center of the + // first pixel to the center of the last pixel. This implies that length is count-1. + fFirstStage->pointSpan(Span{{x + 0.5f, y + 0.5f}, count - 1.0f, count}); +} + +SkLinearBitmapPipeline::PointProcessorInterface* +SkLinearBitmapPipeline::chooseMatrix( + PointProcessorInterface* next, + const SkMatrix& inverse, + SkArenaAlloc* allocator) +{ + if (inverse.hasPerspective()) { + auto matrixStage = allocator->make<PerspectiveMatrix<>>( + next, + SkVector{inverse.getTranslateX(), inverse.getTranslateY()}, + SkVector{inverse.getScaleX(), inverse.getScaleY()}, + SkVector{inverse.getSkewX(), inverse.getSkewY()}, + SkVector{inverse.getPerspX(), inverse.getPerspY()}, + inverse.get(SkMatrix::kMPersp2)); + fMatrixStageCloner = + [matrixStage](PointProcessorInterface* cloneNext, SkArenaAlloc* memory) { + return memory->make<PerspectiveMatrix<>>(cloneNext, matrixStage); + }; + return matrixStage; + } else if (inverse.getSkewX() != 0.0f || inverse.getSkewY() != 0.0f) { + auto matrixStage = allocator->make<AffineMatrix<>>( + next, + SkVector{inverse.getTranslateX(), inverse.getTranslateY()}, + SkVector{inverse.getScaleX(), inverse.getScaleY()}, + SkVector{inverse.getSkewX(), inverse.getSkewY()}); + fMatrixStageCloner = + [matrixStage](PointProcessorInterface* cloneNext, SkArenaAlloc* memory) { + return memory->make<AffineMatrix<>>(cloneNext, matrixStage); + }; + return matrixStage; + } else if (inverse.getScaleX() != 1.0f || inverse.getScaleY() != 1.0f) { + auto matrixStage = allocator->make<ScaleMatrix<>>( + next, + SkVector{inverse.getTranslateX(), inverse.getTranslateY()}, + SkVector{inverse.getScaleX(), inverse.getScaleY()}); + fMatrixStageCloner = + [matrixStage](PointProcessorInterface* cloneNext, SkArenaAlloc* memory) { + return memory->make<ScaleMatrix<>>(cloneNext, matrixStage); + }; + return matrixStage; + } else if (inverse.getTranslateX() != 0.0f || inverse.getTranslateY() != 0.0f) { + auto matrixStage = allocator->make<TranslateMatrix<>>( + next, + SkVector{inverse.getTranslateX(), inverse.getTranslateY()}); + fMatrixStageCloner = + [matrixStage](PointProcessorInterface* cloneNext, SkArenaAlloc* memory) { + return memory->make<TranslateMatrix<>>(cloneNext, matrixStage); + }; + return matrixStage; + } else { + fMatrixStageCloner = [](PointProcessorInterface* cloneNext, SkArenaAlloc* memory) { + return cloneNext; + }; + return next; + } +} + +template <typename Tiler> +SkLinearBitmapPipeline::PointProcessorInterface* SkLinearBitmapPipeline::createTiler( + SampleProcessorInterface* next, + SkISize dimensions, + SkArenaAlloc* allocator) +{ + auto tilerStage = allocator->make<Tiler>(next, dimensions); + fTileStageCloner = + [tilerStage](SampleProcessorInterface* cloneNext, + SkArenaAlloc* memory) -> PointProcessorInterface* { + return memory->make<Tiler>(cloneNext, tilerStage); + }; + return tilerStage; +} + +template <typename XStrategy> +SkLinearBitmapPipeline::PointProcessorInterface* SkLinearBitmapPipeline::chooseTilerYMode( + SampleProcessorInterface* next, + SkShader::TileMode yMode, + SkISize dimensions, + SkArenaAlloc* allocator) +{ + switch (yMode) { + case SkShader::kClamp_TileMode: { + using Tiler = CombinedTileStage<XStrategy, YClampStrategy, SampleProcessorInterface>; + return this->createTiler<Tiler>(next, dimensions, allocator); + } + case SkShader::kRepeat_TileMode: { + using Tiler = CombinedTileStage<XStrategy, YRepeatStrategy, SampleProcessorInterface>; + return this->createTiler<Tiler>(next, dimensions, allocator); + } + case SkShader::kMirror_TileMode: { + using Tiler = CombinedTileStage<XStrategy, YMirrorStrategy, SampleProcessorInterface>; + return this->createTiler<Tiler>(next, dimensions, allocator); + } + } + + // Should never get here. + SkFAIL("Not all Y tile cases covered."); + return nullptr; +} + +SkLinearBitmapPipeline::PointProcessorInterface* SkLinearBitmapPipeline::chooseTiler( + SampleProcessorInterface* next, + SkISize dimensions, + SkShader::TileMode xMode, + SkShader::TileMode yMode, + SkFilterQuality filterQuality, + SkScalar dx, + SkArenaAlloc* allocator) +{ + switch (xMode) { + case SkShader::kClamp_TileMode: + return this->chooseTilerYMode<XClampStrategy>(next, yMode, dimensions, allocator); + case SkShader::kRepeat_TileMode: + if (dx == 1.0f && filterQuality == kNone_SkFilterQuality) { + return this->chooseTilerYMode<XRepeatUnitScaleStrategy>( + next, yMode, dimensions, allocator); + } else { + return this->chooseTilerYMode<XRepeatStrategy>( + next, yMode, dimensions, allocator); + } + case SkShader::kMirror_TileMode: + return this->chooseTilerYMode<XMirrorStrategy>(next, yMode, dimensions, allocator); + } + + // Should never get here. + SkFAIL("Not all X tile cases covered."); + return nullptr; +} + +template <SkColorType colorType> +SkLinearBitmapPipeline::PixelAccessorInterface* + SkLinearBitmapPipeline::chooseSpecificAccessor( + const SkPixmap& srcPixmap, + SkArenaAlloc* allocator) +{ + if (srcPixmap.info().gammaCloseToSRGB()) { + using Accessor = PixelAccessor<colorType, kSRGB_SkGammaType>; + return allocator->make<Accessor>(srcPixmap); + } else { + using Accessor = PixelAccessor<colorType, kLinear_SkGammaType>; + return allocator->make<Accessor>(srcPixmap); + } +} + +SkLinearBitmapPipeline::PixelAccessorInterface* SkLinearBitmapPipeline::choosePixelAccessor( + const SkPixmap& srcPixmap, + const SkColor A8TintColor, + SkArenaAlloc* allocator) +{ + const SkImageInfo& imageInfo = srcPixmap.info(); + + switch (imageInfo.colorType()) { + case kAlpha_8_SkColorType: { + using Accessor = PixelAccessor<kAlpha_8_SkColorType, kLinear_SkGammaType>; + return allocator->make<Accessor>(srcPixmap, A8TintColor); + } + case kARGB_4444_SkColorType: + return this->chooseSpecificAccessor<kARGB_4444_SkColorType>(srcPixmap, allocator); + case kRGB_565_SkColorType: + return this->chooseSpecificAccessor<kRGB_565_SkColorType>(srcPixmap, allocator); + case kRGBA_8888_SkColorType: + return this->chooseSpecificAccessor<kRGBA_8888_SkColorType>(srcPixmap, allocator); + case kBGRA_8888_SkColorType: + return this->chooseSpecificAccessor<kBGRA_8888_SkColorType>(srcPixmap, allocator); + case kIndex_8_SkColorType: + return this->chooseSpecificAccessor<kIndex_8_SkColorType>(srcPixmap, allocator); + case kGray_8_SkColorType: + return this->chooseSpecificAccessor<kGray_8_SkColorType>(srcPixmap, allocator); + case kRGBA_F16_SkColorType: { + using Accessor = PixelAccessor<kRGBA_F16_SkColorType, kLinear_SkGammaType>; + return allocator->make<Accessor>(srcPixmap); + } + default: + // Should never get here. + SkFAIL("Pixel source not supported."); + return nullptr; + } +} + +SkLinearBitmapPipeline::SampleProcessorInterface* SkLinearBitmapPipeline::chooseSampler( + Blender* next, + SkFilterQuality filterQuality, + SkShader::TileMode xTile, SkShader::TileMode yTile, + const SkPixmap& srcPixmap, + const SkColor A8TintColor, + SkArenaAlloc* allocator) +{ + const SkImageInfo& imageInfo = srcPixmap.info(); + SkISize dimensions = imageInfo.dimensions(); + + // Special case samplers with fully expanded templates + if (imageInfo.gammaCloseToSRGB()) { + if (filterQuality == kNone_SkFilterQuality) { + switch (imageInfo.colorType()) { + case kN32_SkColorType: { + using Sampler = + NearestNeighborSampler< + PixelAccessor<kN32_SkColorType, kSRGB_SkGammaType>, Blender>; + return allocator->make<Sampler>(next, srcPixmap); + } + case kIndex_8_SkColorType: { + using Sampler = + NearestNeighborSampler< + PixelAccessor<kIndex_8_SkColorType, kSRGB_SkGammaType>, Blender>; + return allocator->make<Sampler>(next, srcPixmap); + } + default: + break; + } + } else { + switch (imageInfo.colorType()) { + case kN32_SkColorType: { + using Sampler = + BilerpSampler< + PixelAccessor<kN32_SkColorType, kSRGB_SkGammaType>, Blender>; + return allocator->make<Sampler>(next, dimensions, xTile, yTile, srcPixmap); + } + case kIndex_8_SkColorType: { + using Sampler = + BilerpSampler< + PixelAccessor<kIndex_8_SkColorType, kSRGB_SkGammaType>, Blender>; + return allocator->make<Sampler>(next, dimensions, xTile, yTile, srcPixmap); + } + default: + break; + } + } + } + + auto pixelAccessor = this->choosePixelAccessor(srcPixmap, A8TintColor, allocator); + // General cases. + if (filterQuality == kNone_SkFilterQuality) { + using Sampler = NearestNeighborSampler<PixelAccessorShim, Blender>; + return allocator->make<Sampler>(next, pixelAccessor); + } else { + using Sampler = BilerpSampler<PixelAccessorShim, Blender>; + return allocator->make<Sampler>(next, dimensions, xTile, yTile, pixelAccessor); + } +} + +Blender* SkLinearBitmapPipeline::chooseBlenderForShading( + SkAlphaType alphaType, + float postAlpha, + SkArenaAlloc* allocator) +{ + if (alphaType == kUnpremul_SkAlphaType) { + return allocator->make<SrcFPPixel<kUnpremul_SkAlphaType>>(postAlpha); + } else { + // kOpaque_SkAlphaType is treated the same as kPremul_SkAlphaType + return allocator->make<SrcFPPixel<kPremul_SkAlphaType>>(postAlpha); + } +} diff --git a/src/core/SkLinearBitmapPipeline.h b/src/core/SkLinearBitmapPipeline.h new file mode 100644 index 0000000000..6f6e2ae602 --- /dev/null +++ b/src/core/SkLinearBitmapPipeline.h @@ -0,0 +1,112 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkLinearBitmapPipeline_DEFINED +#define SkLinearBitmapPipeline_DEFINED + +#include "SkArenaAlloc.h" +#include "SkColor.h" +#include "SkImageInfo.h" +#include "SkMatrix.h" +#include "SkShader.h" + +class SkEmbeddableLinearPipeline; + +enum SkGammaType { + kLinear_SkGammaType, + kSRGB_SkGammaType, +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// +// SkLinearBitmapPipeline - encapsulates all the machinery for doing floating point pixel +// processing in a linear color space. +// Note: this class has unusual alignment requirements due to its use of SIMD instructions. The +// class SkEmbeddableLinearPipeline below manages these requirements. +class SkLinearBitmapPipeline { +public: + SkLinearBitmapPipeline( + const SkMatrix& inverse, + SkFilterQuality filterQuality, + SkShader::TileMode xTile, SkShader::TileMode yTile, + SkColor paintColor, + const SkPixmap& srcPixmap, + SkArenaAlloc* allocator); + + SkLinearBitmapPipeline( + const SkLinearBitmapPipeline& pipeline, + const SkPixmap& srcPixmap, + SkBlendMode, + const SkImageInfo& dstInfo, + SkArenaAlloc* allocator); + + ~SkLinearBitmapPipeline(); + + void shadeSpan4f(int x, int y, SkPM4f* dst, int count); + void blitSpan(int32_t x, int32_t y, void* dst, int count); + + class PointProcessorInterface; + class SampleProcessorInterface; + class BlendProcessorInterface; + class DestinationInterface; + class PixelAccessorInterface; + + using MatrixCloner = + std::function<PointProcessorInterface* (PointProcessorInterface*, SkArenaAlloc*)>; + using TilerCloner = + std::function<PointProcessorInterface* (SampleProcessorInterface*, SkArenaAlloc*)>; + + PointProcessorInterface* chooseMatrix( + PointProcessorInterface* next, + const SkMatrix& inverse, + SkArenaAlloc* allocator); + + template <typename Tiler> + PointProcessorInterface* createTiler(SampleProcessorInterface* next, SkISize dimensions, + SkArenaAlloc* allocator); + + template <typename XStrategy> + PointProcessorInterface* chooseTilerYMode( + SampleProcessorInterface* next, SkShader::TileMode yMode, SkISize dimensions, + SkArenaAlloc* allocator); + + PointProcessorInterface* chooseTiler( + SampleProcessorInterface* next, + SkISize dimensions, + SkShader::TileMode xMode, SkShader::TileMode yMode, + SkFilterQuality filterQuality, + SkScalar dx, + SkArenaAlloc* allocator); + + template <SkColorType colorType> + PixelAccessorInterface* chooseSpecificAccessor(const SkPixmap& srcPixmap, + SkArenaAlloc* allocator); + + PixelAccessorInterface* choosePixelAccessor( + const SkPixmap& srcPixmap, + const SkColor A8TintColor, + SkArenaAlloc* allocator); + + SampleProcessorInterface* chooseSampler( + BlendProcessorInterface* next, + SkFilterQuality filterQuality, + SkShader::TileMode xTile, SkShader::TileMode yTile, + const SkPixmap& srcPixmap, + const SkColor A8TintColor, + SkArenaAlloc* allocator); + + BlendProcessorInterface* chooseBlenderForShading( + SkAlphaType alphaType, + float postAlpha, + SkArenaAlloc* allocator); + + PointProcessorInterface* fFirstStage; + MatrixCloner fMatrixStageCloner; + TilerCloner fTileStageCloner; + DestinationInterface* fLastStage; +}; + +#endif // SkLinearBitmapPipeline_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_core.h b/src/core/SkLinearBitmapPipeline_core.h new file mode 100644 index 0000000000..ce6c05b752 --- /dev/null +++ b/src/core/SkLinearBitmapPipeline_core.h @@ -0,0 +1,255 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkLinearBitmapPipeline_core_DEFINED +#define SkLinearBitmapPipeline_core_DEFINED + +#include <algorithm> +#include <cmath> +#include "SkNx.h" + +// New bilerp strategy: +// Pass through on bilerpList4 and bilerpListFew (analogs to pointList), introduce bilerpEdge +// which takes 4 points. If the sample spans an edge, then break it into a bilerpEdge. Bilerp +// span then becomes a normal span except in special cases where an extra Y is given. The bilerp +// need to stay single point calculations until the tile layer. +// TODO: +// - edge span predicate. +// - introduce new point API +// - Add tile for new api. + +namespace { +struct X { + explicit X(SkScalar val) : fVal{val} { } + explicit X(SkPoint pt) : fVal{pt.fX} { } + explicit X(SkSize s) : fVal{s.fWidth} { } + explicit X(SkISize s) : fVal((SkScalar)s.fWidth) { } + operator SkScalar () const {return fVal;} +private: + SkScalar fVal; +}; + +struct Y { + explicit Y(SkScalar val) : fVal{val} { } + explicit Y(SkPoint pt) : fVal{pt.fY} { } + explicit Y(SkSize s) : fVal{s.fHeight} { } + explicit Y(SkISize s) : fVal((SkScalar)s.fHeight) { } + operator SkScalar () const {return fVal;} +private: + SkScalar fVal; +}; + +// The Span class enables efficient processing horizontal spans of pixels. +// * start - the point where to start the span. +// * length - the number of pixels to traverse in source space. +// * count - the number of pixels to produce in destination space. +// Both start and length are mapped through the inversion matrix to produce values in source +// space. After the matrix operation, the tilers may break the spans up into smaller spans. +// The tilers can produce spans that seem nonsensical. +// * The clamp tiler can create spans with length of 0. This indicates to copy an edge pixel out +// to the edge of the destination scan. +// * The mirror tiler can produce spans with negative length. This indicates that the source +// should be traversed in the opposite direction to the destination pixels. +class Span { +public: + Span(SkPoint start, SkScalar length, int count) + : fStart(start) + , fLength(length) + , fCount{count} { + SkASSERT(std::isfinite(length)); + } + + operator std::tuple<SkPoint&, SkScalar&, int&>() { + return std::tie(fStart, fLength, fCount); + } + + bool isEmpty() const { return 0 == fCount; } + void clear() { fCount = 0; } + int count() const { return fCount; } + SkScalar length() const { return fLength; } + SkScalar startX() const { return X(fStart); } + SkScalar endX() const { return this->startX() + this->length(); } + SkScalar startY() const { return Y(fStart); } + Span emptySpan() { return Span{{0.0, 0.0}, 0.0f, 0}; } + + bool completelyWithin(SkScalar xMin, SkScalar xMax) const { + SkScalar sMin, sMax; + std::tie(sMin, sMax) = std::minmax(startX(), endX()); + return xMin <= sMin && sMax < xMax; + } + + void offset(SkScalar offsetX) { + fStart.offset(offsetX, 0.0f); + } + + Span breakAt(SkScalar breakX, SkScalar dx) { + SkASSERT(std::isfinite(breakX)); + SkASSERT(std::isfinite(dx)); + SkASSERT(dx != 0.0f); + + if (this->isEmpty()) { + return this->emptySpan(); + } + + int dxSteps = SkScalarFloorToInt((breakX - this->startX()) / dx); + + if (dxSteps < 0) { + // The span is wholly after breakX. + return this->emptySpan(); + } else if (dxSteps >= fCount) { + // The span is wholly before breakX. + Span answer = *this; + this->clear(); + return answer; + } + + // Calculate the values for the span to cleave off. + SkScalar newLength = dxSteps * dx; + + // If the last (or first if count = 1) sample lands directly on the boundary. Include it + // when dx < 0 and exclude it when dx > 0. + // Reasoning: + // dx > 0: The sample point on the boundary is part of the next span because the entire + // pixel is after the boundary. + // dx < 0: The sample point on the boundary is part of the current span because the + // entire pixel is before the boundary. + if (this->startX() + newLength == breakX && dx > 0) { + if (dxSteps > 0) { + dxSteps -= 1; + newLength -= dx; + } else { + return this->emptySpan(); + } + } + + // Calculate new span parameters + SkPoint newStart = fStart; + int newCount = dxSteps + 1; + SkASSERT(newCount > 0); + + // Update this span to reflect the break. + SkScalar lengthToStart = newLength + dx; + fLength -= lengthToStart; + fCount -= newCount; + fStart = {this->startX() + lengthToStart, Y(fStart)}; + + return Span{newStart, newLength, newCount}; + } + + void clampToSinglePixel(SkPoint pixel) { + fStart = pixel; + fLength = 0.0f; + } + +private: + SkPoint fStart; + SkScalar fLength; + int fCount; +}; + +template<typename Stage> +void span_fallback(Span span, Stage* stage) { + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + Sk4f startXs{X(start)}; + Sk4f ys{Y(start)}; + Sk4f mults = {0.0f, 1.0f, 2.0f, 3.0f}; + + // Initializing this is not needed, but some compilers can't figure this out. + Sk4s dXs{0.0f}; + if (count > 1) { + SkScalar dx = length / (count - 1); + dXs = Sk4f{dx}; + } + + // Instead of using xs = xs + dx every round, this uses xs = i * dx + X(start). This + // eliminates the rounding error for the sum. + Sk4f xs = startXs + mults * dXs; + while (count >= 4) { + stage->pointList4(xs, ys); + + mults += Sk4f{4.0f}; + xs = mults * dXs + startXs; + count -= 4; + } + + if (count > 0) { + stage->pointListFew(count, xs, ys); + } +} + +inline Sk4f SK_VECTORCALL check_pixel(const Sk4f& pixel) { + SkASSERTF(0.0f <= pixel[0] && pixel[0] <= 1.0f, "pixel[0]: %f", pixel[0]); + SkASSERTF(0.0f <= pixel[1] && pixel[1] <= 1.0f, "pixel[1]: %f", pixel[1]); + SkASSERTF(0.0f <= pixel[2] && pixel[2] <= 1.0f, "pixel[2]: %f", pixel[2]); + SkASSERTF(0.0f <= pixel[3] && pixel[3] <= 1.0f, "pixel[3]: %f", pixel[3]); + return pixel; +} + +} // namespace + +class SkLinearBitmapPipeline::PointProcessorInterface { +public: + virtual ~PointProcessorInterface() { } + // Take the first n (where 0 < n && n < 4) items from xs and ys and sample those points. For + // nearest neighbor, that means just taking the floor xs and ys. For bilerp, this means + // to expand the bilerp filter around the point and sample using that filter. + virtual void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) = 0; + // Same as pointListFew, but n = 4. + virtual void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) = 0; + // A span is a compact form of sample points that are obtained by mapping points from + // destination space to source space. This is used for horizontal lines only, and is mainly + // used to take advantage of memory coherence for horizontal spans. + virtual void pointSpan(Span span) = 0; +}; + +class SkLinearBitmapPipeline::SampleProcessorInterface + : public SkLinearBitmapPipeline::PointProcessorInterface { +public: + // Used for nearest neighbor when scale factor is 1.0. The span can just be repeated with no + // edge pixel alignment problems. This is for handling a very common case. + virtual void repeatSpan(Span span, int32_t repeatCount) = 0; +}; + +class SkLinearBitmapPipeline::DestinationInterface { +public: + virtual ~DestinationInterface() { } + // Count is normally not needed, but in these early stages of development it is useful to + // check bounds. + // TODO(herb): 4/6/2016 - remove count when code is stable. + virtual void setDestination(void* dst, int count) = 0; +}; + +class SkLinearBitmapPipeline::BlendProcessorInterface + : public SkLinearBitmapPipeline::DestinationInterface { +public: + virtual void SK_VECTORCALL blendPixel(Sk4f pixel0) = 0; + virtual void SK_VECTORCALL blend4Pixels(Sk4f p0, Sk4f p1, Sk4f p2, Sk4f p3) = 0; +}; + +class SkLinearBitmapPipeline::PixelAccessorInterface { +public: + virtual ~PixelAccessorInterface() { } + virtual void SK_VECTORCALL getFewPixels( + int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const = 0; + + virtual void SK_VECTORCALL get4Pixels( + Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; + + virtual void get4Pixels( + const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const = 0; + + virtual Sk4f getPixelFromRow(const void* row, int index) const = 0; + + virtual Sk4f getPixelAt(int index) const = 0; + + virtual const void* row(int y) const = 0; +}; + +#endif // SkLinearBitmapPipeline_core_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_matrix.h b/src/core/SkLinearBitmapPipeline_matrix.h new file mode 100644 index 0000000000..78f723148e --- /dev/null +++ b/src/core/SkLinearBitmapPipeline_matrix.h @@ -0,0 +1,118 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkLinearBitmapPipeline_matrix_DEFINED +#define SkLinearBitmapPipeline_matrix_DEFINED + +#include "SkLinearBitmapPipeline_core.h" + +namespace { +class TranslateMatrixStrategy { +public: + TranslateMatrixStrategy(SkVector offset) + : fXOffset{X(offset)} + , fYOffset{Y(offset)} { } + + void processPoints(Sk4s* xs, Sk4s* ys) const { + *xs = *xs + fXOffset; + *ys = *ys + fYOffset; + } + + template <typename Next> + bool maybeProcessSpan(Span span, Next* next) const { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + next->pointSpan(Span{start + SkPoint{fXOffset, fYOffset}, length, count}); + return true; + } + +private: + const SkScalar fXOffset, fYOffset; +}; + +class ScaleMatrixStrategy { +public: + ScaleMatrixStrategy(SkVector offset, SkVector scale) + : fXOffset{X(offset)}, fYOffset{Y(offset)} + , fXScale{X(scale)}, fYScale{Y(scale)} { } + void processPoints(Sk4s* xs, Sk4s* ys) const { + *xs = *xs * fXScale + fXOffset; + *ys = *ys * fYScale + fYOffset; + } + + template <typename Next> + bool maybeProcessSpan(Span span, Next* next) const { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkPoint newStart = + SkPoint{X(start) * fXScale + fXOffset, Y(start) * fYScale + fYOffset}; + SkScalar newLength = length * fXScale; + next->pointSpan(Span{newStart, newLength, count}); + return true; + } + +private: + const SkScalar fXOffset, fYOffset; + const SkScalar fXScale, fYScale; +}; + +class AffineMatrixStrategy { +public: + AffineMatrixStrategy(SkVector offset, SkVector scale, SkVector skew) + : fXOffset{X(offset)}, fYOffset{Y(offset)} + , fXScale{X(scale)}, fYScale{Y(scale)} + , fXSkew{X(skew)}, fYSkew{Y(skew)} { } + void processPoints(Sk4s* xs, Sk4s* ys) const { + Sk4s newXs = fXScale * *xs + fXSkew * *ys + fXOffset; + Sk4s newYs = fYSkew * *xs + fYScale * *ys + fYOffset; + + *xs = newXs; + *ys = newYs; + } + + template <typename Next> + bool maybeProcessSpan(Span span, Next* next) const { + return false; + } + +private: + const SkScalar fXOffset, fYOffset; + const SkScalar fXScale, fYScale; + const SkScalar fXSkew, fYSkew; +}; + +class PerspectiveMatrixStrategy { +public: + PerspectiveMatrixStrategy(SkVector offset, SkVector scale, SkVector skew, + SkVector zSkew, SkScalar zOffset) + : fXOffset{X(offset)}, fYOffset{Y(offset)}, fZOffset{zOffset} + , fXScale{X(scale)}, fYScale{Y(scale)} + , fXSkew{X(skew)}, fYSkew{Y(skew)}, fZXSkew{X(zSkew)}, fZYSkew{Y(zSkew)} { } + void processPoints(Sk4s* xs, Sk4s* ys) const { + Sk4s newXs = fXScale * *xs + fXSkew * *ys + fXOffset; + Sk4s newYs = fYSkew * *xs + fYScale * *ys + fYOffset; + Sk4s newZs = fZXSkew * *xs + fZYSkew * *ys + fZOffset; + + *xs = newXs / newZs; + *ys = newYs / newZs; + } + + template <typename Next> + bool maybeProcessSpan(Span span, Next* next) const { + return false; + } + +private: + const SkScalar fXOffset, fYOffset, fZOffset; + const SkScalar fXScale, fYScale; + const SkScalar fXSkew, fYSkew, fZXSkew, fZYSkew; +}; + + +} // namespace + +#endif // SkLinearBitmapPipeline_matrix_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_sample.h b/src/core/SkLinearBitmapPipeline_sample.h new file mode 100644 index 0000000000..a7f5d7383e --- /dev/null +++ b/src/core/SkLinearBitmapPipeline_sample.h @@ -0,0 +1,1041 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkLinearBitmapPipeline_sampler_DEFINED +#define SkLinearBitmapPipeline_sampler_DEFINED + +#include <tuple> + +#include "SkAutoMalloc.h" +#include "SkColor.h" +#include "SkColorPriv.h" +#include "SkFixed.h" // for SkFixed1 only. Don't use SkFixed in this file. +#include "SkHalf.h" +#include "SkLinearBitmapPipeline_core.h" +#include "SkNx.h" +#include "SkPM4fPriv.h" + +namespace { +// Explaination of the math: +// 1 - x x +// +--------+--------+ +// | | | +// 1 - y | px00 | px10 | +// | | | +// +--------+--------+ +// | | | +// y | px01 | px11 | +// | | | +// +--------+--------+ +// +// +// Given a pixelxy each is multiplied by a different factor derived from the fractional part of x +// and y: +// * px00 -> (1 - x)(1 - y) = 1 - x - y + xy +// * px10 -> x(1 - y) = x - xy +// * px01 -> (1 - x)y = y - xy +// * px11 -> xy +// So x * y is calculated first and then used to calculate all the other factors. +static Sk4s SK_VECTORCALL bilerp4(Sk4s xs, Sk4s ys, Sk4f px00, Sk4f px10, + Sk4f px01, Sk4f px11) { + // Calculate fractional xs and ys. + Sk4s fxs = xs - xs.floor(); + Sk4s fys = ys - ys.floor(); + Sk4s fxys{fxs * fys}; + Sk4f sum = px11 * fxys; + sum = sum + px01 * (fys - fxys); + sum = sum + px10 * (fxs - fxys); + sum = sum + px00 * (Sk4f{1.0f} - fxs - fys + fxys); + return sum; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// PixelGetter is the lowest level interface to the source data. There is a PixelConverter for each +// of the different SkColorTypes. +template <SkColorType, SkGammaType> class PixelConverter; + +// Alpha handling: +// The alpha from the paint (tintColor) is used in the blend part of the pipeline to modulate +// the entire bitmap. So, the tint color is given an alpha of 1.0 so that the later alpha can +// modulate this color later. +template <> +class PixelConverter<kAlpha_8_SkColorType, kLinear_SkGammaType> { +public: + using Element = uint8_t; + PixelConverter(const SkPixmap& srcPixmap, SkColor tintColor) { + fTintColor = SkColor4f::FromColor(tintColor); + fTintColor.fA = 1.0f; + } + + Sk4f toSk4f(const Element pixel) const { + return Sk4f::Load(&fTintColor) * (pixel * (1.0f/255.0f)); + } + +private: + SkColor4f fTintColor; +}; + +template <SkGammaType gammaType> +static inline Sk4f pmcolor_to_rgba(SkPMColor pixel) { + return swizzle_rb_if_bgra( + (gammaType == kSRGB_SkGammaType) ? Sk4f_fromS32(pixel) + : Sk4f_fromL32(pixel)); +} + +template <SkGammaType gammaType> +class PixelConverter<kRGB_565_SkColorType, gammaType> { +public: + using Element = uint16_t; + PixelConverter(const SkPixmap& srcPixmap) { } + + Sk4f toSk4f(Element pixel) const { + return pmcolor_to_rgba<gammaType>(SkPixel16ToPixel32(pixel)); + } +}; + +template <SkGammaType gammaType> +class PixelConverter<kARGB_4444_SkColorType, gammaType> { +public: + using Element = uint16_t; + PixelConverter(const SkPixmap& srcPixmap) { } + + Sk4f toSk4f(Element pixel) const { + return pmcolor_to_rgba<gammaType>(SkPixel4444ToPixel32(pixel)); + } +}; + +template <SkGammaType gammaType> +class PixelConverter<kRGBA_8888_SkColorType, gammaType> { +public: + using Element = uint32_t; + PixelConverter(const SkPixmap& srcPixmap) { } + + Sk4f toSk4f(Element pixel) const { + return gammaType == kSRGB_SkGammaType + ? Sk4f_fromS32(pixel) + : Sk4f_fromL32(pixel); + } +}; + +template <SkGammaType gammaType> +class PixelConverter<kBGRA_8888_SkColorType, gammaType> { +public: + using Element = uint32_t; + PixelConverter(const SkPixmap& srcPixmap) { } + + Sk4f toSk4f(Element pixel) const { + return swizzle_rb( + gammaType == kSRGB_SkGammaType ? Sk4f_fromS32(pixel) : Sk4f_fromL32(pixel)); + } +}; + +template <SkGammaType gammaType> +class PixelConverter<kIndex_8_SkColorType, gammaType> { +public: + using Element = uint8_t; + PixelConverter(const SkPixmap& srcPixmap) + : fColorTableSize(srcPixmap.ctable()->count()){ + SkColorTable* skColorTable = srcPixmap.ctable(); + SkASSERT(skColorTable != nullptr); + + fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); + for (int i = 0; i < fColorTableSize; i++) { + fColorTable[i] = pmcolor_to_rgba<gammaType>((*skColorTable)[i]); + } + } + + PixelConverter(const PixelConverter& strategy) + : fColorTableSize{strategy.fColorTableSize}{ + fColorTable = (Sk4f*)SkAlign16((intptr_t)fColorTableStorage.get()); + for (int i = 0; i < fColorTableSize; i++) { + fColorTable[i] = strategy.fColorTable[i]; + } + } + + Sk4f toSk4f(Element index) const { + return fColorTable[index]; + } + +private: + static const size_t kColorTableSize = sizeof(Sk4f[256]) + 12; + const int fColorTableSize; + SkAutoMalloc fColorTableStorage{kColorTableSize}; + Sk4f* fColorTable; +}; + +template <SkGammaType gammaType> +class PixelConverter<kGray_8_SkColorType, gammaType> { +public: + using Element = uint8_t; + PixelConverter(const SkPixmap& srcPixmap) { } + + Sk4f toSk4f(Element pixel) const { + float gray = (gammaType == kSRGB_SkGammaType) + ? sk_linear_from_srgb[pixel] + : pixel * (1/255.0f); + return {gray, gray, gray, 1.0f}; + } +}; + +template <> +class PixelConverter<kRGBA_F16_SkColorType, kLinear_SkGammaType> { +public: + using Element = uint64_t; + PixelConverter(const SkPixmap& srcPixmap) { } + + Sk4f toSk4f(const Element pixel) const { + return SkHalfToFloat_finite_ftz(pixel); + } +}; + +class PixelAccessorShim { +public: + explicit PixelAccessorShim(SkLinearBitmapPipeline::PixelAccessorInterface* accessor) + : fPixelAccessor(accessor) { } + + void SK_VECTORCALL getFewPixels( + int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const { + fPixelAccessor->getFewPixels(n, xs, ys, px0, px1, px2); + } + + void SK_VECTORCALL get4Pixels( + Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { + fPixelAccessor->get4Pixels(xs, ys, px0, px1, px2, px3); + } + + void get4Pixels( + const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const { + fPixelAccessor->get4Pixels(src, index, px0, px1, px2, px3); + } + + Sk4f getPixelFromRow(const void* row, int index) const { + return fPixelAccessor->getPixelFromRow(row, index); + } + + Sk4f getPixelAt(int index) const { + return fPixelAccessor->getPixelAt(index); + } + + const void* row(int y) const { + return fPixelAccessor->row(y); + } + +private: + SkLinearBitmapPipeline::PixelAccessorInterface* const fPixelAccessor; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// PixelAccessor handles all the same plumbing for all the PixelGetters. +template <SkColorType colorType, SkGammaType gammaType> +class PixelAccessor final : public SkLinearBitmapPipeline::PixelAccessorInterface { + using Element = typename PixelConverter<colorType, gammaType>::Element; +public: + template <typename... Args> + PixelAccessor(const SkPixmap& srcPixmap, Args&&... args) + : fSrc{static_cast<const Element*>(srcPixmap.addr())} + , fWidth{srcPixmap.rowBytesAsPixels()} + , fConverter{srcPixmap, std::move<Args>(args)...} { } + + void SK_VECTORCALL getFewPixels ( + int n, Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2) const override { + Sk4i bufferLoc = ys * fWidth + xs; + switch (n) { + case 3: + *px2 = this->getPixelAt(bufferLoc[2]); + case 2: + *px1 = this->getPixelAt(bufferLoc[1]); + case 1: + *px0 = this->getPixelAt(bufferLoc[0]); + default: + break; + } + } + + void SK_VECTORCALL get4Pixels( + Sk4i xs, Sk4i ys, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { + Sk4i bufferLoc = ys * fWidth + xs; + *px0 = this->getPixelAt(bufferLoc[0]); + *px1 = this->getPixelAt(bufferLoc[1]); + *px2 = this->getPixelAt(bufferLoc[2]); + *px3 = this->getPixelAt(bufferLoc[3]); + } + + void get4Pixels( + const void* src, int index, Sk4f* px0, Sk4f* px1, Sk4f* px2, Sk4f* px3) const override { + *px0 = this->getPixelFromRow(src, index + 0); + *px1 = this->getPixelFromRow(src, index + 1); + *px2 = this->getPixelFromRow(src, index + 2); + *px3 = this->getPixelFromRow(src, index + 3); + } + + Sk4f getPixelFromRow(const void* row, int index) const override { + const Element* src = static_cast<const Element*>(row); + return fConverter.toSk4f(src[index]); + } + + Sk4f getPixelAt(int index) const override { + return this->getPixelFromRow(fSrc, index); + } + + const void* row(int y) const override { return fSrc + y * fWidth; } + +private: + const Element* const fSrc; + const int fWidth; + PixelConverter<colorType, gammaType> fConverter; +}; + +// We're moving through source space at a rate of 1 source pixel per 1 dst pixel. +// We'll never re-use pixels, but we can at least load contiguous pixels. +template <typename Next, typename Strategy> +static void src_strategy_blend(Span span, Next* next, Strategy* strategy) { + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + int ix = SkScalarFloorToInt(X(start)); + const void* row = strategy->row((int)std::floor(Y(start))); + if (length > 0) { + while (count >= 4) { + Sk4f px0, px1, px2, px3; + strategy->get4Pixels(row, ix, &px0, &px1, &px2, &px3); + next->blend4Pixels(px0, px1, px2, px3); + ix += 4; + count -= 4; + } + + while (count > 0) { + next->blendPixel(strategy->getPixelFromRow(row, ix)); + ix += 1; + count -= 1; + } + } else { + while (count >= 4) { + Sk4f px0, px1, px2, px3; + strategy->get4Pixels(row, ix - 3, &px3, &px2, &px1, &px0); + next->blend4Pixels(px0, px1, px2, px3); + ix -= 4; + count -= 4; + } + + while (count > 0) { + next->blendPixel(strategy->getPixelFromRow(row, ix)); + ix -= 1; + count -= 1; + } + } +} + +// -- NearestNeighborSampler ----------------------------------------------------------------------- +// NearestNeighborSampler - use nearest neighbor filtering to create runs of destination pixels. +template<typename Accessor, typename Next> +class NearestNeighborSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { +public: + template<typename... Args> + NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, Args&& ... args) + : fNext{next}, fAccessor{std::forward<Args>(args)...} { } + + NearestNeighborSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, + const NearestNeighborSampler& sampler) + : fNext{next}, fAccessor{sampler.fAccessor} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + SkASSERT(0 < n && n < 4); + Sk4f px0, px1, px2; + fAccessor.getFewPixels(n, SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2); + if (n >= 1) fNext->blendPixel(px0); + if (n >= 2) fNext->blendPixel(px1); + if (n >= 3) fNext->blendPixel(px2); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + Sk4f px0, px1, px2, px3; + fAccessor.get4Pixels(SkNx_cast<int>(xs), SkNx_cast<int>(ys), &px0, &px1, &px2, &px3); + fNext->blend4Pixels(px0, px1, px2, px3); + } + + void pointSpan(Span span) override { + SkASSERT(!span.isEmpty()); + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + SkScalar absLength = SkScalarAbs(length); + if (absLength < (count - 1)) { + this->spanSlowRate(span); + } else if (absLength == (count - 1)) { + src_strategy_blend(span, fNext, &fAccessor); + } else { + this->spanFastRate(span); + } + } + + void repeatSpan(Span span, int32_t repeatCount) override { + while (repeatCount > 0) { + this->pointSpan(span); + repeatCount--; + } + } + +private: + // When moving through source space more slowly than dst space (zoomed in), + // we'll be sampling from the same source pixel more than once. + void spanSlowRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkScalar x = X(start); + // fx is a fixed 48.16 number. + int64_t fx = static_cast<int64_t>(x * SK_Fixed1); + SkScalar dx = length / (count - 1); + // fdx is a fixed 48.16 number. + int64_t fdx = static_cast<int64_t>(dx * SK_Fixed1); + + const void* row = fAccessor.row((int)std::floor(Y(start))); + Next* next = fNext; + + int64_t ix = fx >> 16; + int64_t prevIX = ix; + Sk4f fpixel = fAccessor.getPixelFromRow(row, ix); + + // When dx is less than one, each pixel is used more than once. Using the fixed point fx + // allows the code to quickly check that the same pixel is being used. The code uses this + // same pixel check to do the sRGB and normalization only once. + auto getNextPixel = [&]() { + if (ix != prevIX) { + fpixel = fAccessor.getPixelFromRow(row, ix); + prevIX = ix; + } + fx += fdx; + ix = fx >> 16; + return fpixel; + }; + + while (count >= 4) { + Sk4f px0 = getNextPixel(); + Sk4f px1 = getNextPixel(); + Sk4f px2 = getNextPixel(); + Sk4f px3 = getNextPixel(); + next->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } + while (count > 0) { + next->blendPixel(getNextPixel()); + count -= 1; + } + } + + // We're moving through source space at a rate of 1 source pixel per 1 dst pixel. + // We'll never re-use pixels, but we can at least load contiguous pixels. + void spanUnitRate(Span span) { + src_strategy_blend(span, fNext, &fAccessor); + } + + // We're moving through source space faster than dst (zoomed out), + // so we'll never reuse a source pixel or be able to do contiguous loads. + void spanFastRate(Span span) { + span_fallback(span, this); + } + + Next* const fNext; + Accessor fAccessor; +}; + +// From an edgeType, the integer value of a pixel vs, and the integer value of the extreme edge +// vMax, take the point which might be off the tile by one pixel and either wrap it or pin it to +// generate the right pixel. The value vs is on the interval [-1, vMax + 1]. It produces a value +// on the interval [0, vMax]. +// Note: vMax is not width or height, but width-1 or height-1 because it is the largest valid pixel. +static inline int adjust_edge(SkShader::TileMode edgeType, int vs, int vMax) { + SkASSERT(-1 <= vs && vs <= vMax + 1); + switch (edgeType) { + case SkShader::kClamp_TileMode: + case SkShader::kMirror_TileMode: + vs = std::max(vs, 0); + vs = std::min(vs, vMax); + break; + case SkShader::kRepeat_TileMode: + vs = (vs <= vMax) ? vs : 0; + vs = (vs >= 0) ? vs : vMax; + break; + } + SkASSERT(0 <= vs && vs <= vMax); + return vs; +} + +// From a sample point on the tile, return the top or left filter value. +// The result r should be in the range (0, 1]. Since this represents the weight given to the top +// left element, then if x == 0.5 the filter value should be 1.0. +// The input sample point must be on the tile, therefore it must be >= 0. +static SkScalar sample_to_filter(SkScalar x) { + SkASSERT(x >= 0.0f); + // The usual form of the top or left edge is x - .5, but since we are working on the unit + // square, then x + .5 works just as well. This also guarantees that v > 0.0 allowing the use + // of trunc. + SkScalar v = x + 0.5f; + // Produce the top or left offset a value on the range [0, 1). + SkScalar f = v - SkScalarTruncToScalar(v); + // Produce the filter value which is on the range (0, 1]. + SkScalar r = 1.0f - f; + SkASSERT(0.0f < r && r <= 1.0f); + return r; +} + +// -- BilerpSampler -------------------------------------------------------------------------------- +// BilerpSampler - use a bilerp filter to create runs of destination pixels. +// Note: in the code below, there are two types of points +// * sample points - these are the points passed in by pointList* and Spans. +// * filter points - are created from a sample point to form the coordinates of the points +// to use in the filter and to generate the filter values. +template<typename Accessor, typename Next> +class BilerpSampler : public SkLinearBitmapPipeline::SampleProcessorInterface { +public: + template<typename... Args> + BilerpSampler( + SkLinearBitmapPipeline::BlendProcessorInterface* next, + SkISize dimensions, + SkShader::TileMode xTile, SkShader::TileMode yTile, + Args&& ... args + ) + : fNext{next} + , fXEdgeType{xTile} + , fXMax{dimensions.width() - 1} + , fYEdgeType{yTile} + , fYMax{dimensions.height() - 1} + , fAccessor{std::forward<Args>(args)...} { } + + BilerpSampler(SkLinearBitmapPipeline::BlendProcessorInterface* next, + const BilerpSampler& sampler) + : fNext{next} + , fXEdgeType{sampler.fXEdgeType} + , fXMax{sampler.fXMax} + , fYEdgeType{sampler.fYEdgeType} + , fYMax{sampler.fYMax} + , fAccessor{sampler.fAccessor} { } + + void SK_VECTORCALL pointListFew(int n, Sk4s xs, Sk4s ys) override { + SkASSERT(0 < n && n < 4); + auto bilerpPixel = [&](int index) { + return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); + }; + + if (n >= 1) fNext->blendPixel(bilerpPixel(0)); + if (n >= 2) fNext->blendPixel(bilerpPixel(1)); + if (n >= 3) fNext->blendPixel(bilerpPixel(2)); + } + + void SK_VECTORCALL pointList4(Sk4s xs, Sk4s ys) override { + auto bilerpPixel = [&](int index) { + return this->bilerpSamplePoint(SkPoint{xs[index], ys[index]}); + }; + fNext->blend4Pixels(bilerpPixel(0), bilerpPixel(1), bilerpPixel(2), bilerpPixel(3)); + } + + void pointSpan(Span span) override { + SkASSERT(!span.isEmpty()); + SkPoint start; + SkScalar length; + int count; + std::tie(start, length, count) = span; + + // Nothing to do. + if (count == 0) { + return; + } + + // Trivial case. No sample points are generated other than start. + if (count == 1) { + fNext->blendPixel(this->bilerpSamplePoint(start)); + return; + } + + // Note: the following code could be done in terms of dx = length / (count -1), but that + // would introduce a divide that is not needed for the most common dx == 1 cases. + SkScalar absLength = SkScalarAbs(length); + if (absLength == 0.0f) { + // |dx| == 0 + // length is zero, so clamp an edge pixel. + this->spanZeroRate(span); + } else if (absLength < (count - 1)) { + // 0 < |dx| < 1. + this->spanSlowRate(span); + } else if (absLength == (count - 1)) { + // |dx| == 1. + if (sample_to_filter(span.startX()) == 1.0f + && sample_to_filter(span.startY()) == 1.0f) { + // All the pixels are aligned with the dest; go fast. + src_strategy_blend(span, fNext, &fAccessor); + } else { + // There is some sub-pixel offsets, so bilerp. + this->spanUnitRate(span); + } + } else if (absLength < 2.0f * (count - 1)) { + // 1 < |dx| < 2. + this->spanMediumRate(span); + } else { + // |dx| >= 2. + this->spanFastRate(span); + } + } + + void repeatSpan(Span span, int32_t repeatCount) override { + while (repeatCount > 0) { + this->pointSpan(span); + repeatCount--; + } + } + +private: + + // Convert a sample point to the points used by the filter. + void filterPoints(SkPoint sample, Sk4i* filterXs, Sk4i* filterYs) { + // May be less than zero. Be careful to use Floor. + int x0 = adjust_edge(fXEdgeType, SkScalarFloorToInt(X(sample) - 0.5), fXMax); + // Always greater than zero. Use the faster Trunc. + int x1 = adjust_edge(fXEdgeType, SkScalarTruncToInt(X(sample) + 0.5), fXMax); + int y0 = adjust_edge(fYEdgeType, SkScalarFloorToInt(Y(sample) - 0.5), fYMax); + int y1 = adjust_edge(fYEdgeType, SkScalarTruncToInt(Y(sample) + 0.5), fYMax); + + *filterXs = Sk4i{x0, x1, x0, x1}; + *filterYs = Sk4i{y0, y0, y1, y1}; + } + + // Given a sample point, generate a color by bilerping the four filter points. + Sk4f bilerpSamplePoint(SkPoint sample) { + Sk4i iXs, iYs; + filterPoints(sample, &iXs, &iYs); + Sk4f px00, px10, px01, px11; + fAccessor.get4Pixels(iXs, iYs, &px00, &px10, &px01, &px11); + return bilerp4(Sk4f{X(sample) - 0.5f}, Sk4f{Y(sample) - 0.5f}, px00, px10, px01, px11); + } + + // Get two pixels at x from row0 and row1. + void get2PixelColumn(const void* row0, const void* row1, int x, Sk4f* px0, Sk4f* px1) { + *px0 = fAccessor.getPixelFromRow(row0, x); + *px1 = fAccessor.getPixelFromRow(row1, x); + } + + // |dx| == 0. This code assumes that length is zero. + void spanZeroRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkASSERT(length == 0.0f); + + // Filter for the blending of the top and bottom pixels. + SkScalar filterY = sample_to_filter(Y(start)); + + // Generate the four filter points from the sample point start. Generate the row* values. + Sk4i iXs, iYs; + this->filterPoints(start, &iXs, &iYs); + const void* const row0 = fAccessor.row(iYs[0]); + const void* const row1 = fAccessor.row(iYs[2]); + + // Get the two pixels that make up the clamping pixel. + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, SkScalarFloorToInt(X(start)), &pxTop, &pxBottom); + Sk4f pixel = pxTop * filterY + (1.0f - filterY) * pxBottom; + + while (count >= 4) { + fNext->blend4Pixels(pixel, pixel, pixel, pixel); + count -= 4; + } + while (count > 0) { + fNext->blendPixel(pixel); + count -= 1; + } + } + + // 0 < |dx| < 1. This code reuses the calculations from previous pixels to reduce + // computation. In particular, several destination pixels maybe generated from the same four + // source pixels. + // In the following code a "part" is a combination of two pixels from the same column of the + // filter. + void spanSlowRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + + // Calculate the distance between each sample point. + const SkScalar dx = length / (count - 1); + SkASSERT(-1.0f < dx && dx < 1.0f && dx != 0.0f); + + // Generate the filter values for the top-left corner. + // Note: these values are in filter space; this has implications about how to adjust + // these values at each step. For example, as the sample point increases, the filter + // value decreases, this is because the filter and position are related by + // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite + // direction of the sample point which is increasing by dx. + SkScalar filterX = sample_to_filter(X(start)); + SkScalar filterY = sample_to_filter(Y(start)); + + // Generate the four filter points from the sample point start. Generate the row* values. + Sk4i iXs, iYs; + this->filterPoints(start, &iXs, &iYs); + const void* const row0 = fAccessor.row(iYs[0]); + const void* const row1 = fAccessor.row(iYs[2]); + + // Generate part of the filter value at xColumn. + auto partAtColumn = [&](int xColumn) { + int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); + return pxTop * filterY + (1.0f - filterY) * pxBottom; + }; + + // The leftPart is made up of two pixels from the left column of the filter, right part + // is similar. The top and bottom pixels in the *Part are created as a linear blend of + // the top and bottom pixels using filterY. See the partAtColumn function above. + Sk4f leftPart = partAtColumn(iXs[0]); + Sk4f rightPart = partAtColumn(iXs[1]); + + // Create a destination color by blending together a left and right part using filterX. + auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) { + Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); + return check_pixel(pixel); + }; + + // Send the first pixel to the destination. This simplifies the loop structure so that no + // extra pixels are fetched for the last iteration of the loop. + fNext->blendPixel(bilerp(leftPart, rightPart)); + count -= 1; + + if (dx > 0.0f) { + // * positive direction - generate destination pixels by sliding the filter from left + // to right. + int rightPartCursor = iXs[1]; + + // Advance the filter from left to right. Remember that moving the top-left corner of + // the filter to the right actually makes the filter value smaller. + auto advanceFilter = [&]() { + filterX -= dx; + if (filterX <= 0.0f) { + filterX += 1.0f; + leftPart = rightPart; + rightPartCursor += 1; + rightPart = partAtColumn(rightPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + + return bilerp(leftPart, rightPart); + }; + + while (count >= 4) { + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } + + while (count > 0) { + fNext->blendPixel(advanceFilter()); + count -= 1; + } + } else { + // * negative direction - generate destination pixels by sliding the filter from + // right to left. + int leftPartCursor = iXs[0]; + + // Advance the filter from right to left. Remember that moving the top-left corner of + // the filter to the left actually makes the filter value larger. + auto advanceFilter = [&]() { + // Remember, dx < 0 therefore this adds |dx| to filterX. + filterX -= dx; + // At this point filterX may be > 1, and needs to be wrapped back on to the filter + // interval, and the next column in the filter is calculated. + if (filterX > 1.0f) { + filterX -= 1.0f; + rightPart = leftPart; + leftPartCursor -= 1; + leftPart = partAtColumn(leftPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + + return bilerp(leftPart, rightPart); + }; + + while (count >= 4) { + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } + + while (count > 0) { + fNext->blendPixel(advanceFilter()); + count -= 1; + } + } + } + + // |dx| == 1. Moving through source space at a rate of 1 source pixel per 1 dst pixel. + // Every filter part is used for two destination pixels, and the code can bulk load four + // pixels at a time. + void spanUnitRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkASSERT(SkScalarAbs(length) == (count - 1)); + + // Calculate the four filter points of start, and use the two different Y values to + // generate the row pointers. + Sk4i iXs, iYs; + filterPoints(start, &iXs, &iYs); + const void* row0 = fAccessor.row(iYs[0]); + const void* row1 = fAccessor.row(iYs[2]); + + // Calculate the filter values for the top-left filter element. + const SkScalar filterX = sample_to_filter(X(start)); + const SkScalar filterY = sample_to_filter(Y(start)); + + // Generate part of the filter value at xColumn. + auto partAtColumn = [&](int xColumn) { + int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); + return pxTop * filterY + (1.0f - filterY) * pxBottom; + }; + + auto get4Parts = [&](int ix, Sk4f* part0, Sk4f* part1, Sk4f* part2, Sk4f* part3) { + // Check if the pixels needed are near the edges. If not go fast using bulk pixels, + // otherwise be careful. + if (0 <= ix && ix <= fXMax - 3) { + Sk4f px00, px10, px20, px30, + px01, px11, px21, px31; + fAccessor.get4Pixels(row0, ix, &px00, &px10, &px20, &px30); + fAccessor.get4Pixels(row1, ix, &px01, &px11, &px21, &px31); + *part0 = filterY * px00 + (1.0f - filterY) * px01; + *part1 = filterY * px10 + (1.0f - filterY) * px11; + *part2 = filterY * px20 + (1.0f - filterY) * px21; + *part3 = filterY * px30 + (1.0f - filterY) * px31; + } else { + *part0 = partAtColumn(ix + 0); + *part1 = partAtColumn(ix + 1); + *part2 = partAtColumn(ix + 2); + *part3 = partAtColumn(ix + 3); + } + }; + + auto bilerp = [&](const Sk4f& part0, const Sk4f& part1) { + return part0 * filterX + part1 * (1.0f - filterX); + }; + + if (length > 0) { + // * positive direction - generate destination pixels by sliding the filter from left + // to right. + + // overlapPart is the filter part from the end of the previous four pixels used at + // the start of the next four pixels. + Sk4f overlapPart = partAtColumn(iXs[0]); + int rightColumnCursor = iXs[1]; + while (count >= 4) { + Sk4f part0, part1, part2, part3; + get4Parts(rightColumnCursor, &part0, &part1, &part2, &part3); + Sk4f px0 = bilerp(overlapPart, part0); + Sk4f px1 = bilerp(part0, part1); + Sk4f px2 = bilerp(part1, part2); + Sk4f px3 = bilerp(part2, part3); + overlapPart = part3; + fNext->blend4Pixels(px0, px1, px2, px3); + rightColumnCursor += 4; + count -= 4; + } + + while (count > 0) { + Sk4f rightPart = partAtColumn(rightColumnCursor); + + fNext->blendPixel(bilerp(overlapPart, rightPart)); + overlapPart = rightPart; + rightColumnCursor += 1; + count -= 1; + } + } else { + // * negative direction - generate destination pixels by sliding the filter from + // right to left. + Sk4f overlapPart = partAtColumn(iXs[1]); + int leftColumnCursor = iXs[0]; + + while (count >= 4) { + Sk4f part0, part1, part2, part3; + get4Parts(leftColumnCursor - 3, &part3, &part2, &part1, &part0); + Sk4f px0 = bilerp(part0, overlapPart); + Sk4f px1 = bilerp(part1, part0); + Sk4f px2 = bilerp(part2, part1); + Sk4f px3 = bilerp(part3, part2); + overlapPart = part3; + fNext->blend4Pixels(px0, px1, px2, px3); + leftColumnCursor -= 4; + count -= 4; + } + + while (count > 0) { + Sk4f leftPart = partAtColumn(leftColumnCursor); + + fNext->blendPixel(bilerp(leftPart, overlapPart)); + overlapPart = leftPart; + leftColumnCursor -= 1; + count -= 1; + } + } + } + + // 1 < |dx| < 2. Going through the source pixels at a faster rate than the dest pixels, but + // still slow enough to take advantage of previous calculations. + void spanMediumRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + + // Calculate the distance between each sample point. + const SkScalar dx = length / (count - 1); + SkASSERT((-2.0f < dx && dx < -1.0f) || (1.0f < dx && dx < 2.0f)); + + // Generate the filter values for the top-left corner. + // Note: these values are in filter space; this has implications about how to adjust + // these values at each step. For example, as the sample point increases, the filter + // value decreases, this is because the filter and position are related by + // (1 - (X(sample) - .5)) % 1. The (1 - stuff) causes the filter to move in the opposite + // direction of the sample point which is increasing by dx. + SkScalar filterX = sample_to_filter(X(start)); + SkScalar filterY = sample_to_filter(Y(start)); + + // Generate the four filter points from the sample point start. Generate the row* values. + Sk4i iXs, iYs; + this->filterPoints(start, &iXs, &iYs); + const void* const row0 = fAccessor.row(iYs[0]); + const void* const row1 = fAccessor.row(iYs[2]); + + // Generate part of the filter value at xColumn. + auto partAtColumn = [&](int xColumn) { + int adjustedColumn = adjust_edge(fXEdgeType, xColumn, fXMax); + Sk4f pxTop, pxBottom; + this->get2PixelColumn(row0, row1, adjustedColumn, &pxTop, &pxBottom); + return pxTop * filterY + (1.0f - filterY) * pxBottom; + }; + + // The leftPart is made up of two pixels from the left column of the filter, right part + // is similar. The top and bottom pixels in the *Part are created as a linear blend of + // the top and bottom pixels using filterY. See the nextPart function below. + Sk4f leftPart = partAtColumn(iXs[0]); + Sk4f rightPart = partAtColumn(iXs[1]); + + // Create a destination color by blending together a left and right part using filterX. + auto bilerp = [&](const Sk4f& leftPart, const Sk4f& rightPart) { + Sk4f pixel = leftPart * filterX + rightPart * (1.0f - filterX); + return check_pixel(pixel); + }; + + // Send the first pixel to the destination. This simplifies the loop structure so that no + // extra pixels are fetched for the last iteration of the loop. + fNext->blendPixel(bilerp(leftPart, rightPart)); + count -= 1; + + if (dx > 0.0f) { + // * positive direction - generate destination pixels by sliding the filter from left + // to right. + int rightPartCursor = iXs[1]; + + // Advance the filter from left to right. Remember that moving the top-left corner of + // the filter to the right actually makes the filter value smaller. + auto advanceFilter = [&]() { + filterX -= dx; + // At this point filterX is less than zero, but might actually be less than -1. + if (filterX > -1.0f) { + filterX += 1.0f; + leftPart = rightPart; + rightPartCursor += 1; + rightPart = partAtColumn(rightPartCursor); + } else { + filterX += 2.0f; + rightPartCursor += 2; + leftPart = partAtColumn(rightPartCursor - 1); + rightPart = partAtColumn(rightPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + + return bilerp(leftPart, rightPart); + }; + + while (count >= 4) { + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } + + while (count > 0) { + fNext->blendPixel(advanceFilter()); + count -= 1; + } + } else { + // * negative direction - generate destination pixels by sliding the filter from + // right to left. + int leftPartCursor = iXs[0]; + + auto advanceFilter = [&]() { + // Remember, dx < 0 therefore this adds |dx| to filterX. + filterX -= dx; + // At this point, filterX is greater than one, but may actually be greater than two. + if (filterX < 2.0f) { + filterX -= 1.0f; + rightPart = leftPart; + leftPartCursor -= 1; + leftPart = partAtColumn(leftPartCursor); + } else { + filterX -= 2.0f; + leftPartCursor -= 2; + rightPart = partAtColumn(leftPartCursor - 1); + leftPart = partAtColumn(leftPartCursor); + } + SkASSERT(0.0f < filterX && filterX <= 1.0f); + return bilerp(leftPart, rightPart); + }; + + while (count >= 4) { + Sk4f px0 = advanceFilter(), + px1 = advanceFilter(), + px2 = advanceFilter(), + px3 = advanceFilter(); + fNext->blend4Pixels(px0, px1, px2, px3); + count -= 4; + } + + while (count > 0) { + fNext->blendPixel(advanceFilter()); + count -= 1; + } + } + } + + // We're moving through source space faster than dst (zoomed out), + // so we'll never reuse a source pixel or be able to do contiguous loads. + void spanFastRate(Span span) { + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = span; + SkScalar x = X(start); + SkScalar y = Y(start); + + SkScalar dx = length / (count - 1); + while (count > 0) { + fNext->blendPixel(this->bilerpSamplePoint(SkPoint{x, y})); + x += dx; + count -= 1; + } + } + + Next* const fNext; + const SkShader::TileMode fXEdgeType; + const int fXMax; + const SkShader::TileMode fYEdgeType; + const int fYMax; + Accessor fAccessor; +}; + +} // namespace + +#endif // SkLinearBitmapPipeline_sampler_DEFINED diff --git a/src/core/SkLinearBitmapPipeline_tile.h b/src/core/SkLinearBitmapPipeline_tile.h new file mode 100644 index 0000000000..e18f7a1a5d --- /dev/null +++ b/src/core/SkLinearBitmapPipeline_tile.h @@ -0,0 +1,412 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkLinearBitmapPipeline_tile_DEFINED +#define SkLinearBitmapPipeline_tile_DEFINED + +#include "SkLinearBitmapPipeline_core.h" +#include "SkPM4f.h" +#include <algorithm> +#include <cmath> +#include <limits> + +namespace { + +void assertTiled(const Sk4s& vs, SkScalar vMax) { + SkASSERT(0 <= vs[0] && vs[0] < vMax); + SkASSERT(0 <= vs[1] && vs[1] < vMax); + SkASSERT(0 <= vs[2] && vs[2] < vMax); + SkASSERT(0 <= vs[3] && vs[3] < vMax); +} + +/* + * Clamp in the X direction. + * Observations: + * * sample pointer border - if the sample point is <= 0.5 or >= Max - 0.5 then the pixel + * value should be a border color. For this case, create the span using clampToSinglePixel. + */ +class XClampStrategy { +public: + XClampStrategy(int32_t max) + : fXMaxPixel{SkScalar(max - SK_ScalarHalf)} + , fXMax{SkScalar(max)} { } + + void tileXPoints(Sk4s* xs) { + *xs = Sk4s::Min(Sk4s::Max(*xs, SK_ScalarHalf), fXMaxPixel); + assertTiled(*xs, fXMax); + } + + template<typename Next> + bool maybeProcessSpan(Span originalSpan, Next* next) { + SkASSERT(!originalSpan.isEmpty()); + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = originalSpan; + SkScalar x = X(start); + SkScalar y = Y(start); + Span span{{x, y}, length, count}; + + if (span.completelyWithin(0.0f, fXMax)) { + next->pointSpan(span); + return true; + } + if (1 == count || 0.0f == length) { + return false; + } + + SkScalar dx = length / (count - 1); + + // A B C + // +-------+-------+-------++-------+-------+-------+ +-------+-------++------ + // | *---*|---*---|*---*--||-*---*-|---*---|*---...| |--*---*|---*---||*---*.... + // | | | || | | | ... | | || + // | | | || | | | | | || + // +-------+-------+-------++-------+-------+-------+ +-------+-------++------ + // ^ ^ + // | xMin xMax-1 | xMax + // + // *---*---*---... - track of samples. * = sample + // + // +-+ || + // | | - pixels in source space. || - tile border. + // +-+ || + // + // The length from A to B is the length in source space or 4 * dx or (count - 1) * dx + // where dx is the distance between samples. There are 5 destination pixels + // corresponding to 5 samples specified in the A, B span. The distance from A to the next + // span starting at C is 5 * dx, so count * dx. + // Remember, count is the number of pixels needed for the destination and the number of + // samples. + // Overall Strategy: + // * Under - for portions of the span < xMin, take the color at pixel {xMin, y} and use it + // to fill in the 5 pixel sampled from A to B. + // * Middle - for the portion of the span between xMin and xMax sample normally. + // * Over - for the portion of the span > xMax, take the color at pixel {xMax-1, y} and + // use it to fill in the rest of the destination pixels. + if (dx >= 0) { + Span leftClamped = span.breakAt(SK_ScalarHalf, dx); + if (!leftClamped.isEmpty()) { + leftClamped.clampToSinglePixel({SK_ScalarHalf, y}); + next->pointSpan(leftClamped); + } + Span center = span.breakAt(fXMax, dx); + if (!center.isEmpty()) { + next->pointSpan(center); + } + if (!span.isEmpty()) { + span.clampToSinglePixel({fXMaxPixel, y}); + next->pointSpan(span); + } + } else { + Span rightClamped = span.breakAt(fXMax, dx); + if (!rightClamped.isEmpty()) { + rightClamped.clampToSinglePixel({fXMaxPixel, y}); + next->pointSpan(rightClamped); + } + Span center = span.breakAt(SK_ScalarHalf, dx); + if (!center.isEmpty()) { + next->pointSpan(center); + } + if (!span.isEmpty()) { + span.clampToSinglePixel({SK_ScalarHalf, y}); + next->pointSpan(span); + } + } + return true; + } + +private: + const SkScalar fXMaxPixel; + const SkScalar fXMax; +}; + +class YClampStrategy { +public: + YClampStrategy(int32_t max) + : fYMaxPixel{SkScalar(max) - SK_ScalarHalf} { } + + void tileYPoints(Sk4s* ys) { + *ys = Sk4s::Min(Sk4s::Max(*ys, SK_ScalarHalf), fYMaxPixel); + assertTiled(*ys, fYMaxPixel + SK_ScalarHalf); + } + + SkScalar tileY(SkScalar y) { + Sk4f ys{y}; + tileYPoints(&ys); + return ys[0]; + } + +private: + const SkScalar fYMaxPixel; +}; + +SkScalar tile_mod(SkScalar x, SkScalar base, SkScalar cap) { + // When x is a negative number *very* close to zero, the difference becomes 0 - (-base) = base + // which is an out of bound value. The min() corrects these problematic values. + return std::min(x - SkScalarFloorToScalar(x / base) * base, cap); +} + +class XRepeatStrategy { +public: + XRepeatStrategy(int32_t max) + : fXMax{SkScalar(max)} + , fXCap{SkScalar(nextafterf(SkScalar(max), 0.0f))} + , fXInvMax{1.0f / SkScalar(max)} { } + + void tileXPoints(Sk4s* xs) { + Sk4s divX = *xs * fXInvMax; + Sk4s modX = *xs - divX.floor() * fXMax; + *xs = Sk4s::Min(fXCap, modX); + assertTiled(*xs, fXMax); + } + + template<typename Next> + bool maybeProcessSpan(Span originalSpan, Next* next) { + SkASSERT(!originalSpan.isEmpty()); + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = originalSpan; + // Make x and y in range on the tile. + SkScalar x = tile_mod(X(start), fXMax, fXCap); + SkScalar y = Y(start); + SkScalar dx = length / (count - 1); + + // No need trying to go fast because the steps are larger than a tile or there is one point. + if (SkScalarAbs(dx) >= fXMax || count <= 1) { + return false; + } + + // A B C D Z + // +-------+-------+-------++-------+-------+-------++ +-------+-------++------ + // | | *---|*---*--||-*---*-|---*---|*---*--|| |--*---*| || + // | | | || | | || ... | | || + // | | | || | | || | | || + // +-------+-------+-------++-------+-------+-------++ +-------+-------++------ + // ^^ ^^ ^^ + // xMax || xMin xMax || xMin xMax || xMin + // + // *---*---*---... - track of samples. * = sample + // + // +-+ || + // | | - pixels in source space. || - tile border. + // +-+ || + // + // + // The given span starts at A and continues on through several tiles to sample point Z. + // The idea is to break this into several spans one on each tile the entire span + // intersects. The A to B span only covers a partial tile and has a count of 3 and the + // distance from A to B is (count - 1) * dx or 2 * dx. The distance from A to the start of + // the next span is count * dx or 3 * dx. Span C to D covers an entire tile has a count + // of 5 and a length of 4 * dx. Remember, count is the number of pixels needed for the + // destination and the number of samples. + // + // Overall Strategy: + // While the span hangs over the edge of the tile, draw the span covering the tile then + // slide the span over to the next tile. + + // The guard could have been count > 0, but then a bunch of math would be done in the + // common case. + + Span span({x, y}, length, count); + if (dx > 0) { + while (!span.isEmpty() && span.endX() >= fXMax) { + Span toDraw = span.breakAt(fXMax, dx); + next->pointSpan(toDraw); + span.offset(-fXMax); + } + } else { + while (!span.isEmpty() && span.endX() < 0.0f) { + Span toDraw = span.breakAt(0.0f, dx); + next->pointSpan(toDraw); + span.offset(fXMax); + } + } + + // All on a single tile. + if (!span.isEmpty()) { + next->pointSpan(span); + } + + return true; + } + +private: + const SkScalar fXMax; + const SkScalar fXCap; + const SkScalar fXInvMax; +}; + +// The XRepeatUnitScaleStrategy exploits the situation where dx = 1.0. The main advantage is that +// the relationship between the sample points and the source pixels does not change from tile to +// repeated tile. This allows the tiler to calculate the span once and re-use it for each +// repeated tile. This is later exploited by some samplers to avoid converting pixels to linear +// space allowing the use of memmove to place pixel in the destination. +class XRepeatUnitScaleStrategy { +public: + XRepeatUnitScaleStrategy(int32_t max) + : fXMax{SkScalar(max)} + , fXCap{SkScalar(nextafterf(SkScalar(max), 0.0f))} + , fXInvMax{1.0f / SkScalar(max)} { } + + void tileXPoints(Sk4s* xs) { + Sk4s divX = *xs * fXInvMax; + Sk4s modX = *xs - divX.floor() * fXMax; + *xs = Sk4s::Min(fXCap, modX); + assertTiled(*xs, fXMax); + } + + template<typename Next> + bool maybeProcessSpan(Span originalSpan, Next* next) { + SkASSERT(!originalSpan.isEmpty()); + SkPoint start; SkScalar length; int count; + std::tie(start, length, count) = originalSpan; + // Make x and y in range on the tile. + SkScalar x = tile_mod(X(start), fXMax, fXCap); + SkScalar y = Y(start); + + // No need trying to go fast because the steps are larger than a tile or there is one point. + if (fXMax == 1 || count <= 1) { + return false; + } + + // x should be on the tile. + SkASSERT(0.0f <= x && x < fXMax); + Span span({x, y}, length, count); + + if (SkScalarFloorToScalar(x) != 0.0f) { + Span toDraw = span.breakAt(fXMax, 1.0f); + SkASSERT(0.0f <= toDraw.startX() && toDraw.endX() < fXMax); + next->pointSpan(toDraw); + span.offset(-fXMax); + } + + // All of the span could have been on the first tile. If so, then no work to do. + if (span.isEmpty()) return true; + + // At this point the span should be aligned to zero. + SkASSERT(SkScalarFloorToScalar(span.startX()) == 0.0f); + + // Note: The span length has an unintuitive relation to the tile width. The tile width is + // a half open interval [tb, te), but the span is a closed interval [sb, se]. In order to + // compare the two, you need to convert the span to a half open interval. This is done by + // adding dx to se. So, the span becomes: [sb, se + dx). Hence the + 1.0f below. + SkScalar div = (span.length() + 1.0f) / fXMax; + int32_t repeatCount = SkScalarFloorToInt(div); + Span repeatableSpan{{0.0f, y}, fXMax - 1.0f, SkScalarFloorToInt(fXMax)}; + + // Repeat the center section. + SkASSERT(0.0f <= repeatableSpan.startX() && repeatableSpan.endX() < fXMax); + if (repeatCount > 0) { + next->repeatSpan(repeatableSpan, repeatCount); + } + + // Calculate the advance past the center portion. + SkScalar advance = SkScalar(repeatCount) * fXMax; + + // There may be some of the span left over. + span.breakAt(advance, 1.0f); + + // All on a single tile. + if (!span.isEmpty()) { + span.offset(-advance); + SkASSERT(0.0f <= span.startX() && span.endX() < fXMax); + next->pointSpan(span); + } + + return true; + } + +private: + const SkScalar fXMax; + const SkScalar fXCap; + const SkScalar fXInvMax; +}; + +class YRepeatStrategy { +public: + YRepeatStrategy(int32_t max) + : fYMax{SkScalar(max)} + , fYCap{SkScalar(nextafterf(SkScalar(max), 0.0f))} + , fYsInvMax{1.0f / SkScalar(max)} { } + + void tileYPoints(Sk4s* ys) { + Sk4s divY = *ys * fYsInvMax; + Sk4s modY = *ys - divY.floor() * fYMax; + *ys = Sk4s::Min(fYCap, modY); + assertTiled(*ys, fYMax); + } + + SkScalar tileY(SkScalar y) { + SkScalar answer = tile_mod(y, fYMax, fYCap); + SkASSERT(0 <= answer && answer < fYMax); + return answer; + } + +private: + const SkScalar fYMax; + const SkScalar fYCap; + const SkScalar fYsInvMax; +}; +// max = 40 +// mq2[x_] := Abs[(x - 40) - Floor[(x - 40)/80] * 80 - 40] +class XMirrorStrategy { +public: + XMirrorStrategy(int32_t max) + : fXMax{SkScalar(max)} + , fXCap{SkScalar(nextafterf(SkScalar(max), 0.0f))} + , fXDoubleInvMax{1.0f / (2.0f * SkScalar(max))} { } + + void tileXPoints(Sk4s* xs) { + Sk4f bias = *xs - fXMax; + Sk4f div = bias * fXDoubleInvMax; + Sk4f mod = bias - div.floor() * 2.0f * fXMax; + Sk4f unbias = mod - fXMax; + *xs = Sk4f::Min(unbias.abs(), fXCap); + assertTiled(*xs, fXMax); + } + + template <typename Next> + bool maybeProcessSpan(Span originalSpan, Next* next) { return false; } + +private: + SkScalar fXMax; + SkScalar fXCap; + SkScalar fXDoubleInvMax; +}; + +class YMirrorStrategy { +public: + YMirrorStrategy(int32_t max) + : fYMax{SkScalar(max)} + , fYCap{nextafterf(SkScalar(max), 0.0f)} + , fYDoubleInvMax{1.0f / (2.0f * SkScalar(max))} { } + + void tileYPoints(Sk4s* ys) { + Sk4f bias = *ys - fYMax; + Sk4f div = bias * fYDoubleInvMax; + Sk4f mod = bias - div.floor() * 2.0f * fYMax; + Sk4f unbias = mod - fYMax; + *ys = Sk4f::Min(unbias.abs(), fYCap); + assertTiled(*ys, fYMax); + } + + SkScalar tileY(SkScalar y) { + SkScalar bias = y - fYMax; + SkScalar div = bias * fYDoubleInvMax; + SkScalar mod = bias - SkScalarFloorToScalar(div) * 2.0f * fYMax; + SkScalar unbias = mod - fYMax; + SkScalar answer = SkMinScalar(SkScalarAbs(unbias), fYCap); + SkASSERT(0 <= answer && answer < fYMax); + return answer; + } + +private: + SkScalar fYMax; + SkScalar fYCap; + SkScalar fYDoubleInvMax; +}; + +} // namespace +#endif // SkLinearBitmapPipeline_tile_DEFINED diff --git a/src/shaders/SkBitmapProcShader.cpp b/src/shaders/SkBitmapProcShader.cpp index 1a87491bf4..91697e2f1b 100644 --- a/src/shaders/SkBitmapProcShader.cpp +++ b/src/shaders/SkBitmapProcShader.cpp @@ -100,6 +100,79 @@ private: }; /////////////////////////////////////////////////////////////////////////////////////////////////// +#include "SkLinearBitmapPipeline.h" +#include "SkPM4f.h" + +class LinearPipelineContext : public BitmapProcInfoContext { +public: + LinearPipelineContext(const SkShaderBase& shader, const SkShaderBase::ContextRec& rec, + SkBitmapProcInfo* info, SkArenaAlloc* alloc) + : INHERITED(shader, rec, info), fAllocator{alloc} + { + // Save things off in case we need to build a blitter pipeline. + fSrcPixmap = info->fPixmap; + fAlpha = SkColorGetA(info->fPaintColor) / 255.0f; + fFilterQuality = info->fFilterQuality; + fMatrixTypeMask = info->fRealInvMatrix.getType(); + + fShaderPipeline = alloc->make<SkLinearBitmapPipeline>( + info->fRealInvMatrix, info->fFilterQuality, + info->fTileModeX, info->fTileModeY, + info->fPaintColor, + info->fPixmap, + fAllocator); + } + + void shadeSpan4f(int x, int y, SkPM4f dstC[], int count) override { + fShaderPipeline->shadeSpan4f(x, y, dstC, count); + } + + void shadeSpan(int x, int y, SkPMColor dstC[], int count) override { + const int N = 128; + SkPM4f tmp[N]; + + while (count > 0) { + const int n = SkTMin(count, N); + fShaderPipeline->shadeSpan4f(x, y, tmp, n); + // now convert to SkPMColor + for (int i = 0; i < n; ++i) { + dstC[i] = Sk4f_toL32(tmp[i].to4f_pmorder()); + } + dstC += n; + x += n; + count -= n; + } + } + +private: + // Store the allocator from the context creation incase we are asked to build a blitter. + SkArenaAlloc* fAllocator; + SkLinearBitmapPipeline* fShaderPipeline; + SkPixmap fSrcPixmap; + float fAlpha; + SkMatrix::TypeMask fMatrixTypeMask; + SkFilterQuality fFilterQuality; + + typedef BitmapProcInfoContext INHERITED; +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +static bool choose_linear_pipeline(const SkShaderBase::ContextRec& rec, const SkImageInfo& srcInfo) { + // If we get here, we can reasonably use either context, respect the caller's preference + // + bool needsPremul = srcInfo.alphaType() == kUnpremul_SkAlphaType; + bool needsSwizzle = srcInfo.bytesPerPixel() == 4 && srcInfo.colorType() != kN32_SkColorType; + return SkShaderBase::ContextRec::kPM4f_DstType == rec.fPreferredDstType + || needsPremul || needsSwizzle; +} + +size_t SkBitmapProcLegacyShader::ContextSize(const ContextRec& rec, const SkImageInfo& srcInfo) { + size_t size0 = sizeof(BitmapProcShaderContext) + sizeof(SkBitmapProcState); + size_t size1 = sizeof(LinearPipelineContext) + sizeof(SkBitmapProcInfo); + size_t s = SkTMax(size0, size1); + return s; +} SkShaderBase::Context* SkBitmapProcLegacyShader::MakeContext( const SkShaderBase& shader, TileMode tmx, TileMode tmy, @@ -111,10 +184,21 @@ SkShaderBase::Context* SkBitmapProcLegacyShader::MakeContext( return nullptr; } - SkBitmapProcState* state = alloc->make<SkBitmapProcState>(provider, tmx, tmy); - if (!state->setup(totalInverse, *rec.fPaint)) { - return nullptr; - } - return alloc->make<BitmapProcShaderContext>(shader, rec, state); + // Decide if we can/want to use the new linear pipeline + bool useLinearPipeline = choose_linear_pipeline(rec, provider.info()); + if (useLinearPipeline) { + SkBitmapProcInfo* info = alloc->make<SkBitmapProcInfo>(provider, tmx, tmy); + if (!info->init(totalInverse, *rec.fPaint)) { + return nullptr; + } + + return alloc->make<LinearPipelineContext>(shader, rec, info, alloc); + } else { + SkBitmapProcState* state = alloc->make<SkBitmapProcState>(provider, tmx, tmy); + if (!state->setup(totalInverse, *rec.fPaint)) { + return nullptr; + } + return alloc->make<BitmapProcShaderContext>(shader, rec, state); + } } diff --git a/src/shaders/SkBitmapProcShader.h b/src/shaders/SkBitmapProcShader.h index 7c5cdcfb8d..2a2599cb1d 100644 --- a/src/shaders/SkBitmapProcShader.h +++ b/src/shaders/SkBitmapProcShader.h @@ -16,6 +16,7 @@ class SkBitmapProcLegacyShader : public SkShaderBase { private: friend class SkImageShader; + static size_t ContextSize(const ContextRec&, const SkImageInfo& srcInfo); static Context* MakeContext(const SkShaderBase&, TileMode tmx, TileMode tmy, const SkBitmapProvider&, const ContextRec&, SkArenaAlloc* alloc); |