/* * Copyright 2017 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "SkMaskBlurFilter.h" #include #include #include "SkArenaAlloc.h" #include "SkNx.h" #include "SkSafeMath.h" static const double kPi = 3.14159265358979323846264338327950288; static constexpr double kSmallSigma = 2.0; class BlurScanInterface { public: virtual ~BlurScanInterface() = default; virtual void blur(const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const = 0; virtual bool canBlur4() { return false; } virtual void blur4Transpose( const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const { SK_ABORT("This should not be called."); } }; class PlanningInterface { public: virtual ~PlanningInterface() = default; virtual size_t bufferSize() const = 0; virtual size_t border() const = 0; virtual bool needsBlur() const = 0; virtual BlurScanInterface* makeBlurScan( SkArenaAlloc* alloc, size_t width, uint32_t* buffer) const = 0; }; class None final : public PlanningInterface { public: None() = default; size_t bufferSize() const override { return 0; } size_t border() const override { return 0; } bool needsBlur() const override { return false; } BlurScanInterface* makeBlurScan( SkArenaAlloc* alloc, size_t width, uint32_t* buffer) const override { SK_ABORT("Should never be called."); return nullptr; } }; class PlanBox final : public PlanningInterface { public: explicit PlanBox(double sigma) { // Calculate the radius from sigma. Taken from the old code until something better is // figured out. auto possibleRadius = 1.5 * sigma - 0.5; auto radius = std::max(std::numeric_limits::epsilon(), possibleRadius); auto outerRadius = std::ceil(radius); auto outerWindow = 2 * outerRadius + 1; auto outerFactor = (1 - (outerRadius - radius)) / outerWindow; fOuterWeight = static_cast(round(outerFactor * (1ull << 24))); auto innerRadius = outerRadius - 1; auto innerWindow = 2 * innerRadius + 1; auto innerFactor = (1 - (radius - innerRadius)) / innerWindow; fInnerWeight = static_cast(round(innerFactor * (1ull << 24))); // Sliding window is defined by the relationship between the outer and inner widows. // In the single window case, you add the element on the right, and subtract the element on // the left. But, because two windows are used, this relationship is more complicated; an // element is added from the right of the outer window, and subtracted from the left of the // inner window. Because innerWindow = outerWindow - 2, the distance between // the left and right in the two window case is outerWindow - 1. fSlidingWindow = static_cast(outerWindow - 1); } size_t bufferSize() const override { return fSlidingWindow * (sizeof(Sk4u) / sizeof(uint32_t)); } // Remember that sliding window = window - 1. Therefore, radius = sliding window / 2. size_t border() const override { return fSlidingWindow / 2; } bool needsBlur() const override { return true; } BlurScanInterface* makeBlurScan( SkArenaAlloc* alloc, size_t width, uint32_t* buffer) const override { size_t noChangeCount; size_t trailingEdgeZeroCount; // The relation between the slidingWindow and the width dictates two operating modes. // * width >= slidingWindow - both sides of the window are contained in the image while // scanning. Therefore, we assume that slidingWindow zeros are consumed on the trailing // edge of the window. After this count, then both edges are traversing the image. // * slidingWindow > width - both sides of the window are off the image while scanning // the middle. The front edge of the window can only travel width until it falls off the // image. At this point, both edges of the window are off the image consuming zeros // and therefore, the destination value does not change. The scan produces unchanged // values until the trailing edge of the window enters the image. This count is // slidingWindow - width. if (width >= fSlidingWindow) { noChangeCount = 0; trailingEdgeZeroCount = fSlidingWindow; } else { noChangeCount = fSlidingWindow - width; trailingEdgeZeroCount = width; } Sk4u* sk4uBuffer = reinterpret_cast(buffer); return alloc->make(fOuterWeight, fInnerWeight, noChangeCount, trailingEdgeZeroCount, sk4uBuffer, sk4uBuffer + fSlidingWindow); } private: class Box final : public BlurScanInterface { public: Box(uint32_t outerWeight, uint32_t innerWeight, size_t noChangeCount, size_t trailingEdgeZeroCount, Sk4u* buffer, Sk4u* bufferEnd) : fOuterWeight{outerWeight} , fInnerWeight{innerWeight} , fNoChangeCount{noChangeCount} , fTrailingEdgeZeroCount{trailingEdgeZeroCount} , fBuffer{buffer} , fBufferEnd{bufferEnd} { } void blur(const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const override { auto rightOuter = src; auto dstCursor = dst; auto interpolateSums = [this](uint32_t outerSum, uint32_t innerSum) { return SkTo( (fOuterWeight * outerSum + fInnerWeight * innerSum + kHalf) >> 24); }; uint32_t outerSum = 0; uint32_t innerSum = 0; for (size_t i = 0; i < fTrailingEdgeZeroCount; i++) { innerSum = outerSum; outerSum += *rightOuter; *dstCursor = interpolateSums(outerSum, innerSum); rightOuter += srcStride; dstCursor += dstStride; } // slidingWindow > width for (size_t i = 0; i < fNoChangeCount; i++) { *dstCursor = interpolateSums(outerSum, innerSum);; dstCursor += dstStride; } // width > slidingWindow auto leftInner = src; while (rightOuter < srcEnd) { innerSum = outerSum - *leftInner; outerSum += *rightOuter; *dstCursor = interpolateSums(outerSum, innerSum); outerSum -= *leftInner; rightOuter += srcStride; leftInner += srcStride; dstCursor += dstStride; } auto leftOuter = srcEnd; dstCursor = dstEnd; outerSum = 0; for (size_t i = 0; i < fTrailingEdgeZeroCount; i++) { leftOuter -= srcStride; dstCursor -= dstStride; innerSum = outerSum; outerSum += *leftOuter; *dstCursor = interpolateSums(outerSum, innerSum); } } bool canBlur4() override { return true; } // NB this is a transposing scan. The next src is src+1, and the next down is // src+srcStride. void blur4Transpose( const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const override { auto rightOuter = src; auto dstCursor = dst; Sk4u* const bufferStart = fBuffer; Sk4u* bufferCursor = bufferStart; Sk4u* const bufferEnd = fBufferEnd; const Sk4u outerWeight(SkTo(fOuterWeight)); const Sk4u innerWeight(SkTo(fInnerWeight)); auto load = [](const uint8_t* cursor, size_t stride) -> Sk4u { return Sk4u(cursor[0*stride], cursor[1*stride], cursor[2*stride], cursor[3*stride]); }; auto interpolateSums = [&] (const Sk4u& outerSum, const Sk4u& innerSum) { return SkNx_cast( (outerSum * outerWeight + innerSum * innerWeight + kHalf) >> 24); }; Sk4u outerSum = 0; Sk4u innerSum = 0; for (size_t i = 0; i < fTrailingEdgeZeroCount; i++) { innerSum = outerSum; Sk4u leadingEdge = load(rightOuter, srcStride); outerSum += leadingEdge; Sk4b blurred = interpolateSums(outerSum, innerSum); blurred.store(dstCursor); leadingEdge.store(bufferCursor); bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart; rightOuter += 1; dstCursor += dstStride; } // slidingWindow > width for (size_t i = 0; i < fNoChangeCount; i++) { Sk4b blurred = interpolateSums(outerSum, innerSum); blurred.store(dstCursor); dstCursor += dstStride; } // width > slidingWindow auto leftInner = src; while (rightOuter < srcEnd) { Sk4u trailEdge = Sk4u::Load(bufferCursor); Sk4u leadingEdge = load(rightOuter, srcStride); innerSum = outerSum - trailEdge; outerSum += leadingEdge; Sk4b blurred = interpolateSums(outerSum, innerSum); blurred.store(dstCursor); outerSum -= trailEdge; leadingEdge.store(bufferCursor); bufferCursor = (bufferCursor + 1) < bufferEnd ? bufferCursor + 1 : bufferStart; rightOuter += 1; leftInner += 1; dstCursor += dstStride; } auto leftOuter = srcEnd; dstCursor = dstEnd; outerSum = 0; for (size_t i = 0; i < fTrailingEdgeZeroCount; i++) { leftOuter -= 1; dstCursor -= dstStride; innerSum = outerSum; outerSum += load(leftOuter, srcStride); Sk4b blurred = interpolateSums(outerSum, innerSum); blurred.store(dstCursor); } } private: static constexpr uint32_t kHalf = static_cast(1) << 23; const uint32_t fOuterWeight; const uint32_t fInnerWeight; const size_t fNoChangeCount; const size_t fTrailingEdgeZeroCount; Sk4u* const fBuffer; Sk4u* const fBufferEnd; }; private: uint32_t fOuterWeight; uint32_t fInnerWeight; size_t fSlidingWindow; }; class PlanGauss final : public PlanningInterface { public: explicit PlanGauss(double sigma) { auto possibleWindow = static_cast(floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5)); auto window = std::max(static_cast(1), possibleWindow); fPass0Size = window - 1; fPass1Size = window - 1; fPass2Size = (window & 1) == 1 ? window - 1 : window; // Calculating the border is tricky. I will go through the odd case which is simpler, and // then through the even case. Given a stack of filters seven wide for the odd case of // three passes. // // S // aaaAaaa // bbbBbbb // cccCccc // D // // The furthest changed pixel is when the filters are in the following configuration. // // S // aaaAaaa // bbbBbbb // cccCccc // D // // The A pixel is calculated using the value S, the B uses A, and the C uses B, and // finally D is C. So, with a window size of seven the border is nine. In general, the // border is 3*((window - 1)/2). // // For even cases the filter stack is more complicated. The spec specifies two passes // of even filters and a final pass of odd filters. A stack for a width of six looks like // this. // // S // aaaAaa // bbBbbb // cccCccc // D // // The furthest pixel looks like this. // // S // aaaAaa // bbBbbb // cccCccc // D // // For a window of size, the border value is seven. In general the border is 3 * // (window/2) -1. fBorder = (window & 1) == 1 ? 3 * ((window - 1) / 2) : 3 * (window / 2) - 1; fSlidingWindow = 2 * fBorder + 1; // If the window is odd then the divisor is just window ^ 3 otherwise, // it is window * window * (window + 1) = window ^ 2 + window ^ 3; auto window2 = window * window; auto window3 = window2 * window; auto divisor = (window & 1) == 1 ? window3 : window3 + window2; fWeight = static_cast(round(1.0 / divisor * (1ull << 32))); } size_t bufferSize() const override { return fPass0Size + fPass1Size + fPass2Size; } size_t border() const override { return fBorder; } bool needsBlur() const override { return true; } BlurScanInterface* makeBlurScan( SkArenaAlloc* alloc, size_t width, uint32_t* buffer) const override { uint32_t* buffer0, *buffer0End, *buffer1, *buffer1End, *buffer2, *buffer2End; buffer0 = buffer; buffer0End = buffer1 = buffer0 + fPass0Size; buffer1End = buffer2 = buffer1 + fPass1Size; buffer2End = buffer2 + fPass2Size; size_t noChangeCount = fSlidingWindow > width ? fSlidingWindow - width : 0; return alloc->make( fWeight, noChangeCount, buffer0, buffer0End, buffer1, buffer1End, buffer2, buffer2End); } public: class Gauss final : public BlurScanInterface { public: Gauss(uint64_t weight, size_t noChangeCount, uint32_t* buffer0, uint32_t* buffer0End, uint32_t* buffer1, uint32_t* buffer1End, uint32_t* buffer2, uint32_t* buffer2End) : fWeight{weight} , fNoChangeCount{noChangeCount} , fBuffer0{buffer0} , fBuffer0End{buffer0End} , fBuffer1{buffer1} , fBuffer1End{buffer1End} , fBuffer2{buffer2} , fBuffer2End{buffer2End} { } void blur(const uint8_t* src, size_t srcStride, const uint8_t* srcEnd, uint8_t* dst, size_t dstStride, uint8_t* dstEnd) const override { auto buffer0Cursor = fBuffer0; auto buffer1Cursor = fBuffer1; auto buffer2Cursor = fBuffer2; std::memset(fBuffer0, 0x00, (fBuffer2End - fBuffer0) * sizeof(*fBuffer0)); uint32_t sum0 = 0; uint32_t sum1 = 0; uint32_t sum2 = 0; // Consume the source generating pixels. for (auto srcCursor = src; srcCursor < srcEnd; dst += dstStride, srcCursor += srcStride) { uint32_t leadingEdge = *srcCursor; sum0 += leadingEdge; sum1 += sum0; sum2 += sum1; *dst = this->finalScale(sum2); sum2 -= *buffer2Cursor; *buffer2Cursor = sum1; buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2; sum1 -= *buffer1Cursor; *buffer1Cursor = sum0; buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1; sum0 -= *buffer0Cursor; *buffer0Cursor = leadingEdge; buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0; } // The leading edge is off the right side of the mask. for (size_t i = 0; i < fNoChangeCount; i++) { uint32_t leadingEdge = 0; sum0 += leadingEdge; sum1 += sum0; sum2 += sum1; *dst = this->finalScale(sum2); sum2 -= *buffer2Cursor; *buffer2Cursor = sum1; buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2; sum1 -= *buffer1Cursor; *buffer1Cursor = sum0; buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1; sum0 -= *buffer0Cursor; *buffer0Cursor = leadingEdge; buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0; dst += dstStride; } // Starting from the right, fill in the rest of the buffer. std::memset(fBuffer0, 0, (fBuffer2End - fBuffer0) * sizeof(*fBuffer0)); sum0 = sum1 = sum2 = 0; uint8_t* dstCursor = dstEnd; const uint8_t* srcCursor = srcEnd; while (dstCursor > dst) { dstCursor -= dstStride; srcCursor -= srcStride; uint32_t leadingEdge = *srcCursor; sum0 += leadingEdge; sum1 += sum0; sum2 += sum1; *dstCursor = this->finalScale(sum2); sum2 -= *buffer2Cursor; *buffer2Cursor = sum1; buffer2Cursor = (buffer2Cursor + 1) < fBuffer2End ? buffer2Cursor + 1 : fBuffer2; sum1 -= *buffer1Cursor; *buffer1Cursor = sum0; buffer1Cursor = (buffer1Cursor + 1) < fBuffer1End ? buffer1Cursor + 1 : fBuffer1; sum0 -= *buffer0Cursor; *buffer0Cursor = leadingEdge; buffer0Cursor = (buffer0Cursor + 1) < fBuffer0End ? buffer0Cursor + 1 : fBuffer0; } } private: static constexpr uint64_t kHalf = static_cast(1) << 31; uint8_t finalScale(uint32_t sum) const { return SkTo((fWeight * sum + kHalf) >> 32); } uint64_t fWeight; size_t fNoChangeCount; uint32_t* fBuffer0; uint32_t* fBuffer0End; uint32_t* fBuffer1; uint32_t* fBuffer1End; uint32_t* fBuffer2; uint32_t* fBuffer2End; }; uint64_t fWeight; size_t fBorder; size_t fSlidingWindow; size_t fPass0Size; size_t fPass1Size; size_t fPass2Size; }; static PlanningInterface* make_plan(SkArenaAlloc* alloc, double sigma) { PlanningInterface* plan = nullptr; if (3 * sigma <= 1) { plan = alloc->make(); } else if (sigma < kSmallSigma) { plan = alloc->make(sigma); } else { plan = alloc->make(sigma); } return plan; }; // NB 136 is the largest sigma that will not cause a buffer full of 255 mask values to overflow // using the Gauss filter. It also limits the size of buffers used hold intermediate values. // Explanation of maximums: // sum0 = window * 255 // sum1 = window * sum0 -> window * window * 255 // sum2 = window * sum1 -> window * window * window * 255 -> window^3 * 255 // // The value window^3 * 255 must fit in a uint32_t. So, // window^3 < 2^32. window = 255. // // window = floor(sigma * 3 * sqrt(2 * kPi) / 4 + 0.5) // For window <= 255, the largest value for sigma is 136. SkMaskBlurFilter::SkMaskBlurFilter(double sigmaW, double sigmaH) : fSigmaW{SkTPin(sigmaW, 0.0, 136.0)} , fSigmaH{SkTPin(sigmaH, 0.0, 136.0)} { SkASSERT(sigmaW >= 0); SkASSERT(sigmaH >= 0); } bool SkMaskBlurFilter::hasNoBlur() const { return (3 * fSigmaW <= 1) && (3 * fSigmaH <= 1); } static SkMask prepare_destination(int radiusX, int radiusY, const SkMask& src) { SkSafeMath safe; SkMask dst; // dstW = srcW + 2 * radiusX; int dstW = safe.add(src.fBounds.width(), safe.add(radiusX, radiusX)); // dstH = srcH + 2 * radiusY; int dstH = safe.add(src.fBounds.height(), safe.add(radiusY, radiusY)); dst.fBounds.set(0, 0, dstW, dstH); dst.fBounds.offset(src.fBounds.x(), src.fBounds.y()); dst.fBounds.offset(-SkTo(radiusX), -SkTo(radiusY)); dst.fImage = nullptr; dst.fRowBytes = SkTo(dstW); dst.fFormat = SkMask::kA8_Format; size_t toAlloc = safe.mul(dstW, dstH); if (safe && src.fImage != nullptr) { dst.fImage = SkMask::AllocImage(toAlloc); } return dst; } SkIPoint SkMaskBlurFilter::blur(const SkMask& src, SkMask* dst) const { // 1024 is a place holder guess until more analysis can be done. SkSTArenaAlloc<1024> alloc; PlanningInterface* planW = make_plan(&alloc, fSigmaW); PlanningInterface* planH = make_plan(&alloc, fSigmaH); size_t borderW = planW->border(), borderH = planH->border(); *dst = prepare_destination(borderW, borderH, src); if (src.fImage == nullptr) { return {SkTo(borderW), SkTo(borderH)}; } if (dst->fImage == nullptr) { dst->fBounds.setEmpty(); return {0, 0}; } auto srcW = SkTo(src.fBounds.width()); auto srcH = SkTo(src.fBounds.height()); size_t dstW = dst->fBounds.width(), dstH = dst->fBounds.height(); auto bufferSize = std::max(planW->bufferSize(), planH->bufferSize()); auto buffer = alloc.makeArrayDefault(bufferSize); if (planW->needsBlur() && planH->needsBlur()) { // Blur both directions. size_t tmpW = srcH; size_t tmpH = dstW; auto tmp = alloc.makeArrayDefault(tmpW * tmpH); // Blur horizontally, and transpose. auto scanW = planW->makeBlurScan(&alloc, srcW, buffer); size_t y = 0; if (scanW->canBlur4() && srcH > 4) { for (;y + 4 <= srcH; y += 4) { auto srcStart = &src.fImage[y * src.fRowBytes]; auto tmpStart = &tmp[y]; scanW->blur4Transpose(srcStart, src.fRowBytes, srcStart + srcW, tmpStart, tmpW, tmpStart + tmpW * tmpH); } } for (;y < srcH; y++) { auto srcStart = &src.fImage[y * src.fRowBytes]; auto tmpStart = &tmp[y]; scanW->blur(srcStart, 1, srcStart + srcW, tmpStart, tmpW, tmpStart + tmpW * tmpH); } // Blur vertically (scan in memory order because of the transposition), // and transpose back to the original orientation. auto scanH = planH->makeBlurScan(&alloc, tmpW, buffer); y = 0; if (scanH->canBlur4() && tmpH > 4) { for (;y + 4 <= tmpH; y += 4) { auto tmpStart = &tmp[y * tmpW]; auto dstStart = &dst->fImage[y]; scanH->blur4Transpose( tmpStart, tmpW, tmpStart + tmpW, dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH); } } for (;y < tmpH; y++) { auto tmpStart = &tmp[y * tmpW]; auto dstStart = &dst->fImage[y]; scanH->blur(tmpStart, 1, tmpStart + tmpW, dstStart, dst->fRowBytes, dstStart + dst->fRowBytes * dstH); } } else if (planW->needsBlur()) { // Blur only horizontally. auto scanW = planW->makeBlurScan(&alloc, srcW, buffer); for (size_t y = 0; y < srcH; y++) { auto srcStart = &src.fImage[y * src.fRowBytes]; auto dstStart = &dst->fImage[y * dst->fRowBytes]; scanW->blur(srcStart, 1, srcStart + srcW, dstStart, 1, dstStart + dstW); } } else if (planH->needsBlur()) { // Blur only vertically. auto srcEnd = &src.fImage[src.fRowBytes * srcH]; auto dstEnd = &dst->fImage[dst->fRowBytes * dstH]; auto scanH = planH->makeBlurScan(&alloc, srcH, buffer); for (size_t x = 0; x < srcW; x++) { auto srcStart = &src.fImage[x]; auto dstStart = &dst->fImage[x]; scanH->blur(srcStart, src.fRowBytes, srcEnd, dstStart, dst->fRowBytes, dstEnd); } } else { // Copy to dst. No Blur. SkASSERT(false); // should not get here for (size_t y = 0; y < srcH; y++) { std::memcpy(&dst->fImage[y * dst->fRowBytes], &src.fImage[y * src.fRowBytes], dstW); } } return {SkTo(borderW), SkTo(borderH)}; }